2 # Module for Bio::PhyloNetwork
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Gabriel Cardona <gabriel(dot)cardona(at)uib(dot)es>
8 # Copyright Gabriel Cardona, Gabriel Valiente
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::PhyloNetwork - Module to compute with Phylogenetic Networks
20 use Bio::PhyloNetwork;
22 # Create a PhyloNetwork object from a eNewick string
23 my $net1=Bio::PhyloNetwork->new(
24 -eNewick=>'t0:((H1,(H2,l2)),H2); H1:((H3,l1)); H2:((H3,(l3,H1))); H3:(l4);'
27 # Print all available data
30 # Rebuild $net1 from its mu_data
31 my %mudata=$net1->mudata();
32 my $net2=Bio::PhyloNetwork->new(-mudata=>\%mudata,-numleaves=>4);
34 print "d=".$net1->mu_distance($net2)."\n";
36 # Get another one and compute distance
37 my $net3=Bio::PhyloNetwork->new(
38 -eNewick=>'(l2,((l1,(H1,l4)),H1))r; (l3)H1;'
40 print "d=".$net1->mu_distance($net3)."\n";
42 # ...and find an optimal alignment w.r.t. the Manhattan distance (default)
43 my ($weight,%alignment)=$net1->optimal_alignment($net3);
44 print "weight:$weight\n";
45 foreach my $node1 (keys %alignment) {
46 print "$node1 => ".$alignment{$node1}."\n";
48 # ...or the Hamming distance
50 my ($weightH,%alignmentH)=$net1->optimal_alignment($net3,-metric=>'Hamming');
51 print "weight:$weightH\n";
52 foreach my $node1 (keys %alignmentH) {
53 print "$node1 => ".$alignmentH{$node1}."\n";
56 # Test for time consistency of $net1
57 if ($net1->is_time_consistent) {
58 print "net1 is time consistent\n"
61 print "net1 is not time consistent\n"
64 # create a network from the list of edges
65 my $net4=Bio::PhyloNetwork->new(-edges=>
66 [qw(r s r t s u s c t c t v u b u l3 u b v b v l4 b l2 c l1)]);
68 # Test for time consistency of $net3
69 if ($net4->is_time_consistent) {
70 print "net4 is time consistent\n"
73 print "net4 is not time consistent\n"
76 # And print all information on net4
79 # Compute some tripartitions
80 my %triparts=$net1->tripartitions();
82 # Now these are stored
85 # And can compute the tripartition error
86 print "dtr=".$net1->tripartition_error($net3)."\n";
90 =head2 Phylogenetic Networks
92 This is a module to work with phylogenetic networks. Phylogenetic networks
93 have been studied over the last years as a richer model of the evolutionary
94 history of sets of organisms than phylogenetic trees, because they take not
95 only mutation events but also recombination and horizontal gene transfer
98 The natural model for describing the evolutionary
99 history of a set of sequences under recombination events is a DAG, hence
100 this package relies on the package Graph::Directed to represent the
101 underlying graph of a phylogenetic network. We refer the reader to [CRV1,CRV2]
102 for formal definitions related to phylogenetic networks.
104 =head2 eNewick description
106 With this package, phylogenetic networks can be given by its eNewick
107 string. This description appeared in other packages related to
108 phylogenetic networks (see [PhyloNet] and [NetGen]); in fact, these two
109 packages use different descriptions. The Bio::PhyloNetwork
110 package allows both of them, but uses the second one in its output.
112 The first approach [PhyloNet] goes as follows: For each hybrid node H, say with
113 parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k+1 different
114 nodes; let each of the first k copies be a child of one of the u_1,...,u_k
115 (one for each) and have no children (hence we will have k extra leaves);
116 as for the last copy, let it have no parents and have v_1,...,v_l be its
117 children. This way we get a forest; each of the trees will be rooted at either
118 one root of the phylogenetic network or a hybrid node of it; the set of leaves
119 (of the whole forest) will be the set of leaves of the original network
120 together with the set of hybrid nodes (each of them repeated as many times
121 as its in-degree). Then, the eNewick representation of the phylogenetic network
122 will be the Newick representation of all the trees in the obtained forest,
123 each of them with its root labeled.
125 The second approach [NetGen] goes as follows: For each hybrid node H, say with
126 parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k different
127 nodes; let the first copy be a child of u_1 and have all v_1,v_2,...v_l as
128 its children; let the other copies be child of u_2,...,u_k (one for each)
129 and have no children. This way, we get a tree whose set of leaves is the
130 set of leaves of the original network together with the set of hybrid nodes
131 (possibly repeated). Then the Newick string of the obtained tree (note that
132 some internal nodes will be labeled and some leaves will be repeated) is
133 the eNewick string of the phylogenetic network.
135 For example, consider the network depicted below:
147 If the first approach is taken, we get the forest:
160 Hence, the eNewick string is '((1,H),(H,3))r; (2)H;'.
162 As for the second one, one gets the tree:
174 Hence, the eNewick string is '((1,H),((2)H,3))r;'.
176 Note: when rooting a tree, this package allows the notations
177 '(subtree,subtree,...)root' as well as 'root:(subtree,subtree,...)', but
178 the first one is used when writing eNewick strings.
180 =head2 Tree-child phylogenetic networks
182 Tree-child (TC) phylogenetic networks are a special class of phylogenetic
183 networks for which a distance, called mu-distance, is defined [CRV2]
184 based on certain data (mu-data) associated to every node.
185 Moreover, this distance extends the
186 Robinson-Foulds on phylogenetic trees. This package allows testing for a
187 phylogenetic network if it is TC and computes mu-distances between networks
188 over the same set of leaves.
190 Moreover, the mu-data allows one to define the optimal
191 (in some precise sense) alignment between networks
192 over the same set of leaves. This package also computes this optimal alignment.
196 Although tripartitions (see [CRV1] and the references therein) do not allow
197 to define distances, this package outputs tripartitions and computes a weak
198 form of the tripartition error.
200 =head2 Time-consistency
202 Another useful property of Phylogenetic Networks that appears in the literature
203 is that of time-consistency or real-time hybrids [BSS]. Roughly speaking, a
204 network admits a temporal representation if it can be drawn in such a way
205 that tree arcs (those whose end is a tree node) are inclined downwards, while
206 hybridization arcs (those whose end is a hybrid node) are horizontal.
207 This package checks for time-consistency and, if so, a temporal representation
212 Gabriel Cardona, gabriel(dot)cardona(at)uib(dot)es
213 Gabriel Valiente, valiente(at)lsi(dot)upc(dot)edu
221 G. Cardona, F. Rossello, G. Valiente. Tripartitions do not always
222 discriminate phylogenetic networks. arXiv:0707.2376v1 [q-bio.PE]
226 G. Cardona, F. Rossello, G. Valiente. A Distance Measure for
227 Tree-Child Phylogenetic Networks. Preprint.
231 M.M. Morin, and B.M.E. Moret. NetGen: generating phylogenetic networks
232 with diploid hybrids. Bioinformatics 22 (2006), 1921-1923
236 PhyloNet: "Phylogenetic Networks Toolkit".
237 http://bioinfo.cs.rice.edu/phylonet
241 M. Baroni, C. Semple, and M. Steel. Hybrids in Real
242 Time. Syst. Biol. 55(1):46-56, 2006
248 The rest of the documentation details each of the object methods.
252 package Bio
::PhyloNetwork
;
257 use base
qw(Bio::Root::Root);
259 use Bio
::PhyloNetwork
::muVector
;
265 use Algorithm
::Munkres
;
272 Usage : my $obj = new Bio::PhyloNetwork();
273 Function: Creates a new Bio::PhyloNetwork object
274 Returns : Bio::PhyloNetwork
279 -graph => Graph::Directed object
281 -edges => reference to an array
283 -tree => Bio::Tree::Tree object
285 -mudata => reference to a hash,
286 -leaves => reference to an array
288 -mudata => reference to a hash,
289 -numleaves => integer
291 Returns a Bio::PhyloNetwork object, created according to the data given:
297 creates an empty network.
299 =item new(-eNewick =E<gt> $str)
301 creates the network whose
302 Extended Newick representation (see description above) is the string $str.
304 =item new(-graph =E<gt> $graph)
306 creates the network with underlying
307 graph given by the Graph::Directed object $graph
309 =item new(-tree =E<gt> $tree)
311 creates a network as a copy of the
312 Bio::Tree::Tree object in $tree
314 =item new(-mudata =E<gt> \%mudata, -leaves =E<gt> \@leaves)
316 creates the network by reconstructing it from its mu-data stored in
317 \%mudata and with set of leaves in \@leaves.
319 =item new(-mudata =E<gt> \%mudata, -numleaves =E<gt> $numleaves)
321 creates the network by reconstructing it from its mu-data stored in
322 \%mudata and with set of leaves in ("l1".."l$numleaves").
330 my $self=$pkg->SUPER::new
(@args);
331 my ($eNewick,$edgesR,$leavesR,$numleaves,$graph,$tree,$mudataR)=
332 $self->_rearrange([qw(ENEWICK
341 $self->build_from_eNewick($eNewick) if defined $eNewick;
342 $self->build_from_edges(@
$edgesR) if defined $edgesR;
343 $self->build_from_graph($graph) if defined $graph;
344 $self->build_from_tree($tree) if defined $tree;
345 if ((! defined $leavesR) && (defined $numleaves)) {
346 my @leaves=map {"l$_"} (1..$numleaves);
349 $self->build_from_mudata($mudataR,$leavesR)
350 if ((defined $mudataR) && (defined $leavesR));
356 sub build_from_edges
{
357 my ($self,@edges)=@_;
358 my $graph=Graph
::Directed
->new();
359 $graph->add_edges(@edges);
360 $self->{graph
}=$graph;
363 foreach my $node ($self->nodes()) {
364 $labels->{$node}=$node;
366 $self->{labels
}=$labels;
369 sub build_from_graph
{
370 my ($self,$graph)=@_;
371 my $graphcp=$graph->copy();
372 $self->{graph
}=$graphcp;
375 foreach my $node ($self->nodes()) {
376 $labels->{$node}=$node;
378 $self->{labels
}=$labels;
383 sub build_from_eNewick
{
384 my ($self,$string)=@_;
386 my $graph=Graph
::Directed
->new();
388 my @blocks=split(/; */,$string);
389 foreach my $block (@blocks) {
390 my ($rt,$str)=get_root_and_subtree
($block);
391 my ($rtlbl,$rttype,$rtid,$rtlng)=get_label_type_id_length
($rt);
392 process_block
($graph,$labels,$block,$rtid);
393 $labels->{$rtid}=$rtlbl.'';
395 $self->{graph
}=$graph;
396 $self->{labels
}=$labels;
401 my ($graph,$labels,$block,$rtid)=@_;
402 my ($rt,$str)=get_root_and_subtree
($block);
403 my @substrs=my_split
($str);
404 foreach my $substr (@substrs) {
405 my ($subrt,$subblock)=get_root_and_subtree
($substr);
406 my ($subrtlbl,$subrttype,$subrtid,$subrtlng)=
407 get_label_type_id_length
($subrt);
408 if (! $subrtlng eq '') {
409 $graph->add_weighted_edges($rtid,$subrtid,$subrtlng);
412 $graph->add_edges($rtid,$subrtid);
414 if (! $subrttype eq '') {
415 $graph->set_edge_attribute($rtid,$subrtid,'type',$subrttype);
418 # if (! $subrtlbl eq '') {
419 if ((! defined $labels->{$subrtid})||($labels->{$subrtid} eq '')){
420 $labels->{$subrtid}=$subrtlbl;
421 } elsif (( $labels->{$subrtid} ne $subrtlbl )&&($subrtlbl ne '')) {
423 die("Different labels for the same node (".$labels->{$subrtid}." and $subrtlbl)");
426 if ($subblock ne "") {
427 process_block
($graph,$labels,$subblock,$subrtid);
432 sub get_root_and_subtree
{
434 my ($rt,$str)=("","");
435 # ($rt,$str)=split(/:|=/,$block);
436 ($rt,$str)=split(/=/,$block);
438 # try to look for root label at the end
439 my $pos=length($rt)-1;
440 while ((substr($rt,$pos,1) ne ")") && ($pos >=0)) {
443 $rt=substr($block,$pos+1,length($block)-$pos);
444 $str=substr($block,0,$pos+1);
451 sub get_label_type_id_length
{
455 if (index($string,'#')==-1) {
457 my ($label,$length)=split(':',$string);
460 if ((! defined $label) || ($label eq '')) {
467 return ($label,'',$id,$length);
471 my ($label,$string2)=split('#',$string);
472 my ($typeid,$length)=split(':',$string2);
477 return ($label,$type,'#'.$id,$length);
494 for my $i ( 1 .. length( $string ) ) {
495 my $char=substr($string,$i,1);
501 push @substrings, $temp;
506 if (($char eq ",") && ($level==1)) {
507 push @substrings, $temp;
516 sub build_from_mudata
{
517 my ($self,$mus,$leavesR)=@_;
518 my $graph=Graph
::Directed
->new();
519 my @nodes=keys %{$mus};
520 my @leaves=@
{$leavesR};
527 foreach my $node (@nodes) {
528 push(@internal, $node) unless exists $seen{$node};
531 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
532 @nodes=(@internal,@leaves);
534 for (my $i=0;$i<$numnodes;$i++) {
535 my $mu=$mus->{$nodes[$i]};
537 while ($mu->is_positive() && $j<$numnodes) {
538 if ($mu->geq_poset($mus->{$nodes[$j]})) {
539 $graph->add_edges(($nodes[$i],$nodes[$j]));
540 $mu = $mu - $mus->{$nodes[$j]};
545 $self->build_from_graph($graph);
552 # my $root=$tree->get_root_node();
553 # foreach my $node ($tree->get_nodes()) {
554 # if ($node == $root) {
555 # $node->{'_id'}="r";
557 # elsif (! $node->is_Leaf) {
558 # $node->{'_id'}="t$i";
562 # if ($node->{'_id'} eq "") {
563 # $node->{'_id'}="l$j";
571 # sub build_subtree {
572 # my ($graph,$root)=@_;
573 # foreach my $child ($root->each_Descendent) {
574 # $graph->add_edge($root->id,$child->id);
575 # $graph=build_subtree($graph,$child);
580 sub build_from_tree
{
582 # relabel_tree($tree);
583 # my $treeroot=$tree->get_root_node;
584 # my $graph=Graph::Directed->new();
585 # $graph=build_subtree($graph,$treeroot);
586 # $self->build_from_graph($graph);
588 my $io=IO
::String
->new($str);
589 my $treeio=Bio
::TreeIO
->new(-format
=> 'newick', -fh
=> $io);
590 $treeio->write_tree($tree);
591 # print "intern: $str\n";
592 $self->build_from_eNewick($str);
597 $self->throw("Graph is not DAG:".$self->{graph
})
598 unless $self->{graph
}->is_dag();
599 my @leaves=$self->{graph
}->successorless_vertices();
600 @leaves=sort @leaves;
601 my $numleaves=@leaves;
602 my @roots=$self->{graph
}->predecessorless_vertices();
604 #$self->throw("Graph is not rooted") unless ($numroots == 1);
605 my @nodes=$self->{graph
}->vertices();
608 foreach my $node (@nodes) {
609 if (! defined $self->{labels
}->{$node}) {
610 $self->{labels
}->{$node}='';
613 $self->{leaves
}=\
@leaves;
614 $self->{numleaves
}=$numleaves;
615 $self->{roots
}=\
@roots;
616 $self->{numroots
}=$numroots;
617 $self->{nodes
}=\
@nodes;
618 $self->{numnodes
}=$numnodes;
621 $self->compute_height();
629 my ($self,$u1,$v1,$u2,$v2)=@_;
630 if ( $self->is_hybrid_node($v1) ||
631 $self->is_hybrid_node($v2) ||
632 $self->graph->is_reachable($v2,$u1) ||
633 (($u1 eq $u2)&&($v1 eq $v2)) ||
634 (! scalar grep {($_ ne $v2) && ($self->is_tree_node($_))}
635 $self->graph->successors($u2)))
643 my ($self,$u1,$v1,$u2,$v2,$lbl)=@_;
644 my $graph=$self->{graph
};
645 $graph->delete_edge($u1,$v1);
646 $graph->delete_edge($u2,$v2);
647 $graph->add_edge($u1,"T$lbl");
648 $graph->add_edge("T$lbl",$v1);
649 $graph->add_edge($u2,"#H$lbl");
650 $graph->add_edge("#H$lbl",$v2);
651 $graph->add_edge("T$lbl","#H$lbl");
652 $self->build_from_graph($graph);
656 # Computation of mu-data
660 my $graph=$self->{graph
};
661 my $mudata=$self->{mudata
};
662 my @leaves=@
{$self->{leaves
}};
663 my $numleaves=$self->{numleaves
};
664 for (my $i=0;$i<$numleaves;$i++) {
665 my $vec=Bio
::PhyloNetwork
::muVector
->new($numleaves);
667 $mudata->{$leaves[$i]}=$vec;
670 while (my @nodes=grep {$self->{h
}->{$_} == $h} @
{$self->{nodes
}} )
672 foreach my $u (@nodes) {
673 my $vec=Bio
::PhyloNetwork
::muVector
->new($numleaves);
674 foreach my $son ($graph->successors($u)) {
675 $vec+=$mudata->{$son};
685 my $graph=$self->{graph
};
686 my @leaves=@
{$self->{leaves
}};
687 foreach my $leaf (@leaves) {
688 $self->{h
}->{$leaf}=0;
691 while (my @nodes=grep {(defined $self->{h
}->{$_})&&($self->{h
}->{$_} == $h)}
694 foreach my $node (@nodes) {
695 foreach my $parent ($graph->predecessors($node)) {
696 $self->{h
}->{$parent}=$h+1;
708 Usage : my $b=$net->is_leaf($u)
709 Function: tests if $u is a leaf in $net
717 if ($self->{graph
}->out_degree($node) == 0) {return 1;}
724 Usage : my $b=$net->is_root($u)
725 Function: tests if $u is the root of $net
733 if ($self->{graph
}->in_degree($node) == 0) {return 1;}
740 Usage : my $b=$net->is_tree_node($u)
741 Function: tests if $u is a tree node in $net
749 if ($self->{graph
}->in_degree($node) <= 1) {return 1;}
753 =head2 is_hybrid_node
755 Title : is_hybrid_node
756 Usage : my $b=$net->is_hybrid_node($u)
757 Function: tests if $u is a hybrid node in $net
765 if ($self->{graph
}->in_degree($node) > 1) {return 1;}
770 # has_tree_child(g,u) returns 1 if u has a tree child in graph g
774 my @Sons=$g->successors($node);
775 foreach my $son (@Sons) {
776 if ($g->in_degree($son)==1) {
785 Title : is_tree_child
786 Usage : my $b=$net->is_tree_child()
787 Function: tests if $net is a Tree-Child phylogenetic network
789 Args : Bio::PhyloNetwork
795 if (defined $self->{is_tree_child
}) {
796 return $self->{is_tree_child
};
798 $self->{is_tree_child
}=0;
799 my $graph=$self->{graph
};
800 foreach my $node (@
{$self->{nodes
}}) {
801 return 0 unless ($graph->out_degree($node)==0 ||
802 has_tree_child
($graph,$node));
804 $self->{is_tree_child
}=1;
813 Usage : my @nodes=$net->nodes()
814 Function: returns the set of nodes of $net
822 return @
{$self->{nodes
}};
828 Usage : my @leaves=$net->leaves()
829 Function: returns the set of leaves of $net
837 return @
{$self->{leaves
}};
843 Usage : my @roots=$net->roots()
844 Function: returns the set of roots of $net
852 return @
{$self->{roots
}};
855 =head2 internal_nodes
857 Title : internal_nodes
858 Usage : my @internal_nodes=$net->internal_nodes()
859 Function: returns the set of internal nodes of $net
867 return grep {! $self->is_leaf($_)} $self->nodes();
873 Usage : my @tree_nodes=$net->tree_nodes()
874 Function: returns the set of tree nodes of $net
882 return grep {$self->is_tree_node($_)} $self->nodes();
888 Usage : my @hybrid_nodes=$net->hybrid_nodes()
889 Function: returns the set of hybrid nodes of $net
897 return grep {$self->is_hybrid_node($_)} $self->nodes();
903 Usage : my $graph=$net->graph()
904 Function: returns the underlying graph of $net
905 Returns : Graph::Directed
912 return $self->{graph
};
918 Usage : my @edges=$net->edges()
919 Function: returns the set of edges of $net
923 Each element in the array is an anonimous array whose first element is the
924 head of the edge and the second one is the tail.
930 return $self->{graph
}->edges();
936 Usage : my @tree_edges=$net->tree_edges()
937 Function: returns the set of tree edges of $net
938 (those whose tail is a tree node)
946 return grep {$self->is_tree_node($_->[1])} $self->edges();
952 Usage : my @hybrid_edges=$net->hybrid_edges()
953 Function: returns the set of hybrid edges of $net
954 (those whose tail is a hybrid node)
962 return grep {$self->is_hybrid_node($_->[1])} $self->edges();
968 Usage : my @trees=$net->explode()
969 Function: returns the representation of $net by a set of
970 Bio::Tree:Tree objects
979 $self->explode_rec(\
@trees);
984 my ($self,$trees)=@_;
985 my @h = $self->hybrid_nodes;
988 for my $u ($self->{graph
}->predecessors($v)) {
989 $self->{graph
}->delete_edge($u,$v);
990 $self->explode_rec($trees);
991 $self->{graph
}->add_edge($u,$v);
994 my $io = IO
::String
->new($self->eNewick);
995 my $treeio = Bio
::TreeIO
->new(-format
=> 'newick', -fh
=> $io);
996 my $tree = $treeio->next_tree;
997 $tree->contract_linear_paths;
998 push @
{$trees}, $tree;
1005 Usage : my %mudata=$net->mudata()
1006 Function: returns the representation of $net by its mu-data
1010 $net-E<gt>mudata() returns a hash with keys the nodes of $net and each value is a
1011 muVector object holding its mu-vector.
1017 return %{$self->{mudata
}};
1022 return $self->{mudata
}{$u};
1028 Usage : my %heights=$net->heights()
1029 Function: returns the heights of the nodes of $net
1033 $net-E<gt>heights() returns a hash with keys the nodes of $net and each value
1040 return %{$self->{h
}};
1045 return $self->{h
}{$u};
1051 Usage : my $dist=$net1->mu_distance($net2)
1052 Function: Computes the mu-distance between the networks $net1 and $net2 on
1053 the same set of leaves
1055 Args : Bio::PhyloNetwork
1060 my ($net1,$net2)=@_;
1061 my @nodes1=$net1->nodes;
1062 my @nodes2=$net2->nodes;
1063 my $comp = Array
::Compare
->new;
1064 $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1065 unless $comp->compare($net1->{leaves
},$net2->{leaves
});
1066 $net1->warn("Not a tree-child phylogenetic network")
1067 unless $net1->is_tree_child();
1068 $net2->warn("Not a tree-child phylogenetic network")
1069 unless $net2->is_tree_child();
1070 my @leaves=@
{$net1->{leaves
}};
1073 OUTER
: foreach my $node1 (@nodes1) {
1074 foreach my $node2 (@nodes2) {
1076 (! exists $matched1{$node1}) && (! exists $matched2{$node2}) &&
1077 ($net1->{mudata
}{$node1} == $net2->{mudata
}{$node2})
1079 $matched1{$node1}=$node2;
1080 $matched2{$node2}=$node1;
1085 return (scalar @nodes1)+(scalar @nodes2)-2*(scalar keys %matched1);
1088 =head2 mu_distance_generalized
1090 Title : mu_distance_generalized
1091 Usage : my $dist=$net1->mu_distance($net2)
1092 Function: Computes the mu-distance between the topological restrictions of
1093 networks $net1 and $net2 on its common set of leaves
1095 Args : Bio::PhyloNetwork
1099 sub mu_distance_generalized
{
1100 my ($net1,$net2)=@_;
1101 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1102 return $netr1->mu_distance($netr2);
1105 # mudata_string (code mu_data in a string; useful for isomorphism testing)
1107 sub mudata_string_node
{
1109 return $self->{mudata
}->{$u}->display();
1114 return $self->{mudata_string
} if defined $self->{mudata_string
};
1115 my @internal=$self->internal_nodes;
1116 my $mus=$self->{mudata
};
1117 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
1119 foreach my $node (@internal) {
1120 $str=$str.$self->mudata_string_node($node);
1122 $self->{mudata_string
}=$str;
1126 sub is_mu_isomorphic
{
1127 my ($net1,$net2)=@_;
1128 return ($net1->mudata_string() eq $net2->mudata_string());
1133 sub compute_tripartition_node
{
1135 $self->warn("Cannot compute tripartitions on unrooted networks. Will assume one at random")
1136 unless ($self->{numroots
} == 1);
1137 my $root=$self->{roots
}->[0];
1138 my $graph=$self->{graph
};
1139 my $graphPruned=$graph->copy();
1140 $graphPruned->delete_vertex($u);
1141 my $tripartition="";
1142 foreach my $leaf (@
{$self->{leaves
}}) {
1144 if ($graph->is_reachable($u,$leaf)) {
1145 if ($graphPruned->is_reachable($root,$leaf)) {$type="B";}
1149 $tripartition .= $type;
1151 $self->{tripartitions
}->{$u}=$tripartition;
1154 sub compute_tripartitions
{
1156 foreach my $node (@
{$self->{nodes
}}) {
1157 $self->compute_tripartition_node($node);
1161 =head2 tripartitions
1163 Title : tripartitions
1164 Usage : my %tripartitions=$net->tripartitions()
1165 Function: returns the set of tripartitions of $net
1169 $net-E<gt>tripartitions() returns a hash with keys the nodes of $net and each value
1170 is a string representing the tripartition of the leaves induced by the node.
1171 A string "BCA..." associated with a node u (e.g.) means, the first leaf is in
1172 the set B(u), the second one in C(u), the third one in A(u), and so on.
1178 $self->compute_tripartitions() unless defined $self->{tripartitions
};
1179 return %{$self->{tripartitions
}};
1182 # to do: change to tri_distance and test for TC and time-cons
1184 sub tripartition_error
{
1185 my ($net1,$net2)=@_;
1186 my $comp = Array
::Compare
->new;
1187 $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1188 unless $comp->compare($net1->{leaves
},$net2->{leaves
});
1189 $net1->warn("Not a tree-child phylogenetic network")
1190 unless $net1->is_tree_child();
1191 $net2->warn("Not a tree-child phylogenetic network")
1192 unless $net2->is_tree_child();
1193 $net1->warn("Not a time-consistent network")
1194 unless $net1->is_time_consistent();
1195 $net2->warn("Not a time-consistent network")
1196 unless $net2->is_time_consistent();
1197 $net1->compute_tripartitions() unless defined $net1->{tripartitions
};
1198 $net2->compute_tripartitions() unless defined $net2->{tripartitions
};
1199 my @edges1=$net1->{graph
}->edges();
1200 my @edges2=$net2->{graph
}->edges();
1202 foreach my $edge1 (@edges1) {
1204 foreach my $edge2 (@edges2) {
1205 if ($net1->{tripartitions
}->{$edge1->[1]} eq
1206 $net2->{tripartitions
}->{$edge2->[1]}) {
1211 if (! $matched) {$FN++;}
1213 foreach my $edge2 (@edges2) {
1215 foreach my $edge1 (@edges1) {
1216 if ($net1->{tripartitions
}->{$edge1->[1]} eq
1217 $net2->{tripartitions
}->{$edge2->[1]}) {
1222 if (! $matched) {$FP++;}
1224 return ($FN/(scalar @edges1)+$FP/(scalar @edges2))/2;
1229 # to do: add weak time consistency
1231 =head2 is_time_consistent
1233 Title : is_time_consistent
1234 Usage : my $b=$net->is_time_consistent()
1235 Function: tests if $net is (strong) time-consistent
1241 sub is_time_consistent
{
1243 $self->compute_temporal_representation()
1244 unless exists $self->{has_temporal_representation
};
1245 return $self->{has_temporal_representation
};
1248 =head2 temporal_representation
1250 Title : temporal_representation
1251 Usage : my %time=$net->temporal_representation()
1252 Function: returns a hash containing a temporal representation of $net, or 0
1253 if $net is not time-consistent
1259 sub temporal_representation
{
1261 if ($self->is_time_consistent) {
1262 return %{$self->{temporal_representation
}};
1267 sub compute_temporal_representation
{
1269 my $quotient=Graph
::Directed
->new();
1270 my $classes=find_classes
($self);
1272 map {$repr{$_}=$classes->{$_}[0]} $self->nodes();
1273 foreach my $e ($self->tree_edges()) {
1274 $quotient->add_edge($repr{$e->[0]},$repr{$e->[1]});
1278 while ($quotient->vertices()) {
1279 if (my @svs=$quotient->predecessorless_vertices()) {
1280 foreach my $sv (@svs) {
1283 $quotient->delete_vertices(@svs);
1289 foreach my $node (@
{$self->{nodes
}}) {
1290 $temp{$node}=$temp{$repr{$node}}
1292 $self->{temporal_representation
}=\
%temp;
1293 $self->{has_temporal_representation
}=1;
1299 map {$classes->{$_}=[$_]} $self->nodes();
1300 foreach my $e ($self->hybrid_edges()) {
1301 $classes=join_classes
($classes,$e->[0],$e->[1]);
1307 my ($classes,$u,$v)=@_;
1308 my @clu=@
{$classes->{$u}};
1309 my @clv=@
{$classes->{$v}};
1310 my @cljoin=(@clu,@clv);
1311 map {$classes->{$_}=\
@cljoin} @cljoin;
1317 =head2 contract_elementary
1320 Title : contract_elementary
1321 Usage : my ($contracted,$blocks)=$net->contract_elementary();
1322 Function: Returns the network $contracted, obtained by contracting elementary
1323 paths of $net into edges. The reference $blocks points to a hash
1324 where, for each node of $contracted, gives the corresponding nodes
1325 of $net that have been deleted.
1326 Returns : Bio::PhyloNetwork,reference to hash
1331 sub contract_elementary
{
1334 my $contracted=$self->graph->copy();
1335 my @nodes=$self->nodes();
1336 my $mus=$self->{mudata
};
1339 foreach my $u (@nodes) {
1342 my @elementary=grep { $contracted->out_degree($_) == 1} $self->tree_nodes();
1343 @elementary=sort {$mus->{$b} <=> $mus->{$a} ||
1344 $hs->{$b} <=> $hs->{$a}} @elementary;
1345 foreach my $elem (@elementary) {
1346 my @children=$contracted->successors($elem);
1347 my $child=$children[0];
1348 if ($contracted->in_degree($elem) == 1) {
1349 my @parents=$contracted->predecessors($elem);
1350 my $parent=$parents[0];
1351 $contracted->add_edge($parent,$child);
1353 $contracted->delete_vertex($elem);
1354 my @blch=@
{$blocks{$child}};
1355 my @blem=@
{$blocks{$elem}};
1356 $blocks{$child}=[@blem,@blch];
1357 delete $blocks{$elem};
1359 my $contr=Bio
::PhyloNetwork
->new(-graph
=> $contracted);
1360 return $contr,\
%blocks;
1363 =head2 optimal_alignment
1365 Title : optimal_alignment
1366 Usage : my ($weight,$alignment,$wgts)=$net->optimal_alignment($net2)
1367 Function: returns the total weight of an optimal alignment,
1368 the alignment itself, and partial weights
1369 between the networks $net1 and $net2 on the same set of leaves.
1370 An optional argument allows one to use the Manhattan (default) or the
1371 Hamming distance between mu-vectors.
1372 Returns : scalar,reference to hash,reference to hash
1373 Args : Bio::PhyloNetwork,
1374 -metric => string (optional)
1376 Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1380 sub optimal_alignment
{
1381 my ($net1,$net2,%params)=@_;
1383 my ($net1cont,$blocks1)=contract_elementary
($net1);
1384 my ($net2cont,$blocks2)=contract_elementary
($net2);
1385 my ($wc,$alignc,$weightc)=
1386 optimal_alignment_noelementary
($net1cont,$net2cont,%params);
1390 foreach my $u1 (keys %$alignc) {
1391 my $u2=$alignc->{$u1};
1392 my @block1=@
{$blocks1->{$u1}};
1393 my @block2=@
{$blocks2->{$u2}};
1394 while (@block1 && @block2) {
1395 my $u1dc=pop @block1;
1396 my $u2dc=pop @block2;
1397 $alignment{$u1dc}=$u2dc;
1398 $weigths{$u1dc}=$weightc->{$u1};
1399 $totalweigth+=$weigths{$u1dc};
1402 return $totalweigth,\
%alignment,\
%weigths;
1405 sub optimal_alignment_noelementary
{
1406 my ($net1,$net2,%params)=@_;
1408 my $comp = Array
::Compare
->new;
1409 $net1->throw("Cannot align phylogenetic networks on different set of leaves")
1410 unless $comp->compare($net1->{leaves
},$net2->{leaves
});
1412 if ((defined $params{-metric
})and ($params{-metric
} eq 'Hamming')) {
1413 $distance='Hamming';
1415 $distance='Manhattan';
1417 my $numleaves=$net1->{numleaves
};
1418 my @nodes1=$net1->internal_nodes();
1419 my @nodes2=$net2->internal_nodes();
1420 my $numnodes1=@nodes1;
1421 my $numnodes2=@nodes2;
1423 for (my $i=0;$i<$numnodes1;$i++) {
1425 for (my $j=0;$j<$numnodes2;$j++) {
1426 push @row,weight
($net1,$nodes1[$i],$net2,$nodes2[$j],$distance);
1431 assign
(\
@matrix,\
@alignment);
1435 foreach my $leaf (@
{$net1->{leaves
}}) {
1436 $alignmenthash{$leaf}=$leaf;
1437 $weighthash{$leaf}=0;
1439 for (my $i=0;$i<$numnodes1;$i++) {
1440 if (defined $nodes2[$alignment[$i]]) {
1441 $alignmenthash{$nodes1[$i]}=$nodes2[$alignment[$i]];
1442 $weighthash{$nodes1[$i]}=$matrix[$i][$alignment[$i]];
1443 $totalw += $matrix[$i][$alignment[$i]];
1446 return $totalw,\
%alignmenthash,\
%weighthash;
1449 =head2 optimal_alignment_generalized
1451 Title : optimal_alignment_generalized
1452 Usage : my ($weight,%alignment)=$net->optimal_alignment_generalized($net2)
1453 Function: returns the wieght of an optimal alignment, and the alignment itself,
1454 between the topological restriction of the networks $net1 and $net2
1455 on the set of common leaves.
1456 An optional argument allows one to use the Manhattan (default) or the
1457 Hamming distance between mu-vectors.
1458 Returns : scalar,hash
1459 Args : Bio::PhyloNetwork,
1460 -metric => string (optional)
1462 Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1466 sub optimal_alignment_generalized
{
1467 my ($net1,$net2,%params)=@_;
1468 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1469 return $netr1->optimal_alignment($netr2,%params);
1473 my ($net1,$v1,$net2,$v2,$distance)=@_;
1475 if (! defined $distance) {
1476 $distance='Manhattan';
1478 if ($distance eq 'Hamming') {
1479 $w=$net1->{mudata
}->{$v1}->hamming($net2->{mudata
}->{$v2});
1481 $w=$net1->{mudata
}->{$v1}->manhattan($net2->{mudata
}->{$v2});
1483 if (($net1->is_tree_node($v1) && $net2->is_hybrid_node($v2)) ||
1484 ($net2->is_tree_node($v2) && $net1->is_hybrid_node($v1))
1487 $w +=1/(2*$net1->{numleaves
});
1493 =head2 topological_restriction
1495 Title : topological_restriction
1496 Usage : my ($netr1,$netr2)=$net1->topological_restriction($net2)
1497 Function: returns the topological restriction of $net1 and $net2 on its
1498 common set of leaves
1499 Returns : Bio::PhyloNetwork, Bio::PhyloNetwork
1500 Args : Bio::PhyloNetwork
1504 sub topological_restriction
{
1505 my ($net1,$net2)=@_;
1507 my @leaves1=$net1->leaves();
1508 my @leaves2=$net2->leaves();
1509 my $numleaves1=scalar @leaves1;
1510 my $numleaves2=scalar @leaves2;
1512 for (my $i=0; $i<$numleaves1; $i++) {
1513 $position1{$leaves1[$i]}=$i;
1516 my @commonleaves=();
1517 for (my $j=0; $j<$numleaves2; $j++) {
1518 if (defined $position1{$leaves2[$j]}) {
1519 push @commonleaves,$leaves2[$j];
1520 $position2{$leaves2[$j]}=$j;
1523 my $graphred1=$net1->{graph
}->copy();
1524 my $graphred2=$net2->{graph
}->copy();
1526 foreach my $u ($graphred1->vertices()) {
1527 my $mu=$net1->mudata_node($u);
1528 foreach my $leaf (@commonleaves) {
1529 if ($mu->[$position1{$leaf}]>0) {
1533 $graphred1->delete_vertex($u);
1536 foreach my $u ($graphred2->vertices()) {
1537 my $mu=$net2->mudata_node($u);
1538 foreach my $leaf (@commonleaves) {
1539 if ($mu->[$position2{$leaf}]>0) {
1543 $graphred2->delete_vertex($u);
1545 my $netr1=Bio
::PhyloNetwork
->new(-graph
=> $graphred1);
1546 my $netr2=Bio
::PhyloNetwork
->new(-graph
=> $graphred2);
1547 return ($netr1,$netr2);
1550 # Functions for eNewick representation
1555 Usage : my $str=$net->eNewick()
1556 Function: returns the eNewick representation of $net without labeling
1567 foreach my $root ($self->roots()) {
1568 $str=$str.$self->eNewick_aux($root,$seen,undef)."; ";
1574 my ($self,$node,$seen,$parent)=@_;
1576 if ($self->is_leaf($node) ||
1577 (defined $seen->{$node}) )
1579 $str=make_label
($self,$parent,$node);
1583 my @sons=$self->{graph
}->successors($node);
1585 foreach my $son (@sons) {
1586 $str=$str.$self->eNewick_aux($son,$seen,$node).",";
1589 $str.=")".make_label
($self,$parent,$node);
1595 my ($self,$parent,$node)=@_;
1597 if ($self->is_hybrid_node($node)) {
1598 my $lbl=$self->{labels
}->{$node};
1602 $str.=$lbl; #$self->{labels}->{$node};
1604 if ((defined $parent) &&
1605 ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1606 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1608 $str.=substr $node,1;
1610 $str.=$self->{labels
}->{$node};
1612 if ((defined $parent) &&
1613 ($self->graph->has_edge_weight($parent,$node))) {
1614 $str.=":".$self->graph->get_edge_weight($parent,$node);
1621 Title : eNewick_full
1622 Usage : my $str=$net->eNewick_full()
1623 Function: returns the eNewick representation of $net labeling
1634 foreach my $root ($self->roots()) {
1635 $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1640 sub eNewick_full_aux
{
1641 my ($self,$node,$seen,$parent)=@_;
1643 if ($self->is_leaf($node) ||
1644 (defined $seen->{$node}) )
1646 $str=make_label_full
($self,$parent,$node);
1650 my @sons=$self->{graph
}->successors($node);
1652 foreach my $son (@sons) {
1653 $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1656 $str.=")".make_label_full
($self,$parent,$node);
1661 sub make_label_full
{
1662 my ($self,$parent,$node)=@_;
1664 if ($self->is_hybrid_node($node)) {
1665 my $lbl=$self->{labels
}->{$node};
1669 $str.=$lbl; #$self->{labels}->{$node};
1671 if ((defined $parent) &&
1672 ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1673 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1675 $str.=substr $node,1;
1677 if ((defined $self->{labels
}->{$node})&&($self->{labels
}->{$node} ne '')) {
1678 $str.=$self->{labels
}->{$node};
1684 if ((defined $parent) &&
1685 ($self->graph->has_edge_weight($parent,$node))) {
1686 $str.=":".$self->graph->get_edge_weight($parent,$node);
1691 # sub eNewick_full {
1695 # foreach my $root ($self->roots()) {
1696 # $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1701 # sub eNewick_full_aux {
1702 # my ($self,$node,$seen,$parent)=@_;
1704 # if ($self->is_leaf($node) ||
1705 # (defined $seen->{$node}) )
1707 # if ($self->is_hybrid_node($node)) {
1708 # my $tag=substr $node,1;
1709 # if ((defined $parent) &&
1710 # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1711 # $str='#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1721 # my @sons=$self->{graph}->successors($node);
1723 # foreach my $son (@sons) {
1724 # $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1727 # if ($self->is_hybrid_node($node)) {
1728 # my $tag=substr $node,1;
1729 # if ((defined $parent) &&
1730 # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1731 # $str.=')#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1739 # if ((defined $parent) &&
1740 # ($self->graph->has_edge_weight($parent,$node))) {
1741 # $str.=":".$self->graph->get_edge_weight($parent,$node);
1749 use overload
'""' => \
&display
;
1754 Usage : my $str=$net->display()
1755 Function: returns a string containing all the available information on $net
1764 my $graph=$self->{graph
};
1765 my @leaves=$self->leaves();
1766 my @nodes=@
{$self->{nodes
}};
1767 $str.= "Leaves:\t@leaves\n";
1768 $str.= "Nodes:\t@nodes\n";
1769 $str.= "Graph:\t$graph\n";
1770 $str.= "eNewick:\t".$self->eNewick()."\n";
1771 $str.= "Full eNewick:\t".$self->eNewick_full()."\n";
1772 $str.= "Mu-data and heights:\n";
1773 foreach my $node (@nodes) {
1775 if (exists $self->{labels
}->{$node}) {
1776 $str.="\tlabel=".$self->{labels
}->{$node}.",";
1778 $str.="\tlabel=(none),";
1780 $str.= "\th=".$self->{h
}->{$node}.", \tmu=".$self->{mudata
}->{$node}."\n";
1782 if (exists $self->{has_temporal_representation
}) {
1783 $str.= "Temporal representation:\n";
1784 if ($self->{has_temporal_representation
}) {
1785 foreach my $node (@nodes) {
1787 $str.= "\tt=".$self->{temporal_representation
}->{$node}."\n";
1790 $str.= "Does not exist.\n";
1793 if (exists $self->{tripartitions
}) {
1794 $str.= "Tripartitions:\n";
1795 foreach my $node (@nodes) {
1797 $str.= "\ttheta=".$self->{tripartitions
}->{$node}."\n";