3 # Module for Bio::PhyloNetwork
5 # Cared for by Gabriel Cardona <gabriel(dot)cardona(at)uib(dot)es>
7 # Copyright Gabriel Cardona, Gabriel Valiente
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::PhyloNetwork - Module to compute with Phylogenetic Networks
19 use Bio::PhyloNetwork;
21 # Create a PhyloNetwork object from a eNewick string
22 my $net1=Bio::PhyloNetwork->new(
23 -eNewick=>'t0:((H1,(H2,l2)),H2); H1:((H3,l1)); H2:((H3,(l3,H1))); H3:(l4);'
26 # Print all available data
29 # Rebuild $net1 from its mu_data
30 my %mudata=$net1->mudata();
31 my $net2=Bio::PhyloNetwork->new(-mudata=>\%mudata,-numleaves=>4);
33 print "d=".$net1->mu_distance($net2)."\n";
35 # Get another one and compute distance
36 my $net3=Bio::PhyloNetwork->new(
37 -eNewick=>'(l2,((l1,(H1,l4)),H1))r; (l3)H1;'
39 print "d=".$net1->mu_distance($net3)."\n";
41 # ...and find an optimal alignment w.r.t. the Manhattan distance (default)
42 my ($weight,%alignment)=$net1->optimal_alignment($net3);
43 print "weight:$weight\n";
44 foreach my $node1 (keys %alignment) {
45 print "$node1 => ".$alignment{$node1}."\n";
47 # ...or the Hamming distance
49 my ($weightH,%alignmentH)=$net1->optimal_alignment($net3,-metric=>'Hamming');
50 print "weight:$weightH\n";
51 foreach my $node1 (keys %alignmentH) {
52 print "$node1 => ".$alignmentH{$node1}."\n";
55 # Test for time consistency of $net1
56 if ($net1->is_time_consistent) {
57 print "net1 is time consistent\n"
60 print "net1 is not time consistent\n"
63 # create a network from the list of edges
64 my $net4=Bio::PhyloNetwork->new(-edges=>
65 [qw(r s r t s u s c t c t v u b u l3 u b v b v l4 b l2 c l1)]);
67 # Test for time consistency of $net3
68 if ($net4->is_time_consistent) {
69 print "net4 is time consistent\n"
72 print "net4 is not time consistent\n"
75 # And print all information on net4
78 # Compute some tripartitions
79 my %triparts=$net1->tripartitions();
81 # Now these are stored
84 # And can compute the tripartition error
85 print "dtr=".$net1->tripartition_error($net3)."\n";
89 =head2 Phylogenetic Networks
91 This is a module to work with phylogenetic networks. Phylogenetic networks
92 have been studied over the last years as a richer model of the evolutionary
93 history of sets of organisms than phylogenetic trees, because they take not
94 only mutation events but also recombination and horizontal gene transfer
97 The natural model for describing the evolutionary
98 history of a set of sequences under recombination events is a DAG, hence
99 this package relies on the package Graph::Directed to represent the
100 underlying graph of a phylogenetic network. We refer the reader to [CRV1,CRV2]
101 for formal definitions related to phylogenetic networks.
103 =head2 eNewick description
105 With this package, phylogenetic networks can be given by its eNewick
106 string. This description appeared in other packages related to
107 phylogenetic networks (see [PhyloNet] and [NetGen]); in fact, these two
108 packages use different descriptions. The Bio::PhyloNetwork
109 package allows both of them, but uses the second one in its output.
111 The first approach [PhyloNet] goes as follows: For each hybrid node H, say with
112 parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k+1 different
113 nodes; let each of the first k copies be a child of one of the u_1,...,u_k
114 (one for each) and have no children (hence we will have k extra leaves);
115 as for the last copy, let it have no parents and have v_1,...,v_l be its
116 children. This way we get a forest; each of the trees will be rooted at either
117 one root of the phylogenetic network or a hybrid node of it; the set of leaves
118 (of the whole forest) will be the set of leaves of the original network
119 together with the set of hybrid nodes (each of them repeated as many times
120 as its in-degree). Then, the eNewick representation of the phylogenetic network
121 will be the Newick representation of all the trees in the obtained forest,
122 each of them with its root labeled.
124 The second approach [NetGen] goes as follows: For each hybrid node H, say with
125 parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k different
126 nodes; let the first copy be a child of u_1 and have all v_1,v_2,...v_l as
127 its children; let the other copies be child of u_2,...,u_k (one for each)
128 and have no children. This way, we get a tree whose set of leaves is the
129 set of leaves of the original network together with the set of hybrid nodes
130 (possibly repeated). Then the Newick string of the obtained tree (note that
131 some internal nodes will be labeled and some leaves will be repeated) is
132 the eNewick string of the phylogenetic network.
134 For example, consider the network depicted below:
146 If the first approach is taken, we get the forest:
159 Hence, the eNewick string is '((1,H),(H,3))r; (2)H;'.
161 As for the second one, one gets the tree:
173 Hence, the eNewick string is '((1,H),((2)H,3))r;'.
175 Note: when rooting a tree, this package allows the notations
176 '(subtree,subtree,...)root' as well as 'root:(subtree,subtree,...)', but
177 the first one is used when writing eNewick strings.
179 =head2 Tree-child phylogenetic networks
181 Tree-child (TC) phylogenetic networks are a special class of phylogenetic
182 networks for which a distance, called mu-distance, is defined [CRV2]
183 based on certain data (mu-data) associated to every node.
184 Moreover, this distance extends the
185 Robinson-Foulds on phylogenetic trees. This package allows testing for a
186 phylogenetic network if it is TC and computes mu-distances between networks
187 over the same set of leaves.
189 Moreover, the mu-data allows to define the optimal
190 (in some precise sense) alignment between networks
191 over the same set of leaves. This package also computes this optimal alignment.
195 Although tripartitions (see [CRV1] and the references therein) do not allow
196 to define distances, this package outputs tripartitions and computes a weak
197 form of the tripartition error.
199 =head2 Time-consistency
201 Another useful property of Phylogenetic Networks that appears in the literature
202 is that of time-consistency or real-time hybrids [BSS]. Roughly speaking, a
203 network admits a temporal representation if it can be drawn in such a way
204 that tree arcs (those whose end is a tree node) are inclined downwards, while
205 hybridization arcs (those whose end is a hybrid node) are horizontal.
206 This package checks for time-consistency and, if so, a temporal representation
211 Gabriel Cardona, gabriel(dot)cardona(at)uib(dot)es
212 Gabriel Valiente, valiente(at)lsi(dot)upc(dot)edu
220 G. Cardona, F. Rossello, G. Valiente. Tripartitions do not always
221 discriminate phylogenetic networks. arXiv:0707.2376v1 [q-bio.PE]
225 G. Cardona, F. Rossello, G. Valiente. A Distance Measure for
226 Tree-Child Phylogenetic Networks. Preprint.
230 M.M. Morin, and B.M.E. Moret. NetGen: generating phylogenetic networks
231 with diploid hybrids. Bioinformatics 22 (2006), 1921-1923
235 PhyloNet: "Phylogenetic Networks Toolkit".
236 http://bioinfo.cs.rice.edu/phylonet
240 M. Baroni, C. Semple, and M. Steel. Hybrids in Real
241 Time. Syst. Biol. 55(1):46-56, 2006
247 The rest of the documentation details each of the object methods.
251 package Bio
::PhyloNetwork
;
256 use base
qw(Bio::Root::Root);
258 use Bio
::PhyloNetwork
::muVector
;
264 use Algorithm
::Munkres
;
271 Usage : my $obj = new Bio::PhyloNetwork();
272 Function: Creates a new Bio::PhyloNetwork object
273 Returns : Bio::PhyloNetwork
278 -graph => Graph::Directed object
280 -edges => reference to an array
282 -tree => Bio::Tree::Tree object
284 -mudata => reference to a hash,
285 -leaves => reference to an array
287 -mudata => reference to a hash,
288 -numleaves => integer
290 Returns a Bio::PhyloNetwork object, created according to the data given:
296 creates an empty network.
298 =item new(-eNewick =E<gt> $str)
300 creates the network whose
301 Extended Newick representation (see description above) is the string $str.
303 =item new(-graph =E<gt> $graph)
305 creates the network with underlying
306 graph given by the Graph::Directed object $graph
308 =item new(-tree =E<gt> $tree)
310 creates a network as a copy of the
311 Bio::Tree::Tree object in $tree
313 =item new(-mudata =E<gt> \%mudata, -leaves =E<gt> \@leaves)
315 creates the network by reconstructing it from its mu-data stored in
316 \%mudata and with set of leaves in \@leaves.
318 =item new(-mudata =E<gt> \%mudata, -numleaves =E<gt> $numleaves)
320 creates the network by reconstructing it from its mu-data stored in
321 \%mudata and with set of leaves in ("l1".."l$numleaves").
329 my $self=$pkg->SUPER::new
(@args);
330 my ($eNewick,$edgesR,$leavesR,$numleaves,$graph,$tree,$mudataR)=
331 $self->_rearrange([qw(ENEWICK
340 $self->build_from_eNewick($eNewick) if defined $eNewick;
341 $self->build_from_edges(@
$edgesR) if defined $edgesR;
342 $self->build_from_graph($graph) if defined $graph;
343 $self->build_from_tree($tree) if defined $tree;
344 if ((! defined $leavesR) && (defined $numleaves)) {
345 my @leaves=map {"l$_"} (1..$numleaves);
348 $self->build_from_mudata($mudataR,$leavesR)
349 if ((defined $mudataR) && (defined $leavesR));
355 sub build_from_edges
{
356 my ($self,@edges)=@_;
357 my $graph=Graph
::Directed
->new();
358 $graph->add_edges(@edges);
359 $self->{graph
}=$graph;
362 foreach my $node ($self->nodes()) {
363 $labels->{$node}=$node;
365 $self->{labels
}=$labels;
368 sub build_from_graph
{
369 my ($self,$graph)=@_;
370 my $graphcp=$graph->copy();
371 $self->{graph
}=$graphcp;
374 foreach my $node ($self->nodes()) {
375 $labels->{$node}=$node;
377 $self->{labels
}=$labels;
382 sub build_from_eNewick
{
383 my ($self,$string)=@_;
385 my $graph=Graph
::Directed
->new();
387 my @blocks=split(/; */,$string);
388 foreach my $block (@blocks) {
389 my ($rt,$str)=get_root_and_subtree
($block);
390 my ($rtlbl,$rttype,$rtid,$rtlng)=get_label_type_id_length
($rt);
391 process_block
($graph,$labels,$block,$rtid);
392 $labels->{$rtid}=$rtlbl.'';
394 $self->{graph
}=$graph;
395 $self->{labels
}=$labels;
400 my ($graph,$labels,$block,$rtid)=@_;
401 my ($rt,$str)=get_root_and_subtree
($block);
402 my @substrs=my_split
($str);
403 foreach my $substr (@substrs) {
404 my ($subrt,$subblock)=get_root_and_subtree
($substr);
405 my ($subrtlbl,$subrttype,$subrtid,$subrtlng)=
406 get_label_type_id_length
($subrt);
407 if (! $subrtlng eq '') {
408 $graph->add_weighted_edges($rtid,$subrtid,$subrtlng);
411 $graph->add_edges($rtid,$subrtid);
413 if (! $subrttype eq '') {
414 $graph->set_edge_attribute($rtid,$subrtid,'type',$subrttype);
417 # if (! $subrtlbl eq '') {
418 if ((! defined $labels->{$subrtid})||($labels->{$subrtid} eq '')){
419 $labels->{$subrtid}=$subrtlbl;
420 } elsif (( $labels->{$subrtid} ne $subrtlbl )&&($subrtlbl ne '')) {
422 die("Different labels for the same node (".$labels->{$subrtid}." and $subrtlbl)");
425 if ($subblock ne "") {
426 process_block
($graph,$labels,$subblock,$subrtid);
431 sub get_root_and_subtree
{
433 my ($rt,$str)=("","");
434 # ($rt,$str)=split(/:|=/,$block);
435 ($rt,$str)=split(/=/,$block);
437 # try to look for root label at the end
438 my $pos=length($rt)-1;
439 while ((substr($rt,$pos,1) ne ")") && ($pos >=0)) {
442 $rt=substr($block,$pos+1,length($block)-$pos);
443 $str=substr($block,0,$pos+1);
450 sub get_label_type_id_length
{
454 if (index($string,'#')==-1) {
456 my ($label,$length)=split(':',$string);
459 if ((! defined $label) || ($label eq '')) {
466 return ($label,'',$id,$length);
470 my ($label,$string2)=split('#',$string);
471 my ($typeid,$length)=split(':',$string2);
476 return ($label,$type,'#'.$id,$length);
493 for my $i ( 1 .. length( $string ) ) {
494 my $char=substr($string,$i,1);
500 push @substrings, $temp;
505 if (($char eq ",") && ($level==1)) {
506 push @substrings, $temp;
515 sub build_from_mudata
{
516 my ($self,$mus,$leavesR)=@_;
517 my $graph=Graph
::Directed
->new();
518 my @nodes=keys %{$mus};
519 my @leaves=@
{$leavesR};
526 foreach my $node (@nodes) {
527 push(@internal, $node) unless exists $seen{$node};
530 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
531 @nodes=(@internal,@leaves);
533 for (my $i=0;$i<$numnodes;$i++) {
534 my $mu=$mus->{$nodes[$i]};
536 while ($mu->is_positive() && $j<$numnodes) {
537 if ($mu->geq_poset($mus->{$nodes[$j]})) {
538 $graph->add_edges(($nodes[$i],$nodes[$j]));
539 $mu = $mu - $mus->{$nodes[$j]};
544 $self->build_from_graph($graph);
551 # my $root=$tree->get_root_node();
552 # foreach my $node ($tree->get_nodes()) {
553 # if ($node == $root) {
554 # $node->{'_id'}="r";
556 # elsif (! $node->is_Leaf) {
557 # $node->{'_id'}="t$i";
561 # if ($node->{'_id'} eq "") {
562 # $node->{'_id'}="l$j";
570 # sub build_subtree {
571 # my ($graph,$root)=@_;
572 # foreach my $child ($root->each_Descendent) {
573 # $graph->add_edge($root->id,$child->id);
574 # $graph=build_subtree($graph,$child);
579 sub build_from_tree
{
581 # relabel_tree($tree);
582 # my $treeroot=$tree->get_root_node;
583 # my $graph=Graph::Directed->new();
584 # $graph=build_subtree($graph,$treeroot);
585 # $self->build_from_graph($graph);
587 my $io=IO
::String
->new($str);
588 my $treeio=Bio
::TreeIO
->new(-format
=> 'newick', -fh
=> $io);
589 $treeio->write_tree($tree);
590 # print "intern: $str\n";
591 $self->build_from_eNewick($str);
596 $self->throw("Graph is not DAG:".$self->{graph
})
597 unless $self->{graph
}->is_dag();
598 my @leaves=$self->{graph
}->successorless_vertices();
599 @leaves=sort @leaves;
600 my $numleaves=@leaves;
601 my @roots=$self->{graph
}->predecessorless_vertices();
603 #$self->throw("Graph is not rooted") unless ($numroots == 1);
604 my @nodes=$self->{graph
}->vertices();
607 foreach my $node (@nodes) {
608 if (! defined $self->{labels
}->{$node}) {
609 $self->{labels
}->{$node}='';
612 $self->{leaves
}=\
@leaves;
613 $self->{numleaves
}=$numleaves;
614 $self->{roots
}=\
@roots;
615 $self->{numroots
}=$numroots;
616 $self->{nodes
}=\
@nodes;
617 $self->{numnodes
}=$numnodes;
620 $self->compute_height();
628 my ($self,$u1,$v1,$u2,$v2)=@_;
629 if ( $self->is_hybrid_node($v1) ||
630 $self->is_hybrid_node($v2) ||
631 $self->graph->is_reachable($v2,$u1) ||
632 (($u1 eq $u2)&&($v1 eq $v2)) ||
633 (! scalar grep {($_ ne $v2) && ($self->is_tree_node($_))}
634 $self->graph->successors($u2)))
642 my ($self,$u1,$v1,$u2,$v2,$lbl)=@_;
643 my $graph=$self->{graph
};
644 $graph->delete_edge($u1,$v1);
645 $graph->delete_edge($u2,$v2);
646 $graph->add_edge($u1,"T$lbl");
647 $graph->add_edge("T$lbl",$v1);
648 $graph->add_edge($u2,"#H$lbl");
649 $graph->add_edge("#H$lbl",$v2);
650 $graph->add_edge("T$lbl","#H$lbl");
651 $self->build_from_graph($graph);
655 # Computation of mu-data
659 my $graph=$self->{graph
};
660 my $mudata=$self->{mudata
};
661 my @leaves=@
{$self->{leaves
}};
662 my $numleaves=$self->{numleaves
};
663 for (my $i=0;$i<$numleaves;$i++) {
664 my $vec=Bio
::PhyloNetwork
::muVector
->new($numleaves);
666 $mudata->{$leaves[$i]}=$vec;
669 while (my @nodes=grep {$self->{h
}->{$_} == $h} @
{$self->{nodes
}} )
671 foreach my $u (@nodes) {
672 my $vec=Bio
::PhyloNetwork
::muVector
->new($numleaves);
673 foreach my $son ($graph->successors($u)) {
674 $vec+=$mudata->{$son};
684 my $graph=$self->{graph
};
685 my @leaves=@
{$self->{leaves
}};
686 foreach my $leaf (@leaves) {
687 $self->{h
}->{$leaf}=0;
690 while (my @nodes=grep {(defined $self->{h
}->{$_})&&($self->{h
}->{$_} == $h)}
693 foreach my $node (@nodes) {
694 foreach my $parent ($graph->predecessors($node)) {
695 $self->{h
}->{$parent}=$h+1;
707 Usage : my $b=$net->is_leaf($u)
708 Function: tests if $u is a leaf in $net
716 if ($self->{graph
}->out_degree($node) == 0) {return 1;}
723 Usage : my $b=$net->is_root($u)
724 Function: tests if $u is the root of $net
732 if ($self->{graph
}->in_degree($node) == 0) {return 1;}
739 Usage : my $b=$net->is_tree_node($u)
740 Function: tests if $u is a tree node in $net
748 if ($self->{graph
}->in_degree($node) <= 1) {return 1;}
752 =head2 is_hybrid_node
754 Title : is_hybrid_node
755 Usage : my $b=$net->is_hybrid_node($u)
756 Function: tests if $u is a hybrid node in $net
764 if ($self->{graph
}->in_degree($node) > 1) {return 1;}
769 # has_tree_child(g,u) returns 1 if u has a tree child in graph g
773 my @Sons=$g->successors($node);
774 foreach my $son (@Sons) {
775 if ($g->in_degree($son)==1) {
784 Title : is_tree_child
785 Usage : my $b=$net->is_tree_child()
786 Function: tests if $net is a Tree-Child phylogenetic network
788 Args : Bio::PhyloNetwork
794 if (defined $self->{is_tree_child
}) {
795 return $self->{is_tree_child
};
797 $self->{is_tree_child
}=0;
798 my $graph=$self->{graph
};
799 foreach my $node (@
{$self->{nodes
}}) {
800 return 0 unless ($graph->out_degree($node)==0 ||
801 has_tree_child
($graph,$node));
803 $self->{is_tree_child
}=1;
812 Usage : my @nodes=$net->nodes()
813 Function: returns the set of nodes of $net
821 return @
{$self->{nodes
}};
827 Usage : my @leaves=$net->leaves()
828 Function: returns the set of leaves of $net
836 return @
{$self->{leaves
}};
842 Usage : my @roots=$net->roots()
843 Function: returns the set of roots of $net
851 return @
{$self->{roots
}};
854 =head2 internal_nodes
856 Title : internal_nodes
857 Usage : my @internal_nodes=$net->internal_nodes()
858 Function: returns the set of internal nodes of $net
866 return grep {! $self->is_leaf($_)} $self->nodes();
872 Usage : my @tree_nodes=$net->tree_nodes()
873 Function: returns the set of tree nodes of $net
881 return grep {$self->is_tree_node($_)} $self->nodes();
887 Usage : my @hybrid_nodes=$net->hybrid_nodes()
888 Function: returns the set of hybrid nodes of $net
896 return grep {$self->is_hybrid_node($_)} $self->nodes();
902 Usage : my $graph=$net->graph()
903 Function: returns the underlying graph of $net
904 Returns : Graph::Directed
911 return $self->{graph
};
917 Usage : my @edges=$net->edges()
918 Function: returns the set of edges of $net
922 Each element in the array is an anonimous array whose first element is the
923 head of the edge and the second one is the tail.
929 return $self->{graph
}->edges();
935 Usage : my @tree_edges=$net->tree_edges()
936 Function: returns the set of tree edges of $net
937 (those whose tail is a tree node)
945 return grep {$self->is_tree_node($_->[1])} $self->edges();
951 Usage : my @hybrid_edges=$net->hybrid_edges()
952 Function: returns the set of hybrid edges of $net
953 (those whose tail is a hybrid node)
961 return grep {$self->is_hybrid_node($_->[1])} $self->edges();
967 Usage : my @trees=$net->explode()
968 Function: returns the representation of $net by a set of
969 Bio::Tree:Tree objects
978 $self->explode_rec(\
@trees);
983 my ($self,$trees)=@_;
984 my @h = $self->hybrid_nodes;
987 for my $u ($self->{graph
}->predecessors($v)) {
988 $self->{graph
}->delete_edge($u,$v);
989 $self->explode_rec($trees);
990 $self->{graph
}->add_edge($u,$v);
993 my $io = IO
::String
->new($self->eNewick);
994 my $treeio = Bio
::TreeIO
->new(-format
=> 'newick', -fh
=> $io);
995 my $tree = $treeio->next_tree;
996 $tree->contract_linear_paths;
997 push @
{$trees}, $tree;
1004 Usage : my %mudata=$net->mudata()
1005 Function: returns the representation of $net by its mu-data
1009 $net-E<gt>mudata() returns a hash with keys the nodes of $net and each value is a
1010 muVector object holding its mu-vector.
1016 return %{$self->{mudata
}};
1021 return $self->{mudata
}{$u};
1027 Usage : my %heights=$net->heights()
1028 Function: returns the heights of the nodes of $net
1032 $net-E<gt>heights() returns a hash with keys the nodes of $net and each value
1039 return %{$self->{h
}};
1044 return $self->{h
}{$u};
1050 Usage : my $dist=$net1->mu_distance($net2)
1051 Function: Computes the mu-distance between the networks $net1 and $net2 on
1052 the same set of leaves
1054 Args : Bio::PhyloNetwork
1059 my ($net1,$net2)=@_;
1060 my @nodes1=$net1->nodes;
1061 my @nodes2=$net2->nodes;
1062 my $comp = Array
::Compare
->new;
1063 $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1064 unless $comp->compare($net1->{leaves
},$net2->{leaves
});
1065 $net1->warn("Not a tree-child phylogenetic network")
1066 unless $net1->is_tree_child();
1067 $net2->warn("Not a tree-child phylogenetic network")
1068 unless $net2->is_tree_child();
1069 my @leaves=@
{$net1->{leaves
}};
1072 OUTER
: foreach my $node1 (@nodes1) {
1073 foreach my $node2 (@nodes2) {
1075 (! exists $matched1{$node1}) && (! exists $matched2{$node2}) &&
1076 ($net1->{mudata
}{$node1} == $net2->{mudata
}{$node2})
1078 $matched1{$node1}=$node2;
1079 $matched2{$node2}=$node1;
1084 return (scalar @nodes1)+(scalar @nodes2)-2*(scalar keys %matched1);
1087 =head2 mu_distance_generalized
1089 Title : mu_distance_generalized
1090 Usage : my $dist=$net1->mu_distance($net2)
1091 Function: Computes the mu-distance between the topological restrictions of
1092 networks $net1 and $net2 on its common set of leaves
1094 Args : Bio::PhyloNetwork
1098 sub mu_distance_generalized
{
1099 my ($net1,$net2)=@_;
1100 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1101 return $netr1->mu_distance($netr2);
1104 # mudata_string (code mu_data in a string; useful for isomorphism testing)
1106 sub mudata_string_node
{
1108 return $self->{mudata
}->{$u}->display();
1113 return $self->{mudata_string
} if defined $self->{mudata_string
};
1114 my @internal=$self->internal_nodes;
1115 my $mus=$self->{mudata
};
1116 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
1118 foreach my $node (@internal) {
1119 $str=$str.$self->mudata_string_node($node);
1121 $self->{mudata_string
}=$str;
1125 sub is_mu_isomorphic
{
1126 my ($net1,$net2)=@_;
1127 return ($net1->mudata_string() eq $net2->mudata_string());
1132 sub compute_tripartition_node
{
1134 $self->warn("Cannot compute tripartitions on unrooted networks. Will assume one at random")
1135 unless ($self->{numroots
} == 1);
1136 my $root=$self->{roots
}->[0];
1137 my $graph=$self->{graph
};
1138 my $graphPruned=$graph->copy();
1139 $graphPruned->delete_vertex($u);
1140 my $tripartition="";
1141 foreach my $leaf (@
{$self->{leaves
}}) {
1143 if ($graph->is_reachable($u,$leaf)) {
1144 if ($graphPruned->is_reachable($root,$leaf)) {$type="B";}
1148 $tripartition .= $type;
1150 $self->{tripartitions
}->{$u}=$tripartition;
1153 sub compute_tripartitions
{
1155 foreach my $node (@
{$self->{nodes
}}) {
1156 $self->compute_tripartition_node($node);
1160 =head2 tripartitions
1162 Title : tripartitions
1163 Usage : my %tripartitions=$net->tripartitions()
1164 Function: returns the set of tripartitions of $net
1168 $net-E<gt>tripartitions() returns a hash with keys the nodes of $net and each value
1169 is a string representing the tripartition of the leaves induced by the node.
1170 A string "BCA..." associated with a node u (e.g.) means, the first leaf is in
1171 the set B(u), the second one in C(u), the third one in A(u), and so on.
1177 $self->compute_tripartitions() unless defined $self->{tripartitions
};
1178 return %{$self->{tripartitions
}};
1181 # to do: change to tri_distance and test for TC and time-cons
1183 sub tripartition_error
{
1184 my ($net1,$net2)=@_;
1185 my $comp = Array
::Compare
->new;
1186 $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1187 unless $comp->compare($net1->{leaves
},$net2->{leaves
});
1188 $net1->warn("Not a tree-child phylogenetic network")
1189 unless $net1->is_tree_child();
1190 $net2->warn("Not a tree-child phylogenetic network")
1191 unless $net2->is_tree_child();
1192 $net1->warn("Not a time-consistent network")
1193 unless $net1->is_time_consistent();
1194 $net2->warn("Not a time-consistent network")
1195 unless $net2->is_time_consistent();
1196 $net1->compute_tripartitions() unless defined $net1->{tripartitions
};
1197 $net2->compute_tripartitions() unless defined $net2->{tripartitions
};
1198 my @edges1=$net1->{graph
}->edges();
1199 my @edges2=$net2->{graph
}->edges();
1201 foreach my $edge1 (@edges1) {
1203 foreach my $edge2 (@edges2) {
1204 if ($net1->{tripartitions
}->{$edge1->[1]} eq
1205 $net2->{tripartitions
}->{$edge2->[1]}) {
1210 if (! $matched) {$FN++;}
1212 foreach my $edge2 (@edges2) {
1214 foreach my $edge1 (@edges1) {
1215 if ($net1->{tripartitions
}->{$edge1->[1]} eq
1216 $net2->{tripartitions
}->{$edge2->[1]}) {
1221 if (! $matched) {$FP++;}
1223 return ($FN/(scalar @edges1)+$FP/(scalar @edges2))/2;
1228 # to do: add weak time consistency
1230 =head2 is_time_consistent
1232 Title : is_time_consistent
1233 Usage : my $b=$net->is_time_consistent()
1234 Function: tests if $net is (strong) time-consistent
1240 sub is_time_consistent
{
1242 $self->compute_temporal_representation()
1243 unless exists $self->{has_temporal_representation
};
1244 return $self->{has_temporal_representation
};
1247 =head2 temporal_representation
1249 Title : temporal_representation
1250 Usage : my %time=$net->temporal_representation()
1251 Function: returns a hash containing a temporal representation of $net, or 0
1252 if $net is not time-consistent
1258 sub temporal_representation
{
1260 if ($self->is_time_consistent) {
1261 return %{$self->{temporal_representation
}};
1266 sub compute_temporal_representation
{
1268 my $quotient=Graph
::Directed
->new();
1269 my $classes=find_classes
($self);
1271 map {$repr{$_}=$classes->{$_}[0]} $self->nodes();
1272 foreach my $e ($self->tree_edges()) {
1273 $quotient->add_edge($repr{$e->[0]},$repr{$e->[1]});
1277 while ($quotient->vertices()) {
1278 if (my @svs=$quotient->predecessorless_vertices()) {
1279 foreach my $sv (@svs) {
1282 $quotient->delete_vertices(@svs);
1288 foreach my $node (@
{$self->{nodes
}}) {
1289 $temp{$node}=$temp{$repr{$node}}
1291 $self->{temporal_representation
}=\
%temp;
1292 $self->{has_temporal_representation
}=1;
1298 map {$classes->{$_}=[$_]} $self->nodes();
1299 foreach my $e ($self->hybrid_edges()) {
1300 $classes=join_classes
($classes,$e->[0],$e->[1]);
1306 my ($classes,$u,$v)=@_;
1307 my @clu=@
{$classes->{$u}};
1308 my @clv=@
{$classes->{$v}};
1309 my @cljoin=(@clu,@clv);
1310 map {$classes->{$_}=\
@cljoin} @cljoin;
1316 =head2 contract_elementary
1319 Title : contract_elementary
1320 Usage : my ($contracted,$blocks)=$net->contract_elementary();
1321 Function: Returns the network $contracted, obtained by contracting elementary
1322 paths of $net into edges. The reference $blocks points to a hash
1323 where, for each node of $contracted, gives the corresponding nodes
1324 of $net that have been deleted.
1325 Returns : Bio::PhyloNetwork,reference to hash
1330 sub contract_elementary
{
1333 my $contracted=$self->graph->copy();
1334 my @nodes=$self->nodes();
1335 my $mus=$self->{mudata
};
1338 foreach my $u (@nodes) {
1341 my @elementary=grep { $contracted->out_degree($_) == 1} $self->tree_nodes();
1342 @elementary=sort {$mus->{$b} <=> $mus->{$a} ||
1343 $hs->{$b} <=> $hs->{$a}} @elementary;
1344 foreach my $elem (@elementary) {
1345 my @children=$contracted->successors($elem);
1346 my $child=$children[0];
1347 if ($contracted->in_degree($elem) == 1) {
1348 my @parents=$contracted->predecessors($elem);
1349 my $parent=$parents[0];
1350 $contracted->add_edge($parent,$child);
1352 $contracted->delete_vertex($elem);
1353 my @blch=@
{$blocks{$child}};
1354 my @blem=@
{$blocks{$elem}};
1355 $blocks{$child}=[@blem,@blch];
1356 delete $blocks{$elem};
1358 my $contr=Bio
::PhyloNetwork
->new(-graph
=> $contracted);
1359 return $contr,\
%blocks;
1362 =head2 optimal_alignment
1364 Title : optimal_alignment
1365 Usage : my ($weight,$alignment,$wgts)=$net->optimal_alignment($net2)
1366 Function: returns the total weight of an optimal alignment,
1367 the alignment itself, and partial weights
1368 between the networks $net1 and $net2 on the same set of leaves.
1369 An optional argument allows to use the Manhattan (default) or the
1370 Hamming distance between mu-vectors.
1371 Returns : scalar,reference to hash,reference to hash
1372 Args : Bio::PhyloNetwork,
1373 -metric => string (optional)
1375 Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1379 sub optimal_alignment
{
1380 my ($net1,$net2,%params)=@_;
1382 my ($net1cont,$blocks1)=contract_elementary
($net1);
1383 my ($net2cont,$blocks2)=contract_elementary
($net2);
1384 my ($wc,$alignc,$weightc)=
1385 optimal_alignment_noelementary
($net1cont,$net2cont,%params);
1389 foreach my $u1 (keys %$alignc) {
1390 my $u2=$alignc->{$u1};
1391 my @block1=@
{$blocks1->{$u1}};
1392 my @block2=@
{$blocks2->{$u2}};
1393 while (@block1 && @block2) {
1394 my $u1dc=pop @block1;
1395 my $u2dc=pop @block2;
1396 $alignment{$u1dc}=$u2dc;
1397 $weigths{$u1dc}=$weightc->{$u1};
1398 $totalweigth+=$weigths{$u1dc};
1401 return $totalweigth,\
%alignment,\
%weigths;
1404 sub optimal_alignment_noelementary
{
1405 my ($net1,$net2,%params)=@_;
1407 my $comp = Array
::Compare
->new;
1408 $net1->throw("Cannot align phylogenetic networks on different set of leaves")
1409 unless $comp->compare($net1->{leaves
},$net2->{leaves
});
1411 if ((defined $params{-metric
})and ($params{-metric
} eq 'Hamming')) {
1412 $distance='Hamming';
1414 $distance='Manhattan';
1416 my $numleaves=$net1->{numleaves
};
1417 my @nodes1=$net1->internal_nodes();
1418 my @nodes2=$net2->internal_nodes();
1419 my $numnodes1=@nodes1;
1420 my $numnodes2=@nodes2;
1422 for (my $i=0;$i<$numnodes1;$i++) {
1424 for (my $j=0;$j<$numnodes2;$j++) {
1425 push @row,weight
($net1,$nodes1[$i],$net2,$nodes2[$j],$distance);
1430 assign
(\
@matrix,\
@alignment);
1434 foreach my $leaf (@
{$net1->{leaves
}}) {
1435 $alignmenthash{$leaf}=$leaf;
1436 $weighthash{$leaf}=0;
1438 for (my $i=0;$i<$numnodes1;$i++) {
1439 if (defined $nodes2[$alignment[$i]]) {
1440 $alignmenthash{$nodes1[$i]}=$nodes2[$alignment[$i]];
1441 $weighthash{$nodes1[$i]}=$matrix[$i][$alignment[$i]];
1442 $totalw += $matrix[$i][$alignment[$i]];
1445 return $totalw,\
%alignmenthash,\
%weighthash;
1448 =head2 optimal_alignment_generalized
1450 Title : optimal_alignment_generalized
1451 Usage : my ($weight,%alignment)=$net->optimal_alignment_generalized($net2)
1452 Function: returns the wieght of an optimal alignment, and the alignment itself,
1453 between the topological restriction of the networks $net1 and $net2
1454 on the set of common leaves.
1455 An optional argument allows to use the Manhattan (default) or the
1456 Hamming distance between mu-vectors.
1457 Returns : scalar,hash
1458 Args : Bio::PhyloNetwork,
1459 -metric => string (optional)
1461 Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1465 sub optimal_alignment_generalized
{
1466 my ($net1,$net2,%params)=@_;
1467 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1468 return $netr1->optimal_alignment($netr2,%params);
1472 my ($net1,$v1,$net2,$v2,$distance)=@_;
1474 if (! defined $distance) {
1475 $distance='Manhattan';
1477 if ($distance eq 'Hamming') {
1478 $w=$net1->{mudata
}->{$v1}->hamming($net2->{mudata
}->{$v2});
1480 $w=$net1->{mudata
}->{$v1}->manhattan($net2->{mudata
}->{$v2});
1482 if (($net1->is_tree_node($v1) && $net2->is_hybrid_node($v2)) ||
1483 ($net2->is_tree_node($v2) && $net1->is_hybrid_node($v1))
1486 $w +=1/(2*$net1->{numleaves
});
1492 =head2 topological_restriction
1494 Title : topological_restriction
1495 Usage : my ($netr1,$netr2)=$net1->topological_restriction($net2)
1496 Function: returns the topological restriction of $net1 and $net2 on its
1497 common set of leaves
1498 Returns : Bio::PhyloNetwork, Bio::PhyloNetwork
1499 Args : Bio::PhyloNetwork
1503 sub topological_restriction
{
1504 my ($net1,$net2)=@_;
1506 my @leaves1=$net1->leaves();
1507 my @leaves2=$net2->leaves();
1508 my $numleaves1=scalar @leaves1;
1509 my $numleaves2=scalar @leaves2;
1511 for (my $i=0; $i<$numleaves1; $i++) {
1512 $position1{$leaves1[$i]}=$i;
1515 my @commonleaves=();
1516 for (my $j=0; $j<$numleaves2; $j++) {
1517 if (defined $position1{$leaves2[$j]}) {
1518 push @commonleaves,$leaves2[$j];
1519 $position2{$leaves2[$j]}=$j;
1522 my $graphred1=$net1->{graph
}->copy();
1523 my $graphred2=$net2->{graph
}->copy();
1525 foreach my $u ($graphred1->vertices()) {
1526 my $mu=$net1->mudata_node($u);
1527 foreach my $leaf (@commonleaves) {
1528 if ($mu->[$position1{$leaf}]>0) {
1532 $graphred1->delete_vertex($u);
1535 foreach my $u ($graphred2->vertices()) {
1536 my $mu=$net2->mudata_node($u);
1537 foreach my $leaf (@commonleaves) {
1538 if ($mu->[$position2{$leaf}]>0) {
1542 $graphred2->delete_vertex($u);
1544 my $netr1=Bio
::PhyloNetwork
->new(-graph
=> $graphred1);
1545 my $netr2=Bio
::PhyloNetwork
->new(-graph
=> $graphred2);
1546 return ($netr1,$netr2);
1549 # Functions for eNewick representation
1554 Usage : my $str=$net->eNewick()
1555 Function: returns the eNewick representation of $net without labeling
1566 foreach my $root ($self->roots()) {
1567 $str=$str.$self->eNewick_aux($root,$seen,undef)."; ";
1573 my ($self,$node,$seen,$parent)=@_;
1575 if ($self->is_leaf($node) ||
1576 (defined $seen->{$node}) )
1578 $str=make_label
($self,$parent,$node);
1582 my @sons=$self->{graph
}->successors($node);
1584 foreach my $son (@sons) {
1585 $str=$str.$self->eNewick_aux($son,$seen,$node).",";
1588 $str.=")".make_label
($self,$parent,$node);
1594 my ($self,$parent,$node)=@_;
1596 if ($self->is_hybrid_node($node)) {
1597 my $lbl=$self->{labels
}->{$node};
1601 $str.=$lbl; #$self->{labels}->{$node};
1603 if ((defined $parent) &&
1604 ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1605 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1607 $str.=substr $node,1;
1609 $str.=$self->{labels
}->{$node};
1611 if ((defined $parent) &&
1612 ($self->graph->has_edge_weight($parent,$node))) {
1613 $str.=":".$self->graph->get_edge_weight($parent,$node);
1620 Title : eNewick_full
1621 Usage : my $str=$net->eNewick_full()
1622 Function: returns the eNewick representation of $net labeling
1633 foreach my $root ($self->roots()) {
1634 $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1639 sub eNewick_full_aux
{
1640 my ($self,$node,$seen,$parent)=@_;
1642 if ($self->is_leaf($node) ||
1643 (defined $seen->{$node}) )
1645 $str=make_label_full
($self,$parent,$node);
1649 my @sons=$self->{graph
}->successors($node);
1651 foreach my $son (@sons) {
1652 $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1655 $str.=")".make_label_full
($self,$parent,$node);
1660 sub make_label_full
{
1661 my ($self,$parent,$node)=@_;
1663 if ($self->is_hybrid_node($node)) {
1664 my $lbl=$self->{labels
}->{$node};
1668 $str.=$lbl; #$self->{labels}->{$node};
1670 if ((defined $parent) &&
1671 ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1672 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1674 $str.=substr $node,1;
1676 if ((defined $self->{labels
}->{$node})&&($self->{labels
}->{$node} ne '')) {
1677 $str.=$self->{labels
}->{$node};
1683 if ((defined $parent) &&
1684 ($self->graph->has_edge_weight($parent,$node))) {
1685 $str.=":".$self->graph->get_edge_weight($parent,$node);
1690 # sub eNewick_full {
1694 # foreach my $root ($self->roots()) {
1695 # $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1700 # sub eNewick_full_aux {
1701 # my ($self,$node,$seen,$parent)=@_;
1703 # if ($self->is_leaf($node) ||
1704 # (defined $seen->{$node}) )
1706 # if ($self->is_hybrid_node($node)) {
1707 # my $tag=substr $node,1;
1708 # if ((defined $parent) &&
1709 # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1710 # $str='#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1720 # my @sons=$self->{graph}->successors($node);
1722 # foreach my $son (@sons) {
1723 # $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1726 # if ($self->is_hybrid_node($node)) {
1727 # my $tag=substr $node,1;
1728 # if ((defined $parent) &&
1729 # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1730 # $str.=')#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1738 # if ((defined $parent) &&
1739 # ($self->graph->has_edge_weight($parent,$node))) {
1740 # $str.=":".$self->graph->get_edge_weight($parent,$node);
1748 use overload
'""' => \
&display
;
1753 Usage : my $str=$net->display()
1754 Function: returns a string containing all the available information on $net
1763 my $graph=$self->{graph
};
1764 my @leaves=$self->leaves();
1765 my @nodes=@
{$self->{nodes
}};
1766 $str.= "Leaves:\t@leaves\n";
1767 $str.= "Nodes:\t@nodes\n";
1768 $str.= "Graph:\t$graph\n";
1769 $str.= "eNewick:\t".$self->eNewick()."\n";
1770 $str.= "Full eNewick:\t".$self->eNewick_full()."\n";
1771 $str.= "Mu-data and heights:\n";
1772 foreach my $node (@nodes) {
1774 if (exists $self->{labels
}->{$node}) {
1775 $str.="\tlabel=".$self->{labels
}->{$node}.",";
1777 $str.="\tlabel=(none),";
1779 $str.= "\th=".$self->{h
}->{$node}.", \tmu=".$self->{mudata
}->{$node}."\n";
1781 if (exists $self->{has_temporal_representation
}) {
1782 $str.= "Temporal representation:\n";
1783 if ($self->{has_temporal_representation
}) {
1784 foreach my $node (@nodes) {
1786 $str.= "\tt=".$self->{temporal_representation
}->{$node}."\n";
1789 $str.= "Does not exist.\n";
1792 if (exists $self->{tripartitions
}) {
1793 $str.= "Tripartitions:\n";
1794 foreach my $node (@nodes) {
1796 $str.= "\ttheta=".$self->{tripartitions
}->{$node}."\n";