bug 2549; fixed small bug in Bio::Taxon which doesn't catch -common_name
[bioperl-live.git] / Bio / PhyloNetwork.pm
blob37426c10009eb5f8a9b0cd8d8fa5f733deb021b2
1 # $Id$
3 # Module for Bio::PhyloNetwork
5 # Cared for by Gabriel Cardona <gabriel(dot)cardona(at)uib(dot)es>
7 # Copyright Gabriel Cardona, Gabriel Valiente
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::PhyloNetwork - Module to compute with Phylogenetic Networks
17 =head1 SYNOPSIS
19 use Bio::PhyloNetwork;
21 # Create a PhyloNetwork object from a eNewick string
22 my $net1=Bio::PhyloNetwork->new(
23 -eNewick=>'t0:((H1,(H2,l2)),H2); H1:((H3,l1)); H2:((H3,(l3,H1))); H3:(l4);'
26 # Print all available data
27 print $net1;
29 # Rebuild $net1 from its mu_data
30 my %mudata=$net1->mudata();
31 my $net2=Bio::PhyloNetwork->new(-mudata=>\%mudata,-numleaves=>4);
32 print $net2;
33 print "d=".$net1->mu_distance($net2)."\n";
35 # Get another one and compute distance
36 my $net3=Bio::PhyloNetwork->new(
37 -eNewick=>'(l2,((l1,(H1,l4)),H1))r; (l3)H1;'
39 print "d=".$net1->mu_distance($net3)."\n";
41 # ...and find an optimal alignment w.r.t. the Manhattan distance (default)
42 my ($weight,%alignment)=$net1->optimal_alignment($net3);
43 print "weight:$weight\n";
44 foreach my $node1 (keys %alignment) {
45 print "$node1 => ".$alignment{$node1}."\n";
47 # ...or the Hamming distance
49 my ($weightH,%alignmentH)=$net1->optimal_alignment($net3,-metric=>'Hamming');
50 print "weight:$weightH\n";
51 foreach my $node1 (keys %alignmentH) {
52 print "$node1 => ".$alignmentH{$node1}."\n";
55 # Test for time consistency of $net1
56 if ($net1->is_time_consistent) {
57 print "net1 is time consistent\n"
59 else {
60 print "net1 is not time consistent\n"
63 # create a network from the list of edges
64 my $net4=Bio::PhyloNetwork->new(-edges=>
65 [qw(r s r t s u s c t c t v u b u l3 u b v b v l4 b l2 c l1)]);
67 # Test for time consistency of $net3
68 if ($net4->is_time_consistent) {
69 print "net4 is time consistent\n"
71 else {
72 print "net4 is not time consistent\n"
75 # And print all information on net4
76 print $net4;
78 # Compute some tripartitions
79 my %triparts=$net1->tripartitions();
81 # Now these are stored
82 print $net1;
84 # And can compute the tripartition error
85 print "dtr=".$net1->tripartition_error($net3)."\n";
87 =head1 DESCRIPTION
89 =head2 Phylogenetic Networks
91 This is a module to work with phylogenetic networks. Phylogenetic networks
92 have been studied over the last years as a richer model of the evolutionary
93 history of sets of organisms than phylogenetic trees, because they take not
94 only mutation events but also recombination and horizontal gene transfer
95 events into account.
97 The natural model for describing the evolutionary
98 history of a set of sequences under recombination events is a DAG, hence
99 this package relies on the package Graph::Directed to represent the
100 underlying graph of a phylogenetic network. We refer the reader to [CRV1,CRV2]
101 for formal definitions related to phylogenetic networks.
103 =head2 eNewick description
105 With this package, phylogenetic networks can be given by its eNewick
106 string. This description appeared in other packages related to
107 phylogenetic networks (see [PhyloNet] and [NetGen]); in fact, these two
108 packages use different descriptions. The Bio::PhyloNetwork
109 package allows both of them, but uses the second one in its output.
111 The first approach [PhyloNet] goes as follows: For each hybrid node H, say with
112 parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k+1 different
113 nodes; let each of the first k copies be a child of one of the u_1,...,u_k
114 (one for each) and have no children (hence we will have k extra leaves);
115 as for the last copy, let it have no parents and have v_1,...,v_l be its
116 children. This way we get a forest; each of the trees will be rooted at either
117 one root of the phylogenetic network or a hybrid node of it; the set of leaves
118 (of the whole forest) will be the set of leaves of the original network
119 together with the set of hybrid nodes (each of them repeated as many times
120 as its in-degree). Then, the eNewick representation of the phylogenetic network
121 will be the Newick representation of all the trees in the obtained forest,
122 each of them with its root labeled.
124 The second approach [NetGen] goes as follows: For each hybrid node H, say with
125 parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k different
126 nodes; let the first copy be a child of u_1 and have all v_1,v_2,...v_l as
127 its children; let the other copies be child of u_2,...,u_k (one for each)
128 and have no children. This way, we get a tree whose set of leaves is the
129 set of leaves of the original network together with the set of hybrid nodes
130 (possibly repeated). Then the Newick string of the obtained tree (note that
131 some internal nodes will be labeled and some leaves will be repeated) is
132 the eNewick string of the phylogenetic network.
134 For example, consider the network depicted below:
140 / \ / \
141 1 \ / 3
146 If the first approach is taken, we get the forest:
152 / \ / \
153 1 H H 3
159 Hence, the eNewick string is '((1,H),(H,3))r; (2)H;'.
161 As for the second one, one gets the tree:
167 / \ / \
168 1 H | 3
173 Hence, the eNewick string is '((1,H),((2)H,3))r;'.
175 Note: when rooting a tree, this package allows the notations
176 '(subtree,subtree,...)root' as well as 'root:(subtree,subtree,...)', but
177 the first one is used when writing eNewick strings.
179 =head2 Tree-child phylogenetic networks
181 Tree-child (TC) phylogenetic networks are a special class of phylogenetic
182 networks for which a distance, called mu-distance, is defined [CRV2]
183 based on certain data (mu-data) associated to every node.
184 Moreover, this distance extends the
185 Robinson-Foulds on phylogenetic trees. This package allows testing for a
186 phylogenetic network if it is TC and computes mu-distances between networks
187 over the same set of leaves.
189 Moreover, the mu-data allows to define the optimal
190 (in some precise sense) alignment between networks
191 over the same set of leaves. This package also computes this optimal alignment.
193 =head2 Tripartitions
195 Although tripartitions (see [CRV1] and the references therein) do not allow
196 to define distances, this package outputs tripartitions and computes a weak
197 form of the tripartition error.
199 =head2 Time-consistency
201 Another useful property of Phylogenetic Networks that appears in the literature
202 is that of time-consistency or real-time hybrids [BSS]. Roughly speaking, a
203 network admits a temporal representation if it can be drawn in such a way
204 that tree arcs (those whose end is a tree node) are inclined downwards, while
205 hybridization arcs (those whose end is a hybrid node) are horizontal.
206 This package checks for time-consistency and, if so, a temporal representation
207 is provided.
209 =head1 AUTHOR
211 Gabriel Cardona, gabriel(dot)cardona(at)uib(dot)es
212 Gabriel Valiente, valiente(at)lsi(dot)upc(dot)edu
214 =head1 SEE ALSO
216 =over
218 =item [CRV1]
220 G. Cardona, F. Rossello, G. Valiente. Tripartitions do not always
221 discriminate phylogenetic networks. arXiv:0707.2376v1 [q-bio.PE]
223 =item [CRV2]
225 G. Cardona, F. Rossello, G. Valiente. A Distance Measure for
226 Tree-Child Phylogenetic Networks. Preprint.
228 =item [NetGen]
230 M.M. Morin, and B.M.E. Moret. NetGen: generating phylogenetic networks
231 with diploid hybrids. Bioinformatics 22 (2006), 1921-1923
233 =item [PhyloNet]
235 PhyloNet: "Phylogenetic Networks Toolkit".
236 http://bioinfo.cs.rice.edu/phylonet
238 =item [BSS]
240 M. Baroni, C. Semple, and M. Steel. Hybrids in Real
241 Time. Syst. Biol. 55(1):46-56, 2006
243 =back
245 =head1 APPENDIX
247 The rest of the documentation details each of the object methods.
249 =cut
251 package Bio::PhyloNetwork;
253 use strict;
254 use warnings;
256 use base qw(Bio::Root::Root);
258 use Bio::PhyloNetwork::muVector;
259 use Graph::Directed;
260 use Bio::TreeIO;
261 use Bio::Tree::Node;
262 use IO::String;
263 use Array::Compare;
264 use Algorithm::Munkres;
266 # Creator
268 =head2 new
270 Title : new
271 Usage : my $obj = new Bio::PhyloNetwork();
272 Function: Creates a new Bio::PhyloNetwork object
273 Returns : Bio::PhyloNetwork
274 Args : none
276 -eNewick => string
278 -graph => Graph::Directed object
280 -edges => reference to an array
282 -tree => Bio::Tree::Tree object
284 -mudata => reference to a hash,
285 -leaves => reference to an array
287 -mudata => reference to a hash,
288 -numleaves => integer
290 Returns a Bio::PhyloNetwork object, created according to the data given:
292 =over 3
294 =item new()
296 creates an empty network.
298 =item new(-eNewick =E<gt> $str)
300 creates the network whose
301 Extended Newick representation (see description above) is the string $str.
303 =item new(-graph =E<gt> $graph)
305 creates the network with underlying
306 graph given by the Graph::Directed object $graph
308 =item new(-tree =E<gt> $tree)
310 creates a network as a copy of the
311 Bio::Tree::Tree object in $tree
313 =item new(-mudata =E<gt> \%mudata, -leaves =E<gt> \@leaves)
315 creates the network by reconstructing it from its mu-data stored in
316 \%mudata and with set of leaves in \@leaves.
318 =item new(-mudata =E<gt> \%mudata, -numleaves =E<gt> $numleaves)
320 creates the network by reconstructing it from its mu-data stored in
321 \%mudata and with set of leaves in ("l1".."l$numleaves").
323 =back
325 =cut
327 sub new {
328 my ($pkg,@args)=@_;
329 my $self=$pkg->SUPER::new(@args);
330 my ($eNewick,$edgesR,$leavesR,$numleaves,$graph,$tree,$mudataR)=
331 $self->_rearrange([qw(ENEWICK
332 EDGES
333 LEAVES
334 NUMLEAVES
335 GRAPH
336 TREE
337 MUDATA)],@args);
338 bless($self,$pkg);
340 $self->build_from_eNewick($eNewick) if defined $eNewick;
341 $self->build_from_edges(@$edgesR) if defined $edgesR;
342 $self->build_from_graph($graph) if defined $graph;
343 $self->build_from_tree($tree) if defined $tree;
344 if ((! defined $leavesR) && (defined $numleaves)) {
345 my @leaves=map {"l$_"} (1..$numleaves);
346 $leavesR=\@leaves;
348 $self->build_from_mudata($mudataR,$leavesR)
349 if ((defined $mudataR) && (defined $leavesR));
350 return $self;
353 # Builders
355 sub build_from_edges {
356 my ($self,@edges)=@_;
357 my $graph=Graph::Directed->new();
358 $graph->add_edges(@edges);
359 $self->{graph}=$graph;
360 $self->recompute();
361 my $labels={};
362 foreach my $node ($self->nodes()) {
363 $labels->{$node}=$node;
365 $self->{labels}=$labels;
368 sub build_from_graph {
369 my ($self,$graph)=@_;
370 my $graphcp=$graph->copy();
371 $self->{graph}=$graphcp;
372 $self->recompute();
373 my $labels={};
374 foreach my $node ($self->nodes()) {
375 $labels->{$node}=$node;
377 $self->{labels}=$labels;
380 my $_eN_index;
382 sub build_from_eNewick {
383 my ($self,$string)=@_;
384 $_eN_index=0;
385 my $graph=Graph::Directed->new();
386 my $labels={};
387 my @blocks=split(/; */,$string);
388 foreach my $block (@blocks) {
389 my ($rt,$str)=get_root_and_subtree($block);
390 my ($rtlbl,$rttype,$rtid,$rtlng)=get_label_type_id_length($rt);
391 process_block($graph,$labels,$block,$rtid);
392 $labels->{$rtid}=$rtlbl.'';
394 $self->{graph}=$graph;
395 $self->{labels}=$labels;
396 $self->recompute();
399 sub process_block {
400 my ($graph,$labels,$block,$rtid)=@_;
401 my ($rt,$str)=get_root_and_subtree($block);
402 my @substrs=my_split($str);
403 foreach my $substr (@substrs) {
404 my ($subrt,$subblock)=get_root_and_subtree($substr);
405 my ($subrtlbl,$subrttype,$subrtid,$subrtlng)=
406 get_label_type_id_length($subrt);
407 if (! $subrtlng eq '') {
408 $graph->add_weighted_edges($rtid,$subrtid,$subrtlng);
410 else {
411 $graph->add_edges($rtid,$subrtid);
413 if (! $subrttype eq '') {
414 $graph->set_edge_attribute($rtid,$subrtid,'type',$subrttype);
416 $subrtlbl.='';
417 # if (! $subrtlbl eq '') {
418 if ((! defined $labels->{$subrtid})||($labels->{$subrtid} eq '')){
419 $labels->{$subrtid}=$subrtlbl;
420 } elsif (( $labels->{$subrtid} ne $subrtlbl )&&($subrtlbl ne '')) {
421 # error
422 die("Different labels for the same node (".$labels->{$subrtid}." and $subrtlbl)");
425 if ($subblock ne "") {
426 process_block($graph,$labels,$subblock,$subrtid);
431 sub get_root_and_subtree {
432 my ($block)=@_;
433 my ($rt,$str)=("","");
434 # ($rt,$str)=split(/:|=/,$block);
435 ($rt,$str)=split(/=/,$block);
436 if ($rt eq $block) {
437 # try to look for root label at the end
438 my $pos=length($rt)-1;
439 while ((substr($rt,$pos,1) ne ")") && ($pos >=0)) {
440 $pos--;
442 $rt=substr($block,$pos+1,length($block)-$pos);
443 $str=substr($block,0,$pos+1);
445 $rt=trim($rt);
446 $str=trim($str);
447 return ($rt,$str);
450 sub get_label_type_id_length {
451 my ($string) = @_;
452 $string.='';
453 # print "$string\n";
454 if (index($string,'#')==-1) {
455 # no hybrid
456 my ($label,$length)=split(':',$string);
457 $label.='';
458 my $id;
459 if ((! defined $label) || ($label eq '')) {
460 # create id
461 $_eN_index++;
462 $id="T$_eN_index";
463 } else {
464 $id=$label;
466 return ($label,'',$id,$length);
468 else {
469 # hybrid
470 my ($label,$string2)=split('#',$string);
471 my ($typeid,$length)=split(':',$string2);
472 my $type=$typeid;
473 $type =~ s/\d//g;
474 my $id=$typeid;
475 $id =~ s/\D//g;
476 return ($label,$type,'#'.$id,$length);
480 sub trim
482 my ($string) = @_;
483 $string =~ s/^\s+//;
484 $string =~ s/\s+$//;
485 return $string;
488 sub my_split {
489 my ( $string ) = @_;
490 my $temp="";
491 my @substrings;
492 my $level=1;
493 for my $i ( 1 .. length( $string ) ) {
494 my $char=substr($string,$i,1);
495 if ($char eq "(") {
496 $level++;
498 if ($char eq ")") {
499 if ($level==1) {
500 push @substrings, $temp;
501 $temp="";
503 $level--;
505 if (($char eq ",") && ($level==1)) {
506 push @substrings, $temp;
507 $temp="";
508 $char="";
510 $temp = $temp.$char;
512 return @substrings;
515 sub build_from_mudata {
516 my ($self,$mus,$leavesR)=@_;
517 my $graph=Graph::Directed->new();
518 my @nodes=keys %{$mus};
519 my @leaves=@{$leavesR};
521 my %seen;
522 my @internal;
524 @seen{@leaves} = ();
526 foreach my $node (@nodes) {
527 push(@internal, $node) unless exists $seen{$node};
530 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
531 @nodes=(@internal,@leaves);
532 my $numnodes=@nodes;
533 for (my $i=0;$i<$numnodes;$i++) {
534 my $mu=$mus->{$nodes[$i]};
535 my $j=$i+1;
536 while ($mu->is_positive() && $j<$numnodes) {
537 if ($mu->geq_poset($mus->{$nodes[$j]})) {
538 $graph->add_edges(($nodes[$i],$nodes[$j]));
539 $mu = $mu - $mus->{$nodes[$j]};
541 $j++;
544 $self->build_from_graph($graph);
547 # sub relabel_tree {
548 # my ($tree)=@_;
549 # my $i=1;
550 # my $j=1;
551 # my $root=$tree->get_root_node();
552 # foreach my $node ($tree->get_nodes()) {
553 # if ($node == $root) {
554 # $node->{'_id'}="r";
556 # elsif (! $node->is_Leaf) {
557 # $node->{'_id'}="t$i";
558 # $i++;
560 # else {
561 # if ($node->{'_id'} eq "") {
562 # $node->{'_id'}="l$j";
563 # $j++;
567 # return $tree;
570 # sub build_subtree {
571 # my ($graph,$root)=@_;
572 # foreach my $child ($root->each_Descendent) {
573 # $graph->add_edge($root->id,$child->id);
574 # $graph=build_subtree($graph,$child);
576 # return $graph;
579 sub build_from_tree {
580 my ($self,$tree)=@_;
581 # relabel_tree($tree);
582 # my $treeroot=$tree->get_root_node;
583 # my $graph=Graph::Directed->new();
584 # $graph=build_subtree($graph,$treeroot);
585 # $self->build_from_graph($graph);
586 my $str;
587 my $io=IO::String->new($str);
588 my $treeio=Bio::TreeIO->new(-format => 'newick', -fh => $io);
589 $treeio->write_tree($tree);
590 # print "intern: $str\n";
591 $self->build_from_eNewick($str);
594 sub recompute {
595 my ($self)=@_;
596 $self->throw("Graph is not DAG:".$self->{graph})
597 unless $self->{graph}->is_dag();
598 my @leaves=$self->{graph}->successorless_vertices();
599 @leaves=sort @leaves;
600 my $numleaves=@leaves;
601 my @roots=$self->{graph}->predecessorless_vertices();
602 my $numroots=@roots;
603 #$self->throw("Graph is not rooted") unless ($numroots == 1);
604 my @nodes=$self->{graph}->vertices();
605 @nodes=sort @nodes;
606 my $numnodes=@nodes;
607 foreach my $node (@nodes) {
608 if (! defined $self->{labels}->{$node}) {
609 $self->{labels}->{$node}='';
612 $self->{leaves}=\@leaves;
613 $self->{numleaves}=$numleaves;
614 $self->{roots}=\@roots;
615 $self->{numroots}=$numroots;
616 $self->{nodes}=\@nodes;
617 $self->{numnodes}=$numnodes;
618 $self->{mudata}={};
619 $self->{h}={};
620 $self->compute_height();
621 $self->compute_mu();
622 return $self;
625 # Hybridizing
627 sub is_attackable {
628 my ($self,$u1,$v1,$u2,$v2)=@_;
629 if ( $self->is_hybrid_node($v1) ||
630 $self->is_hybrid_node($v2) ||
631 $self->graph->is_reachable($v2,$u1) ||
632 (($u1 eq $u2)&&($v1 eq $v2)) ||
633 (! scalar grep {($_ ne $v2) && ($self->is_tree_node($_))}
634 $self->graph->successors($u2)))
636 return 0;
638 return 1;
641 sub do_attack {
642 my ($self,$u1,$v1,$u2,$v2,$lbl)=@_;
643 my $graph=$self->{graph};
644 $graph->delete_edge($u1,$v1);
645 $graph->delete_edge($u2,$v2);
646 $graph->add_edge($u1,"T$lbl");
647 $graph->add_edge("T$lbl",$v1);
648 $graph->add_edge($u2,"#H$lbl");
649 $graph->add_edge("#H$lbl",$v2);
650 $graph->add_edge("T$lbl","#H$lbl");
651 $self->build_from_graph($graph);
655 # Computation of mu-data
657 sub compute_mu {
658 my ($self)=@_;
659 my $graph=$self->{graph};
660 my $mudata=$self->{mudata};
661 my @leaves=@{$self->{leaves}};
662 my $numleaves=$self->{numleaves};
663 for (my $i=0;$i<$numleaves;$i++) {
664 my $vec=Bio::PhyloNetwork::muVector->new($numleaves);
665 $vec->[$i]=1;
666 $mudata->{$leaves[$i]}=$vec;
668 my $h=1;
669 while (my @nodes=grep {$self->{h}->{$_} == $h} @{$self->{nodes}} )
671 foreach my $u (@nodes) {
672 my $vec=Bio::PhyloNetwork::muVector->new($numleaves);
673 foreach my $son ($graph->successors($u)) {
674 $vec+=$mudata->{$son};
676 $mudata->{$u}=$vec;
678 $h++;
682 sub compute_height {
683 my ($self)=@_;
684 my $graph=$self->{graph};
685 my @leaves=@{$self->{leaves}};
686 foreach my $leaf (@leaves) {
687 $self->{h}->{$leaf}=0;
689 my $h=0;
690 while (my @nodes=grep {(defined $self->{h}->{$_})&&($self->{h}->{$_} == $h)}
691 @{$self->{nodes}} )
693 foreach my $node (@nodes) {
694 foreach my $parent ($graph->predecessors($node)) {
695 $self->{h}->{$parent}=$h+1;
698 $h++;
702 # Tests
704 =head2 is_leaf
706 Title : is_leaf
707 Usage : my $b=$net->is_leaf($u)
708 Function: tests if $u is a leaf in $net
709 Returns : boolean
710 Args : scalar
712 =cut
714 sub is_leaf {
715 my ($self,$node)=@_;
716 if ($self->{graph}->out_degree($node) == 0) {return 1;}
717 return 0;
720 =head2 is_root
722 Title : is_root
723 Usage : my $b=$net->is_root($u)
724 Function: tests if $u is the root of $net
725 Returns : boolean
726 Args : scalar
728 =cut
730 sub is_root {
731 my ($self,$node)=@_;
732 if ($self->{graph}->in_degree($node) == 0) {return 1;}
733 return 0;
736 =head2 is_tree_node
738 Title : is_tree_node
739 Usage : my $b=$net->is_tree_node($u)
740 Function: tests if $u is a tree node in $net
741 Returns : boolean
742 Args : scalar
744 =cut
746 sub is_tree_node {
747 my ($self,$node)=@_;
748 if ($self->{graph}->in_degree($node) <= 1) {return 1;}
749 return 0;
752 =head2 is_hybrid_node
754 Title : is_hybrid_node
755 Usage : my $b=$net->is_hybrid_node($u)
756 Function: tests if $u is a hybrid node in $net
757 Returns : boolean
758 Args : scalar
760 =cut
762 sub is_hybrid_node {
763 my ($self,$node)=@_;
764 if ($self->{graph}->in_degree($node) > 1) {return 1;}
765 return 0;
768 sub has_tree_child {
769 # has_tree_child(g,u) returns 1 if u has a tree child in graph g
770 # and 0 otherwise
771 my $g=shift(@_);
772 my $node=shift(@_);
773 my @Sons=$g->successors($node);
774 foreach my $son (@Sons) {
775 if ($g->in_degree($son)==1) {
776 return 1;
779 return 0;
782 =head2 is_tree_child
784 Title : is_tree_child
785 Usage : my $b=$net->is_tree_child()
786 Function: tests if $net is a Tree-Child phylogenetic network
787 Returns : boolean
788 Args : Bio::PhyloNetwork
790 =cut
792 sub is_tree_child {
793 my ($self)=@_;
794 if (defined $self->{is_tree_child}) {
795 return $self->{is_tree_child};
797 $self->{is_tree_child}=0;
798 my $graph=$self->{graph};
799 foreach my $node (@{$self->{nodes}}) {
800 return 0 unless ($graph->out_degree($node)==0 ||
801 has_tree_child($graph,$node));
803 $self->{is_tree_child}=1;
804 return 1;
807 # Accessors
809 =head2 nodes
811 Title : nodes
812 Usage : my @nodes=$net->nodes()
813 Function: returns the set of nodes of $net
814 Returns : array
815 Args : none
817 =cut
819 sub nodes {
820 my ($self)=@_;
821 return @{$self->{nodes}};
824 =head2 leaves
826 Title : leaves
827 Usage : my @leaves=$net->leaves()
828 Function: returns the set of leaves of $net
829 Returns : array
830 Args : none
832 =cut
834 sub leaves {
835 my ($self)=@_;
836 return @{$self->{leaves}};
839 =head2 roots
841 Title : roots
842 Usage : my @roots=$net->roots()
843 Function: returns the set of roots of $net
844 Returns : array
845 Args : none
847 =cut
849 sub roots {
850 my ($self)=@_;
851 return @{$self->{roots}};
854 =head2 internal_nodes
856 Title : internal_nodes
857 Usage : my @internal_nodes=$net->internal_nodes()
858 Function: returns the set of internal nodes of $net
859 Returns : array
860 Args : none
862 =cut
864 sub internal_nodes {
865 my ($self)=@_;
866 return grep {! $self->is_leaf($_)} $self->nodes();
869 =head2 tree_nodes
871 Title : tree_nodes
872 Usage : my @tree_nodes=$net->tree_nodes()
873 Function: returns the set of tree nodes of $net
874 Returns : array
875 Args : none
877 =cut
879 sub tree_nodes {
880 my ($self)=@_;
881 return grep {$self->is_tree_node($_)} $self->nodes();
884 =head2 hybrid_nodes
886 Title : hybrid_nodes
887 Usage : my @hybrid_nodes=$net->hybrid_nodes()
888 Function: returns the set of hybrid nodes of $net
889 Returns : array
890 Args : none
892 =cut
894 sub hybrid_nodes {
895 my ($self)=@_;
896 return grep {$self->is_hybrid_node($_)} $self->nodes();
899 =head2 graph
901 Title : graph
902 Usage : my $graph=$net->graph()
903 Function: returns the underlying graph of $net
904 Returns : Graph::Directed
905 Args : none
907 =cut
909 sub graph {
910 my ($self)=@_;
911 return $self->{graph};
914 =head2 edges
916 Title : edges
917 Usage : my @edges=$net->edges()
918 Function: returns the set of edges of $net
919 Returns : array
920 Args : none
922 Each element in the array is an anonimous array whose first element is the
923 head of the edge and the second one is the tail.
925 =cut
927 sub edges {
928 my ($self)=@_;
929 return $self->{graph}->edges();
932 =head2 tree_edges
934 Title : tree_edges
935 Usage : my @tree_edges=$net->tree_edges()
936 Function: returns the set of tree edges of $net
937 (those whose tail is a tree node)
938 Returns : array
939 Args : none
941 =cut
943 sub tree_edges {
944 my ($self)=@_;
945 return grep {$self->is_tree_node($_->[1])} $self->edges();
948 =head2 hybrid_edges
950 Title : hybrid_edges
951 Usage : my @hybrid_edges=$net->hybrid_edges()
952 Function: returns the set of hybrid edges of $net
953 (those whose tail is a hybrid node)
954 Returns : array
955 Args : none
957 =cut
959 sub hybrid_edges {
960 my ($self)=@_;
961 return grep {$self->is_hybrid_node($_->[1])} $self->edges();
964 =head2 explode
966 Title : explode
967 Usage : my @trees=$net->explode()
968 Function: returns the representation of $net by a set of
969 Bio::Tree:Tree objects
970 Returns : array
971 Args : none
973 =cut
975 sub explode {
976 my ($self)=@_;
977 my @trees;
978 $self->explode_rec(\@trees);
979 return @trees;
982 sub explode_rec {
983 my ($self,$trees)=@_;
984 my @h = $self->hybrid_nodes;
985 if (scalar @h) {
986 my $v = shift @h;
987 for my $u ($self->{graph}->predecessors($v)) {
988 $self->{graph}->delete_edge($u,$v);
989 $self->explode_rec($trees);
990 $self->{graph}->add_edge($u,$v);
992 } else {
993 my $io = IO::String->new($self->eNewick);
994 my $treeio = Bio::TreeIO->new(-format => 'newick', -fh => $io);
995 my $tree = $treeio->next_tree;
996 $tree->contract_linear_paths;
997 push @{$trees}, $tree;
1001 =head2 mudata
1003 Title : mudata
1004 Usage : my %mudata=$net->mudata()
1005 Function: returns the representation of $net by its mu-data
1006 Returns : hash
1007 Args : none
1009 $net-E<gt>mudata() returns a hash with keys the nodes of $net and each value is a
1010 muVector object holding its mu-vector.
1012 =cut
1014 sub mudata {
1015 my ($self)=@_;
1016 return %{$self->{mudata}};
1019 sub mudata_node {
1020 my ($self,$u)=@_;
1021 return $self->{mudata}{$u};
1024 =head2 heights
1026 Title : heights
1027 Usage : my %heights=$net->heights()
1028 Function: returns the heights of the nodes of $net
1029 Returns : hash
1030 Args : none
1032 $net-E<gt>heights() returns a hash with keys the nodes of $net and each value
1033 is its height.
1035 =cut
1037 sub heights {
1038 my ($self)=@_;
1039 return %{$self->{h}};
1042 sub height_node {
1043 my ($self,$u)=@_;
1044 return $self->{h}{$u};
1047 =head2 mu_distance
1049 Title : mu_distance
1050 Usage : my $dist=$net1->mu_distance($net2)
1051 Function: Computes the mu-distance between the networks $net1 and $net2 on
1052 the same set of leaves
1053 Returns : scalar
1054 Args : Bio::PhyloNetwork
1056 =cut
1058 sub mu_distance {
1059 my ($net1,$net2)=@_;
1060 my @nodes1=$net1->nodes;
1061 my @nodes2=$net2->nodes;
1062 my $comp = Array::Compare->new;
1063 $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1064 unless $comp->compare($net1->{leaves},$net2->{leaves});
1065 $net1->warn("Not a tree-child phylogenetic network")
1066 unless $net1->is_tree_child();
1067 $net2->warn("Not a tree-child phylogenetic network")
1068 unless $net2->is_tree_child();
1069 my @leaves=@{$net1->{leaves}};
1070 my %matched1;
1071 my %matched2;
1072 OUTER: foreach my $node1 (@nodes1) {
1073 foreach my $node2 (@nodes2) {
1074 if (
1075 (! exists $matched1{$node1}) && (! exists $matched2{$node2}) &&
1076 ($net1->{mudata}{$node1} == $net2->{mudata}{$node2})
1078 $matched1{$node1}=$node2;
1079 $matched2{$node2}=$node1;
1080 next OUTER;
1084 return (scalar @nodes1)+(scalar @nodes2)-2*(scalar keys %matched1);
1087 =head2 mu_distance_generalized
1089 Title : mu_distance_generalized
1090 Usage : my $dist=$net1->mu_distance($net2)
1091 Function: Computes the mu-distance between the topological restrictions of
1092 networks $net1 and $net2 on its common set of leaves
1093 Returns : scalar
1094 Args : Bio::PhyloNetwork
1096 =cut
1098 sub mu_distance_generalized {
1099 my ($net1,$net2)=@_;
1100 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1101 return $netr1->mu_distance($netr2);
1104 # mudata_string (code mu_data in a string; useful for isomorphism testing)
1106 sub mudata_string_node {
1107 my ($self,$u)=@_;
1108 return $self->{mudata}->{$u}->display();
1111 sub mudata_string {
1112 my ($self)=@_;
1113 return $self->{mudata_string} if defined $self->{mudata_string};
1114 my @internal=$self->internal_nodes;
1115 my $mus=$self->{mudata};
1116 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
1117 my $str="";
1118 foreach my $node (@internal) {
1119 $str=$str.$self->mudata_string_node($node);
1121 $self->{mudata_string}=$str;
1122 return $str;
1125 sub is_mu_isomorphic {
1126 my ($net1,$net2)=@_;
1127 return ($net1->mudata_string() eq $net2->mudata_string());
1130 # tripartitions
1132 sub compute_tripartition_node {
1133 my ($self,$u)=@_;
1134 $self->warn("Cannot compute tripartitions on unrooted networks. Will assume one at random")
1135 unless ($self->{numroots} == 1);
1136 my $root=$self->{roots}->[0];
1137 my $graph=$self->{graph};
1138 my $graphPruned=$graph->copy();
1139 $graphPruned->delete_vertex($u);
1140 my $tripartition="";
1141 foreach my $leaf (@{$self->{leaves}}) {
1142 my $type;
1143 if ($graph->is_reachable($u,$leaf)) {
1144 if ($graphPruned->is_reachable($root,$leaf)) {$type="B";}
1145 else {$type="A";}
1147 else {$type="C";}
1148 $tripartition .= $type;
1150 $self->{tripartitions}->{$u}=$tripartition;
1153 sub compute_tripartitions {
1154 my ($self)=@_;
1155 foreach my $node (@{$self->{nodes}}) {
1156 $self->compute_tripartition_node($node);
1160 =head2 tripartitions
1162 Title : tripartitions
1163 Usage : my %tripartitions=$net->tripartitions()
1164 Function: returns the set of tripartitions of $net
1165 Returns : hash
1166 Args : none
1168 $net-E<gt>tripartitions() returns a hash with keys the nodes of $net and each value
1169 is a string representing the tripartition of the leaves induced by the node.
1170 A string "BCA..." associated with a node u (e.g.) means, the first leaf is in
1171 the set B(u), the second one in C(u), the third one in A(u), and so on.
1173 =cut
1175 sub tripartitions {
1176 my ($self)=@_;
1177 $self->compute_tripartitions() unless defined $self->{tripartitions};
1178 return %{$self->{tripartitions}};
1181 # to do: change to tri_distance and test for TC and time-cons
1183 sub tripartition_error {
1184 my ($net1,$net2)=@_;
1185 my $comp = Array::Compare->new;
1186 $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1187 unless $comp->compare($net1->{leaves},$net2->{leaves});
1188 $net1->warn("Not a tree-child phylogenetic network")
1189 unless $net1->is_tree_child();
1190 $net2->warn("Not a tree-child phylogenetic network")
1191 unless $net2->is_tree_child();
1192 $net1->warn("Not a time-consistent network")
1193 unless $net1->is_time_consistent();
1194 $net2->warn("Not a time-consistent network")
1195 unless $net2->is_time_consistent();
1196 $net1->compute_tripartitions() unless defined $net1->{tripartitions};
1197 $net2->compute_tripartitions() unless defined $net2->{tripartitions};
1198 my @edges1=$net1->{graph}->edges();
1199 my @edges2=$net2->{graph}->edges();
1200 my ($FN,$FP)=(0,0);
1201 foreach my $edge1 (@edges1) {
1202 my $matched=0;
1203 foreach my $edge2 (@edges2) {
1204 if ($net1->{tripartitions}->{$edge1->[1]} eq
1205 $net2->{tripartitions}->{$edge2->[1]}) {
1206 $matched=1;
1207 last;
1210 if (! $matched) {$FN++;}
1212 foreach my $edge2 (@edges2) {
1213 my $matched=0;
1214 foreach my $edge1 (@edges1) {
1215 if ($net1->{tripartitions}->{$edge1->[1]} eq
1216 $net2->{tripartitions}->{$edge2->[1]}) {
1217 $matched=1;
1218 last;
1221 if (! $matched) {$FP++;}
1223 return ($FN/(scalar @edges1)+$FP/(scalar @edges2))/2;
1226 # Time-consistency
1228 # to do: add weak time consistency
1230 =head2 is_time_consistent
1232 Title : is_time_consistent
1233 Usage : my $b=$net->is_time_consistent()
1234 Function: tests if $net is (strong) time-consistent
1235 Returns : boolean
1236 Args : none
1238 =cut
1240 sub is_time_consistent {
1241 my ($self)=@_;
1242 $self->compute_temporal_representation()
1243 unless exists $self->{has_temporal_representation};
1244 return $self->{has_temporal_representation};
1247 =head2 temporal_representation
1249 Title : temporal_representation
1250 Usage : my %time=$net->temporal_representation()
1251 Function: returns a hash containing a temporal representation of $net, or 0
1252 if $net is not time-consistent
1253 Returns : hash
1254 Args : none
1256 =cut
1258 sub temporal_representation {
1259 my ($self)=@_;
1260 if ($self->is_time_consistent) {
1261 return %{$self->{temporal_representation}};
1263 return 0;
1266 sub compute_temporal_representation {
1267 my ($self)=@_;
1268 my $quotient=Graph::Directed->new();
1269 my $classes=find_classes($self);
1270 my %repr;
1271 map {$repr{$_}=$classes->{$_}[0]} $self->nodes();
1272 foreach my $e ($self->tree_edges()) {
1273 $quotient->add_edge($repr{$e->[0]},$repr{$e->[1]});
1275 my %temp;
1276 my $depth=0;
1277 while ($quotient->vertices()) {
1278 if (my @svs=$quotient->predecessorless_vertices()) {
1279 foreach my $sv (@svs) {
1280 $temp{$sv}=$depth;
1282 $quotient->delete_vertices(@svs);
1283 } else {
1284 return 0;
1286 $depth++;
1288 foreach my $node (@{$self->{nodes}}) {
1289 $temp{$node}=$temp{$repr{$node}}
1291 $self->{temporal_representation}=\%temp;
1292 $self->{has_temporal_representation}=1;
1295 sub find_classes {
1296 my ($self)=@_;
1297 my $classes={};
1298 map {$classes->{$_}=[$_]} $self->nodes();
1299 foreach my $e ($self->hybrid_edges()) {
1300 $classes=join_classes($classes,$e->[0],$e->[1]);
1302 return $classes;
1305 sub join_classes {
1306 my ($classes,$u,$v)=@_;
1307 my @clu=@{$classes->{$u}};
1308 my @clv=@{$classes->{$v}};
1309 my @cljoin=(@clu,@clv);
1310 map {$classes->{$_}=\@cljoin} @cljoin;
1311 return $classes;
1314 # alignment
1316 =head2 contract_elementary
1319 Title : contract_elementary
1320 Usage : my ($contracted,$blocks)=$net->contract_elementary();
1321 Function: Returns the network $contracted, obtained by contracting elementary
1322 paths of $net into edges. The reference $blocks points to a hash
1323 where, for each node of $contracted, gives the corresponding nodes
1324 of $net that have been deleted.
1325 Returns : Bio::PhyloNetwork,reference to hash
1326 Args : none
1328 =cut
1330 sub contract_elementary {
1331 my ($self)=@_;
1333 my $contracted=$self->graph->copy();
1334 my @nodes=$self->nodes();
1335 my $mus=$self->{mudata};
1336 my $hs=$self->{h};
1337 my %blocks;
1338 foreach my $u (@nodes) {
1339 $blocks{$u}=[$u];
1341 my @elementary=grep { $contracted->out_degree($_) == 1} $self->tree_nodes();
1342 @elementary=sort {$mus->{$b} <=> $mus->{$a} ||
1343 $hs->{$b} <=> $hs->{$a}} @elementary;
1344 foreach my $elem (@elementary) {
1345 my @children=$contracted->successors($elem);
1346 my $child=$children[0];
1347 if ($contracted->in_degree($elem) == 1) {
1348 my @parents=$contracted->predecessors($elem);
1349 my $parent=$parents[0];
1350 $contracted->add_edge($parent,$child);
1352 $contracted->delete_vertex($elem);
1353 my @blch=@{$blocks{$child}};
1354 my @blem=@{$blocks{$elem}};
1355 $blocks{$child}=[@blem,@blch];
1356 delete $blocks{$elem};
1358 my $contr=Bio::PhyloNetwork->new(-graph => $contracted);
1359 return $contr,\%blocks;
1362 =head2 optimal_alignment
1364 Title : optimal_alignment
1365 Usage : my ($weight,$alignment,$wgts)=$net->optimal_alignment($net2)
1366 Function: returns the total weight of an optimal alignment,
1367 the alignment itself, and partial weights
1368 between the networks $net1 and $net2 on the same set of leaves.
1369 An optional argument allows to use the Manhattan (default) or the
1370 Hamming distance between mu-vectors.
1371 Returns : scalar,reference to hash,reference to hash
1372 Args : Bio::PhyloNetwork,
1373 -metric => string (optional)
1375 Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1377 =cut
1379 sub optimal_alignment {
1380 my ($net1,$net2,%params)=@_;
1382 my ($net1cont,$blocks1)=contract_elementary($net1);
1383 my ($net2cont,$blocks2)=contract_elementary($net2);
1384 my ($wc,$alignc,$weightc)=
1385 optimal_alignment_noelementary($net1cont,$net2cont,%params);
1386 my %alignment=();
1387 my $totalweigth=0;
1388 my %weigths=();
1389 foreach my $u1 (keys %$alignc) {
1390 my $u2=$alignc->{$u1};
1391 my @block1=@{$blocks1->{$u1}};
1392 my @block2=@{$blocks2->{$u2}};
1393 while (@block1 && @block2) {
1394 my $u1dc=pop @block1;
1395 my $u2dc=pop @block2;
1396 $alignment{$u1dc}=$u2dc;
1397 $weigths{$u1dc}=$weightc->{$u1};
1398 $totalweigth+=$weigths{$u1dc};
1401 return $totalweigth,\%alignment,\%weigths;
1404 sub optimal_alignment_noelementary {
1405 my ($net1,$net2,%params)=@_;
1407 my $comp = Array::Compare->new;
1408 $net1->throw("Cannot align phylogenetic networks on different set of leaves")
1409 unless $comp->compare($net1->{leaves},$net2->{leaves});
1410 my $distance;
1411 if ((defined $params{-metric})and ($params{-metric} eq 'Hamming')) {
1412 $distance='Hamming';
1413 } else {
1414 $distance='Manhattan';
1416 my $numleaves=$net1->{numleaves};
1417 my @nodes1=$net1->internal_nodes();
1418 my @nodes2=$net2->internal_nodes();
1419 my $numnodes1=@nodes1;
1420 my $numnodes2=@nodes2;
1421 my @matrix=();
1422 for (my $i=0;$i<$numnodes1;$i++) {
1423 my @row=();
1424 for (my $j=0;$j<$numnodes2;$j++) {
1425 push @row,weight($net1,$nodes1[$i],$net2,$nodes2[$j],$distance);
1427 push @matrix,\@row;
1429 my @alignment=();
1430 assign(\@matrix,\@alignment);
1431 my %alignmenthash;
1432 my %weighthash;
1433 my $totalw=0;
1434 foreach my $leaf (@{$net1->{leaves}}) {
1435 $alignmenthash{$leaf}=$leaf;
1436 $weighthash{$leaf}=0;
1438 for (my $i=0;$i<$numnodes1;$i++) {
1439 if (defined $nodes2[$alignment[$i]]) {
1440 $alignmenthash{$nodes1[$i]}=$nodes2[$alignment[$i]];
1441 $weighthash{$nodes1[$i]}=$matrix[$i][$alignment[$i]];
1442 $totalw += $matrix[$i][$alignment[$i]];
1445 return $totalw,\%alignmenthash,\%weighthash;
1448 =head2 optimal_alignment_generalized
1450 Title : optimal_alignment_generalized
1451 Usage : my ($weight,%alignment)=$net->optimal_alignment_generalized($net2)
1452 Function: returns the wieght of an optimal alignment, and the alignment itself,
1453 between the topological restriction of the networks $net1 and $net2
1454 on the set of common leaves.
1455 An optional argument allows to use the Manhattan (default) or the
1456 Hamming distance between mu-vectors.
1457 Returns : scalar,hash
1458 Args : Bio::PhyloNetwork,
1459 -metric => string (optional)
1461 Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1463 =cut
1465 sub optimal_alignment_generalized {
1466 my ($net1,$net2,%params)=@_;
1467 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1468 return $netr1->optimal_alignment($netr2,%params);
1471 sub weight {
1472 my ($net1,$v1,$net2,$v2,$distance)=@_;
1473 my $w;
1474 if (! defined $distance) {
1475 $distance='Manhattan';
1477 if ($distance eq 'Hamming') {
1478 $w=$net1->{mudata}->{$v1}->hamming($net2->{mudata}->{$v2});
1479 } else {
1480 $w=$net1->{mudata}->{$v1}->manhattan($net2->{mudata}->{$v2});
1482 if (($net1->is_tree_node($v1) && $net2->is_hybrid_node($v2)) ||
1483 ($net2->is_tree_node($v2) && $net1->is_hybrid_node($v1))
1486 $w +=1/(2*$net1->{numleaves});
1488 return $w;
1492 =head2 topological_restriction
1494 Title : topological_restriction
1495 Usage : my ($netr1,$netr2)=$net1->topological_restriction($net2)
1496 Function: returns the topological restriction of $net1 and $net2 on its
1497 common set of leaves
1498 Returns : Bio::PhyloNetwork, Bio::PhyloNetwork
1499 Args : Bio::PhyloNetwork
1501 =cut
1503 sub topological_restriction {
1504 my ($net1,$net2)=@_;
1506 my @leaves1=$net1->leaves();
1507 my @leaves2=$net2->leaves();
1508 my $numleaves1=scalar @leaves1;
1509 my $numleaves2=scalar @leaves2;
1510 my %position1;
1511 for (my $i=0; $i<$numleaves1; $i++) {
1512 $position1{$leaves1[$i]}=$i;
1514 my %position2;
1515 my @commonleaves=();
1516 for (my $j=0; $j<$numleaves2; $j++) {
1517 if (defined $position1{$leaves2[$j]}) {
1518 push @commonleaves,$leaves2[$j];
1519 $position2{$leaves2[$j]}=$j;
1522 my $graphred1=$net1->{graph}->copy();
1523 my $graphred2=$net2->{graph}->copy();
1524 OUTER1:
1525 foreach my $u ($graphred1->vertices()) {
1526 my $mu=$net1->mudata_node($u);
1527 foreach my $leaf (@commonleaves) {
1528 if ($mu->[$position1{$leaf}]>0) {
1529 next OUTER1;
1532 $graphred1->delete_vertex($u);
1534 OUTER2:
1535 foreach my $u ($graphred2->vertices()) {
1536 my $mu=$net2->mudata_node($u);
1537 foreach my $leaf (@commonleaves) {
1538 if ($mu->[$position2{$leaf}]>0) {
1539 next OUTER2;
1542 $graphred2->delete_vertex($u);
1544 my $netr1=Bio::PhyloNetwork->new(-graph => $graphred1);
1545 my $netr2=Bio::PhyloNetwork->new(-graph => $graphred2);
1546 return ($netr1,$netr2);
1549 # Functions for eNewick representation
1551 =head2 eNewick
1553 Title : eNewick
1554 Usage : my $str=$net->eNewick()
1555 Function: returns the eNewick representation of $net without labeling
1556 internal tree nodes
1557 Returns : string
1558 Args : none
1560 =cut
1562 sub eNewick {
1563 my ($self)=@_;
1564 my $str="";
1565 my $seen={};
1566 foreach my $root ($self->roots()) {
1567 $str=$str.$self->eNewick_aux($root,$seen,undef)."; ";
1569 return $str;
1572 sub eNewick_aux {
1573 my ($self,$node,$seen,$parent)=@_;
1574 my $str='';
1575 if ($self->is_leaf($node) ||
1576 (defined $seen->{$node}) )
1578 $str=make_label($self,$parent,$node);
1580 else {
1581 $seen->{$node}=1;
1582 my @sons=$self->{graph}->successors($node);
1583 $str="(";
1584 foreach my $son (@sons) {
1585 $str=$str.$self->eNewick_aux($son,$seen,$node).",";
1587 chop($str);
1588 $str.=")".make_label($self,$parent,$node);
1590 return $str;
1593 sub make_label {
1594 my ($self,$parent,$node)=@_;
1595 my $str='';
1596 if ($self->is_hybrid_node($node)) {
1597 my $lbl=$self->{labels}->{$node};
1598 if ($lbl =~ /#/) {
1599 $lbl='';
1601 $str.=$lbl; #$self->{labels}->{$node};
1602 $str.='#';
1603 if ((defined $parent) &&
1604 ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1605 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1607 $str.=substr $node,1;
1608 } else {
1609 $str.=$self->{labels}->{$node};
1611 if ((defined $parent) &&
1612 ($self->graph->has_edge_weight($parent,$node))) {
1613 $str.=":".$self->graph->get_edge_weight($parent,$node);
1615 return $str;
1618 =head2 eNewick_full
1620 Title : eNewick_full
1621 Usage : my $str=$net->eNewick_full()
1622 Function: returns the eNewick representation of $net labeling
1623 internal tree nodes
1624 Returns : string
1625 Args : none
1627 =cut
1629 sub eNewick_full {
1630 my ($self)=@_;
1631 my $str="";
1632 my $seen={};
1633 foreach my $root ($self->roots()) {
1634 $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1636 return $str;
1639 sub eNewick_full_aux {
1640 my ($self,$node,$seen,$parent)=@_;
1641 my $str='';
1642 if ($self->is_leaf($node) ||
1643 (defined $seen->{$node}) )
1645 $str=make_label_full($self,$parent,$node);
1647 else {
1648 $seen->{$node}=1;
1649 my @sons=$self->{graph}->successors($node);
1650 $str="(";
1651 foreach my $son (@sons) {
1652 $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1654 chop($str);
1655 $str.=")".make_label_full($self,$parent,$node);
1657 return $str;
1660 sub make_label_full {
1661 my ($self,$parent,$node)=@_;
1662 my $str='';
1663 if ($self->is_hybrid_node($node)) {
1664 my $lbl=$self->{labels}->{$node};
1665 if ($lbl =~ /#/) {
1666 $lbl='';
1668 $str.=$lbl; #$self->{labels}->{$node};
1669 $str.='#';
1670 if ((defined $parent) &&
1671 ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1672 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1674 $str.=substr $node,1;
1675 } else {
1676 if ((defined $self->{labels}->{$node})&&($self->{labels}->{$node} ne '')) {
1677 $str.=$self->{labels}->{$node};
1679 else {
1680 $str.=$node;
1683 if ((defined $parent) &&
1684 ($self->graph->has_edge_weight($parent,$node))) {
1685 $str.=":".$self->graph->get_edge_weight($parent,$node);
1687 return $str;
1690 # sub eNewick_full {
1691 # my ($self)=@_;
1692 # my $str="";
1693 # my $seen={};
1694 # foreach my $root ($self->roots()) {
1695 # $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1697 # return $str;
1700 # sub eNewick_full_aux {
1701 # my ($self,$node,$seen,$parent)=@_;
1702 # my $str;
1703 # if ($self->is_leaf($node) ||
1704 # (defined $seen->{$node}) )
1706 # if ($self->is_hybrid_node($node)) {
1707 # my $tag=substr $node,1;
1708 # if ((defined $parent) &&
1709 # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1710 # $str='#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1711 # } else {
1712 # $str=$node;
1714 # } else {
1715 # $str=$node;
1718 # else {
1719 # $seen->{$node}=1;
1720 # my @sons=$self->{graph}->successors($node);
1721 # $str="(";
1722 # foreach my $son (@sons) {
1723 # $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1725 # chop($str);
1726 # if ($self->is_hybrid_node($node)) {
1727 # my $tag=substr $node,1;
1728 # if ((defined $parent) &&
1729 # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1730 # $str.=')#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1731 # } else {
1732 # $str.=")$node";
1734 # } else {
1735 # $str.=")$node";
1738 # if ((defined $parent) &&
1739 # ($self->graph->has_edge_weight($parent,$node))) {
1740 # $str.=":".$self->graph->get_edge_weight($parent,$node);
1742 # return $str;
1746 # displaying data
1748 use overload '""' => \&display;
1750 =head2 display
1752 Title : display
1753 Usage : my $str=$net->display()
1754 Function: returns a string containing all the available information on $net
1755 Returns : string
1756 Args : none
1758 =cut
1760 sub display {
1761 my ($self)=@_;
1762 my $str="";
1763 my $graph=$self->{graph};
1764 my @leaves=$self->leaves();
1765 my @nodes=@{$self->{nodes}};
1766 $str.= "Leaves:\t@leaves\n";
1767 $str.= "Nodes:\t@nodes\n";
1768 $str.= "Graph:\t$graph\n";
1769 $str.= "eNewick:\t".$self->eNewick()."\n";
1770 $str.= "Full eNewick:\t".$self->eNewick_full()."\n";
1771 $str.= "Mu-data and heights:\n";
1772 foreach my $node (@nodes) {
1773 $str.= "v=$node: ";
1774 if (exists $self->{labels}->{$node}) {
1775 $str.="\tlabel=".$self->{labels}->{$node}.",";
1776 } else {
1777 $str.="\tlabel=(none),";
1779 $str.= "\th=".$self->{h}->{$node}.", \tmu=".$self->{mudata}->{$node}."\n";
1781 if (exists $self->{has_temporal_representation}) {
1782 $str.= "Temporal representation:\n";
1783 if ($self->{has_temporal_representation}) {
1784 foreach my $node (@nodes) {
1785 $str.= "v=$node; ";
1786 $str.= "\tt=".$self->{temporal_representation}->{$node}."\n";
1788 } else {
1789 $str.= "Does not exist.\n";
1792 if (exists $self->{tripartitions}) {
1793 $str.= "Tripartitions:\n";
1794 foreach my $node (@nodes) {
1795 $str.= "v=$node; ";
1796 $str.= "\ttheta=".$self->{tripartitions}->{$node}."\n";
1799 return $str;