spiced_seq() may need LWP
[bioperl-live.git] / Bio / PhyloNetwork.pm
blob212f40d1e50629b8483cd5b916d030d5782a8628
2 # Module for Bio::PhyloNetwork
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Gabriel Cardona <gabriel(dot)cardona(at)uib(dot)es>
8 # Copyright Gabriel Cardona, Gabriel Valiente
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::PhyloNetwork - Module to compute with Phylogenetic Networks
18 =head1 SYNOPSIS
20 use Bio::PhyloNetwork;
22 # Create a PhyloNetwork object from a eNewick string
23 my $net1=Bio::PhyloNetwork->new(
24 -eNewick=>'t0:((H1,(H2,l2)),H2); H1:((H3,l1)); H2:((H3,(l3,H1))); H3:(l4);'
27 # Print all available data
28 print $net1;
30 # Rebuild $net1 from its mu_data
31 my %mudata=$net1->mudata();
32 my $net2=Bio::PhyloNetwork->new(-mudata=>\%mudata,-numleaves=>4);
33 print $net2;
34 print "d=".$net1->mu_distance($net2)."\n";
36 # Get another one and compute distance
37 my $net3=Bio::PhyloNetwork->new(
38 -eNewick=>'(l2,((l1,(H1,l4)),H1))r; (l3)H1;'
40 print "d=".$net1->mu_distance($net3)."\n";
42 # ...and find an optimal alignment w.r.t. the Manhattan distance (default)
43 my ($weight,%alignment)=$net1->optimal_alignment($net3);
44 print "weight:$weight\n";
45 foreach my $node1 (keys %alignment) {
46 print "$node1 => ".$alignment{$node1}."\n";
48 # ...or the Hamming distance
50 my ($weightH,%alignmentH)=$net1->optimal_alignment($net3,-metric=>'Hamming');
51 print "weight:$weightH\n";
52 foreach my $node1 (keys %alignmentH) {
53 print "$node1 => ".$alignmentH{$node1}."\n";
56 # Test for time consistency of $net1
57 if ($net1->is_time_consistent) {
58 print "net1 is time consistent\n"
60 else {
61 print "net1 is not time consistent\n"
64 # create a network from the list of edges
65 my $net4=Bio::PhyloNetwork->new(-edges=>
66 [qw(r s r t s u s c t c t v u b u l3 u b v b v l4 b l2 c l1)]);
68 # Test for time consistency of $net3
69 if ($net4->is_time_consistent) {
70 print "net4 is time consistent\n"
72 else {
73 print "net4 is not time consistent\n"
76 # And print all information on net4
77 print $net4;
79 # Compute some tripartitions
80 my %triparts=$net1->tripartitions();
82 # Now these are stored
83 print $net1;
85 # And can compute the tripartition error
86 print "dtr=".$net1->tripartition_error($net3)."\n";
88 =head1 DESCRIPTION
90 =head2 Phylogenetic Networks
92 This is a module to work with phylogenetic networks. Phylogenetic networks
93 have been studied over the last years as a richer model of the evolutionary
94 history of sets of organisms than phylogenetic trees, because they take not
95 only mutation events but also recombination and horizontal gene transfer
96 events into account.
98 The natural model for describing the evolutionary
99 history of a set of sequences under recombination events is a DAG, hence
100 this package relies on the package Graph::Directed to represent the
101 underlying graph of a phylogenetic network. We refer the reader to [CRV1,CRV2]
102 for formal definitions related to phylogenetic networks.
104 =head2 eNewick description
106 With this package, phylogenetic networks can be given by its eNewick
107 string. This description appeared in other packages related to
108 phylogenetic networks (see [PhyloNet] and [NetGen]); in fact, these two
109 packages use different descriptions. The Bio::PhyloNetwork
110 package allows both of them, but uses the second one in its output.
112 The first approach [PhyloNet] goes as follows: For each hybrid node H, say with
113 parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k+1 different
114 nodes; let each of the first k copies be a child of one of the u_1,...,u_k
115 (one for each) and have no children (hence we will have k extra leaves);
116 as for the last copy, let it have no parents and have v_1,...,v_l be its
117 children. This way we get a forest; each of the trees will be rooted at either
118 one root of the phylogenetic network or a hybrid node of it; the set of leaves
119 (of the whole forest) will be the set of leaves of the original network
120 together with the set of hybrid nodes (each of them repeated as many times
121 as its in-degree). Then, the eNewick representation of the phylogenetic network
122 will be the Newick representation of all the trees in the obtained forest,
123 each of them with its root labeled.
125 The second approach [NetGen] goes as follows: For each hybrid node H, say with
126 parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k different
127 nodes; let the first copy be a child of u_1 and have all v_1,v_2,...v_l as
128 its children; let the other copies be child of u_2,...,u_k (one for each)
129 and have no children. This way, we get a tree whose set of leaves is the
130 set of leaves of the original network together with the set of hybrid nodes
131 (possibly repeated). Then the Newick string of the obtained tree (note that
132 some internal nodes will be labeled and some leaves will be repeated) is
133 the eNewick string of the phylogenetic network.
135 For example, consider the network depicted below:
141 / \ / \
142 1 \ / 3
147 If the first approach is taken, we get the forest:
153 / \ / \
154 1 H H 3
160 Hence, the eNewick string is '((1,H),(H,3))r; (2)H;'.
162 As for the second one, one gets the tree:
168 / \ / \
169 1 H | 3
174 Hence, the eNewick string is '((1,H),((2)H,3))r;'.
176 Note: when rooting a tree, this package allows the notations
177 '(subtree,subtree,...)root' as well as 'root:(subtree,subtree,...)', but
178 the first one is used when writing eNewick strings.
180 =head2 Tree-child phylogenetic networks
182 Tree-child (TC) phylogenetic networks are a special class of phylogenetic
183 networks for which a distance, called mu-distance, is defined [CRV2]
184 based on certain data (mu-data) associated to every node.
185 Moreover, this distance extends the
186 Robinson-Foulds on phylogenetic trees. This package allows testing for a
187 phylogenetic network if it is TC and computes mu-distances between networks
188 over the same set of leaves.
190 Moreover, the mu-data allows one to define the optimal
191 (in some precise sense) alignment between networks
192 over the same set of leaves. This package also computes this optimal alignment.
194 =head2 Tripartitions
196 Although tripartitions (see [CRV1] and the references therein) do not allow
197 to define distances, this package outputs tripartitions and computes a weak
198 form of the tripartition error.
200 =head2 Time-consistency
202 Another useful property of Phylogenetic Networks that appears in the literature
203 is that of time-consistency or real-time hybrids [BSS]. Roughly speaking, a
204 network admits a temporal representation if it can be drawn in such a way
205 that tree arcs (those whose end is a tree node) are inclined downwards, while
206 hybridization arcs (those whose end is a hybrid node) are horizontal.
207 This package checks for time-consistency and, if so, a temporal representation
208 is provided.
210 =head1 AUTHOR
212 Gabriel Cardona, gabriel(dot)cardona(at)uib(dot)es
213 Gabriel Valiente, valiente(at)lsi(dot)upc(dot)edu
215 =head1 SEE ALSO
217 =over
219 =item [CRV1]
221 G. Cardona, F. Rossello, G. Valiente. Tripartitions do not always
222 discriminate phylogenetic networks. arXiv:0707.2376v1 [q-bio.PE]
224 =item [CRV2]
226 G. Cardona, F. Rossello, G. Valiente. A Distance Measure for
227 Tree-Child Phylogenetic Networks. Preprint.
229 =item [NetGen]
231 M.M. Morin, and B.M.E. Moret. NetGen: generating phylogenetic networks
232 with diploid hybrids. Bioinformatics 22 (2006), 1921-1923
234 =item [PhyloNet]
236 PhyloNet: "Phylogenetic Networks Toolkit".
237 http://bioinfo.cs.rice.edu/phylonet
239 =item [BSS]
241 M. Baroni, C. Semple, and M. Steel. Hybrids in Real
242 Time. Syst. Biol. 55(1):46-56, 2006
244 =back
246 =head1 APPENDIX
248 The rest of the documentation details each of the object methods.
250 =cut
252 package Bio::PhyloNetwork;
254 use strict;
255 use warnings;
257 use base qw(Bio::Root::Root);
259 use Bio::PhyloNetwork::muVector;
260 use Graph::Directed;
261 use Bio::TreeIO;
262 use Bio::Tree::Node;
263 use IO::String;
264 use Array::Compare;
265 use Algorithm::Munkres;
267 # Creator
269 =head2 new
271 Title : new
272 Usage : my $obj = new Bio::PhyloNetwork();
273 Function: Creates a new Bio::PhyloNetwork object
274 Returns : Bio::PhyloNetwork
275 Args : none
277 -eNewick => string
279 -graph => Graph::Directed object
281 -edges => reference to an array
283 -tree => Bio::Tree::Tree object
285 -mudata => reference to a hash,
286 -leaves => reference to an array
288 -mudata => reference to a hash,
289 -numleaves => integer
291 Returns a Bio::PhyloNetwork object, created according to the data given:
293 =over 3
295 =item new()
297 creates an empty network.
299 =item new(-eNewick =E<gt> $str)
301 creates the network whose
302 Extended Newick representation (see description above) is the string $str.
304 =item new(-graph =E<gt> $graph)
306 creates the network with underlying
307 graph given by the Graph::Directed object $graph
309 =item new(-tree =E<gt> $tree)
311 creates a network as a copy of the
312 Bio::Tree::Tree object in $tree
314 =item new(-mudata =E<gt> \%mudata, -leaves =E<gt> \@leaves)
316 creates the network by reconstructing it from its mu-data stored in
317 \%mudata and with set of leaves in \@leaves.
319 =item new(-mudata =E<gt> \%mudata, -numleaves =E<gt> $numleaves)
321 creates the network by reconstructing it from its mu-data stored in
322 \%mudata and with set of leaves in ("l1".."l$numleaves").
324 =back
326 =cut
328 sub new {
329 my ($pkg,@args)=@_;
330 my $self=$pkg->SUPER::new(@args);
331 my ($eNewick,$edgesR,$leavesR,$numleaves,$graph,$tree,$mudataR)=
332 $self->_rearrange([qw(ENEWICK
333 EDGES
334 LEAVES
335 NUMLEAVES
336 GRAPH
337 TREE
338 MUDATA)],@args);
339 bless($self,$pkg);
341 $self->build_from_eNewick($eNewick) if defined $eNewick;
342 $self->build_from_edges(@$edgesR) if defined $edgesR;
343 $self->build_from_graph($graph) if defined $graph;
344 $self->build_from_tree($tree) if defined $tree;
345 if ((! defined $leavesR) && (defined $numleaves)) {
346 my @leaves=map {"l$_"} (1..$numleaves);
347 $leavesR=\@leaves;
349 $self->build_from_mudata($mudataR,$leavesR)
350 if ((defined $mudataR) && (defined $leavesR));
351 return $self;
354 # Builders
356 sub build_from_edges {
357 my ($self,@edges)=@_;
358 my $graph=Graph::Directed->new();
359 $graph->add_edges(@edges);
360 $self->{graph}=$graph;
361 $self->recompute();
362 my $labels={};
363 foreach my $node ($self->nodes()) {
364 $labels->{$node}=$node;
366 $self->{labels}=$labels;
369 sub build_from_graph {
370 my ($self,$graph)=@_;
371 my $graphcp=$graph->copy();
372 $self->{graph}=$graphcp;
373 $self->recompute();
374 my $labels={};
375 foreach my $node ($self->nodes()) {
376 $labels->{$node}=$node;
378 $self->{labels}=$labels;
381 my $_eN_index;
383 sub build_from_eNewick {
384 my ($self,$string)=@_;
385 $_eN_index=0;
386 my $graph=Graph::Directed->new();
387 my $labels={};
388 my @blocks=split(/; */,$string);
389 foreach my $block (@blocks) {
390 my ($rt,$str)=get_root_and_subtree($block);
391 my ($rtlbl,$rttype,$rtid,$rtlng)=get_label_type_id_length($rt);
392 process_block($graph,$labels,$block,$rtid);
393 $labels->{$rtid}=$rtlbl.'';
395 $self->{graph}=$graph;
396 $self->{labels}=$labels;
397 $self->recompute();
400 sub process_block {
401 my ($graph,$labels,$block,$rtid)=@_;
402 my ($rt,$str)=get_root_and_subtree($block);
403 my @substrs=my_split($str);
404 foreach my $substr (@substrs) {
405 my ($subrt,$subblock)=get_root_and_subtree($substr);
406 my ($subrtlbl,$subrttype,$subrtid,$subrtlng)=
407 get_label_type_id_length($subrt);
408 if (! $subrtlng eq '') {
409 $graph->add_weighted_edges($rtid,$subrtid,$subrtlng);
411 else {
412 $graph->add_edges($rtid,$subrtid);
414 if (! $subrttype eq '') {
415 $graph->set_edge_attribute($rtid,$subrtid,'type',$subrttype);
417 $subrtlbl.='';
418 # if (! $subrtlbl eq '') {
419 if ((! defined $labels->{$subrtid})||($labels->{$subrtid} eq '')){
420 $labels->{$subrtid}=$subrtlbl;
421 } elsif (( $labels->{$subrtid} ne $subrtlbl )&&($subrtlbl ne '')) {
422 # error
423 die("Different labels for the same node (".$labels->{$subrtid}." and $subrtlbl)");
426 if ($subblock ne "") {
427 process_block($graph,$labels,$subblock,$subrtid);
432 sub get_root_and_subtree {
433 my ($block)=@_;
434 my ($rt,$str)=("","");
435 # ($rt,$str)=split(/:|=/,$block);
436 ($rt,$str)=split(/=/,$block);
437 if ($rt eq $block) {
438 # try to look for root label at the end
439 my $pos=length($rt)-1;
440 while ((substr($rt,$pos,1) ne ")") && ($pos >=0)) {
441 $pos--;
443 $rt=substr($block,$pos+1,length($block)-$pos);
444 $str=substr($block,0,$pos+1);
446 $rt=trim($rt);
447 $str=trim($str);
448 return ($rt,$str);
451 sub get_label_type_id_length {
452 my ($string) = @_;
453 $string.='';
454 # print "$string\n";
455 if (index($string,'#')==-1) {
456 # no hybrid
457 my ($label,$length)=split(':',$string);
458 $label.='';
459 my $id;
460 if ((! defined $label) || ($label eq '')) {
461 # create id
462 $_eN_index++;
463 $id="T$_eN_index";
464 } else {
465 $id=$label;
467 return ($label,'',$id,$length);
469 else {
470 # hybrid
471 my ($label,$string2)=split('#',$string);
472 my ($typeid,$length)=split(':',$string2);
473 my $type=$typeid;
474 $type =~ s/\d//g;
475 my $id=$typeid;
476 $id =~ s/\D//g;
477 return ($label,$type,'#'.$id,$length);
481 sub trim
483 my ($string) = @_;
484 $string =~ s/^\s+//;
485 $string =~ s/\s+$//;
486 return $string;
489 sub my_split {
490 my ( $string ) = @_;
491 my $temp="";
492 my @substrings;
493 my $level=1;
494 for my $i ( 1 .. length( $string ) ) {
495 my $char=substr($string,$i,1);
496 if ($char eq "(") {
497 $level++;
499 if ($char eq ")") {
500 if ($level==1) {
501 push @substrings, $temp;
502 $temp="";
504 $level--;
506 if (($char eq ",") && ($level==1)) {
507 push @substrings, $temp;
508 $temp="";
509 $char="";
511 $temp = $temp.$char;
513 return @substrings;
516 sub build_from_mudata {
517 my ($self,$mus,$leavesR)=@_;
518 my $graph=Graph::Directed->new();
519 my @nodes=keys %{$mus};
520 my @leaves=@{$leavesR};
522 my %seen;
523 my @internal;
525 @seen{@leaves} = ();
527 foreach my $node (@nodes) {
528 push(@internal, $node) unless exists $seen{$node};
531 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
532 @nodes=(@internal,@leaves);
533 my $numnodes=@nodes;
534 for (my $i=0;$i<$numnodes;$i++) {
535 my $mu=$mus->{$nodes[$i]};
536 my $j=$i+1;
537 while ($mu->is_positive() && $j<$numnodes) {
538 if ($mu->geq_poset($mus->{$nodes[$j]})) {
539 $graph->add_edges(($nodes[$i],$nodes[$j]));
540 $mu = $mu - $mus->{$nodes[$j]};
542 $j++;
545 $self->build_from_graph($graph);
548 # sub relabel_tree {
549 # my ($tree)=@_;
550 # my $i=1;
551 # my $j=1;
552 # my $root=$tree->get_root_node();
553 # foreach my $node ($tree->get_nodes()) {
554 # if ($node == $root) {
555 # $node->{'_id'}="r";
557 # elsif (! $node->is_Leaf) {
558 # $node->{'_id'}="t$i";
559 # $i++;
561 # else {
562 # if ($node->{'_id'} eq "") {
563 # $node->{'_id'}="l$j";
564 # $j++;
568 # return $tree;
571 # sub build_subtree {
572 # my ($graph,$root)=@_;
573 # foreach my $child ($root->each_Descendent) {
574 # $graph->add_edge($root->id,$child->id);
575 # $graph=build_subtree($graph,$child);
577 # return $graph;
580 sub build_from_tree {
581 my ($self,$tree)=@_;
582 # relabel_tree($tree);
583 # my $treeroot=$tree->get_root_node;
584 # my $graph=Graph::Directed->new();
585 # $graph=build_subtree($graph,$treeroot);
586 # $self->build_from_graph($graph);
587 my $str;
588 my $io=IO::String->new($str);
589 my $treeio=Bio::TreeIO->new(-format => 'newick', -fh => $io);
590 $treeio->write_tree($tree);
591 # print "intern: $str\n";
592 $self->build_from_eNewick($str);
595 sub recompute {
596 my ($self)=@_;
597 $self->throw("Graph is not DAG:".$self->{graph})
598 unless $self->{graph}->is_dag();
599 my @leaves=$self->{graph}->successorless_vertices();
600 @leaves=sort @leaves;
601 my $numleaves=@leaves;
602 my @roots=$self->{graph}->predecessorless_vertices();
603 my $numroots=@roots;
604 #$self->throw("Graph is not rooted") unless ($numroots == 1);
605 my @nodes=$self->{graph}->vertices();
606 @nodes=sort @nodes;
607 my $numnodes=@nodes;
608 foreach my $node (@nodes) {
609 if (! defined $self->{labels}->{$node}) {
610 $self->{labels}->{$node}='';
613 $self->{leaves}=\@leaves;
614 $self->{numleaves}=$numleaves;
615 $self->{roots}=\@roots;
616 $self->{numroots}=$numroots;
617 $self->{nodes}=\@nodes;
618 $self->{numnodes}=$numnodes;
619 $self->{mudata}={};
620 $self->{h}={};
621 $self->compute_height();
622 $self->compute_mu();
623 return $self;
626 # Hybridizing
628 sub is_attackable {
629 my ($self,$u1,$v1,$u2,$v2)=@_;
630 if ( $self->is_hybrid_node($v1) ||
631 $self->is_hybrid_node($v2) ||
632 $self->graph->is_reachable($v2,$u1) ||
633 (($u1 eq $u2)&&($v1 eq $v2)) ||
634 (! scalar grep {($_ ne $v2) && ($self->is_tree_node($_))}
635 $self->graph->successors($u2)))
637 return 0;
639 return 1;
642 sub do_attack {
643 my ($self,$u1,$v1,$u2,$v2,$lbl)=@_;
644 my $graph=$self->{graph};
645 $graph->delete_edge($u1,$v1);
646 $graph->delete_edge($u2,$v2);
647 $graph->add_edge($u1,"T$lbl");
648 $graph->add_edge("T$lbl",$v1);
649 $graph->add_edge($u2,"#H$lbl");
650 $graph->add_edge("#H$lbl",$v2);
651 $graph->add_edge("T$lbl","#H$lbl");
652 $self->build_from_graph($graph);
656 # Computation of mu-data
658 sub compute_mu {
659 my ($self)=@_;
660 my $graph=$self->{graph};
661 my $mudata=$self->{mudata};
662 my @leaves=@{$self->{leaves}};
663 my $numleaves=$self->{numleaves};
664 for (my $i=0;$i<$numleaves;$i++) {
665 my $vec=Bio::PhyloNetwork::muVector->new($numleaves);
666 $vec->[$i]=1;
667 $mudata->{$leaves[$i]}=$vec;
669 my $h=1;
670 while (my @nodes=grep {$self->{h}->{$_} == $h} @{$self->{nodes}} )
672 foreach my $u (@nodes) {
673 my $vec=Bio::PhyloNetwork::muVector->new($numleaves);
674 foreach my $son ($graph->successors($u)) {
675 $vec+=$mudata->{$son};
677 $mudata->{$u}=$vec;
679 $h++;
683 sub compute_height {
684 my ($self)=@_;
685 my $graph=$self->{graph};
686 my @leaves=@{$self->{leaves}};
687 foreach my $leaf (@leaves) {
688 $self->{h}->{$leaf}=0;
690 my $h=0;
691 while (my @nodes=grep {(defined $self->{h}->{$_})&&($self->{h}->{$_} == $h)}
692 @{$self->{nodes}} )
694 foreach my $node (@nodes) {
695 foreach my $parent ($graph->predecessors($node)) {
696 $self->{h}->{$parent}=$h+1;
699 $h++;
703 # Tests
705 =head2 is_leaf
707 Title : is_leaf
708 Usage : my $b=$net->is_leaf($u)
709 Function: tests if $u is a leaf in $net
710 Returns : boolean
711 Args : scalar
713 =cut
715 sub is_leaf {
716 my ($self,$node)=@_;
717 if ($self->{graph}->out_degree($node) == 0) {return 1;}
718 return 0;
721 =head2 is_root
723 Title : is_root
724 Usage : my $b=$net->is_root($u)
725 Function: tests if $u is the root of $net
726 Returns : boolean
727 Args : scalar
729 =cut
731 sub is_root {
732 my ($self,$node)=@_;
733 if ($self->{graph}->in_degree($node) == 0) {return 1;}
734 return 0;
737 =head2 is_tree_node
739 Title : is_tree_node
740 Usage : my $b=$net->is_tree_node($u)
741 Function: tests if $u is a tree node in $net
742 Returns : boolean
743 Args : scalar
745 =cut
747 sub is_tree_node {
748 my ($self,$node)=@_;
749 if ($self->{graph}->in_degree($node) <= 1) {return 1;}
750 return 0;
753 =head2 is_hybrid_node
755 Title : is_hybrid_node
756 Usage : my $b=$net->is_hybrid_node($u)
757 Function: tests if $u is a hybrid node in $net
758 Returns : boolean
759 Args : scalar
761 =cut
763 sub is_hybrid_node {
764 my ($self,$node)=@_;
765 if ($self->{graph}->in_degree($node) > 1) {return 1;}
766 return 0;
769 sub has_tree_child {
770 # has_tree_child(g,u) returns 1 if u has a tree child in graph g
771 # and 0 otherwise
772 my $g=shift(@_);
773 my $node=shift(@_);
774 my @Sons=$g->successors($node);
775 foreach my $son (@Sons) {
776 if ($g->in_degree($son)==1) {
777 return 1;
780 return 0;
783 =head2 is_tree_child
785 Title : is_tree_child
786 Usage : my $b=$net->is_tree_child()
787 Function: tests if $net is a Tree-Child phylogenetic network
788 Returns : boolean
789 Args : Bio::PhyloNetwork
791 =cut
793 sub is_tree_child {
794 my ($self)=@_;
795 if (defined $self->{is_tree_child}) {
796 return $self->{is_tree_child};
798 $self->{is_tree_child}=0;
799 my $graph=$self->{graph};
800 foreach my $node (@{$self->{nodes}}) {
801 return 0 unless ($graph->out_degree($node)==0 ||
802 has_tree_child($graph,$node));
804 $self->{is_tree_child}=1;
805 return 1;
808 # Accessors
810 =head2 nodes
812 Title : nodes
813 Usage : my @nodes=$net->nodes()
814 Function: returns the set of nodes of $net
815 Returns : array
816 Args : none
818 =cut
820 sub nodes {
821 my ($self)=@_;
822 return @{$self->{nodes}};
825 =head2 leaves
827 Title : leaves
828 Usage : my @leaves=$net->leaves()
829 Function: returns the set of leaves of $net
830 Returns : array
831 Args : none
833 =cut
835 sub leaves {
836 my ($self)=@_;
837 return @{$self->{leaves}};
840 =head2 roots
842 Title : roots
843 Usage : my @roots=$net->roots()
844 Function: returns the set of roots of $net
845 Returns : array
846 Args : none
848 =cut
850 sub roots {
851 my ($self)=@_;
852 return @{$self->{roots}};
855 =head2 internal_nodes
857 Title : internal_nodes
858 Usage : my @internal_nodes=$net->internal_nodes()
859 Function: returns the set of internal nodes of $net
860 Returns : array
861 Args : none
863 =cut
865 sub internal_nodes {
866 my ($self)=@_;
867 return grep {! $self->is_leaf($_)} $self->nodes();
870 =head2 tree_nodes
872 Title : tree_nodes
873 Usage : my @tree_nodes=$net->tree_nodes()
874 Function: returns the set of tree nodes of $net
875 Returns : array
876 Args : none
878 =cut
880 sub tree_nodes {
881 my ($self)=@_;
882 return grep {$self->is_tree_node($_)} $self->nodes();
885 =head2 hybrid_nodes
887 Title : hybrid_nodes
888 Usage : my @hybrid_nodes=$net->hybrid_nodes()
889 Function: returns the set of hybrid nodes of $net
890 Returns : array
891 Args : none
893 =cut
895 sub hybrid_nodes {
896 my ($self)=@_;
897 return grep {$self->is_hybrid_node($_)} $self->nodes();
900 =head2 graph
902 Title : graph
903 Usage : my $graph=$net->graph()
904 Function: returns the underlying graph of $net
905 Returns : Graph::Directed
906 Args : none
908 =cut
910 sub graph {
911 my ($self)=@_;
912 return $self->{graph};
915 =head2 edges
917 Title : edges
918 Usage : my @edges=$net->edges()
919 Function: returns the set of edges of $net
920 Returns : array
921 Args : none
923 Each element in the array is an anonimous array whose first element is the
924 head of the edge and the second one is the tail.
926 =cut
928 sub edges {
929 my ($self)=@_;
930 return $self->{graph}->edges();
933 =head2 tree_edges
935 Title : tree_edges
936 Usage : my @tree_edges=$net->tree_edges()
937 Function: returns the set of tree edges of $net
938 (those whose tail is a tree node)
939 Returns : array
940 Args : none
942 =cut
944 sub tree_edges {
945 my ($self)=@_;
946 return grep {$self->is_tree_node($_->[1])} $self->edges();
949 =head2 hybrid_edges
951 Title : hybrid_edges
952 Usage : my @hybrid_edges=$net->hybrid_edges()
953 Function: returns the set of hybrid edges of $net
954 (those whose tail is a hybrid node)
955 Returns : array
956 Args : none
958 =cut
960 sub hybrid_edges {
961 my ($self)=@_;
962 return grep {$self->is_hybrid_node($_->[1])} $self->edges();
965 =head2 explode
967 Title : explode
968 Usage : my @trees=$net->explode()
969 Function: returns the representation of $net by a set of
970 Bio::Tree:Tree objects
971 Returns : array
972 Args : none
974 =cut
976 sub explode {
977 my ($self)=@_;
978 my @trees;
979 $self->explode_rec(\@trees);
980 return @trees;
983 sub explode_rec {
984 my ($self,$trees)=@_;
985 my @h = $self->hybrid_nodes;
986 if (scalar @h) {
987 my $v = shift @h;
988 for my $u ($self->{graph}->predecessors($v)) {
989 $self->{graph}->delete_edge($u,$v);
990 $self->explode_rec($trees);
991 $self->{graph}->add_edge($u,$v);
993 } else {
994 my $io = IO::String->new($self->eNewick);
995 my $treeio = Bio::TreeIO->new(-format => 'newick', -fh => $io);
996 my $tree = $treeio->next_tree;
997 $tree->contract_linear_paths;
998 push @{$trees}, $tree;
1002 =head2 mudata
1004 Title : mudata
1005 Usage : my %mudata=$net->mudata()
1006 Function: returns the representation of $net by its mu-data
1007 Returns : hash
1008 Args : none
1010 $net-E<gt>mudata() returns a hash with keys the nodes of $net and each value is a
1011 muVector object holding its mu-vector.
1013 =cut
1015 sub mudata {
1016 my ($self)=@_;
1017 return %{$self->{mudata}};
1020 sub mudata_node {
1021 my ($self,$u)=@_;
1022 return $self->{mudata}{$u};
1025 =head2 heights
1027 Title : heights
1028 Usage : my %heights=$net->heights()
1029 Function: returns the heights of the nodes of $net
1030 Returns : hash
1031 Args : none
1033 $net-E<gt>heights() returns a hash with keys the nodes of $net and each value
1034 is its height.
1036 =cut
1038 sub heights {
1039 my ($self)=@_;
1040 return %{$self->{h}};
1043 sub height_node {
1044 my ($self,$u)=@_;
1045 return $self->{h}{$u};
1048 =head2 mu_distance
1050 Title : mu_distance
1051 Usage : my $dist=$net1->mu_distance($net2)
1052 Function: Computes the mu-distance between the networks $net1 and $net2 on
1053 the same set of leaves
1054 Returns : scalar
1055 Args : Bio::PhyloNetwork
1057 =cut
1059 sub mu_distance {
1060 my ($net1,$net2)=@_;
1061 my @nodes1=$net1->nodes;
1062 my @nodes2=$net2->nodes;
1063 my $comp = Array::Compare->new;
1064 $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1065 unless $comp->compare($net1->{leaves},$net2->{leaves});
1066 $net1->warn("Not a tree-child phylogenetic network")
1067 unless $net1->is_tree_child();
1068 $net2->warn("Not a tree-child phylogenetic network")
1069 unless $net2->is_tree_child();
1070 my @leaves=@{$net1->{leaves}};
1071 my %matched1;
1072 my %matched2;
1073 OUTER: foreach my $node1 (@nodes1) {
1074 foreach my $node2 (@nodes2) {
1075 if (
1076 (! exists $matched1{$node1}) && (! exists $matched2{$node2}) &&
1077 ($net1->{mudata}{$node1} == $net2->{mudata}{$node2})
1079 $matched1{$node1}=$node2;
1080 $matched2{$node2}=$node1;
1081 next OUTER;
1085 return (scalar @nodes1)+(scalar @nodes2)-2*(scalar keys %matched1);
1088 =head2 mu_distance_generalized
1090 Title : mu_distance_generalized
1091 Usage : my $dist=$net1->mu_distance($net2)
1092 Function: Computes the mu-distance between the topological restrictions of
1093 networks $net1 and $net2 on its common set of leaves
1094 Returns : scalar
1095 Args : Bio::PhyloNetwork
1097 =cut
1099 sub mu_distance_generalized {
1100 my ($net1,$net2)=@_;
1101 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1102 return $netr1->mu_distance($netr2);
1105 # mudata_string (code mu_data in a string; useful for isomorphism testing)
1107 sub mudata_string_node {
1108 my ($self,$u)=@_;
1109 return $self->{mudata}->{$u}->display();
1112 sub mudata_string {
1113 my ($self)=@_;
1114 return $self->{mudata_string} if defined $self->{mudata_string};
1115 my @internal=$self->internal_nodes;
1116 my $mus=$self->{mudata};
1117 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
1118 my $str="";
1119 foreach my $node (@internal) {
1120 $str=$str.$self->mudata_string_node($node);
1122 $self->{mudata_string}=$str;
1123 return $str;
1126 sub is_mu_isomorphic {
1127 my ($net1,$net2)=@_;
1128 return ($net1->mudata_string() eq $net2->mudata_string());
1131 # tripartitions
1133 sub compute_tripartition_node {
1134 my ($self,$u)=@_;
1135 $self->warn("Cannot compute tripartitions on unrooted networks. Will assume one at random")
1136 unless ($self->{numroots} == 1);
1137 my $root=$self->{roots}->[0];
1138 my $graph=$self->{graph};
1139 my $graphPruned=$graph->copy();
1140 $graphPruned->delete_vertex($u);
1141 my $tripartition="";
1142 foreach my $leaf (@{$self->{leaves}}) {
1143 my $type;
1144 if ($graph->is_reachable($u,$leaf)) {
1145 if ($graphPruned->is_reachable($root,$leaf)) {$type="B";}
1146 else {$type="A";}
1148 else {$type="C";}
1149 $tripartition .= $type;
1151 $self->{tripartitions}->{$u}=$tripartition;
1154 sub compute_tripartitions {
1155 my ($self)=@_;
1156 foreach my $node (@{$self->{nodes}}) {
1157 $self->compute_tripartition_node($node);
1161 =head2 tripartitions
1163 Title : tripartitions
1164 Usage : my %tripartitions=$net->tripartitions()
1165 Function: returns the set of tripartitions of $net
1166 Returns : hash
1167 Args : none
1169 $net-E<gt>tripartitions() returns a hash with keys the nodes of $net and each value
1170 is a string representing the tripartition of the leaves induced by the node.
1171 A string "BCA..." associated with a node u (e.g.) means, the first leaf is in
1172 the set B(u), the second one in C(u), the third one in A(u), and so on.
1174 =cut
1176 sub tripartitions {
1177 my ($self)=@_;
1178 $self->compute_tripartitions() unless defined $self->{tripartitions};
1179 return %{$self->{tripartitions}};
1182 # to do: change to tri_distance and test for TC and time-cons
1184 sub tripartition_error {
1185 my ($net1,$net2)=@_;
1186 my $comp = Array::Compare->new;
1187 $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1188 unless $comp->compare($net1->{leaves},$net2->{leaves});
1189 $net1->warn("Not a tree-child phylogenetic network")
1190 unless $net1->is_tree_child();
1191 $net2->warn("Not a tree-child phylogenetic network")
1192 unless $net2->is_tree_child();
1193 $net1->warn("Not a time-consistent network")
1194 unless $net1->is_time_consistent();
1195 $net2->warn("Not a time-consistent network")
1196 unless $net2->is_time_consistent();
1197 $net1->compute_tripartitions() unless defined $net1->{tripartitions};
1198 $net2->compute_tripartitions() unless defined $net2->{tripartitions};
1199 my @edges1=$net1->{graph}->edges();
1200 my @edges2=$net2->{graph}->edges();
1201 my ($FN,$FP)=(0,0);
1202 foreach my $edge1 (@edges1) {
1203 my $matched=0;
1204 foreach my $edge2 (@edges2) {
1205 if ($net1->{tripartitions}->{$edge1->[1]} eq
1206 $net2->{tripartitions}->{$edge2->[1]}) {
1207 $matched=1;
1208 last;
1211 if (! $matched) {$FN++;}
1213 foreach my $edge2 (@edges2) {
1214 my $matched=0;
1215 foreach my $edge1 (@edges1) {
1216 if ($net1->{tripartitions}->{$edge1->[1]} eq
1217 $net2->{tripartitions}->{$edge2->[1]}) {
1218 $matched=1;
1219 last;
1222 if (! $matched) {$FP++;}
1224 return ($FN/(scalar @edges1)+$FP/(scalar @edges2))/2;
1227 # Time-consistency
1229 # to do: add weak time consistency
1231 =head2 is_time_consistent
1233 Title : is_time_consistent
1234 Usage : my $b=$net->is_time_consistent()
1235 Function: tests if $net is (strong) time-consistent
1236 Returns : boolean
1237 Args : none
1239 =cut
1241 sub is_time_consistent {
1242 my ($self)=@_;
1243 $self->compute_temporal_representation()
1244 unless exists $self->{has_temporal_representation};
1245 return $self->{has_temporal_representation};
1248 =head2 temporal_representation
1250 Title : temporal_representation
1251 Usage : my %time=$net->temporal_representation()
1252 Function: returns a hash containing a temporal representation of $net, or 0
1253 if $net is not time-consistent
1254 Returns : hash
1255 Args : none
1257 =cut
1259 sub temporal_representation {
1260 my ($self)=@_;
1261 if ($self->is_time_consistent) {
1262 return %{$self->{temporal_representation}};
1264 return 0;
1267 sub compute_temporal_representation {
1268 my ($self)=@_;
1269 my $quotient=Graph::Directed->new();
1270 my $classes=find_classes($self);
1271 my %repr;
1272 map {$repr{$_}=$classes->{$_}[0]} $self->nodes();
1273 foreach my $e ($self->tree_edges()) {
1274 $quotient->add_edge($repr{$e->[0]},$repr{$e->[1]});
1276 my %temp;
1277 my $depth=0;
1278 while ($quotient->vertices()) {
1279 if (my @svs=$quotient->predecessorless_vertices()) {
1280 foreach my $sv (@svs) {
1281 $temp{$sv}=$depth;
1283 $quotient->delete_vertices(@svs);
1284 } else {
1285 return 0;
1287 $depth++;
1289 foreach my $node (@{$self->{nodes}}) {
1290 $temp{$node}=$temp{$repr{$node}}
1292 $self->{temporal_representation}=\%temp;
1293 $self->{has_temporal_representation}=1;
1296 sub find_classes {
1297 my ($self)=@_;
1298 my $classes={};
1299 map {$classes->{$_}=[$_]} $self->nodes();
1300 foreach my $e ($self->hybrid_edges()) {
1301 $classes=join_classes($classes,$e->[0],$e->[1]);
1303 return $classes;
1306 sub join_classes {
1307 my ($classes,$u,$v)=@_;
1308 my @clu=@{$classes->{$u}};
1309 my @clv=@{$classes->{$v}};
1310 my @cljoin=(@clu,@clv);
1311 map {$classes->{$_}=\@cljoin} @cljoin;
1312 return $classes;
1315 # alignment
1317 =head2 contract_elementary
1320 Title : contract_elementary
1321 Usage : my ($contracted,$blocks)=$net->contract_elementary();
1322 Function: Returns the network $contracted, obtained by contracting elementary
1323 paths of $net into edges. The reference $blocks points to a hash
1324 where, for each node of $contracted, gives the corresponding nodes
1325 of $net that have been deleted.
1326 Returns : Bio::PhyloNetwork,reference to hash
1327 Args : none
1329 =cut
1331 sub contract_elementary {
1332 my ($self)=@_;
1334 my $contracted=$self->graph->copy();
1335 my @nodes=$self->nodes();
1336 my $mus=$self->{mudata};
1337 my $hs=$self->{h};
1338 my %blocks;
1339 foreach my $u (@nodes) {
1340 $blocks{$u}=[$u];
1342 my @elementary=grep { $contracted->out_degree($_) == 1} $self->tree_nodes();
1343 @elementary=sort {$mus->{$b} <=> $mus->{$a} ||
1344 $hs->{$b} <=> $hs->{$a}} @elementary;
1345 foreach my $elem (@elementary) {
1346 my @children=$contracted->successors($elem);
1347 my $child=$children[0];
1348 if ($contracted->in_degree($elem) == 1) {
1349 my @parents=$contracted->predecessors($elem);
1350 my $parent=$parents[0];
1351 $contracted->add_edge($parent,$child);
1353 $contracted->delete_vertex($elem);
1354 my @blch=@{$blocks{$child}};
1355 my @blem=@{$blocks{$elem}};
1356 $blocks{$child}=[@blem,@blch];
1357 delete $blocks{$elem};
1359 my $contr=Bio::PhyloNetwork->new(-graph => $contracted);
1360 return $contr,\%blocks;
1363 =head2 optimal_alignment
1365 Title : optimal_alignment
1366 Usage : my ($weight,$alignment,$wgts)=$net->optimal_alignment($net2)
1367 Function: returns the total weight of an optimal alignment,
1368 the alignment itself, and partial weights
1369 between the networks $net1 and $net2 on the same set of leaves.
1370 An optional argument allows one to use the Manhattan (default) or the
1371 Hamming distance between mu-vectors.
1372 Returns : scalar,reference to hash,reference to hash
1373 Args : Bio::PhyloNetwork,
1374 -metric => string (optional)
1376 Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1378 =cut
1380 sub optimal_alignment {
1381 my ($net1,$net2,%params)=@_;
1383 my ($net1cont,$blocks1)=contract_elementary($net1);
1384 my ($net2cont,$blocks2)=contract_elementary($net2);
1385 my ($wc,$alignc,$weightc)=
1386 optimal_alignment_noelementary($net1cont,$net2cont,%params);
1387 my %alignment=();
1388 my $totalweigth=0;
1389 my %weigths=();
1390 foreach my $u1 (keys %$alignc) {
1391 my $u2=$alignc->{$u1};
1392 my @block1=@{$blocks1->{$u1}};
1393 my @block2=@{$blocks2->{$u2}};
1394 while (@block1 && @block2) {
1395 my $u1dc=pop @block1;
1396 my $u2dc=pop @block2;
1397 $alignment{$u1dc}=$u2dc;
1398 $weigths{$u1dc}=$weightc->{$u1};
1399 $totalweigth+=$weigths{$u1dc};
1402 return $totalweigth,\%alignment,\%weigths;
1405 sub optimal_alignment_noelementary {
1406 my ($net1,$net2,%params)=@_;
1408 my $comp = Array::Compare->new;
1409 $net1->throw("Cannot align phylogenetic networks on different set of leaves")
1410 unless $comp->compare($net1->{leaves},$net2->{leaves});
1411 my $distance;
1412 if ((defined $params{-metric})and ($params{-metric} eq 'Hamming')) {
1413 $distance='Hamming';
1414 } else {
1415 $distance='Manhattan';
1417 my $numleaves=$net1->{numleaves};
1418 my @nodes1=$net1->internal_nodes();
1419 my @nodes2=$net2->internal_nodes();
1420 my $numnodes1=@nodes1;
1421 my $numnodes2=@nodes2;
1422 my @matrix=();
1423 for (my $i=0;$i<$numnodes1;$i++) {
1424 my @row=();
1425 for (my $j=0;$j<$numnodes2;$j++) {
1426 push @row,weight($net1,$nodes1[$i],$net2,$nodes2[$j],$distance);
1428 push @matrix,\@row;
1430 my @alignment=();
1431 Algorithm::Munkres::assign(\@matrix,\@alignment);
1432 my %alignmenthash;
1433 my %weighthash;
1434 my $totalw=0;
1435 foreach my $leaf (@{$net1->{leaves}}) {
1436 $alignmenthash{$leaf}=$leaf;
1437 $weighthash{$leaf}=0;
1439 for (my $i=0;$i<$numnodes1;$i++) {
1440 if (defined $nodes2[$alignment[$i]]) {
1441 $alignmenthash{$nodes1[$i]}=$nodes2[$alignment[$i]];
1442 $weighthash{$nodes1[$i]}=$matrix[$i][$alignment[$i]];
1443 $totalw += $matrix[$i][$alignment[$i]];
1446 return $totalw,\%alignmenthash,\%weighthash;
1449 =head2 optimal_alignment_generalized
1451 Title : optimal_alignment_generalized
1452 Usage : my ($weight,%alignment)=$net->optimal_alignment_generalized($net2)
1453 Function: returns the wieght of an optimal alignment, and the alignment itself,
1454 between the topological restriction of the networks $net1 and $net2
1455 on the set of common leaves.
1456 An optional argument allows one to use the Manhattan (default) or the
1457 Hamming distance between mu-vectors.
1458 Returns : scalar,hash
1459 Args : Bio::PhyloNetwork,
1460 -metric => string (optional)
1462 Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1464 =cut
1466 sub optimal_alignment_generalized {
1467 my ($net1,$net2,%params)=@_;
1468 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1469 return $netr1->optimal_alignment($netr2,%params);
1472 sub weight {
1473 my ($net1,$v1,$net2,$v2,$distance)=@_;
1474 my $w;
1475 if (! defined $distance) {
1476 $distance='Manhattan';
1478 if ($distance eq 'Hamming') {
1479 $w=$net1->{mudata}->{$v1}->hamming($net2->{mudata}->{$v2});
1480 } else {
1481 $w=$net1->{mudata}->{$v1}->manhattan($net2->{mudata}->{$v2});
1483 if (($net1->is_tree_node($v1) && $net2->is_hybrid_node($v2)) ||
1484 ($net2->is_tree_node($v2) && $net1->is_hybrid_node($v1))
1487 $w +=1/(2*$net1->{numleaves});
1489 return $w;
1493 =head2 topological_restriction
1495 Title : topological_restriction
1496 Usage : my ($netr1,$netr2)=$net1->topological_restriction($net2)
1497 Function: returns the topological restriction of $net1 and $net2 on its
1498 common set of leaves
1499 Returns : Bio::PhyloNetwork, Bio::PhyloNetwork
1500 Args : Bio::PhyloNetwork
1502 =cut
1504 sub topological_restriction {
1505 my ($net1,$net2)=@_;
1507 my @leaves1=$net1->leaves();
1508 my @leaves2=$net2->leaves();
1509 my $numleaves1=scalar @leaves1;
1510 my $numleaves2=scalar @leaves2;
1511 my %position1;
1512 for (my $i=0; $i<$numleaves1; $i++) {
1513 $position1{$leaves1[$i]}=$i;
1515 my %position2;
1516 my @commonleaves=();
1517 for (my $j=0; $j<$numleaves2; $j++) {
1518 if (defined $position1{$leaves2[$j]}) {
1519 push @commonleaves,$leaves2[$j];
1520 $position2{$leaves2[$j]}=$j;
1523 my $graphred1=$net1->{graph}->copy();
1524 my $graphred2=$net2->{graph}->copy();
1525 OUTER1:
1526 foreach my $u ($graphred1->vertices()) {
1527 my $mu=$net1->mudata_node($u);
1528 foreach my $leaf (@commonleaves) {
1529 if ($mu->[$position1{$leaf}]>0) {
1530 next OUTER1;
1533 $graphred1->delete_vertex($u);
1535 OUTER2:
1536 foreach my $u ($graphred2->vertices()) {
1537 my $mu=$net2->mudata_node($u);
1538 foreach my $leaf (@commonleaves) {
1539 if ($mu->[$position2{$leaf}]>0) {
1540 next OUTER2;
1543 $graphred2->delete_vertex($u);
1545 my $netr1=Bio::PhyloNetwork->new(-graph => $graphred1);
1546 my $netr2=Bio::PhyloNetwork->new(-graph => $graphred2);
1547 return ($netr1,$netr2);
1550 # Functions for eNewick representation
1552 =head2 eNewick
1554 Title : eNewick
1555 Usage : my $str=$net->eNewick()
1556 Function: returns the eNewick representation of $net without labeling
1557 internal tree nodes
1558 Returns : string
1559 Args : none
1561 =cut
1563 sub eNewick {
1564 my ($self)=@_;
1565 my $str="";
1566 my $seen={};
1567 foreach my $root ($self->roots()) {
1568 $str=$str.$self->eNewick_aux($root,$seen,undef)."; ";
1570 return $str;
1573 sub eNewick_aux {
1574 my ($self,$node,$seen,$parent)=@_;
1575 my $str='';
1576 if ($self->is_leaf($node) ||
1577 (defined $seen->{$node}) )
1579 $str=make_label($self,$parent,$node);
1581 else {
1582 $seen->{$node}=1;
1583 my @sons=$self->{graph}->successors($node);
1584 $str="(";
1585 foreach my $son (@sons) {
1586 $str=$str.$self->eNewick_aux($son,$seen,$node).",";
1588 chop($str);
1589 $str.=")".make_label($self,$parent,$node);
1591 return $str;
1594 sub make_label {
1595 my ($self,$parent,$node)=@_;
1596 my $str='';
1597 if ($self->is_hybrid_node($node)) {
1598 my $lbl=$self->{labels}->{$node};
1599 if ($lbl =~ /#/) {
1600 $lbl='';
1602 $str.=$lbl; #$self->{labels}->{$node};
1603 $str.='#';
1604 if ((defined $parent) &&
1605 ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1606 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1608 $str.=substr $node,1;
1609 } else {
1610 $str.=$self->{labels}->{$node};
1612 if ((defined $parent) &&
1613 ($self->graph->has_edge_weight($parent,$node))) {
1614 $str.=":".$self->graph->get_edge_weight($parent,$node);
1616 return $str;
1619 =head2 eNewick_full
1621 Title : eNewick_full
1622 Usage : my $str=$net->eNewick_full()
1623 Function: returns the eNewick representation of $net labeling
1624 internal tree nodes
1625 Returns : string
1626 Args : none
1628 =cut
1630 sub eNewick_full {
1631 my ($self)=@_;
1632 my $str="";
1633 my $seen={};
1634 foreach my $root ($self->roots()) {
1635 $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1637 return $str;
1640 sub eNewick_full_aux {
1641 my ($self,$node,$seen,$parent)=@_;
1642 my $str='';
1643 if ($self->is_leaf($node) ||
1644 (defined $seen->{$node}) )
1646 $str=make_label_full($self,$parent,$node);
1648 else {
1649 $seen->{$node}=1;
1650 my @sons=$self->{graph}->successors($node);
1651 $str="(";
1652 foreach my $son (@sons) {
1653 $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1655 chop($str);
1656 $str.=")".make_label_full($self,$parent,$node);
1658 return $str;
1661 sub make_label_full {
1662 my ($self,$parent,$node)=@_;
1663 my $str='';
1664 if ($self->is_hybrid_node($node)) {
1665 my $lbl=$self->{labels}->{$node};
1666 if ($lbl =~ /#/) {
1667 $lbl='';
1669 $str.=$lbl; #$self->{labels}->{$node};
1670 $str.='#';
1671 if ((defined $parent) &&
1672 ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1673 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1675 $str.=substr $node,1;
1676 } else {
1677 if ((defined $self->{labels}->{$node})&&($self->{labels}->{$node} ne '')) {
1678 $str.=$self->{labels}->{$node};
1680 else {
1681 $str.=$node;
1684 if ((defined $parent) &&
1685 ($self->graph->has_edge_weight($parent,$node))) {
1686 $str.=":".$self->graph->get_edge_weight($parent,$node);
1688 return $str;
1691 # sub eNewick_full {
1692 # my ($self)=@_;
1693 # my $str="";
1694 # my $seen={};
1695 # foreach my $root ($self->roots()) {
1696 # $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1698 # return $str;
1701 # sub eNewick_full_aux {
1702 # my ($self,$node,$seen,$parent)=@_;
1703 # my $str;
1704 # if ($self->is_leaf($node) ||
1705 # (defined $seen->{$node}) )
1707 # if ($self->is_hybrid_node($node)) {
1708 # my $tag=substr $node,1;
1709 # if ((defined $parent) &&
1710 # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1711 # $str='#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1712 # } else {
1713 # $str=$node;
1715 # } else {
1716 # $str=$node;
1719 # else {
1720 # $seen->{$node}=1;
1721 # my @sons=$self->{graph}->successors($node);
1722 # $str="(";
1723 # foreach my $son (@sons) {
1724 # $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1726 # chop($str);
1727 # if ($self->is_hybrid_node($node)) {
1728 # my $tag=substr $node,1;
1729 # if ((defined $parent) &&
1730 # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1731 # $str.=')#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1732 # } else {
1733 # $str.=")$node";
1735 # } else {
1736 # $str.=")$node";
1739 # if ((defined $parent) &&
1740 # ($self->graph->has_edge_weight($parent,$node))) {
1741 # $str.=":".$self->graph->get_edge_weight($parent,$node);
1743 # return $str;
1747 # displaying data
1749 use overload '""' => \&display;
1751 =head2 display
1753 Title : display
1754 Usage : my $str=$net->display()
1755 Function: returns a string containing all the available information on $net
1756 Returns : string
1757 Args : none
1759 =cut
1761 sub display {
1762 my ($self)=@_;
1763 my $str="";
1764 my $graph=$self->{graph};
1765 my @leaves=$self->leaves();
1766 my @nodes=@{$self->{nodes}};
1767 $str.= "Leaves:\t@leaves\n";
1768 $str.= "Nodes:\t@nodes\n";
1769 $str.= "Graph:\t$graph\n";
1770 $str.= "eNewick:\t".$self->eNewick()."\n";
1771 $str.= "Full eNewick:\t".$self->eNewick_full()."\n";
1772 $str.= "Mu-data and heights:\n";
1773 foreach my $node (@nodes) {
1774 $str.= "v=$node: ";
1775 if (exists $self->{labels}->{$node}) {
1776 $str.="\tlabel=".$self->{labels}->{$node}.",";
1777 } else {
1778 $str.="\tlabel=(none),";
1780 $str.= "\th=".$self->{h}->{$node}.", \tmu=".$self->{mudata}->{$node}."\n";
1782 if (exists $self->{has_temporal_representation}) {
1783 $str.= "Temporal representation:\n";
1784 if ($self->{has_temporal_representation}) {
1785 foreach my $node (@nodes) {
1786 $str.= "v=$node; ";
1787 $str.= "\tt=".$self->{temporal_representation}->{$node}."\n";
1789 } else {
1790 $str.= "Does not exist.\n";
1793 if (exists $self->{tripartitions}) {
1794 $str.= "Tripartitions:\n";
1795 foreach my $node (@nodes) {
1796 $str.= "v=$node; ";
1797 $str.= "\ttheta=".$self->{tripartitions}->{$node}."\n";
1800 return $str;