nexml.t: Added a missing XML::Twig requirement.
[bioperl-live.git] / Bio / Tree / Draw / Cladogram.pm
blobdf6e66c7a0cef2d0dfc45de818b3db21cedf42c8
2 # BioPerl module for Cladogram
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Gabriel Valiente <valiente@lsi.upc.edu>
8 # Copyright Gabriel Valiente
10 # You may distribute this module under the same terms as Perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Tree::Draw::Cladogram - Drawing phylogenetic trees in
17 Encapsulated PostScript (EPS) format.
19 =head1 SYNOPSIS
21 use Bio::Tree::Draw::Cladogram;
22 use Bio::TreeIO;
23 my $treeio = Bio::TreeIO->new('-format' => 'newick',
24 '-file' => 'input.nwk');
25 my $t1 = $treeio->next_tree;
26 my $t2 = $treeio->next_tree;
28 my $obj1 = Bio::Tree::Draw::Cladogram->new(-tree => $t1);
29 $obj1->print(-file => 'cladogram.eps');
31 if ($t2) {
32 my $obj2 = Bio::Tree::Draw::Cladogram->new(-tree => $t1, -second => $t2);
33 $obj2->print(-file => 'tanglegram.eps');
36 =head1 DESCRIPTION
38 Bio::Tree::Draw::Cladogram is a Perl tool for drawing Bio::Tree::Tree
39 objects in Encapsulated PostScript (EPS) format. It can be utilized
40 both for displaying a single phylogenetic tree (a cladogram) and for
41 the comparative display of two phylogenetic trees (a tanglegram) such
42 as a gene tree and a species tree, a host tree and a parasite tree,
43 two alternative trees for the same set of taxa, or two alternative
44 trees for overlapping sets of taxa.
46 Phylogenetic trees are drawn as rectangular cladograms, with
47 horizontal orientation and ancestral nodes centered over their
48 descendents. The font used for taxa is Courier at 10 pt. A single
49 Bio::Tree::Tree object is drawn with ancestors to the left and taxa
50 flushed to the right. Two Bio::Tree::Tree objects are drawn with the
51 first tree oriented left-to-right and the second tree oriented
52 right-to-left, and with corresponding taxa connected by straight lines
53 in a shade of gray. Each correspondence between a $taxon1 of the first
54 tree and a $taxon2 of the second tree is established by setting
55 $taxon1-E<gt>add_tag_value('connection',$taxon2). Thus, a taxon of the
56 first tree can be connected to more than one taxon of the second tree,
57 and vice versa.
59 The branch from the parent to a child $node, as well as the child
60 label, can be colored by setting $node-E<gt>add_tag_value('Rcolor',$r),
61 $node-E<gt>add_tag_value('Gcolor',$g), and
62 $node-E<gt>add_tag_value('Bcolor',$b), where $r, $g, and $b are the
63 desired values for red, green, and blue (zero for lowest, one for
64 highest intensity).
66 This is a preliminary release of Bio::Tree::Draw::Cladogram. Future
67 improvements include an option to output phylograms instead of
68 cladograms. Beware that cladograms are automatically scaled according
69 to branch lengths, but the current release has only been tested with
70 trees having unit branch lengths.
72 The print method could be extended to output graphic formats other
73 than EPS, although there are many graphics conversion programs around
74 that accept EPS input. For instance, most Linux distributions include
75 epstopdf, a Perl script that together with Ghostscript, converts EPS
76 to PDF.
78 =head1 FEEDBACK
80 =head2 Mailing Lists
82 User feedback is an integral part of the evolution of this and other
83 Bioperl modules. Send your comments and suggestions preferably to
84 the Bioperl mailing list. Your participation is much appreciated.
86 bioperl-l@bioperl.org - General discussion
87 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
89 =head2 Support
91 Please direct usage questions or support issues to the mailing list:
93 I<bioperl-l@bioperl.org>
95 rather than to the module maintainer directly. Many experienced and
96 reponsive experts will be able look at the problem and quickly
97 address it. Please include a thorough description of the problem
98 with code and data examples if at all possible.
100 =head2 Reporting Bugs
102 Report bugs to the Bioperl bug tracking system to help us keep track
103 of the bugs and their resolution. Bug reports can be submitted via
104 the web:
106 https://github.com/bioperl/bioperl-live/issues
108 =head1 AUTHOR - Gabriel Valiente
110 Email valiente@lsi.upc.edu
112 Code for coloring branches contributed by Georgii A Bazykin
113 (gbazykin@princeton.edu).
115 =head1 APPENDIX
117 The rest of the documentation details each of the object methods.
118 Internal methods are usually preceded with a _
120 =cut
122 package Bio::Tree::Draw::Cladogram;
123 use strict;
125 use PostScript::TextBlock;
127 use base qw(Bio::Root::Root);
129 # The following private package variables are set by the new method
130 # and used by the print method.
132 my %xx; # horizontal coordinate for each node
133 my %yy; # vertical coordinate for each node
134 my $t1; # first Bio::Tree::Tree object
135 my $t2; # second Bio::Tree::Tree object
136 my $font; # font name
137 my $size; # font size
138 my $width; # total drawing width
139 my $height; # total drawing height
140 my $xstep; # branch length in drawing
141 my $tip; # extra space between tip and label
142 my $tipwidth1; # width of longest label among $t1 taxa
143 my $tipwidth2; # width of longest label among $t2 taxa
144 my $compact; # whether or not to ignore branch lengths
145 my $ratio; # horizontal to vertical ratio
146 my $colors; # use colors to color edges
147 my %Rcolor; # red color for each node
148 my %Gcolor; # green color for each node
149 my %Bcolor; # blue color for each node
150 my $bootstrap; # Draw Bootstrap boolean
152 =head2 new
154 Title : new
155 Usage : my $obj = Bio::Tree::Draw::Cladogram->new();
156 Function: Builds a new Bio::Tree::Draw::Cladogram object
157 Returns : Bio::Tree::Draw::Cladogram
158 Args : -tree => Bio::Tree::Tree object
159 -second => Bio::Tree::Tree object (optional)
160 -font => font name [string] (optional)
161 -size => font size [integer] (optional)
162 -top => top margin [integer] (optional)
163 -bottom => bottom margin [integer] (optional)
164 -left => left margin [integer] (optional)
165 -right => right margin [integer] (optional)
166 -tip => extra tip space [integer] (optional)
167 -column => extra space between cladograms [integer] (optional)
168 -compact => ignore branch lengths [boolean] (optional)
169 -ratio => horizontal to vertical ratio [integer] (optional)
170 -colors => use colors to color edges [boolean] (optional)
171 -bootstrap => draw bootstrap or internal ids [boolean]
173 =cut
175 sub new {
176 my($class,@args) = @_;
178 my $self = $class->SUPER::new(@args);
179 ($t1, $t2, $font, $size, my $top, my $bottom, my $left, my $right,
180 $tip, my $column, $compact, $ratio, $colors,$bootstrap) =
181 $self->_rearrange([qw(TREE SECOND FONT SIZE TOP BOTTOM LEFT RIGHT
182 TIP COLUMN COMPACT RATIO COLORS BOOTSTRAP)],
183 @args);
184 $font ||= "Helvetica-Narrow";
185 $size ||= 12;
186 $top ||= 10;
187 $bottom ||= 10;
188 $left ||= 10;
189 $right ||= 10;
190 $tip ||= 5;
191 $column ||= 60;
192 $compact ||= 0;
193 $ratio ||= 1 / 1.6180339887;
194 $colors ||= 0;
195 $bootstrap ||= 0;
197 # Roughly, a cladogram is set according to the following parameters.
199 #################################
200 # # T # $top (T, top margin)
201 # +---------+ XXX # # $bottom (B, bottom margin)
202 # | # # $left (L, left margin)
203 # | # # $right (R, right margin)
204 # +----+ # # $tip (X, extra tip space)
205 # | +----+ XXXX # # $width (total drawing width)
206 # | | # # $height (total drawing height)
207 # +----+ # Y # $xstep (S, stem length)
208 # | # # $ystep (Y, space between taxa)
209 # +----+ XX # # $tiplen (string length of longest name)
210 # # B # $tipwidth (N, size of longest name)
211 #################################
212 # L S X N R #
213 #############################
215 # A tanglegram is roughly set as follows. The only additional
216 # parameter is $column (C, length of connection lines between taxa
217 # of the two trees), but $tip occurs four times, and $tiplen and
218 # $tipwidth differ for the first and the second tree.
220 ###########################################################
222 # +---------+ XXX ----- XXXXXX +----+ #
223 # | | #
224 # | +----+ #
225 # +----+ | | #
226 # | +----+ XXXX ----- XXX +----+ | #
227 # | | +----+ #
228 # +----+ | #
229 # | | #
230 # +----+ XX ----- XXXX +---------+ #
232 ###########################################################
233 # L X X C X X R #
234 ###########################################################
236 # An alternative would be to let the user set $width and $height in
237 # points and to scale down everything to fit the desired
238 # dimensions. However, the final EPS can later be scaled down to any
239 # desired size anyway.
241 my @taxa1 = $t1->get_leaf_nodes;
242 my $root1 = $t1->get_root_node;
244 $tipwidth1 = 0;
245 foreach my $taxon (@taxa1) {
246 my $w = PostScript::Metrics::stringwidth($taxon->id,$font,$size);
247 if ($w > $tipwidth1) { $tipwidth1 = $w; }
250 my @taxa2;
251 my $root2;
253 my $ystep = 20;
255 if ($t2) {
256 @taxa2 = $t2->get_leaf_nodes;
257 $root2 = $t2->get_root_node;
258 $tipwidth2 = 0;
259 foreach my $taxon (@taxa2) {
260 my $w = PostScript::Metrics::stringwidth($taxon->id,$font,$size);
261 if ($w > $tipwidth2) { $tipwidth2 = $w; }
265 my $stems = $root1->height + 1;
266 if ($t2) { $stems += $root2->height + 1; }
267 my $labels = $tipwidth1;
268 if ($t2) { $labels += $tipwidth2; }
269 $xstep = 20;
270 $width = $left + $stems * $xstep + $tip + $labels + $right;
271 if ($t2) { $width += $tip + $column + $tip + $tip; }
272 $height = $bottom + $ystep * (@taxa1 - 1) + $top;
273 if ($t2) {
274 if ( scalar(@taxa2) > scalar(@taxa1) ) {
275 $height = $bottom + $ystep * (@taxa2 - 1) + $top;
278 my $ystep1 = $height / scalar(@taxa1);
279 my $ystep2;
280 if ($t2) {
281 $ystep2 = $height / scalar(@taxa2);
284 my $x = $left + $xstep * ($root1->height + 1) + $tip;
285 my $y = $bottom;
287 for my $taxon (reverse @taxa1) {
288 $xx{$taxon} = $x - $tip;
289 $yy{$taxon} = $y;
290 $y += $ystep1;
292 $x -= $xstep;
294 my @stack;
295 my @queue; # postorder traversal
296 push @stack, $t1->get_root_node;
297 while (@stack) {
298 my $node = pop @stack;
299 push @queue, $node;
300 foreach my $child ($node->each_Descendent(-sortby => 'internal_id')) {
301 push @stack, $child;
304 @queue = reverse @queue;
306 for my $node (@queue) {
307 if (!$node->is_Leaf) {
308 my @children = $node->each_Descendent;
309 my $child = shift @children;
310 my $xmin = $xx{$child};
311 my $ymin = my $ymax = $yy{$child};
312 foreach $child (@children) {
313 $xmin = $xx{$child} if $xx{$child} < $xmin;
314 $ymax = $yy{$child} if $yy{$child} > $ymax;
315 $ymin = $yy{$child} if $yy{$child} < $ymin;
317 $xx{$node} = $xmin - $xstep;
318 $yy{$node} = ($ymin + $ymax)/2;
322 $xx{$t1->get_root_node} = $left + $xstep;
324 my @preorder = $t1->get_nodes(-order => 'depth');
326 for my $node (@preorder) {
327 #print "\n$node";
328 if ($colors) {
329 if ($node->has_tag('Rcolor')) {
330 $Rcolor{$node} = $node->get_tag_values('Rcolor')
331 } else {
332 $Rcolor{$node} = 0
334 if ($node->has_tag('Gcolor')) {
335 $Gcolor{$node} = $node->get_tag_values('Gcolor')
336 } else {
337 $Gcolor{$node} = 0
339 if ($node->has_tag('Bcolor')) {
340 $Bcolor{$node} = $node->get_tag_values('Bcolor')
341 } else {
342 $Bcolor{$node} = 0
344 #print "\t$Rcolor{$node}\t$Gcolor{$node}\t$Bcolor{$node}";
348 if ($compact) { # ragged right, ignoring branch lengths
350 $width = 0;
351 shift @preorder; # skip root
352 for my $node (@preorder) {
353 $xx{$node} = $xx{$node->ancestor} + $xstep;
354 $width = $xx{$node} if $xx{$node} > $width;
356 $width += $tip + $tipwidth1 + $right;
358 } else { # set to aspect ratio and use branch lengths if available
360 my $total_height = (scalar($t1->get_leaf_nodes) - 1) * $ystep;
361 my $scale_factor = $total_height * $ratio / $t1->get_root_node->height;
363 $width = $t1->get_root_node->height * $scale_factor;
364 $width += $left + $xstep;
365 $width += $tip + $tipwidth1 + $right;
367 shift @preorder; # skip root
368 for my $node (@preorder) {
369 my $bl = $node->branch_length;
370 $bl = 1 unless (defined $bl && $bl =~ /^\-?\d+(\.\d+)?$/);
371 $xx{$node} = $xx{$node->ancestor} + $bl * $scale_factor;
376 if ($t2) {
378 $x = $left + $xstep * ($root1->height + 1) + $tip;
379 $x += $tipwidth1 + $tip + $column + $tip;
380 my $y = $bottom;
382 for my $taxon (reverse @taxa2) {
383 $xx{$taxon} = $x + $tipwidth2 + $tip;
384 $yy{$taxon} = $y;
385 $y += $ystep2;
387 $x += $xstep;
389 my @stack;
390 my @queue; # postorder traversal
391 push @stack, $t2->get_root_node;
392 while (@stack) {
393 my $node = pop @stack;
394 push @queue, $node;
395 foreach my $child ($node->each_Descendent(-sortby => 'internal_id')) {
396 push @stack, $child;
399 @queue = reverse @queue;
401 for my $node (@queue) {
402 if (!$node->is_Leaf) {
403 my @children = $node->each_Descendent;
404 my $child = shift @children;
405 my $xmax = $xx{$child};
406 my $ymin = my $ymax = $yy{$child};
407 foreach $child (@children) {
408 $xmax = $xx{$child} if $xx{$child} > $xmax;
409 $ymax = $yy{$child} if $yy{$child} > $ymax;
410 $ymin = $yy{$child} if $yy{$child} < $ymin;
412 $xx{$node} = $xmax + $xstep;
413 $yy{$node} = ($ymin + $ymax)/2;
419 return $self;
422 =head2 print
424 Title : print
425 Usage : $obj->print();
426 Function: Outputs $obj in Encapsulated PostScript (EPS) format
427 Returns :
428 Args : -file => filename (optional)
430 =cut
432 sub print {
433 my($self,@args) = @_;
435 my ($file) = $self->_rearrange([qw(FILE)], @args);
436 $file ||= "output.eps"; # stdout
438 open my $INFO, '>', $file or $self->throw("Could not write file '$file': $!");
439 print $INFO "%!PS-Adobe-\n";
440 print $INFO "%%BoundingBox: 0 0 ", $width, " ", $height, "\n";
441 print $INFO "1 setlinewidth\n";
442 print $INFO "/$font findfont\n";
443 print $INFO "$size scalefont\n";
444 print $INFO "setfont\n";
446 # taxa labels are centered to 1/3 the font size
448 for my $taxon (reverse $t1->get_leaf_nodes) {
449 if ($colors) {
450 print $INFO $Rcolor{$taxon}, " ", $Gcolor{$taxon}, " ", $Bcolor{$taxon}, " setrgbcolor\n";
452 print $INFO $xx{$taxon} + $tip, " ", $yy{$taxon} - $size / 3, " moveto\n";
453 print $INFO "(", $taxon->id, ") show\n";
456 my $root1 = $t1->get_root_node;
457 for my $node ($t1->get_nodes) {
458 if ($node->ancestor) {
459 # print $xx{$node->ancestor}, " ", $yy{$node->ancestor}, " moveto\n";
460 # print $xx{$node}, " ", $yy{$node}, " lineto\n";
461 if ($colors) {
462 print $INFO "stroke\n";
463 print $INFO $Rcolor{$node}, " ", $Gcolor{$node}, " ",
464 $Bcolor{$node}, " setrgbcolor\n";
466 print $INFO $xx{$node}, " ", $yy{$node}, " moveto\n";
467 print $INFO $xx{$node->ancestor}, " ", $yy{$node}, " lineto\n";
468 if( $bootstrap ) {
469 print $INFO $xx{$node->ancestor}+ $size/10, " ", $yy{$node->ancestor} - ($size / 3), " moveto\n";
470 print $INFO "(", $node->ancestor->id, ") show\n";
471 print $INFO $xx{$node->ancestor}, " ", $yy{$node}, " moveto\n";
473 print $INFO $xx{$node->ancestor}, " ", $yy{$node->ancestor}, " lineto\n";
478 my $ymin = $yy{$root1};
479 my $ymax = $yy{$root1};
480 foreach my $child ($root1->each_Descendent) {
481 $ymax = $yy{$child} if $yy{$child} > $ymax;
482 $ymin = $yy{$child} if $yy{$child} < $ymin;
484 my $zz = ($ymin + $ymax)/2;
485 if ($colors) {
486 print $INFO "stroke\n";
487 print $INFO $Rcolor{$root1}, " ", $Gcolor{$root1}, " ", $Bcolor{$root1}, " setrgbcolor\n";
489 print $INFO $xx{$root1}, " ", $zz, " moveto\n";
490 print $INFO $xx{$root1} - $xstep, " ", $zz, " lineto\n";
492 if ($t2) {
494 for my $taxon (reverse $t2->get_leaf_nodes) {
495 my $tiplen2 = PostScript::Metrics::stringwidth($taxon->id,$font,$size);
496 print $INFO $xx{$taxon} - $tiplen2 - $tip, " ",
497 $yy{$taxon} - $size / 3, " moveto\n";
498 printf $INFO "(%s) show\n", $taxon->id;
501 for my $node ($t2->get_nodes) {
502 if ($node->ancestor) {
503 print $INFO $xx{$node}, " ", $yy{$node}, " moveto\n";
504 print $INFO $xx{$node->ancestor}, " ", $yy{$node}, " lineto\n";
505 print $INFO $xx{$node->ancestor}, " ",
506 $yy{$node->ancestor}, " lineto\n";
510 my $root2 = $t2->get_root_node;
511 my $ymin = $yy{$root2};
512 my $ymax = $yy{$root2};
513 foreach my $child2 ($root2->each_Descendent) {
514 $ymax = $yy{$child2} if $yy{$child2} > $ymax;
515 $ymin = $yy{$child2} if $yy{$child2} < $ymin;
517 my $zz = ($ymin + $ymax)/2;
518 print $INFO $xx{$root2}, " ", $zz, " moveto\n";
519 print $INFO $xx{$root2} + $xstep, " ", $zz, " lineto\n";
521 my @taxa1 = $t1->get_leaf_nodes;
522 my @taxa2 = $t2->get_leaf_nodes;
524 # set default connection between $t1 and $t2 taxa, unless
525 # overridden by the user (the latter not implemented yet)
527 foreach my $taxon1 (@taxa1) {
528 foreach my $taxon2 (@taxa2) {
529 if ($taxon1->id eq $taxon2->id) {
530 $taxon1->add_tag_value('connection',$taxon2);
531 last;
536 # draw connection lines between $t1 and $t2 taxa
538 print $INFO "stroke\n";
539 print $INFO "0.5 setgray\n";
541 foreach my $taxon1 (@taxa1) {
542 my @match = $taxon1->get_tag_values('connection');
543 foreach my $taxon2 (@match) {
544 my $x0 = $xx{$taxon1} + $tip
545 + PostScript::Metrics::stringwidth($taxon1->id,$font,$size) + $tip;
546 my $x1 = $xx{$taxon1} + $tip + $tipwidth1 + $tip;
547 my $y1 = $yy{$taxon1};
548 my $x2 = $xx{$taxon2} - $tip - $tipwidth2 - $tip;
549 my $x3 = $xx{$taxon2} - $tip
550 - PostScript::Metrics::stringwidth($taxon2->id,$font,$size) - $tip;
551 my $y2 = $yy{$taxon2};
552 print $INFO $x0, " ", $y1, " moveto\n";
553 print $INFO $x1, " ", $y1, " lineto\n";
554 print $INFO $x2, " ", $y2, " lineto\n";
555 print $INFO $x3, " ", $y2, " lineto\n";
561 print $INFO "stroke\n";
562 print $INFO "showpage\n";