sync w/ main trunk
[bioperl-live.git] / Bio / TreeIO / newick.pm
bloba86f0a24b301b5d36960f5674c6db1a4274ceac3
1 # $Id$
3 # BioPerl module for Bio::TreeIO::newick
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich <jason@bioperl.org>
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::TreeIO::newick - TreeIO implementation for parsing
18 Newick/New Hampshire/PHYLIP format.
20 =head1 SYNOPSIS
22 # do not use this module directly
23 use Bio::TreeIO;
24 my $treeio = Bio::TreeIO->new(-format => 'newick',
25 -file => 't/data/LOAD_Ccd1.dnd');
26 my $tree = $treeio->next_tree;
28 =head1 DESCRIPTION
30 This module handles parsing and writing of Newick/PHYLIP/New Hampshire format.
32 =head1 FEEDBACK
34 =head2 Mailing Lists
36 User feedback is an integral part of the evolution of this and other
37 Bioperl modules. Send your comments and suggestions preferably to the
38 Bioperl mailing list. Your participation is much appreciated.
40 bioperl-l@bioperl.org - General discussion
41 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
43 =head2 Support
45 Please direct usage questions or support issues to the mailing list:
47 L<bioperl-l@bioperl.org>
49 rather than to the module maintainer directly. Many experienced and
50 reponsive experts will be able look at the problem and quickly
51 address it. Please include a thorough description of the problem
52 with code and data examples if at all possible.
54 =head2 Reporting Bugs
56 Report bugs to the Bioperl bug tracking system to help us keep track
57 of the bugs and their resolution. Bug reports can be submitted via the
58 web:
60 http://bugzilla.open-bio.org/
62 =head1 AUTHOR - Jason Stajich
64 Email jason-at-bioperl-dot-org
66 =head1 APPENDIX
68 The rest of the documentation details each of the object methods.
69 Internal methods are usually preceded with a _
71 =cut
74 # Let the code begin...
77 package Bio::TreeIO::newick;
78 use vars qw($DefaultBootstrapStyle);
79 use strict;
81 use Bio::Event::EventGeneratorI;
83 #initialize some package variables, could use 'our' but fails in perl < 5.6
85 $DefaultBootstrapStyle = 'traditional';
86 use base qw(Bio::TreeIO);
89 =head2 new
91 Title : new
92 Args : -print_count => boolean default is false
93 -bootstrap_style => set the bootstrap style (one of nobranchlength,
94 molphy, traditional)
95 -order_by => set the order by sort method
96 (see L<Bio::Node::Node::each_Descendent()> )
98 =cut
100 sub _initialize {
101 my $self = shift;
102 $self->SUPER::_initialize(@_);
103 my ($print_count,$style,$order_by) = $self->_rearrange([qw(PRINT_COUNT
104 BOOTSTRAP_STYLE
105 ORDER_BY)],
106 @_);
107 $self->print_tree_count($print_count || 0);
108 $self->bootstrap_style($style || $DefaultBootstrapStyle);
109 $self->order_by($order_by) if defined $order_by;
110 return;
114 =head2 next_tree
116 Title : next_tree
117 Usage : my $tree = $treeio->next_tree
118 Function: Gets the next tree in the stream
119 Returns : L<Bio::Tree::TreeI>
120 Args : none
123 =cut
125 sub next_tree{
126 my ($self) = @_;
127 local $/ = ";\n";
128 return unless $_ = $self->_readline;
129 s/[\r\n]//gs;
130 my $score;
131 my $despace = sub {my $dirty = shift; $dirty =~ s/\s+//gs; return $dirty};
132 my $dequote = sub {my $dirty = shift; $dirty =~ s/^"?\s*(.+?)\s*"?$/$1/; return $dirty};
133 s/([^"]*)(".+?")([^"]*)/$despace->($1) . $dequote->($2) . $despace->($3)/egsx;
134 if( s/^\s*\[([^\]]+)\]// ) {
135 my $match = $1;
136 $match =~ s/\s//g;
137 $match =~ s/lh\=//;
138 if( $match =~ /([-\d\.+]+)/ ) {
139 $score = $1;
143 $self->debug("entry is $_\n");
144 # my $empty = chr(20);
146 # replace empty labels with a tag
147 # s/\(,/\($empty,/ig;
148 # s/,,/,$empty,/ig;
149 # s/,,/,/ig;
150 # s/,\)/,$empty\)/ig;
151 # s/\"/\'/ig;
153 my $chars = '';
154 $self->_eventHandler->start_document;
155 my ($prev_event,$lastevent,$id) = ('','','');
156 foreach my $ch ( split(//,$_) ) {
157 if( $ch eq ';' ) {
158 my $tree = $self->_eventHandler->end_document($chars);
159 $tree->score($score) if defined $score;
160 if( $self->internal_node_id eq 'bootstrap' ) {
161 $tree->move_id_to_bootstrap;
163 return $tree;
164 } elsif( $ch eq '(' ) {
165 $chars = '';
166 $self->_eventHandler->start_element( {'Name' => 'tree'} );
167 } elsif($ch eq ')' ) {
168 if( length($chars) ) {
169 if( $lastevent eq ':' ) {
170 $self->_eventHandler->start_element( { 'Name' => 'branch_length'});
171 $self->_eventHandler->characters($chars);
172 $self->_eventHandler->end_element( {'Name' => 'branch_length'});
173 $lastevent = $prev_event;
174 } else {
175 $self->debug("internal node, id with no branchlength is $chars\n");
176 $self->_eventHandler->start_element( { 'Name' => 'node' } );
177 $self->_eventHandler->start_element( { 'Name' => 'id' } );
178 $self->_eventHandler->characters($chars);
179 $self->_eventHandler->end_element( { 'Name' => 'id' } );
180 $id = $chars;
182 my $leafstatus = 0;
183 if( $lastevent ne ')' ) {
184 $leafstatus = 1;
187 $self->_eventHandler->start_element({'Name' => 'leaf'});
188 $self->_eventHandler->characters($leafstatus);
189 $self->_eventHandler->end_element({'Name' => 'leaf'});
190 $id = '';
191 } else {
192 $self->_eventHandler->start_element( {'Name' => 'node'} );
195 $self->_eventHandler->end_element( {'Name' => 'node'} );
196 $self->_eventHandler->end_element( {'Name' => 'tree'} );
197 $chars = '';
198 } elsif ( $ch eq ',' ) {
199 if( length($chars) ) {
200 if( $lastevent eq ':' ) {
201 $self->_eventHandler->start_element( { 'Name' => 'branch_length'});
202 $self->_eventHandler->characters($chars);
203 $self->_eventHandler->end_element( {'Name' => 'branch_length'});
204 $lastevent = $prev_event;
205 $chars = '';
206 } else {
207 $self->debug("leaf id with no branchlength is $chars\n");
208 $self->_eventHandler->start_element( { 'Name' => 'node' } );
209 $self->_eventHandler->start_element( { 'Name' => 'id' } );
210 $self->_eventHandler->characters($chars);
211 $self->_eventHandler->end_element( { 'Name' => 'id' } );
212 $id = $chars;
214 } else {
215 $self->_eventHandler->start_element( { 'Name' => 'node' } );
217 my $leafstatus = 0;
218 if( $lastevent ne ')' ) {
219 $leafstatus = 1;
221 $self->_eventHandler->start_element({'Name' => 'leaf'});
222 $self->_eventHandler->characters($leafstatus);
223 $self->_eventHandler->end_element({'Name' => 'leaf'});
224 $self->_eventHandler->end_element( {'Name' => 'node'} );
225 $chars = '';
226 $id = '';
227 } elsif( $ch eq ':' ) {
228 $self->debug("id with a branchlength coming is $chars\n");
229 $self->_eventHandler->start_element( { 'Name' => 'node' } );
230 $self->_eventHandler->start_element( { 'Name' => 'id' } );
231 $self->_eventHandler->characters($chars);
232 $self->_eventHandler->end_element( { 'Name' => 'id' } );
233 $id = $chars;
234 $chars = '';
235 } else {
236 $chars .= $ch;
237 next;
239 $prev_event = $lastevent;
240 $lastevent = $ch;
242 my $tree = $self->_eventHandler->end_document($chars);
243 return $tree if $tree;
244 return;
247 =head2 write_tree
249 Title : write_tree
250 Usage : $treeio->write_tree($tree);
251 Function: Write a tree out to data stream in newick/phylip format
252 Returns : none
253 Args : L<Bio::Tree::TreeI> object
255 =cut
257 sub write_tree{
258 my ($self,@trees) = @_;
259 my $orderby = $self->order_by;
260 my $bootstrap_style = $self->bootstrap_style;
261 if( $self->print_tree_count ){
262 $self->_print(sprintf(" %d\n",scalar @trees));
264 my $nl = $self->newline_each_node;
265 foreach my $tree( @trees ) {
267 if( ! defined $tree || ref($tree) =~ /ARRAY/i ||
268 ! $tree->isa('Bio::Tree::TreeI') ) {
269 $self->throw("Calling write_tree with non Bio::Tree::TreeI object\n");
271 my @data = _write_tree_Helper($tree->get_root_node,
272 $bootstrap_style,
273 $orderby,
274 $nl);
275 if( $nl ) {
276 chomp($data[-1]);# remove last newline
277 $self->_print(join(",\n", @data), ";\n");
278 } else {
279 $self->_print(join(',', @data), ";\n");
282 $self->flush if $self->_flush_on_write && defined $self->_fh;
283 return;
286 sub _write_tree_Helper {
287 my ($node,$style,$orderby,$nl) = @_;
288 $style = '' unless defined $style;
289 return () if (!defined $node);
291 my @data;
292 foreach my $n ( $node->each_Descendent($orderby) ) {
293 push @data, _write_tree_Helper($n,$style,$orderby,$nl);
296 # let's explicitly write out the bootstrap if we've got it
297 my $id = $node->id_output;
298 my $bs = $node->bootstrap; # bs better not have any spaces?
299 $bs =~ s/\s+//g if defined $bs;
300 my $bl = $node->branch_length;
301 if( @data ) {
302 if( $nl ) {
303 $data[0] = "(\n" . $data[0];
304 $data[-1] .= ")\n";
305 } else {
306 $data[0] = "(" . $data[0];
307 $data[-1] .= ")";
310 if( $node->is_Leaf ) {
311 $node->debug("node is a leaf! This is unexpected...");
313 $id ||= '';
314 if( ! defined $bl || ! length($bl) ||
315 ($style && $style =~ /nobranchlength/i) ) {
316 $data[-1] .= $id;
317 } elsif( defined $bl && length($bl) ) {
318 $data[-1] .= "$id:$bl";
319 } else {
320 $data[-1] .= $id;
322 } else {
323 if( ! defined $bl || ! length($bl) ||
324 ($style && $style =~ /nobranchlength/i) ) {
326 if( defined $id || defined $bs ) {
327 $data[-1] .= defined $bs ? $bs : $id;
329 } elsif( $style =~ /molphy/i ) {
330 if( defined $id ) {
331 $data[-1] .= $id;
333 if( $bl =~ /\#/) {
334 $data[-1] .= $bl;
335 } else {
336 $data[-1] .= ":$bl";
338 if( defined $bs ) {
339 $data[-1] .= "[$bs]";
341 } else {
342 # traditional style of
343 # ((A:1,B:2)81:3); where 3 is internal node branch length
344 # and 81 is bootstrap/node label
345 if( defined $bs || defined $id ) {
346 $data[-1] .= defined $bs ? "$bs:$bl" : "$id:$bl";
347 } elsif( $bl =~ /\#/ ) {
348 $data[-1] .= $bl;
349 } else {
350 $data[-1] .= ":$bl";
354 } elsif( defined $id || defined $bl ) {
355 my $str;
356 $id ||= '';
357 if( ! defined $bl || ! length($bl) ||
358 ($style && $style =~ /nobranchlength/i) ) {
359 $str = $id;
360 } elsif( defined $bl && length($bl) ) {
361 $str = "$id:$bl";
362 } else {
363 $str = $id;
365 push @data, $str;
367 return @data;
370 =head2 print_tree_count
372 Title : print_tree_count
373 Usage : $obj->print_tree_count($newval)
374 Function: Get/Set flag for printing out the tree count (paml,protml way)
375 Returns : value of print_tree_count (a scalar)
376 Args : on set, new value (a scalar or undef, optional)
379 =cut
381 sub print_tree_count{
382 my $self = shift;
383 return $self->{'_print_tree_count'} = shift if @_;
384 return $self->{'_print_tree_count'} || 0;
387 =head2 bootstrap_style
389 Title : bootstrap_style
390 Usage : $obj->bootstrap_style($newval)
391 Function: A description of how bootstraps and branch lengths are
392 written, as the ID part of the internal node or else in []
393 in the branch length (Molphy-like; I am sure there is a
394 better name for this but am not sure where to go for some
395 sort of format documentation)
397 If no branch lengths are requested then no bootstraps are usually
398 written (unless someone REALLY wants this functionality...)
400 Can take on strings which contain the possible values of
401 'nobranchlength' --> don't draw any branch lengths - this
402 is helpful if you don't want to have to
403 go through and delete branch len on all nodes
404 'molphy' --> draw bootstraps (100) like
405 (A:0.11,B:0.22):0.33[100];
406 'traditional' --> draw bootstraps (100) like
407 (A:0.11,B:0.22)100:0.33;
408 Returns : value of bootstrap_style (a scalar)
409 Args : on set, new value (a scalar or undef, optional)
412 =cut
414 sub bootstrap_style{
415 my $self = shift;
416 my $val = shift;
417 if( defined $val ) {
419 if( $val !~ /^nobranchlength|molphy|traditional/i ) {
420 $self->warn("requested an unknown bootstrap style $val, expect one of nobranchlength,molphy,traditional, not updating value. Default is $DefaultBootstrapStyle\n");
421 } else {
422 $self->{'_bootstrap_style'} = $val;
425 return $self->{'_bootstrap_style'} || $DefaultBootstrapStyle;
428 =head2 order_by
430 Title : order_by
431 Usage : $obj->order_by($newval)
432 Function: Allow node order to be specified (typically "alpha")
433 See L<Bio::Node::Node::each_Descendent()>
434 Returns : value of order_by (a scalar)
435 Args : on set, new value (a scalar or undef, optional)
438 =cut
440 sub order_by {
441 my $self = shift;
443 return $self->{'order_by'} = shift if @_;
444 return $self->{'order_by'};