bug 2549; fixed small bug in Bio::Taxon which doesn't catch -common_name
[bioperl-live.git] / Bio / TreeIO / newick.pm
blob40ef82f20efe373e42e6758c38b8e9440fd80465
1 # $Id$
3 # BioPerl module for Bio::TreeIO::newick
5 # Cared for by Jason Stajich <jason@bioperl.org>
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::TreeIO::newick - TreeIO implementation for parsing
16 Newick/New Hampshire/PHYLIP format.
18 =head1 SYNOPSIS
20 # do not use this module directly
21 use Bio::TreeIO;
22 my $treeio = Bio::TreeIO->new(-format => 'newick',
23 -file => 't/data/LOAD_Ccd1.dnd');
24 my $tree = $treeio->next_tree;
26 =head1 DESCRIPTION
28 This module handles parsing and writing of Newick/PHYLIP/New Hampshire format.
30 =head1 FEEDBACK
32 =head2 Mailing Lists
34 User feedback is an integral part of the evolution of this and other
35 Bioperl modules. Send your comments and suggestions preferably to the
36 Bioperl mailing list. Your participation is much appreciated.
38 bioperl-l@bioperl.org - General discussion
39 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
41 =head2 Reporting Bugs
43 Report bugs to the Bioperl bug tracking system to help us keep track
44 of the bugs and their resolution. Bug reports can be submitted via the
45 web:
47 http://bugzilla.open-bio.org/
49 =head1 AUTHOR - Jason Stajich
51 Email jason-at-bioperl-dot-org
53 =head1 APPENDIX
55 The rest of the documentation details each of the object methods.
56 Internal methods are usually preceded with a _
58 =cut
61 # Let the code begin...
64 package Bio::TreeIO::newick;
65 use vars qw($DefaultBootstrapStyle);
66 use strict;
68 use Bio::Event::EventGeneratorI;
70 #initialize some package variables, could use 'our' but fails in perl < 5.6
72 $DefaultBootstrapStyle = 'traditional';
73 use base qw(Bio::TreeIO);
76 =head2 new
78 Title : new
79 Args : -print_count => boolean default is false
80 -bootstrap_style => set the bootstrap style (one of nobranchlength,
81 molphy, traditional)
82 -order_by => set the order by sort method
83 (see L<Bio::Node::Node::each_Descendent()> )
85 =cut
87 sub _initialize {
88 my $self = shift;
89 $self->SUPER::_initialize(@_);
90 my ($print_count,$style,$order_by) = $self->_rearrange([qw(PRINT_COUNT
91 BOOTSTRAP_STYLE
92 ORDER_BY)],
93 @_);
94 $self->print_tree_count($print_count || 0);
95 $self->bootstrap_style($style || $DefaultBootstrapStyle);
96 $self->order_by($order_by) if defined $order_by;
97 return;
101 =head2 next_tree
103 Title : next_tree
104 Usage : my $tree = $treeio->next_tree
105 Function: Gets the next tree in the stream
106 Returns : L<Bio::Tree::TreeI>
107 Args : none
110 =cut
112 sub next_tree{
113 my ($self) = @_;
114 local $/ = ";\n";
115 return unless $_ = $self->_readline;
116 s/[\r\n]//gs;
117 my $score;
118 my $despace = sub {my $dirty = shift; $dirty =~ s/\s+//gs; return $dirty};
119 my $dequote = sub {my $dirty = shift; $dirty =~ s/^"?\s*(.+?)\s*"?$/$1/; return $dirty};
120 s/([^"]*)(".+?")([^"]*)/$despace->($1) . $dequote->($2) . $despace->($3)/egsx;
121 if( s/^\s*\[([^\]]+)\]// ) {
122 my $match = $1;
123 $match =~ s/\s//g;
124 $match =~ s/lh\=//;
125 if( $match =~ /([-\d\.+]+)/ ) {
126 $score = $1;
130 $self->debug("entry is $_\n");
131 # my $empty = chr(20);
133 # replace empty labels with a tag
134 # s/\(,/\($empty,/ig;
135 # s/,,/,$empty,/ig;
136 # s/,,/,/ig;
137 # s/,\)/,$empty\)/ig;
138 # s/\"/\'/ig;
140 my $chars = '';
141 $self->_eventHandler->start_document;
142 my ($prev_event,$lastevent,$id) = ('','','');
143 foreach my $ch ( split(//,$_) ) {
144 if( $ch eq ';' ) {
145 my $tree = $self->_eventHandler->end_document($chars);
146 $tree->score($score) if defined $score;
147 if( $self->internal_node_id eq 'bootstrap' ) {
148 $tree->move_id_to_bootstrap;
150 return $tree;
151 } elsif( $ch eq '(' ) {
152 $chars = '';
153 $self->_eventHandler->start_element( {'Name' => 'tree'} );
154 } elsif($ch eq ')' ) {
155 if( length($chars) ) {
156 if( $lastevent eq ':' ) {
157 $self->_eventHandler->start_element( { 'Name' => 'branch_length'});
158 $self->_eventHandler->characters($chars);
159 $self->_eventHandler->end_element( {'Name' => 'branch_length'});
160 $lastevent = $prev_event;
161 } else {
162 $self->debug("internal node, id with no branchlength is $chars\n");
163 $self->_eventHandler->start_element( { 'Name' => 'node' } );
164 $self->_eventHandler->start_element( { 'Name' => 'id' } );
165 $self->_eventHandler->characters($chars);
166 $self->_eventHandler->end_element( { 'Name' => 'id' } );
167 $id = $chars;
169 my $leafstatus = 0;
170 if( $lastevent ne ')' ) {
171 $leafstatus = 1;
174 $self->_eventHandler->start_element({'Name' => 'leaf'});
175 $self->_eventHandler->characters($leafstatus);
176 $self->_eventHandler->end_element({'Name' => 'leaf'});
177 $id = '';
178 } else {
179 $self->_eventHandler->start_element( {'Name' => 'node'} );
182 $self->_eventHandler->end_element( {'Name' => 'node'} );
183 $self->_eventHandler->end_element( {'Name' => 'tree'} );
184 $chars = '';
185 } elsif ( $ch eq ',' ) {
186 if( length($chars) ) {
187 if( $lastevent eq ':' ) {
188 $self->_eventHandler->start_element( { 'Name' => 'branch_length'});
189 $self->_eventHandler->characters($chars);
190 $self->_eventHandler->end_element( {'Name' => 'branch_length'});
191 $lastevent = $prev_event;
192 $chars = '';
193 } else {
194 $self->debug("leaf id with no branchlength is $chars\n");
195 $self->_eventHandler->start_element( { 'Name' => 'node' } );
196 $self->_eventHandler->start_element( { 'Name' => 'id' } );
197 $self->_eventHandler->characters($chars);
198 $self->_eventHandler->end_element( { 'Name' => 'id' } );
199 $id = $chars;
201 } else {
202 $self->_eventHandler->start_element( { 'Name' => 'node' } );
204 my $leafstatus = 0;
205 if( $lastevent ne ')' ) {
206 $leafstatus = 1;
208 $self->_eventHandler->start_element({'Name' => 'leaf'});
209 $self->_eventHandler->characters($leafstatus);
210 $self->_eventHandler->end_element({'Name' => 'leaf'});
211 $self->_eventHandler->end_element( {'Name' => 'node'} );
212 $chars = '';
213 $id = '';
214 } elsif( $ch eq ':' ) {
215 $self->debug("id with a branchlength coming is $chars\n");
216 $self->_eventHandler->start_element( { 'Name' => 'node' } );
217 $self->_eventHandler->start_element( { 'Name' => 'id' } );
218 $self->_eventHandler->characters($chars);
219 $self->_eventHandler->end_element( { 'Name' => 'id' } );
220 $id = $chars;
221 $chars = '';
222 } else {
223 $chars .= $ch;
224 next;
226 $prev_event = $lastevent;
227 $lastevent = $ch;
229 return;
232 =head2 write_tree
234 Title : write_tree
235 Usage : $treeio->write_tree($tree);
236 Function: Write a tree out to data stream in newick/phylip format
237 Returns : none
238 Args : L<Bio::Tree::TreeI> object
240 =cut
242 sub write_tree{
243 my ($self,@trees) = @_;
244 my $orderby = $self->order_by;
245 my $bootstrap_style = $self->bootstrap_style;
246 if( $self->print_tree_count ){
247 $self->_print(sprintf(" %d\n",scalar @trees));
249 my $nl = $self->newline_each_node;
250 foreach my $tree( @trees ) {
252 if( ! defined $tree || ref($tree) =~ /ARRAY/i ||
253 ! $tree->isa('Bio::Tree::TreeI') ) {
254 $self->throw("Calling write_tree with non Bio::Tree::TreeI object\n");
256 my @data = _write_tree_Helper($tree->get_root_node,
257 $bootstrap_style,
258 $orderby,
259 $nl);
260 if( $nl ) {
261 chomp($data[-1]);# remove last newline
262 $self->_print(join(",\n", @data), ";\n");
263 } else {
264 $self->_print(join(',', @data), ";\n");
267 $self->flush if $self->_flush_on_write && defined $self->_fh;
268 return;
271 sub _write_tree_Helper {
272 my ($node,$style,$orderby,$nl) = @_;
273 $style = '' unless defined $style;
274 return () if (!defined $node);
276 my @data;
277 foreach my $n ( $node->each_Descendent($orderby) ) {
278 push @data, _write_tree_Helper($n,$style,$orderby,$nl);
281 # let's explicitly write out the bootstrap if we've got it
282 my $id = $node->id_output;
283 my $bs = $node->bootstrap; # bs better not have any spaces?
284 $bs =~ s/\s+//g if defined $bs;
285 my $bl = $node->branch_length;
286 if( @data ) {
287 if( $nl ) {
288 $data[0] = "(\n" . $data[0];
289 $data[-1] .= ")\n";
290 } else {
291 $data[0] = "(" . $data[0];
292 $data[-1] .= ")";
295 if( $node->is_Leaf ) {
296 $node->debug("node is a leaf! This is unexpected...");
298 $id ||= '';
299 if( ! defined $bl || ! length($bl) ||
300 ($style && $style =~ /nobranchlength/i) ) {
301 $data[-1] .= $id;
302 } elsif( defined $bl && length($bl) ) {
303 $data[-1] .= "$id:$bl";
304 } else {
305 $data[-1] .= $id;
307 } else {
308 if( ! defined $bl || ! length($bl) ||
309 ($style && $style =~ /nobranchlength/i) ) {
311 if( defined $id || defined $bs ) {
312 $data[-1] .= defined $bs ? $bs : $id;
314 } elsif( $style =~ /molphy/i ) {
315 if( defined $id ) {
316 $data[-1] .= $id;
318 if( $bl =~ /\#/) {
319 $data[-1] .= $bl;
320 } else {
321 $data[-1] .= ":$bl";
323 if( defined $bs ) {
324 $data[-1] .= "[$bs]";
326 } else {
327 # traditional style of
328 # ((A:1,B:2)81:3); where 3 is internal node branch length
329 # and 81 is bootstrap/node label
330 if( defined $bs || defined $id ) {
331 $data[-1] .= defined $bs ? "$bs:$bl" : "$id:$bl";
332 } elsif( $bl =~ /\#/ ) {
333 $data[-1] .= $bl;
334 } else {
335 $data[-1] .= ":$bl";
339 } elsif( defined $id || defined $bl ) {
340 my $str;
341 $id ||= '';
342 if( ! defined $bl || ! length($bl) ||
343 ($style && $style =~ /nobranchlength/i) ) {
344 $str = $id;
345 } elsif( defined $bl && length($bl) ) {
346 $str = "$id:$bl";
347 } else {
348 $str = $id;
350 push @data, $str;
352 return @data;
355 =head2 print_tree_count
357 Title : print_tree_count
358 Usage : $obj->print_tree_count($newval)
359 Function: Get/Set flag for printing out the tree count (paml,protml way)
360 Returns : value of print_tree_count (a scalar)
361 Args : on set, new value (a scalar or undef, optional)
364 =cut
366 sub print_tree_count{
367 my $self = shift;
368 return $self->{'_print_tree_count'} = shift if @_;
369 return $self->{'_print_tree_count'} || 0;
372 =head2 bootstrap_style
374 Title : bootstrap_style
375 Usage : $obj->bootstrap_style($newval)
376 Function: A description of how bootstraps and branch lengths are
377 written, as the ID part of the internal node or else in []
378 in the branch length (Molphy-like; I am sure there is a
379 better name for this but am not sure where to go for some
380 sort of format documentation)
382 If no branch lengths are requested then no bootstraps are usually
383 written (unless someone REALLY wants this functionality...)
385 Can take on strings which contain the possible values of
386 'nobranchlength' --> don't draw any branch lengths - this
387 is helpful if you don't want to have to
388 go through and delete branch len on all nodes
389 'molphy' --> draw bootstraps (100) like
390 (A:0.11,B:0.22):0.33[100];
391 'traditional' --> draw bootstraps (100) like
392 (A:0.11,B:0.22)100:0.33;
393 Returns : value of bootstrap_style (a scalar)
394 Args : on set, new value (a scalar or undef, optional)
397 =cut
399 sub bootstrap_style{
400 my $self = shift;
401 my $val = shift;
402 if( defined $val ) {
404 if( $val !~ /^nobranchlength|molphy|traditional/i ) {
405 $self->warn("requested an unknown bootstrap style $val, expect one of nobranchlength,molphy,traditional, not updating value. Default is $DefaultBootstrapStyle\n");
406 } else {
407 $self->{'_bootstrap_style'} = $val;
410 return $self->{'_bootstrap_style'} || $DefaultBootstrapStyle;
413 =head2 order_by
415 Title : order_by
416 Usage : $obj->order_by($newval)
417 Function: Allow node order to be specified (typically "alpha")
418 See L<Bio::Node::Node::each_Descendent()>
419 Returns : value of order_by (a scalar)
420 Args : on set, new value (a scalar or undef, optional)
423 =cut
425 sub order_by {
426 my $self = shift;
428 return $self->{'order_by'} = shift if @_;
429 return $self->{'order_by'};