Simplify some code
[bioperl-live.git] / Bio / TreeIO / nexus.pm
blob6f497fc3698a57623722ad0207e9e2df8abe5ff4
2 # BioPerl module for Bio::TreeIO::nexus
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-open-bio-dot-org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::TreeIO::nexus - A TreeIO driver module for parsing Nexus tree output from PAUP
18 =head1 SYNOPSIS
20 use Bio::TreeIO;
21 my $in = Bio::TreeIO->new(-file => 't/data/cat_tre.tre');
22 while( my $tree = $in->next_tree ) {
25 =head1 DESCRIPTION
27 This is a driver module for parsing PAUP Nexus tree format which
28 basically is just a remapping of trees.
30 =head2 Comments
32 The nexus format allows node comments that are placed inside square
33 brackets. Usually the comments (implemented as tags for nodes) are
34 used to give a name for an internal node or record the bootstap value,
35 but other uses are possible.
37 The FigTree program by Andrew Rambaut adds various rendering
38 parameters inside comments and flags these comments by starting them
39 with '&!'. The parameters implemented here are 'label' and 'color'.
41 =head1 FEEDBACK
43 =head2 Mailing Lists
45 User feedback is an integral part of the evolution of this and other
46 Bioperl modules. Send your comments and suggestions preferably to
47 the Bioperl mailing list. Your participation is much appreciated.
49 bioperl-l@bioperl.org - General discussion
50 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52 =head2 Support
54 Please direct usage questions or support issues to the mailing list:
56 I<bioperl-l@bioperl.org>
58 rather than to the module maintainer directly. Many experienced and
59 reponsive experts will be able look at the problem and quickly
60 address it. Please include a thorough description of the problem
61 with code and data examples if at all possible.
63 =head2 Reporting Bugs
65 Report bugs to the Bioperl bug tracking system to help us keep track
66 of the bugs and their resolution. Bug reports can be submitted via
67 the web:
69 https://github.com/bioperl/bioperl-live/issues
71 =head1 AUTHOR - Jason Stajich
73 Email jason-at-open-bio-dot-org
75 =head1 APPENDIX
77 The rest of the documentation details each of the object methods.
78 Internal methods are usually preceded with a _
80 =cut
82 # Let the code begin...
84 package Bio::TreeIO::nexus;
85 use strict;
87 use Bio::Event::EventGeneratorI;
88 use IO::String;
90 use base qw(Bio::TreeIO);
92 =head2 new
94 Title : new
95 Args : -header => boolean default is true
96 print/do not print #NEXUS header
97 -translate => boolean default is true
98 print/do not print Node Id translation to a number
100 =cut
102 sub _initialize {
103 my $self = shift;
104 $self->SUPER::_initialize(@_);
105 my ( $hdr, $trans ) = $self->_rearrange(
107 qw(HEADER
108 TRANSLATE)
112 $self->header( defined $hdr ? $hdr : 1 );
113 $self->translate_node( defined $trans ? $trans : 1 );
116 =head2 next_tree
118 Title : next_tree
119 Usage : my $tree = $treeio->next_tree
120 Function: Gets the next tree in the stream
121 Returns : Bio::Tree::TreeI
122 Args : none
125 =cut
127 sub next_tree {
128 my ($self) = @_;
129 unless ( $self->{'_parsed'} ) {
130 $self->_parse;
132 return $self->{'_trees'}->[ $self->{'_treeiter'}++ ];
135 sub rewind {
136 shift->{'_treeiter'} = 0;
139 sub _parse {
140 my ($self) = @_;
142 $self->{'_parsed'} = 1;
143 $self->{'_treeiter'} = 0;
145 while ( defined( $_ = $self->_readline ) ) {
146 next if /^\s+$/;
147 last;
149 return unless ( defined $_ );
151 unless (/^\#NEXUS/i) {
152 $self->warn("File does not start with #NEXUS"); #'
153 return;
156 my $line;
157 while ( defined( $_ = $self->_readline ) ) {
158 $line .= $_;
160 my @sections = split( /#NEXUS/i, $line );
161 for my $s (@sections) {
162 my %translate;
163 if ( $self->verbose > 0 ) {
164 while ( $s =~ s/(\[[^\]]+\])// ) {
165 $self->debug("removing comment $1\n");
168 else {
169 $s =~ s/(\[[^\]]+\])//g;
172 if ( $s =~ /begin trees;(.+)(end;)?/si ) {
173 my $trees = $1;
174 if ( $trees =~ s/\s+translate\s+([^;]+);//i ) {
175 my @trans;
176 my $tr = $1;
178 while ($tr =~ m{\s*([^,\s]+?\s+(?:'[^']+'|[^'\s]+)),?}gc) {
179 push @trans, $1;
181 for my $n ( @trans ) {
182 if ($n =~ /^\s*(\S+)\s+(.+)$/) {
183 my ($id,$tag) = ($1,$2);
184 $tag =~ s/[\s,]+$//; # remove the extra spaces of the last taxon
185 $translate{$id} = $tag;
189 else {
190 $self->debug("no translate in: $trees\n");
192 while ($trees =~ /\s+tree\s+\*?\s*(\S+)\s*\=
193 \s*(?:\[\S+\])?\s*([^\;]+;)/igx)
195 my ( $tree_name, $tree_str ) = ( $1, $2 );
197 # MrBayes does not print colons for node label
198 # $tree_str =~ s/\)(\d*\.\d+)\)/:$1/g;
199 my $buf = IO::String->new($tree_str);
200 my $treeio = Bio::TreeIO->new(
201 -format => 'newick',
202 -fh => $buf
204 my $tree = $treeio->next_tree;
205 foreach my $node ( grep { $_->is_Leaf } $tree->get_nodes ) {
206 my $id = $node->id;
207 my $lookup = $translate{$id};
208 $node->id( $lookup || $id );
210 $tree->id($tree_name) if defined $tree_name;
211 push @{ $self->{'_trees'} }, $tree;
214 else {
215 $self->debug("begin_trees failed: $s\n");
218 if ( !@sections ) {
219 $self->debug("warn no sections: $line\n");
223 =head2 write_tree
225 Title : write_tree
226 Usage : $treeio->write_tree($tree);
227 Function: Writes a tree onto the stream
228 Returns : none
229 Args : Bio::Tree::TreeI
232 =cut
234 sub write_tree {
235 my ( $self, @trees ) = @_;
236 if ( $self->header ) {
237 $self->_print("#NEXUS\n\n");
239 my $translate = $self->translate_node;
240 my $time = localtime();
241 $self->_print( sprintf( "Begin trees; [Treefile created %s]\n", $time ) );
243 my ( $first, $nodecter, %node2num ) = ( 0, 1 );
244 foreach my $tree (@trees) {
246 if ( $first == 0
247 && $translate )
249 $self->_print("\tTranslate\n");
250 $self->_print(
251 join(
252 ",\n",
253 map {
254 $node2num{ $_->id } = $nodecter;
255 sprintf( "\t\t%d %s", $nodecter++, $_->id )
257 grep { $_->is_Leaf } $tree->get_nodes
259 "\n;\n"
262 my @data = _write_tree_Helper( $tree->get_root_node, \%node2num );
263 if ( $data[-1] !~ /\)$/ ) {
264 $data[0] = "(" . $data[0];
265 $data[-1] .= ")";
268 # by default all trees in bioperl are currently rooted
269 # something we'll try and fix one day....
270 $self->_print(
271 sprintf(
272 "\t tree %s = [&%s] %s;\n",
273 ( $tree->id || sprintf( "Bioperl_%d", $first + 1 ) ),
274 ( $tree->get_root_node ) ? 'R' : 'U',
275 join( ',', @data )
278 $first++;
280 $self->_print("End;\n");
281 $self->flush if $self->_flush_on_write && defined $self->_fh;
282 return;
285 sub _write_tree_Helper {
286 my ( $node, $node2num ) = @_;
287 return () if ( !defined $node );
288 my @data;
290 foreach my $n ( $node->each_Descendent() ) {
291 push @data, _write_tree_Helper( $n, $node2num );
293 if ( @data > 1 ) { # internal node
294 $data[0] = "(" . $data[0];
295 $data[-1] .= ")";
297 # FigTree comments start
298 my $comment_flag;
299 $comment_flag = 0
300 if ( $node->has_tag('color') or $node->has_tag('label') );
302 $data[-1] .= '[&!' if defined $comment_flag;
304 if ( $node->has_tag('color')) {
305 my $color = $node->get_tag_values('color');
306 $data[-1] .= "color=$color";
307 $comment_flag++;
309 if ( $node->has_tag('label')) {
310 my $label = $node->get_tag_values('label');
311 $data[-1] .= ',' if $comment_flag;
312 $data[-1] .= 'label="'. $label. '"';
314 $data[-1] .= ']' if defined $comment_flag;
315 # FigTree comments end
317 # let's explicitly write out the bootstrap if we've got it
318 my $b;
320 my $bl = $node->branch_length;
321 if ( !defined $bl ) {
323 elsif ( $bl =~ /\#/ ) {
324 $data[-1] .= $bl;
326 else {
327 $data[-1] .= ":$bl";
329 if ( defined( $b = $node->bootstrap ) ) {
330 $data[-1] .= sprintf( "[%s]", $b );
332 elsif ( defined( $b = $node->id ) ) {
333 $b = $node2num->{$b} if ( $node2num->{$b} ); # translate node2num
334 $data[-1] .= sprintf( "[%s]", $b ) if defined $b;
338 else { # leaf node
339 if ( defined $node->id || defined $node->branch_length ) {
340 my $id = defined $node->id ? $node->id : '';
341 if ( length($id) && $node2num->{$id} ) {
342 $id = $node2num->{$id};
344 if ( $node->has_tag('color')) {
345 my ($color) = $node->get_tag_values('color');
346 $id .= "[&!color=$color\]";
348 push @data,
349 sprintf( "%s%s",
350 $id,
351 defined $node->branch_length
352 ? ":" . $node->branch_length
353 : '' );
356 return @data;
359 =head2 header
361 Title : header
362 Usage : $obj->header($newval)
363 Function:
364 Example :
365 Returns : value of header (a scalar)
366 Args : on set, new value (a scalar or undef, optional)
369 =cut
371 sub header {
372 my $self = shift;
374 return $self->{'header'} = shift if @_;
375 return $self->{'header'};
378 =head2 translate_node
380 Title : translate_node
381 Usage : $obj->translate_node($newval)
382 Function:
383 Example :
384 Returns : value of translate_node (a scalar)
385 Args : on set, new value (a scalar or undef, optional)
388 =cut
390 sub translate_node {
391 my $self = shift;
393 return $self->{'translate_node'} = shift if @_;
394 return $self->{'translate_node'};