tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / TreeIO / nexus.pm
blob44044315322a0825fb05dc01f88d2e60d1a64ae9
1 # $Id$
3 # BioPerl module for Bio::TreeIO::nexus
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich <jason-at-open-bio-dot-org>
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::TreeIO::nexus - A TreeIO driver module for parsing Nexus tree output from PAUP
19 =head1 SYNOPSIS
21 use Bio::TreeIO;
22 my $in = Bio::TreeIO->new(-file => 't/data/cat_tre.tre');
23 while( my $tree = $in->next_tree ) {
26 =head1 DESCRIPTION
28 This is a driver module for parsing PAUP Nexus tree format which
29 basically is just a remapping of trees.
31 =head2 Comments
33 The nexus format allows node comments that are placed inside square
34 brackets. Usually the comments (implemented as tags for nodes) are
35 used to give a name for an internal node or record the bootstap value,
36 but other uses are possible.
38 The FigTree program by Andrew Rambaut adds various rendering
39 parameters inside comments and flags these comments by starting them
40 with '&!'. The parameters implemented here are 'label' and 'color'.
42 =head1 FEEDBACK
44 =head2 Mailing Lists
46 User feedback is an integral part of the evolution of this and other
47 Bioperl modules. Send your comments and suggestions preferably to
48 the Bioperl mailing list. Your participation is much appreciated.
50 bioperl-l@bioperl.org - General discussion
51 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
53 =head2 Support
55 Please direct usage questions or support issues to the mailing list:
57 I<bioperl-l@bioperl.org>
59 rather than to the module maintainer directly. Many experienced and
60 reponsive experts will be able look at the problem and quickly
61 address it. Please include a thorough description of the problem
62 with code and data examples if at all possible.
64 =head2 Reporting Bugs
66 Report bugs to the Bioperl bug tracking system to help us keep track
67 of the bugs and their resolution. Bug reports can be submitted via
68 the web:
70 http://bugzilla.open-bio.org/
72 =head1 AUTHOR - Jason Stajich
74 Email jason-at-open-bio-dot-org
76 =head1 APPENDIX
78 The rest of the documentation details each of the object methods.
79 Internal methods are usually preceded with a _
81 =cut
83 # Let the code begin...
85 package Bio::TreeIO::nexus;
86 use strict;
88 use Bio::Event::EventGeneratorI;
89 use IO::String;
91 use base qw(Bio::TreeIO);
93 =head2 new
95 Title : new
96 Args : -header => boolean default is true
97 print/do not print #NEXUS header
98 -translate => boolean default is true
99 print/do not print Node Id translation to a number
101 =cut
103 sub _initialize {
104 my $self = shift;
105 $self->SUPER::_initialize(@_);
106 my ( $hdr, $trans ) = $self->_rearrange(
108 qw(HEADER
109 TRANSLATE)
113 $self->header( defined $hdr ? $hdr : 1 );
114 $self->translate_node( defined $trans ? $trans : 1 );
117 =head2 next_tree
119 Title : next_tree
120 Usage : my $tree = $treeio->next_tree
121 Function: Gets the next tree in the stream
122 Returns : Bio::Tree::TreeI
123 Args : none
126 =cut
128 sub next_tree {
129 my ($self) = @_;
130 unless ( $self->{'_parsed'} ) {
131 $self->_parse;
133 return $self->{'_trees'}->[ $self->{'_treeiter'}++ ];
136 sub rewind {
137 shift->{'_treeiter'} = 0;
140 sub _parse {
141 my ($self) = @_;
143 $self->{'_parsed'} = 1;
144 $self->{'_treeiter'} = 0;
146 while ( defined( $_ = $self->_readline ) ) {
147 next if /^\s+$/;
148 last;
150 return unless ( defined $_ );
152 unless (/^\#NEXUS/i) {
153 $self->warn("File does not start with #NEXUS"); #'
154 return;
157 my $line;
158 while ( defined( $_ = $self->_readline ) ) {
159 $line .= $_;
161 my @sections = split( /#NEXUS/i, $line );
162 for my $s (@sections) {
163 my %translate;
164 if ( $self->verbose > 0 ) {
165 while ( $s =~ s/(\[[^\]]+\])// ) {
166 $self->debug("removing comment $1\n");
169 else {
170 $s =~ s/(\[[^\]]+\])//g;
173 if ( $s =~ /begin trees;(.+)(end;)?/si ) {
174 my $trees = $1;
175 if ( $trees =~ s/\s+translate\s+([^;]+);//i ) {
176 my @trans;
177 my $tr = $1;
179 while ($tr =~ m{\s*([^,\s]+?\s+(?:'[^']+'|[^'\s]+)),?}gc) {
180 push @trans, $1;
182 for my $n ( @trans ) {
183 if ($n =~ /^\s*(\S+)\s+(.+)$/) {
184 my ($id,$tag) = ($1,$2);
185 $tag =~ s/[\s,]+$//; # remove the extra spaces of the last taxon
186 $translate{$id} = $tag;
190 else {
191 $self->debug("no translate in: $trees\n");
193 while ($trees =~ /\s+tree\s+\*?\s*(\S+)\s*\=
194 \s*(?:\[\S+\])?\s*([^\;]+;)/igx)
196 my ( $tree_name, $tree_str ) = ( $1, $2 );
198 # MrBayes does not print colons for node label
199 # $tree_str =~ s/\)(\d*\.\d+)\)/:$1/g;
200 my $buf = IO::String->new($tree_str);
201 my $treeio = Bio::TreeIO->new(
202 -format => 'newick',
203 -fh => $buf
205 my $tree = $treeio->next_tree;
206 foreach my $node ( grep { $_->is_Leaf } $tree->get_nodes ) {
207 my $id = $node->id;
208 my $lookup = $translate{$id};
209 $node->id( $lookup || $id );
211 $tree->id($tree_name) if defined $tree_name;
212 push @{ $self->{'_trees'} }, $tree;
215 else {
216 $self->debug("begin_trees failed: $s\n");
219 if ( !@sections ) {
220 $self->debug("warn no sections: $line\n");
224 =head2 write_tree
226 Title : write_tree
227 Usage : $treeio->write_tree($tree);
228 Function: Writes a tree onto the stream
229 Returns : none
230 Args : Bio::Tree::TreeI
233 =cut
235 sub write_tree {
236 my ( $self, @trees ) = @_;
237 if ( $self->header ) {
238 $self->_print("#NEXUS\n\n");
240 my $translate = $self->translate_node;
241 my $time = localtime();
242 $self->_print( sprintf( "Begin trees; [Treefile created %s]\n", $time ) );
244 my ( $first, $nodecter, %node2num ) = ( 0, 1 );
245 foreach my $tree (@trees) {
247 if ( $first == 0
248 && $translate )
250 $self->_print("\tTranslate\n");
251 $self->_print(
252 join(
253 ",\n",
254 map {
255 $node2num{ $_->id } = $nodecter;
256 sprintf( "\t\t%d %s", $nodecter++, $_->id )
258 grep { $_->is_Leaf } $tree->get_nodes
260 "\n;\n"
263 my @data = _write_tree_Helper( $tree->get_root_node, \%node2num );
264 if ( $data[-1] !~ /\)$/ ) {
265 $data[0] = "(" . $data[0];
266 $data[-1] .= ")";
269 # by default all trees in bioperl are currently rooted
270 # something we'll try and fix one day....
271 $self->_print(
272 sprintf(
273 "\t tree %s = [&%s] %s;\n",
274 ( $tree->id || sprintf( "Bioperl_%d", $first + 1 ) ),
275 ( $tree->get_root_node ) ? 'R' : 'U',
276 join( ',', @data )
279 $first++;
281 $self->_print("End;\n");
282 $self->flush if $self->_flush_on_write && defined $self->_fh;
283 return;
286 sub _write_tree_Helper {
287 my ( $node, $node2num ) = @_;
288 return () if ( !defined $node );
289 my @data;
291 foreach my $n ( $node->each_Descendent() ) {
292 push @data, _write_tree_Helper( $n, $node2num );
294 if ( @data > 1 ) { # internal node
295 $data[0] = "(" . $data[0];
296 $data[-1] .= ")";
298 # let's explicitly write out the bootstrap if we've got it
299 my $b;
301 my $bl = $node->branch_length;
302 if ( !defined $bl ) {
304 elsif ( $bl =~ /\#/ ) {
305 $data[-1] .= $bl;
307 else {
308 $data[-1] .= ":$bl";
310 if ( defined( $b = $node->bootstrap ) ) {
311 $data[-1] .= sprintf( "[%s]", $b );
313 elsif ( defined( $b = $node->id ) ) {
314 $b = $node2num->{$b} if ( $node2num->{$b} ); # translate node2num
315 $data[-1] .= sprintf( "[%s]", $b ) if defined $b;
318 # FigTree comments start
319 my $comment_flag;
320 $comment_flag = 0
321 if ( $node->has_tag('color') or $node->has_tag('label') );
323 $data[-1] .= '[&!' if defined $comment_flag;
325 if ( $node->has_tag('color')) {
326 my $color = $node->get_tag_values('color');
327 $data[-1] .= "color=$color";
328 $comment_flag++;
330 if ( $node->has_tag('label')) {
331 my $label = $node->get_tag_values('label');
332 $data[-1] .= ',' if $comment_flag;
333 $data[-1] .= 'label="'. $label. '"';
335 $data[-1] .= ']' if defined $comment_flag;
336 # FigTree comments end
340 else { # leaf node
341 if ( defined $node->id || defined $node->branch_length ) {
342 my $id = defined $node->id ? $node->id : '';
343 if ( length($id) && $node2num->{$id} ) {
344 $id = $node2num->{$id};
346 if ( $node->has_tag('color')) {
347 my ($color) = $node->get_tag_values('color');
348 $id .= "[&!color=$color\]";
350 push @data,
351 sprintf( "%s%s",
352 $id,
353 defined $node->branch_length
354 ? ":" . $node->branch_length
355 : '' );
358 return @data;
361 =head2 header
363 Title : header
364 Usage : $obj->header($newval)
365 Function:
366 Example :
367 Returns : value of header (a scalar)
368 Args : on set, new value (a scalar or undef, optional)
371 =cut
373 sub header {
374 my $self = shift;
376 return $self->{'header'} = shift if @_;
377 return $self->{'header'};
380 =head2 translate_node
382 Title : translate_node
383 Usage : $obj->translate_node($newval)
384 Function:
385 Example :
386 Returns : value of translate_node (a scalar)
387 Args : on set, new value (a scalar or undef, optional)
390 =cut
392 sub translate_node {
393 my $self = shift;
395 return $self->{'translate_node'} = shift if @_;
396 return $self->{'translate_node'};