bug 2356
[bioperl-live.git] / Bio / TreeIO / nexus.pm
blob280d29cf0625a34ce1238332cf3ed4de86b579e1
1 # $Id$
3 # BioPerl module for Bio::TreeIO::nexus
5 # Cared for by Jason Stajich <jason-at-open-bio-dot-org>
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::TreeIO::nexus - A TreeIO driver module for parsing Nexus tree output from PAUP
17 =head1 SYNOPSIS
19 use Bio::TreeIO;
20 my $in = Bio::TreeIO->new(-file => 't/data/cat_tre.tre');
21 while( my $tree = $in->next_tree ) {
24 =head1 DESCRIPTION
26 This is a driver module for parsing PAUP Nexus tree format which
27 basically is just a remapping of trees.
29 =head1 FEEDBACK
31 =head2 Mailing Lists
33 User feedback is an integral part of the evolution of this and other
34 Bioperl modules. Send your comments and suggestions preferably to
35 the Bioperl mailing list. Your participation is much appreciated.
37 bioperl-l@bioperl.org - General discussion
38 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
40 =head2 Reporting Bugs
42 Report bugs to the Bioperl bug tracking system to help us keep track
43 of the bugs and their resolution. Bug reports can be submitted via
44 the web:
46 http://bugzilla.open-bio.org/
48 =head1 AUTHOR - Jason Stajich
50 Email jason-at-open-bio-dot-org
52 =head1 APPENDIX
54 The rest of the documentation details each of the object methods.
55 Internal methods are usually preceded with a _
57 =cut
59 # Let the code begin...
61 package Bio::TreeIO::nexus;
62 use strict;
64 use Bio::Event::EventGeneratorI;
65 use IO::String;
67 use base qw(Bio::TreeIO);
69 =head2 new
71 Title : new
72 Args : -header => boolean default is true
73 print/do not print #NEXUS header
74 -translate => boolean default is true
75 print/do not print Node Id translation to a number
77 =cut
79 sub _initialize {
80 my $self = shift;
81 $self->SUPER::_initialize(@_);
82 my ( $hdr, $trans ) = $self->_rearrange(
84 qw(HEADER
85 TRANSLATE)
89 $self->header( defined $hdr ? $hdr : 1 );
90 $self->translate_node( defined $trans ? $trans : 1 );
93 =head2 next_tree
95 Title : next_tree
96 Usage : my $tree = $treeio->next_tree
97 Function: Gets the next tree in the stream
98 Returns : Bio::Tree::TreeI
99 Args : none
102 =cut
104 sub next_tree {
105 my ($self) = @_;
106 unless ( $self->{'_parsed'} ) {
107 $self->_parse;
109 return $self->{'_trees'}->[ $self->{'_treeiter'}++ ];
112 sub rewind {
113 shift->{'_treeiter'} = 0;
116 sub _parse {
117 my ($self) = @_;
119 $self->{'_parsed'} = 1;
120 $self->{'_treeiter'} = 0;
122 while ( defined( $_ = $self->_readline ) ) {
123 next if /^\s+$/;
124 last;
126 return unless ( defined $_ );
128 unless (/^\#NEXUS/i) {
129 $self->warn("File does not start with #NEXUS"); #'
130 return;
133 my $line;
134 while ( defined( $_ = $self->_readline ) ) {
135 $line .= $_;
137 my @sections = split( /#NEXUS/i, $line );
138 for my $s (@sections) {
139 my %translate;
140 if ( $self->verbose > 0 ) {
141 while ( $s =~ s/(\[[^\]]+\])// ) {
142 $self->debug("removing comment $1\n");
145 else {
146 $s =~ s/(\[[^\]]+\])//g;
149 if ( $s =~ /begin trees;(.+)(end;)?/si ) {
150 my $trees = $1;
151 if ( $trees =~ s/\s+translate\s+([^;]+);//i ) {
152 my @trans;
153 my $tr = $1;
155 while ($tr =~ m{\s*([^,\s]+?\s+(?:'[^']+'|[^'\s]+)),?}gc) {
156 push @trans, $1;
158 for my $n ( @trans ) {
159 if ($n =~ /^\s*(\S+)\s+(.+)$/) {
160 my ($id,$tag) = ($1,$2);
161 $tag =~ s/[\s,]+$//; # remove the extra spaces of the last taxon
162 $translate{$id} = $tag;
166 else {
167 $self->debug("no translate in: $trees\n");
169 while ($trees =~ /\s+tree\s+\*?\s*(\S+)\s*\=
170 \s*(?:\[\S+\])?\s*([^\;]+;)/igx)
172 my ( $tree_name, $tree_str ) = ( $1, $2 );
174 # MrBayes does not print colons for node label
175 # $tree_str =~ s/\)(\d*\.\d+)\)/:$1/g;
176 my $buf = IO::String->new($tree_str);
177 my $treeio = Bio::TreeIO->new(
178 -format => 'newick',
179 -fh => $buf
181 my $tree = $treeio->next_tree;
182 foreach my $node ( grep { $_->is_Leaf } $tree->get_nodes ) {
183 my $id = $node->id;
184 my $lookup = $translate{$id};
185 $node->id( $lookup || $id );
187 $tree->id($tree_name) if defined $tree_name;
188 push @{ $self->{'_trees'} }, $tree;
191 else {
192 $self->debug("begin_trees failed: $s\n");
195 if ( !@sections ) {
196 $self->debug("warn no sections: $line\n");
200 =head2 write_tree
202 Title : write_tree
203 Usage : $treeio->write_tree($tree);
204 Function: Writes a tree onto the stream
205 Returns : none
206 Args : Bio::Tree::TreeI
209 =cut
211 sub write_tree {
212 my ( $self, @trees ) = @_;
213 if ( $self->header ) {
214 $self->_print("#NEXUS\n\n");
216 my $translate = $self->translate_node;
217 my $time = localtime();
218 $self->_print( sprintf( "Begin trees; [Treefile created %s]\n", $time ) );
220 my ( $first, $nodecter, %node2num ) = ( 0, 1 );
221 foreach my $tree (@trees) {
223 if ( $first == 0
224 && $translate )
226 $self->_print("\tTranslate\n");
227 $self->_print(
228 join(
229 ",\n",
230 map {
231 $node2num{ $_->id } = $nodecter;
232 sprintf( "\t\t%d %s", $nodecter++, $_->id )
234 grep { $_->is_Leaf } $tree->get_nodes
236 "\n;\n"
239 my @data = _write_tree_Helper( $tree->get_root_node, \%node2num );
240 if ( $data[-1] !~ /\)$/ ) {
241 $data[0] = "(" . $data[0];
242 $data[-1] .= ")";
245 # by default all trees in bioperl are currently rooted
246 # something we'll try and fix one day....
247 $self->_print(
248 sprintf(
249 "\t tree %s = [&%s] %s;\n",
250 ( $tree->id || sprintf( "Bioperl_%d", $first + 1 ) ),
251 ( $tree->get_root_node ) ? 'R' : 'U',
252 join( ',', @data )
255 $first++;
257 $self->_print("End;\n");
258 $self->flush if $self->_flush_on_write && defined $self->_fh;
259 return;
262 sub _write_tree_Helper {
263 my ( $node, $node2num ) = @_;
264 return () if ( !defined $node );
265 my @data;
267 foreach my $n ( $node->each_Descendent() ) {
268 push @data, _write_tree_Helper( $n, $node2num );
270 if ( @data > 1 ) {
271 $data[0] = "(" . $data[0];
272 $data[-1] .= ")";
274 # let's explicitly write out the bootstrap if we've got it
275 my $b;
277 my $bl = $node->branch_length;
278 if ( !defined $bl ) {
280 elsif ( $bl =~ /\#/ ) {
281 $data[-1] .= $bl;
283 else {
284 $data[-1] .= ":$bl";
286 if ( defined( $b = $node->bootstrap ) ) {
287 $data[-1] .= sprintf( "[%s]", $b );
289 elsif ( defined( $b = $node->id ) ) {
290 $b = $node2num->{$b} if ( $node2num->{$b} ); # translate node2num
291 $data[-1] .= sprintf( "[%s]", $b );
295 else {
296 if ( defined $node->id || defined $node->branch_length ) {
297 my $id = defined $node->id ? $node->id : '';
298 if ( length($id) && $node2num->{$id} ) {
299 $id = $node2num->{$id};
301 push @data,
302 sprintf( "%s%s",
303 $id,
304 defined $node->branch_length
305 ? ":" . $node->branch_length
306 : '' );
309 return @data;
312 =head2 header
314 Title : header
315 Usage : $obj->header($newval)
316 Function:
317 Example :
318 Returns : value of header (a scalar)
319 Args : on set, new value (a scalar or undef, optional)
322 =cut
324 sub header {
325 my $self = shift;
327 return $self->{'header'} = shift if @_;
328 return $self->{'header'};
331 =head2 translate_node
333 Title : translate_node
334 Usage : $obj->translate_node($newval)
335 Function:
336 Example :
337 Returns : value of translate_node (a scalar)
338 Args : on set, new value (a scalar or undef, optional)
341 =cut
343 sub translate_node {
344 my $self = shift;
346 return $self->{'translate_node'} = shift if @_;
347 return $self->{'translate_node'};