reimplement various methods in terms of get_dbxrefs, for consistency
[bioperl-live.git] / Bio / TreeIO / nhx.pm
blobcfb89931e0f1585db8cfa632079cd35c33331fa4
1 # $Id$
3 # BioPerl module for Bio::TreeIO::nhx
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Aaron Mackey <amackey@virginia.edu>
9 # Copyright Aaron Mackey
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::TreeIO::nhx - TreeIO implementation for parsing
18 Newick/New Hampshire eXtendend (NHX) format.
20 =head1 SYNOPSIS
22 # do not use this module directly
23 use Bio::TreeIO;
24 my $treeio = Bio::TreeIO->new(-format => 'nhx', -file => 'tree.dnd');
25 my $tree = $treeio->next_tree;
27 =head1 DESCRIPTION
29 This module handles parsing and writing of Newick/New Hampshire eXtended (NHX) format.
31 =head1 FEEDBACK
33 =head2 Mailing Lists
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to the
37 Bioperl mailing list. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
42 =head2 Support
44 Please direct usage questions or support issues to the mailing list:
46 I<bioperl-l@bioperl.org>
48 rather than to the module maintainer directly. Many experienced and
49 reponsive experts will be able look at the problem and quickly
50 address it. Please include a thorough description of the problem
51 with code and data examples if at all possible.
53 =head2 Reporting Bugs
55 Report bugs to the Bioperl bug tracking system to help us keep track
56 of the bugs and their resolution. Bug reports can be submitted viax the
57 web:
59 http://bugzilla.open-bio.org/
61 =head1 AUTHOR - Aaron Mackey
63 Email amackey-at-virginia.edu
65 =head1 CONTRIBUTORS
67 Email jason-at-bioperl-dot-org
69 =head1 APPENDIX
71 The rest of the documentation details each of the object methods.
72 Internal methods are usually preceded with a _
74 =cut
77 # Let the code begin...
80 package Bio::TreeIO::nhx;
81 use strict;
83 # Object preamble - inherits from Bio::Root::Root
85 use Bio::Tree::NodeNHX;
86 use Bio::Event::EventGeneratorI;
87 #use XML::Handler::Subs;
90 use base qw(Bio::TreeIO);
92 sub _initialize {
93 my($self, %args) = @_;
94 $args{-nodetype} ||= 'Bio::Tree::NodeNHX';
95 $self->SUPER::_initialize(%args);
98 =head2 next_tree
100 Title : next_tree
101 Usage : my $tree = $treeio->next_tree
102 Function: Gets the next tree in the stream
103 Returns : Bio::Tree::TreeI
104 Args : none
107 =cut
109 sub next_tree{
110 my ($self) = @_;
111 local $/ = ";\n";
112 return unless $_ = $self->_readline;
113 s/\s+//g;
114 $self->debug("entry is $_\n");
115 my $chars = '';
116 $self->_eventHandler->start_document;
117 my ($prev_event,$lastevent,$last_leaf_event) = ('','','');
118 my @ch = split(//, $_);
119 foreach my $ch (@ch) {
120 if( $ch eq ';' ) {
121 $self->_eventHandler->in_element('node') &&
122 $self->_eventHandler->end_element( {'Name' => 'node'});
123 return $self->_eventHandler->end_document;
124 } elsif ($ch eq '[') {
125 if ( length $chars ) {
126 if ( $lastevent eq ':' ) {
127 $self->_eventHandler->start_element( { Name => 'branch_length' } );
128 $self->_eventHandler->characters($chars);
129 $self->_eventHandler->end_element( { Name => 'branch_length' });
130 $lastevent = $prev_event;
131 } else {
132 $self->debug("id with no branchlength is $chars\n");
133 $self->_eventHandler->start_element( { 'Name' => 'node' } );
134 $self->_eventHandler->start_element( { 'Name' => 'id' } );
135 $self->_eventHandler->characters($chars);
136 $self->_eventHandler->end_element( { 'Name' => 'id' } );
138 } else {
139 $self->_eventHandler->start_element( { Name => 'node' } );
141 my $leafstatus = ( $last_leaf_event ne ')' ) ? 1 : 0;
142 $self->_eventHandler->start_element({'Name' => 'leaf'});
143 $self->_eventHandler->characters($leafstatus);
144 $self->_eventHandler->end_element({'Name' => 'leaf'});
145 $chars = '';
147 $self->_eventHandler->start_element( { Name => 'nhx_tag' });
148 } elsif( $ch eq '(' ) {
149 $chars = '';
150 $self->_eventHandler->start_element( {'Name' => 'tree'} );
151 } elsif($ch eq ')' ) {
152 if( length $chars ) {
153 if( $lastevent eq ':') {
154 unless ($self->_eventHandler->within_element('nhx_tag')) {
155 $self->_eventHandler->start_element( { 'Name' => 'branch_length'});
156 $self->_eventHandler->characters($chars);
157 $self->_eventHandler->end_element( {'Name' => 'branch_length'});
158 } else {
159 $self->throw("malformed input; end of node ) before ] found");
161 } else {
162 $self->debug("id with no branchlength is '$chars'\n");
163 $self->_eventHandler->start_element( { 'Name' => 'node' } );
164 $self->_eventHandler->start_element( { 'Name' => 'id' } );
165 $self->_eventHandler->characters($chars);
166 $self->_eventHandler->end_element( { 'Name' => 'id' } );
168 } elsif ( $lastevent ne ']' ) {
169 $self->_eventHandler->start_element( {'Name' => 'node'} );
171 # problem here is that we need to detect if we coming up on
172 # the end of a leaf node or a labeled internal node
173 # each can have [] and each can have :, but only leaves are
174 # NOT proceeded by a ')'
175 # the [] events throw us off
176 my $leafstatus = ( $last_leaf_event ne ')' ) ? 1 : 0;
177 $self->_eventHandler->start_element({'Name' => 'leaf'});
178 $self->_eventHandler->characters($leafstatus);
179 $self->_eventHandler->end_element({'Name' => 'leaf'});
181 $self->_eventHandler->end_element( {'Name' => 'node'} );
182 $self->_eventHandler->end_element( {'Name' => 'tree'} );
183 $chars = '';
184 $last_leaf_event = $ch;
186 } elsif ( $ch eq ',' ) {
187 if( length $chars ) {
188 if( $lastevent eq ':' ) {
189 $self->_eventHandler->start_element( { 'Name' => 'branch_length'});
190 $self->_eventHandler->characters($chars);
191 $self->_eventHandler->end_element( {'Name' => 'branch_length'});
192 $lastevent = $prev_event;
193 } else {
194 $self->debug("id with no branchlength is $chars, last event was $lastevent\n");
195 $self->_eventHandler->start_element( { 'Name' => 'node' } );
196 $self->_eventHandler->start_element( { 'Name' => 'id' } );
197 $self->_eventHandler->characters($chars);
198 $self->_eventHandler->end_element( { 'Name' => 'id' } );
200 } elsif ( $lastevent ne ']' ) {
201 $self->_eventHandler->start_element( { 'Name' => 'node' } );
203 $self->_eventHandler->end_element( {'Name' => 'node'} );
204 $chars = '';
205 $last_leaf_event = $ch;
206 } elsif( $ch eq ':' ) {
207 if ($self->_eventHandler->within_element('nhx_tag')) {
208 if ($lastevent eq '=') {
209 $self->_eventHandler->start_element( { Name => 'tag_value' } );
210 $self->_eventHandler->characters($chars);
211 $self->_eventHandler->end_element( { Name => 'tag_value' } );
212 $chars = '';
213 } else {
214 if ($chars eq '&&NHX') {
215 $chars = ''; # get rid of &&NHX:
216 } else {
217 $self->throw("Unrecognized, non \&\&NHX string: >>$chars<<; lastevent is $lastevent");
220 } elsif ($lastevent ne ']') {
221 $self->debug("id with a branchlength coming is $chars\n");
222 $self->_eventHandler->start_element( { 'Name' => 'node' } );
223 $self->_eventHandler->start_element( { 'Name' => 'id' } );
224 $self->_eventHandler->characters($chars);
225 $self->_eventHandler->end_element( { 'Name' => 'id' } );
226 $chars = '';
228 } elsif ( $ch eq '=' ) {
229 if ($self->_eventHandler->within_element('nhx_tag')) {
230 $self->_eventHandler->start_element( { Name => 'tag_name' } );
231 $self->_eventHandler->characters($chars);
232 $self->_eventHandler->end_element( { Name => 'tag_name' } );
233 $chars = '';
234 } else {
235 $chars .= $ch;
237 } elsif ( $ch eq ']' ) {
238 if ($self->_eventHandler->within_element('nhx_tag') ) {
239 if( $lastevent eq '=' ) {
240 $self->_eventHandler->start_element( { Name => 'tag_value' } );
241 $self->_eventHandler->characters($chars);
242 $self->_eventHandler->end_element( { Name => 'tag_value' } );
243 $chars = '';
244 $self->_eventHandler->end_element( { Name => 'nhx_tag' } );
245 } else {
246 if ($chars ne '&&NHX') {
247 $self->throw("Unrecognized, non \&\&NHX string: >>$chars<<; lastevent is $lastevent");
249 $chars = '';
250 $self->_eventHandler->end_element( { Name => 'nhx_tag' } );
252 } else {
253 $chars .= $ch;
254 next;
256 } else {
257 $chars .= $ch;
258 next;
260 $prev_event = $lastevent;
261 $lastevent = $ch;
263 return;
266 =head2 write_tree
268 Title : write_tree
269 Usage : $treeio->write_tree($tree);
270 Function: Write a tree out to data stream in nhx format
271 Returns : none
272 Args : Bio::Tree::TreeI object
274 =cut
276 sub write_tree{
277 my ($self,@trees) = @_;
278 my $nl = $self->newline_each_node;
279 foreach my $tree ( @trees ) {
280 my @data = _write_tree_Helper($tree->get_root_node,$nl);
281 # per bug # 1471 do not include enclosing brackets.
282 # this is sort of cheating but it should work
283 # remove first and last paren if the set ends in a paren
284 if($data[-1] =~ s/\)$// ) {
285 $data[0] =~ s/^\(//;
287 if( $nl ) {
288 chomp($data[-1]);# remove last newline
289 $self->_print(join(",\n", @data), ";\n");
290 } else {
291 $self->_print(join(',', @data), ";\n");
294 $self->flush if $self->_flush_on_write && defined $self->_fh;
295 return;
298 sub _write_tree_Helper {
299 my ($node,$nl) = @_;
300 return () unless defined $node;
301 # rebless
302 $node = bless $node,'Bio::Tree::NodeNHX';
303 my @data;
305 foreach my $n ( $node->each_Descendent() ) {
306 push @data, _write_tree_Helper($n,$nl);
309 if( @data > 1 ) {
310 if( $nl ) {
311 $data[0] = "(\n" . $data[0];
312 $data[-1] .= ")\n";
313 } else {
314 $data[0] = "(" . $data[0];
315 $data[-1] .= ")";
318 my $id = $node->id;
319 $data[-1] .= $id if( defined $id );
320 my $blen = $node->branch_length;
321 $data[-1] .= ":". $blen if $blen;
322 # this is to not print out an empty NHX for the root node which is
323 # a convience for how we get a handle to the whole tree
324 my @tags = $node->get_all_tags;
325 if( $node->ancestor || @tags ) {
326 $data[-1] .= '[' .
327 join(":", "&&NHX",
328 map { "$_=" .join(',',$node->get_tag_values($_)) }
329 @tags ) . ']';
331 } else {
332 if( $nl ) {
333 $data[0] = "(\n" . $data[0];
334 $data[-1] .= ")\n";
335 } else {
336 $data[0] = "(" . $data[0];
337 $data[-1] .= ")";
340 } else {
341 push @data, $node->to_string; # a leaf
343 return @data;