maint: fix multiple typos identified by lintian
[bioperl-live.git] / Bio / NexmlIO.pm
blob269a3b8aeaac0e4bb79f67034dd79b91de3fedd2
1 # $Id: Nexml.pm 15889 2009-07-29 13:35:29Z chmille4 $
2 # BioPerl module for Bio::NexmlIO
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Chase Miller <chmille4@gmail.com>
8 # Copyright Chase Miller
10 # You may distribute this module under the same terms as perl itself
12 # _history
13 # June 16, 2009 Largely rewritten by Chase Miller
15 # POD documentation - main docs before the code
17 =head1 NAME
19 Bio::NexmlIO - stream handler for NeXML documents
21 =head1 SYNOPSIS
23 #Instantiate a Bio::Nexml object and link it to a file
24 my $in_nexml = Bio::Nexml->new(-file => 'nexml_doc.xml', -format => 'Nexml');
26 #Read in some data
27 my $bptree1 = $in_nexml->next_tree();
28 my $bpaln1 = $in_nexml->next_aln();
29 my $bpseq1 = $in_nexml->next_seq();
31 #Use/manipulate data
32 ...
34 #Write data to nexml file
35 my $out_nexml = Bio::Nexml->new(-file => '>new_nexml_doc.xml', -format => 'Nexml');
36 $out_nexml->to_xml();
40 =head1 DESCRIPTION
42 Bio::NexmlIO is an I/O handler for a NeXML document. A NeXML document can
43 represent three different data types: simple sequences, alignments,
44 and trees. NexmlIO has four main methods next_tree, next_seq,
45 next_aln, and write. NexmlIO returns bioperl seq, tree, and aln
46 objects which can be manipulated then passed to the write method of a
47 new NexmlIO instance to allow the creation of a NeXML document.
49 Each bioperl object contains all the information necessary to recreate
50 a Bio::Phylo::Taxa object, so each time a bioperl object is converted
51 to a biophylo object, the bioperl object is checked to see if its
52 associated taxa has already been created (against a hash using the
53 NexmlIO_ID and Taxa_ID to create a unique string). If not, it is
54 created; if so, that taxa object is used to link the Bio::Phylo tree
55 or matrix.
57 For more information on the NeXML format, see L<http://www.nexml.org>.
59 =head1 FEEDBACK
61 =head2 Mailing Lists
63 User feedback is an integral part of the evolution of this and other
64 Bioperl modules. Send your comments and suggestions preferably to one
65 of the Bioperl mailing lists.
67 Your participation is much appreciated.
69 bioperl-l@bioperl.org - General discussion
70 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
72 =head2 Support
74 Please direct usage questions or support issues to the mailing list:
76 I<bioperl-l@bioperl.org>
78 rather than to the module maintainer directly. Many experienced and
79 reponsive experts will be able look at the problem and quickly
80 address it. Please include a thorough description of the problem
81 with code and data examples if at all possible.
83 =head2 Reporting Bugs
85 Report bugs to the Bioperl bug tracking system to help us keep track
86 the bugs and their resolution. Bug reports can be submitted via the
87 web:
89 https://github.com/bioperl/bioperl-live/issues
91 =head1 AUTHOR - Chase Miller
93 Email chmille4@gmail.com
95 =head1 CONTRIBUTORS
97 Mark A. Jensen, maj -at- fortinbras -dot- com
99 =head1 APPENDIX
101 The rest of the documentation details each of the object
102 methods. Internal methods are usually preceded with a _
104 =cut
106 # Let the code begin...
109 package Bio::NexmlIO;
110 use strict;
111 #TODO Change this
112 use lib '..';
114 use Bio::SeqIO::nexml;
115 use Bio::AlignIO::nexml;
116 use Bio::TreeIO::nexml;
117 use Bio::Nexml::Factory;
119 use base qw(Bio::Root::IO);
121 my $nexml_fac = Bio::Nexml::Factory->new();
123 =head1 CONSTRUCTOR
125 =head2 new
127 Title : new
128 Usage : my $in_nexmlIO = Bio::NexmlIO->new(-file => 'data.nexml.xml');
129 Function: Creates a L<Bio::NexmlIO> object linked to a stream
130 Returns : a L<Bio::NexmlIO> object
131 Args : file name
133 See L<Bio::Root::IO>
135 =cut
137 sub new {
138 my($class,@args) = @_;
139 my $self = $class->SUPER::new(@args);
141 my %params = @args;
142 my $file_string = $params{'-file'};
144 #create unique ID by creating a scalar and using the memory address
145 my $ID = bless \(my $dummy), "UniqueID";
146 ($self->{'_ID'}) = sprintf("%s",\$ID) =~ /(0x[0-9a-fA-F]+)/;
148 unless ($file_string =~ m/^\>/) {
149 # Only pass filename if filehandle is not available,
150 # or "Bio::Phylo" will create a new filehandle that ends
151 # out of scope and can't be closed directly, leaving 2 open
152 # filehandles for the same file (so file can't be deleted)
153 my $file_arg;
154 my $file_value;
155 if ( exists $self->{'_filehandle'}
156 and defined $self->{'_filehandle'}
158 $file_arg = '-handle';
159 $file_value = $self->{'_filehandle'};
161 else {
162 $file_arg = '-file';
163 $file_value = $self->{'_file'};
166 $self->{'_doc'} = Bio::Phylo::IO->parse($file_arg => $file_value,,
167 '-format' => 'nexml',
168 '-as_project' => '1');
171 return $self;
174 =head2 doc
176 Title : doc
177 Usage : my $nexml_doc = $in_nexmlIO->doc();
178 Function: returns a L<Bio::Phylo::Project> object that contains all the Bio::Phylo data objects parsed from the stream
179 Returns : a L<Bio::Phylo::Project> object
180 Args : none
182 =cut
184 sub doc {
185 my $self = shift;
186 return $self->{'_doc'};
189 # Takes the Bio::Phylo::Project object and creats BioPerl trees, alns, and seqs from it
190 sub _parse {
191 my ($self) = @_;
193 $self->{'_treeiter'} = 0;
194 $self->{'_seqiter'} = 0;
195 $self->{'_alniter'} = 0;
197 $self->{_trees} = $nexml_fac->create_bperl_tree($self);
198 $self->{_alns} = $nexml_fac->create_bperl_aln($self);
199 $self->{_seqs} = $nexml_fac->create_bperl_seq($self);
200 my $taxa_array = $self->doc->get_taxa();
202 $self->{'_parsed'} = 1; #success
205 =head1 ITERATORS
207 =head2 next_tree
209 Title : next_tree
210 Usage : $tree = $stream->next_tree
211 Function: Reads the next tree object from the stream and returns it.
212 Returns : a L<Bio::Tree::Tree> object
213 Args : none
215 See L<Bio::Root::IO>, L<Bio::Tree::Tree>
217 =cut
219 sub next_tree {
220 my $self = shift;
221 $self->_parse unless $self->{'_parsed'};
223 return $self->{'_trees'}->[ $self->{'_treeiter'}++ ];
226 =head2 next_seq
228 Title : next_seq
229 Usage : $seq = $stream->next_seq
230 Function: Reads the next seq object from the stream and returns it.
231 Returns : a L<Bio::Seq> object
232 Args : none
234 See L<Bio::Root::IO>, L<Bio::Seq>
236 =cut
238 sub next_seq {
239 my $self = shift;
240 unless ( $self->{'_parsed'} ) {
241 $self->_parse;
243 return $self->{'_seqs'}->[ $self->{'_seqiter'}++ ];
246 =head2 next_aln
248 Title : next_aln
249 Usage : $aln = $stream->next_aln
250 Function: Reads the next aln object from the stream and returns it.
251 Returns : a L<Bio::SimpleAlign> object
252 Args : none
254 See L<Bio::Root::IO>, L<Bio::SimpleAlign>
256 =cut
258 sub next_aln {
259 my $self = shift;
260 unless ( $self->{'_parsed'} ) {
261 $self->_parse;
263 return $self->{'_alns'}->[ $self->{'_alniter'}++ ];
266 sub _rewind {
267 my $self = shift;
268 my $elt = shift;
269 $self->{"_${elt}iter"} = 0 if defined $self->{"_${elt}iter"};
270 return 1;
273 =head2 rewind_seq
275 Title : rewind_seq
276 Usage : $stream->rewind_seq
277 Function: Resets the stream for seqs
278 Returns : none
279 Args : none
281 See L<Bio::Root::IO>, L<Bio::Seq>
283 =cut
285 sub rewind_seq { shift->_rewind('seq'); }
287 =head2 rewind_aln
289 Title : rewind_aln
290 Usage : $stream->rewind_aln
291 Function: Resets the stream for alns
292 Returns : none
293 Args : none
295 See L<Bio::Root::IO>, L<Bio::Simple::Align>
297 =cut
299 sub rewind_aln { shift->_rewind('aln'); }
301 =head2 rewind_tree
303 Title : rewind_tree
304 Usage : $stream->rewind_tree
305 Function: Resets the stream for trees
306 Returns : none
307 Args : none
309 See L<Bio::Root::IO>, L<Bio::tree::tree>
311 =cut
313 sub rewind_tree { shift->_rewind('tree'); }
315 =head2 write
317 Title : write
318 Usage : $stream->write(-alns => $alns,-seqs => $seqs,-trees => $trees)
319 Function: converts BioPerl seq, tree, and aln objects into Bio::Phylo
320 seq, tree, and aln objects, constructs a Bio::Phylo::Project
321 object made up of the newly created Bio::Phylo objects, and
322 writes the Bio::Phylo:Project object to the stream as a valid
323 nexml document
324 Returns : none
325 Args : \@L<Bio::Seq>, \@L<Bio::SimpleAlign>, \@L<Bio::Tree::Tree>
327 See L<Bio::Root::IO>, L<Bio::tree::tree>, L<Bio::Seq>, L<Bio::SimpleAlign>
329 =cut
331 sub write {
332 my ($self, @args) = @_;
334 my %params = @args;
336 my ($trees, $alns, $seqs) = @params{qw( -trees -alns -seqs )};
337 my %taxa_hash = ();
338 my %seq_matrices = ();
340 my $proj_doc = Bio::Phylo::Factory->create_project();
342 #convert trees to bio::Phylo objects
343 my $forest = Bio::Phylo::Factory->create_forest();
344 my @forests;
345 my @taxa_array;
346 my $ent;
347 my $taxa_o;
348 my $phylo_tree_o;
350 foreach my $tree (@$trees) {
351 my $nexml_id = $tree->get_tag_values('_NexmlIO_ID');
352 $taxa_o = undef;
353 if ( defined $taxa_hash{$nexml_id} ) {
354 $taxa_o = $taxa_hash{$nexml_id};
356 else {
357 ($taxa_o) = $nexml_fac->create_bphylo_taxa($tree);
358 $forest->set_taxa($taxa_o) if defined $taxa_o;
359 $taxa_hash{$nexml_id} = $taxa_o;
362 ($phylo_tree_o) = $nexml_fac->create_bphylo_tree($tree, $taxa_o);
364 $forest->insert($phylo_tree_o);
367 #convert matrices to Bio::Phylo objects
368 my $matrices = Bio::Phylo::Matrices->new();
369 my $phylo_matrix_o;
371 foreach my $aln (@$alns)
373 $taxa_o = undef;
374 if (defined $taxa_hash{ $aln->{_Nexml_ID} }) {
375 $taxa_o = $taxa_hash{$aln->{_Nexml_ID}};
377 else {
378 ($taxa_o) = $nexml_fac->create_bphylo_taxa($aln);
379 $taxa_hash{$aln->{_Nexml_ID}} = $taxa_o;
382 ($phylo_matrix_o) = $nexml_fac->create_bphylo_aln($aln, $taxa_o);
384 $phylo_matrix_o->set_taxa($taxa_o) if defined $taxa_o;
385 $matrices->insert($phylo_matrix_o);
388 my $seq_matrix_o;
389 my $datum;
390 #convert sequences to Bio::Phylo objects
391 foreach my $seq (@$seqs)
393 $taxa_o = undef;
394 #check if this Bio::Phylo::Taxa obj has already been created
395 if (defined $taxa_hash{ $seq->{_Nexml_ID} }) {
396 $taxa_o = $taxa_hash{$seq->{_Nexml_ID}};
398 else {
399 ($taxa_o) = $nexml_fac->create_bphylo_taxa($seq);
400 $taxa_hash{$seq->{_Nexml_ID}} = $taxa_o;
402 $datum = $nexml_fac->create_bphylo_seq($seq, $taxa_o);
403 #check if this Bio::Phylo::Matrices::Matrix obj has already been created
404 if (defined $seq_matrices{ $seq->{_Nexml_matrix_ID} }) {
405 $seq_matrix_o = $seq_matrices{$seq->{_Nexml_matrix_ID}};
406 my $taxon_name = $datum->get_taxon()->get_name();
407 $datum->unset_taxon();
408 $seq_matrix_o->insert($datum);
409 $datum->set_taxon($seq_matrix_o->get_taxa()->get_by_name($taxon_name));
411 else {
412 $seq_matrix_o = Bio::Phylo::Factory->create_matrix('-type' => $datum->moltype);
413 $seq_matrices{$seq->{_Nexml_matrix_ID}} = $seq_matrix_o;
414 $seq_matrix_o->set_taxa($taxa_o) if defined $taxa_o;
415 $seq_matrix_o->insert($datum);
417 #get matrix label
418 my $feat = ($seq->get_SeqFeatures())[0];
419 my $matrix_label = ($feat->get_tag_values('matrix_label'))[0] if $feat->has_tag('id');
420 $seq_matrix_o->set_name($matrix_label);
422 $matrices->insert($seq_matrix_o);
426 #Add matrices and forest objects to project object which represents a complete nexml document
427 if($forest->first) {
428 $proj_doc->insert($forest);
430 while(my $curr_matrix = $matrices->next) {
431 $proj_doc->insert($curr_matrix);
434 #write nexml document to stream
435 my $ret = $self->_print($proj_doc->to_xml(-compact=>1));
436 $self->flush;
437 return($ret);
440 =head2 extract_seqs
442 Title : extract_seqs
443 Usage : $nexmlIO->extract_seqs(-file => ">$outfile", -format => $format)
444 Function: converts BioPerl seqs stored in the NexmlIO object into the provided
445 format and writes it to the provided file. Uses L<Bio::SeqIO> to do
446 the conversion and writing.
447 Returns : none
448 Args : file to write to, format to be converted to
450 See L<Bio::Seq>, L<Bio::SeqIO>
452 =cut
454 sub extract_seqs {
455 my $self = shift;
456 unless ( $self->{'_parsed'} ) {
457 $self->_parse;
460 my %params = @_;
461 my $remove_spaces = 0;
462 my $ret = 0;
463 my ($format, $file) = @params{qw( -format -file)};
465 for ($format) {
466 /^fasta$/i && do {
467 # this is ok, flag so that the nexmlid gets converted;
468 $remove_spaces = 1;
469 last;
471 # default
472 do {
473 $self->throw("Format '$format' not yet supported for extraction");
477 my $seqIO = Bio::SeqIO->new(-format => $format, -file => $file);
478 my $seqs = $self->{_seqs};
479 foreach my $seq (@$seqs) {
480 if ($remove_spaces) {
481 my $id = $seq->id;
482 $id =~ s/ /_/;
483 $seq->id($id);
485 $ret = $seqIO->write_seq($seq);
487 return $ret;
490 =head2 extract_alns
492 Title : extract_alns
493 Usage : $nexmlIO->extract_alns(-file => ">$outfile", -format => $format)
494 Function: converts BioPerl alns stored in the NexmlIO object into the provided
495 format and writes it to the provided file. Uses L<Bio::AlignIO> to do
496 the conversion and writing.
497 Returns : none
498 Args : file to write to, format to be converted to
500 See L<Bio::SimpleAlign>, L<Bio::AlignIO>
502 =cut
504 sub extract_alns {
505 my $self = shift;
506 unless ( $self->{'_parsed'} ) {
507 $self->_parse;
510 my $ret = 0;
511 my %params = @_;
512 my ($format, $file) = @params{qw( -format -file)};
514 my $alignIO = Bio::AlignIO->new(-format => $format, -file => $file);
515 my $alns = $self->{_alns};
516 foreach my $aln (@$alns) {
517 $ret = $alignIO->write_aln($aln);
519 return $ret;
522 =head2 extract_trees
524 Title : extract_trees
525 Usage : $nexmlIO->extract_trees(-file => ">$outfile", -format => $format)
526 Function: converts BioPerl trees stored in the NexmlIO object into the provided
527 format and writes it to the provided file. Uses L<Bio::TreeIO> to do
528 the conversion and writing.
529 Returns : none
530 Args : file to write to, format to be converted to
532 See L<Bio::Tree::Tree>, L<Bio::TreeIO>
534 =cut
536 sub extract_trees {
537 my $self = shift;
538 unless ( $self->{'_parsed'} ) {
539 $self->_parse;
542 my $ret = 0;
543 my %params = @_;
544 my ($format, $file) = @params{qw( -format -file)};
546 my $treeIO = Bio::TreeIO->new(-format => $format, -file => $file);
547 my $trees = $self->{_trees};
548 foreach my $tree (@$trees) {
549 $treeIO->write_tree($tree);
550 $ret = 1;
552 return $ret;