1 # $Id: Nexml.pm 15889 2009-07-29 13:35:29Z chmille4 $
2 # BioPerl module for Bio::NexmlIO
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Chase Miller <chmille4@gmail.com>
8 # Copyright Chase Miller
10 # You may distribute this module under the same terms as perl itself
13 # June 16, 2009 Largely rewritten by Chase Miller
15 # POD documentation - main docs before the code
19 Bio::NexmlIO - stream handler for NeXML documents
23 #Instantiate a Bio::Nexml object and link it to a file
24 my $in_nexml = Bio::Nexml->new(-file => 'nexml_doc.xml', -format => 'Nexml');
27 my $bptree1 = $in_nexml->next_tree();
28 my $bpaln1 = $in_nexml->next_aln();
29 my $bpseq1 = $in_nexml->next_seq();
34 #Write data to nexml file
35 my $out_nexml = Bio::Nexml->new(-file => '>new_nexml_doc.xml', -format => 'Nexml');
42 Bio::NexmlIO is an I/O handler for a NeXML document. A NeXML document can
43 represent three different data types: simple sequences, alignments,
44 and trees. NexmlIO has four main methods next_tree, next_seq,
45 next_aln, and write. NexmlIO returns bioperl seq, tree, and aln
46 objects which can be manipulated then passed to the write method of a
47 new NexmlIO instance to allow the creation of a NeXML document.
49 Each bioperl object contains all the information necessary to recreate
50 a Bio::Phylo::Taxa object, so each time a bioperl object is converted
51 to a biophylo object, the bioperl object is checked to see if its
52 associated taxa has already been created (against a hash using the
53 NexmlIO_ID and Taxa_ID to create a unique string). If not, it is
54 created; if so, that taxa object is used to link the Bio::Phylo tree
57 For more information on the NeXML format, see L<http://www.nexml.org>.
63 User feedback is an integral part of the evolution of this and other
64 Bioperl modules. Send your comments and suggestions preferably to one
65 of the Bioperl mailing lists.
67 Your participation is much appreciated.
69 bioperl-l@bioperl.org - General discussion
70 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
74 Please direct usage questions or support issues to the mailing list:
76 I<bioperl-l@bioperl.org>
78 rather than to the module maintainer directly. Many experienced and
79 reponsive experts will be able look at the problem and quickly
80 address it. Please include a thorough description of the problem
81 with code and data examples if at all possible.
85 Report bugs to the Bioperl bug tracking system to help us keep track
86 the bugs and their resolution. Bug reports can be submitted via the
89 https://github.com/bioperl/bioperl-live/issues
91 =head1 AUTHOR - Chase Miller
93 Email chmille4@gmail.com
97 Mark A. Jensen, maj -at- fortinbras -dot- com
101 The rest of the documentation details each of the object
102 methods. Internal methods are usually preceded with a _
106 # Let the code begin...
109 package Bio
::NexmlIO
;
114 use Bio
::SeqIO
::nexml
;
115 use Bio
::AlignIO
::nexml
;
116 use Bio
::TreeIO
::nexml
;
117 use Bio
::Nexml
::Factory
;
119 use base
qw(Bio::Root::IO);
121 my $nexml_fac = Bio
::Nexml
::Factory
->new();
128 Usage : my $in_nexmlIO = Bio::NexmlIO->new(-file => 'data.nexml.xml');
129 Function: Creates a L<Bio::NexmlIO> object linked to a stream
130 Returns : a L<Bio::NexmlIO> object
138 my($class,@args) = @_;
139 my $self = $class->SUPER::new
(@args);
142 my $file_string = $params{'-file'};
144 #create unique ID by creating a scalar and using the memory address
145 my $ID = bless \
(my $dummy), "UniqueID";
146 ($self->{'_ID'}) = sprintf("%s",\
$ID) =~ /(0x[0-9a-fA-F]+)/;
148 unless ($file_string =~ m/^\>/) {
149 # Only pass filename if filehandle is not available,
150 # or "Bio::Phylo" will create a new filehandle that ends
151 # out of scope and can't be closed directly, leaving 2 open
152 # filehandles for the same file (so file can't be deleted)
155 if ( exists $self->{'_filehandle'}
156 and defined $self->{'_filehandle'}
158 $file_arg = '-handle';
159 $file_value = $self->{'_filehandle'};
163 $file_value = $self->{'_file'};
166 $self->{'_doc'} = Bio
::Phylo
::IO
->parse($file_arg => $file_value,,
167 '-format' => 'nexml',
168 '-as_project' => '1');
177 Usage : my $nexml_doc = $in_nexmlIO->doc();
178 Function: returns a L<Bio::Phylo::Project> object that contains all the Bio::Phylo data objects parsed from the stream
179 Returns : a L<Bio::Phylo::Project> object
186 return $self->{'_doc'};
189 # Takes the Bio::Phylo::Project object and creats BioPerl trees, alns, and seqs from it
193 $self->{'_treeiter'} = 0;
194 $self->{'_seqiter'} = 0;
195 $self->{'_alniter'} = 0;
197 $self->{_trees
} = $nexml_fac->create_bperl_tree($self);
198 $self->{_alns
} = $nexml_fac->create_bperl_aln($self);
199 $self->{_seqs
} = $nexml_fac->create_bperl_seq($self);
200 my $taxa_array = $self->doc->get_taxa();
202 $self->{'_parsed'} = 1; #success
210 Usage : $tree = $stream->next_tree
211 Function: Reads the next tree object from the stream and returns it.
212 Returns : a L<Bio::Tree::Tree> object
215 See L<Bio::Root::IO>, L<Bio::Tree::Tree>
221 $self->_parse unless $self->{'_parsed'};
223 return $self->{'_trees'}->[ $self->{'_treeiter'}++ ];
229 Usage : $seq = $stream->next_seq
230 Function: Reads the next seq object from the stream and returns it.
231 Returns : a L<Bio::Seq> object
234 See L<Bio::Root::IO>, L<Bio::Seq>
240 unless ( $self->{'_parsed'} ) {
243 return $self->{'_seqs'}->[ $self->{'_seqiter'}++ ];
249 Usage : $aln = $stream->next_aln
250 Function: Reads the next aln object from the stream and returns it.
251 Returns : a L<Bio::SimpleAlign> object
254 See L<Bio::Root::IO>, L<Bio::SimpleAlign>
260 unless ( $self->{'_parsed'} ) {
263 return $self->{'_alns'}->[ $self->{'_alniter'}++ ];
269 $self->{"_${elt}iter"} = 0 if defined $self->{"_${elt}iter"};
276 Usage : $stream->rewind_seq
277 Function: Resets the stream for seqs
281 See L<Bio::Root::IO>, L<Bio::Seq>
285 sub rewind_seq
{ shift->_rewind('seq'); }
290 Usage : $stream->rewind_aln
291 Function: Resets the stream for alns
295 See L<Bio::Root::IO>, L<Bio::Simple::Align>
299 sub rewind_aln
{ shift->_rewind('aln'); }
304 Usage : $stream->rewind_tree
305 Function: Resets the stream for trees
309 See L<Bio::Root::IO>, L<Bio::tree::tree>
313 sub rewind_tree
{ shift->_rewind('tree'); }
318 Usage : $stream->write(-alns => $alns,-seqs => $seqs,-trees => $trees)
319 Function: converts BioPerl seq, tree, and aln objects into Bio::Phylo
320 seq, tree, and aln objects, constructs a Bio::Phylo::Project
321 object made up of the newly created Bio::Phylo objects, and
322 writes the Bio::Phylo:Project object to the stream as a valid
325 Args : \@L<Bio::Seq>, \@L<Bio::SimpleAlign>, \@L<Bio::Tree::Tree>
327 See L<Bio::Root::IO>, L<Bio::tree::tree>, L<Bio::Seq>, L<Bio::SimpleAlign>
332 my ($self, @args) = @_;
336 my ($trees, $alns, $seqs) = @params{qw( -trees -alns -seqs )};
338 my %seq_matrices = ();
340 my $proj_doc = Bio
::Phylo
::Factory
->create_project();
342 #convert trees to bio::Phylo objects
343 my $forest = Bio
::Phylo
::Factory
->create_forest();
350 foreach my $tree (@
$trees) {
351 my $nexml_id = $tree->get_tag_values('_NexmlIO_ID');
353 if ( defined $taxa_hash{$nexml_id} ) {
354 $taxa_o = $taxa_hash{$nexml_id};
357 ($taxa_o) = $nexml_fac->create_bphylo_taxa($tree);
358 $forest->set_taxa($taxa_o) if defined $taxa_o;
359 $taxa_hash{$nexml_id} = $taxa_o;
362 ($phylo_tree_o) = $nexml_fac->create_bphylo_tree($tree, $taxa_o);
364 $forest->insert($phylo_tree_o);
367 #convert matrices to Bio::Phylo objects
368 my $matrices = Bio
::Phylo
::Matrices
->new();
371 foreach my $aln (@
$alns)
374 if (defined $taxa_hash{ $aln->{_Nexml_ID
} }) {
375 $taxa_o = $taxa_hash{$aln->{_Nexml_ID
}};
378 ($taxa_o) = $nexml_fac->create_bphylo_taxa($aln);
379 $taxa_hash{$aln->{_Nexml_ID
}} = $taxa_o;
382 ($phylo_matrix_o) = $nexml_fac->create_bphylo_aln($aln, $taxa_o);
384 $phylo_matrix_o->set_taxa($taxa_o) if defined $taxa_o;
385 $matrices->insert($phylo_matrix_o);
390 #convert sequences to Bio::Phylo objects
391 foreach my $seq (@
$seqs)
394 #check if this Bio::Phylo::Taxa obj has already been created
395 if (defined $taxa_hash{ $seq->{_Nexml_ID
} }) {
396 $taxa_o = $taxa_hash{$seq->{_Nexml_ID
}};
399 ($taxa_o) = $nexml_fac->create_bphylo_taxa($seq);
400 $taxa_hash{$seq->{_Nexml_ID
}} = $taxa_o;
402 $datum = $nexml_fac->create_bphylo_seq($seq, $taxa_o);
403 #check if this Bio::Phylo::Matrices::Matrix obj has already been created
404 if (defined $seq_matrices{ $seq->{_Nexml_matrix_ID
} }) {
405 $seq_matrix_o = $seq_matrices{$seq->{_Nexml_matrix_ID
}};
406 my $taxon_name = $datum->get_taxon()->get_name();
407 $datum->unset_taxon();
408 $seq_matrix_o->insert($datum);
409 $datum->set_taxon($seq_matrix_o->get_taxa()->get_by_name($taxon_name));
412 $seq_matrix_o = Bio
::Phylo
::Factory
->create_matrix('-type' => $datum->moltype);
413 $seq_matrices{$seq->{_Nexml_matrix_ID
}} = $seq_matrix_o;
414 $seq_matrix_o->set_taxa($taxa_o) if defined $taxa_o;
415 $seq_matrix_o->insert($datum);
418 my $feat = ($seq->get_SeqFeatures())[0];
419 my $matrix_label = ($feat->get_tag_values('matrix_label'))[0] if $feat->has_tag('id');
420 $seq_matrix_o->set_name($matrix_label);
422 $matrices->insert($seq_matrix_o);
426 #Add matrices and forest objects to project object which represents a complete nexml document
428 $proj_doc->insert($forest);
430 while(my $curr_matrix = $matrices->next) {
431 $proj_doc->insert($curr_matrix);
434 #write nexml document to stream
435 my $ret = $self->_print($proj_doc->to_xml(-compact
=>1));
443 Usage : $nexmlIO->extract_seqs(-file => ">$outfile", -format => $format)
444 Function: converts BioPerl seqs stored in the NexmlIO object into the provided
445 format and writes it to the provided file. Uses L<Bio::SeqIO> to do
446 the conversion and writing.
448 Args : file to write to, format to be converted to
450 See L<Bio::Seq>, L<Bio::SeqIO>
456 unless ( $self->{'_parsed'} ) {
461 my $remove_spaces = 0;
463 my ($format, $file) = @params{qw( -format -file)};
467 # this is ok, flag so that the nexmlid gets converted;
473 $self->throw("Format '$format' not yet supported for extraction");
477 my $seqIO = Bio
::SeqIO
->new(-format
=> $format, -file
=> $file);
478 my $seqs = $self->{_seqs
};
479 foreach my $seq (@
$seqs) {
480 if ($remove_spaces) {
485 $ret = $seqIO->write_seq($seq);
493 Usage : $nexmlIO->extract_alns(-file => ">$outfile", -format => $format)
494 Function: converts BioPerl alns stored in the NexmlIO object into the provided
495 format and writes it to the provided file. Uses L<Bio::AlignIO> to do
496 the conversion and writing.
498 Args : file to write to, format to be converted to
500 See L<Bio::SimpleAlign>, L<Bio::AlignIO>
506 unless ( $self->{'_parsed'} ) {
512 my ($format, $file) = @params{qw( -format -file)};
514 my $alignIO = Bio
::AlignIO
->new(-format
=> $format, -file
=> $file);
515 my $alns = $self->{_alns
};
516 foreach my $aln (@
$alns) {
517 $ret = $alignIO->write_aln($aln);
524 Title : extract_trees
525 Usage : $nexmlIO->extract_trees(-file => ">$outfile", -format => $format)
526 Function: converts BioPerl trees stored in the NexmlIO object into the provided
527 format and writes it to the provided file. Uses L<Bio::TreeIO> to do
528 the conversion and writing.
530 Args : file to write to, format to be converted to
532 See L<Bio::Tree::Tree>, L<Bio::TreeIO>
538 unless ( $self->{'_parsed'} ) {
544 my ($format, $file) = @params{qw( -format -file)};
546 my $treeIO = Bio
::TreeIO
->new(-format
=> $format, -file
=> $file);
547 my $trees = $self->{_trees
};
548 foreach my $tree (@
$trees) {
549 $treeIO->write_tree($tree);