1 # $Id: Nexml.pm 15889 2009-07-29 13:35:29Z chmille4 $
2 # BioPerl module for Bio::NexmlIO
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Chase Miller <chmille4@gmail.com>
8 # Copyright Chase Miller
10 # You may distribute this module under the same terms as perl itself
13 # June 16, 2009 Largely rewritten by Chase Miller
15 # POD documentation - main docs before the code
19 Bio::NexmlIO - stream handler for NeXML documents
23 #Instantiate a Bio::Nexml object and link it to a file
24 my $in_nexml = Bio::Nexml->new(-file => 'nexml_doc.xml', -format => 'Nexml');
27 my $bptree1 = $in_nexml->next_tree();
28 my $bpaln1 = $in_nexml->next_aln();
29 my $bpseq1 = $in_nexml->next_seq();
34 #Write data to nexml file
35 my $out_nexml = Bio::Nexml->new(-file => '>new_nexml_doc.xml', -format => 'Nexml');
42 Bio::NexmlIO is an I/O handler for a NeXML document. A NeXML document can
43 represent three different data types: simple sequences, alignments,
44 and trees. NexmlIO has four main methods next_tree, next_seq,
45 next_aln, and write. NexmlIO returns bioperl seq, tree, and aln
46 objects which can be manipulated then passed to the write method of a
47 new NexmlIO instance to allow the creation of a NeXML document.
49 Each bioperl object contains all the information necessary to recreate
50 a Bio::Phylo::Taxa object, so each time a bioperl object is converted
51 to a biophylo object, the bioperl object is checked to see if its
52 associated taxa has already been created (against a hash using the
53 NexmlIO_ID and Taxa_ID to create a unique string). If not, it is
54 created; if so, that taxa object is used to link the Bio::Phylo tree
57 For more information on the NeXML format, see L<http://www.nexml.org>.
63 User feedback is an integral part of the evolution of this and other
64 Bioperl modules. Send your comments and suggestions preferably to one
65 of the Bioperl mailing lists.
67 Your participation is much appreciated.
69 bioperl-l@bioperl.org - General discussion
70 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
74 Please direct usage questions or support issues to the mailing list:
76 I<bioperl-l@bioperl.org>
78 rather than to the module maintainer directly. Many experienced and
79 reponsive experts will be able look at the problem and quickly
80 address it. Please include a thorough description of the problem
81 with code and data examples if at all possible.
85 Report bugs to the Bioperl bug tracking system to help us keep track
86 the bugs and their resolution. Bug reports can be submitted via the
89 https://redmine.open-bio.org/projects/bioperl/
91 =head1 AUTHOR - Chase Miller
93 Email chmille4@gmail.com
97 Mark A. Jensen, maj -at- fortinbras -dot- com
101 The rest of the documentation details each of the object
102 methods. Internal methods are usually preceded with a _
106 # Let the code begin...
109 package Bio
::NexmlIO
;
114 use Bio
::SeqIO
::nexml
;
115 use Bio
::AlignIO
::nexml
;
116 use Bio
::TreeIO
::nexml
;
117 use Bio
::Nexml
::Factory
;
119 use base
qw(Bio::Root::IO);
121 my $nexml_fac = Bio
::Nexml
::Factory
->new();
128 Usage : my $in_nexmlIO = Bio::NexmlIO->new(-file => 'data.nexml.xml');
129 Function: Creates a L<Bio::NexmlIO> object linked to a stream
130 Returns : a L<Bio::NexmlIO> object
138 my($class,@args) = @_;
139 my $self = $class->SUPER::new
(@args);
142 my $file_string = $params{'-file'};
144 #create unique ID by creating a scalar and using the memory address
145 my $ID = bless \
(my $dummy), "UniqueID";
146 ($self->{'_ID'}) = sprintf("%s",\
$ID) =~ /(0x[0-9a-fA-F]+)/;
148 unless ($file_string =~ m/^\>/) {
149 $self->{'_doc'} = Bio
::Phylo
::IO
->parse('-file' => $params{'-file'}, '-format' => 'nexml', '-as_project' => '1');
159 Usage : my $nexml_doc = $in_nexmlIO->doc();
160 Function: returns a L<Bio::Phylo::Project> object that contains all the Bio::Phylo data objects parsed from the stream
161 Returns : a L<Bio::Phylo::Project> object
168 return $self->{'_doc'};
171 # Takes the Bio::Phylo::Project object and creats BioPerl trees, alns, and seqs from it
175 $self->{'_treeiter'} = 0;
176 $self->{'_seqiter'} = 0;
177 $self->{'_alniter'} = 0;
179 $self->{_trees
} = $nexml_fac->create_bperl_tree($self);
180 $self->{_alns
} = $nexml_fac->create_bperl_aln($self);
181 $self->{_seqs
} = $nexml_fac->create_bperl_seq($self);
182 my $taxa_array = $self->doc->get_taxa();
184 $self->{'_parsed'} = 1; #success
192 Usage : $tree = $stream->next_tree
193 Function: Reads the next tree object from the stream and returns it.
194 Returns : a L<Bio::Tree::Tree> object
197 See L<Bio::Root::IO>, L<Bio::Tree::Tree>
203 $self->_parse unless $self->{'_parsed'};
205 return $self->{'_trees'}->[ $self->{'_treeiter'}++ ];
211 Usage : $seq = $stream->next_seq
212 Function: Reads the next seq object from the stream and returns it.
213 Returns : a L<Bio::Seq> object
216 See L<Bio::Root::IO>, L<Bio::Seq>
222 unless ( $self->{'_parsed'} ) {
225 return $self->{'_seqs'}->[ $self->{'_seqiter'}++ ];
231 Usage : $aln = $stream->next_aln
232 Function: Reads the next aln object from the stream and returns it.
233 Returns : a L<Bio::SimpleAlign> object
236 See L<Bio::Root::IO>, L<Bio::SimpleAlign>
242 unless ( $self->{'_parsed'} ) {
245 return $self->{'_alns'}->[ $self->{'_alniter'}++ ];
251 $self->{"_${elt}iter"} = 0 if defined $self->{"_${elt}iter"};
258 Usage : $stream->rewind_seq
259 Function: Resets the stream for seqs
263 See L<Bio::Root::IO>, L<Bio::Seq>
267 sub rewind_seq
{ shift->_rewind('seq'); }
272 Usage : $stream->rewind_aln
273 Function: Resets the stream for alns
277 See L<Bio::Root::IO>, L<Bio::Simple::Align>
281 sub rewind_aln
{ shift->_rewind('aln'); }
286 Usage : $stream->rewind_tree
287 Function: Resets the stream for trees
291 See L<Bio::Root::IO>, L<Bio::tree::tree>
295 sub rewind_tree
{ shift->_rewind('tree'); }
300 Usage : $stream->write(-alns => $alns,-seqs => $seqs,-trees => $trees)
301 Function: converts BioPerl seq, tree, and aln objects into Bio::Phylo
302 seq, tree, and aln objects, constructs a Bio::Phylo::Project
303 object made up of the newly created Bio::Phylo objects, and
304 writes the Bio::Phylo:Project object to the stream as a valid
307 Args : \@L<Bio::Seq>, \@L<Bio::SimpleAlign>, \@L<Bio::Tree::Tree>
309 See L<Bio::Root::IO>, L<Bio::tree::tree>, L<Bio::Seq>, L<Bio::SimpleAlign>
314 my ($self, @args) = @_;
318 my ($trees, $alns, $seqs) = @params{qw( -trees -alns -seqs )};
320 my %seq_matrices = ();
322 my $proj_doc = Bio
::Phylo
::Factory
->create_project();
324 #convert trees to bio::Phylo objects
325 my $forest = Bio
::Phylo
::Factory
->create_forest();
332 foreach my $tree (@
$trees) {
333 my $nexml_id = $tree->get_tag_values('_NexmlIO_ID');
335 if ( defined $taxa_hash{$nexml_id} ) {
336 $taxa_o = $taxa_hash{$nexml_id};
339 ($taxa_o) = $nexml_fac->create_bphylo_taxa($tree);
340 $forest->set_taxa($taxa_o) if defined $taxa_o;
341 $taxa_hash{$nexml_id} = $taxa_o;
344 ($phylo_tree_o) = $nexml_fac->create_bphylo_tree($tree, $taxa_o);
346 $forest->insert($phylo_tree_o);
349 #convert matrices to Bio::Phylo objects
350 my $matrices = Bio
::Phylo
::Matrices
->new();
353 foreach my $aln (@
$alns)
356 if (defined $taxa_hash{ $aln->{_Nexml_ID
} }) {
357 $taxa_o = $taxa_hash{$aln->{_Nexml_ID
}};
360 ($taxa_o) = $nexml_fac->create_bphylo_taxa($aln);
361 $taxa_hash{$aln->{_Nexml_ID
}} = $taxa_o;
364 ($phylo_matrix_o) = $nexml_fac->create_bphylo_aln($aln, $taxa_o);
366 $phylo_matrix_o->set_taxa($taxa_o) if defined $taxa_o;
367 $matrices->insert($phylo_matrix_o);
372 #convert sequences to Bio::Phylo objects
373 foreach my $seq (@
$seqs)
376 #check if this Bio::Phylo::Taxa obj has already been created
377 if (defined $taxa_hash{ $seq->{_Nexml_ID
} }) {
378 $taxa_o = $taxa_hash{$seq->{_Nexml_ID
}};
381 ($taxa_o) = $nexml_fac->create_bphylo_taxa($seq);
382 $taxa_hash{$seq->{_Nexml_ID
}} = $taxa_o;
384 $datum = $nexml_fac->create_bphylo_seq($seq, $taxa_o);
385 #check if this Bio::Phylo::Matrices::Matrix obj has already been created
386 if (defined $seq_matrices{ $seq->{_Nexml_matrix_ID
} }) {
387 $seq_matrix_o = $seq_matrices{$seq->{_Nexml_matrix_ID
}};
388 my $taxon_name = $datum->get_taxon()->get_name();
389 $datum->unset_taxon();
390 $seq_matrix_o->insert($datum);
391 $datum->set_taxon($seq_matrix_o->get_taxa()->get_by_name($taxon_name));
394 $seq_matrix_o = Bio
::Phylo
::Factory
->create_matrix('-type' => $datum->moltype);
395 $seq_matrices{$seq->{_Nexml_matrix_ID
}} = $seq_matrix_o;
396 $seq_matrix_o->set_taxa($taxa_o) if defined $taxa_o;
397 $seq_matrix_o->insert($datum);
400 my $feat = ($seq->get_SeqFeatures())[0];
401 my $matrix_label = ($feat->get_tag_values('matrix_label'))[0] if $feat->has_tag('id');
402 $seq_matrix_o->set_name($matrix_label);
404 $matrices->insert($seq_matrix_o);
408 #Add matrices and forest objects to project object which represents a complete nexml document
410 $proj_doc->insert($forest);
412 while(my $curr_matrix = $matrices->next) {
413 $proj_doc->insert($curr_matrix);
416 #write nexml document to stream
417 my $ret = $self->_print($proj_doc->to_xml(-compact
=>1));
425 Usage : $nexmlIO->extract_seqs(-file => ">$outfile", -format => $format)
426 Function: converts BioPerl seqs stored in the NexmlIO object into the provided
427 format and writes it to the provided file. Uses L<Bio::SeqIO> to do
428 the conversion and writing.
430 Args : file to write to, format to be converted to
432 See L<Bio::Seq>, L<Bio::SeqIO>
438 unless ( $self->{'_parsed'} ) {
443 my $remove_spaces = 0;
445 my ($format, $file) = @params{qw( -format -file)};
449 # this is ok, flag so that the nexmlid gets converted;
455 $self->throw("Format '$format' not yet supported for extraction");
459 my $seqIO = Bio
::SeqIO
->new(-format
=> $format, -file
=> $file);
460 my $seqs = $self->{_seqs
};
461 foreach my $seq (@
$seqs) {
462 if ($remove_spaces) {
467 $ret = $seqIO->write_seq($seq);
475 Usage : $nexmlIO->extract_alns(-file => ">$outfile", -format => $format)
476 Function: converts BioPerl alns stored in the NexmlIO object into the provided
477 format and writes it to the provided file. Uses L<Bio::AlignIO> to do
478 the conversion and writing.
480 Args : file to write to, format to be converted to
482 See L<Bio::SimpleAlign>, L<Bio::AlignIO>
488 unless ( $self->{'_parsed'} ) {
494 my ($format, $file) = @params{qw( -format -file)};
496 my $alignIO = Bio
::AlignIO
->new(-format
=> $format, -file
=> $file);
497 my $alns = $self->{_alns
};
498 foreach my $aln (@
$alns) {
499 $ret = $alignIO->write_aln($aln);
506 Title : extract_trees
507 Usage : $nexmlIO->extract_trees(-file => ">$outfile", -format => $format)
508 Function: converts BioPerl trees stored in the NexmlIO object into the provided
509 format and writes it to the provided file. Uses L<Bio::TreeIO> to do
510 the conversion and writing.
512 Args : file to write to, format to be converted to
514 See L<Bio::Tree::Tree>, L<Bio::TreeIO>
520 unless ( $self->{'_parsed'} ) {
526 my ($format, $file) = @params{qw( -format -file)};
528 my $treeIO = Bio
::TreeIO
->new(-format
=> $format, -file
=> $file);
529 my $trees = $self->{_trees
};
530 foreach my $tree (@
$trees) {
531 $treeIO->write_tree($tree);