1 # BioPerl module for Bio::SeqIO::metafasta
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Heikki Lehvaslaiho
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::SeqIO::metafasta - metafasta sequence input/output stream
19 Do not use this module directly. Use it via the Bio::SeqIO class.
23 # read the metafasta file
24 $io = Bio::SeqIO->new(-file => "test.metafasta",
25 -format => "metafasta" );
31 This object can transform Bio::Seq::Meta objects to and from metafasta
34 For sequence part the code is an exact copy of Bio::SeqIO::fasta
35 module. The only added bits deal with meta data IO.
37 The format of a metafasta file is
40 ABCDEFHIJKLMNOPQRSTUVWXYZ
42 NBNAANCNJCNNNONNCNNUNNXNZ
44 LBSAARCLJCLSMOIMCHHULRXRZ
46 where the sequence block is followed by one or several meta blocks.
47 Each meta block starts with the ampersand character '&' in the first
48 column and is immediately followed by the name of the meta data which
49 continues until the new line. The meta data follows it. All
50 characters, except new line, are important in meta data.
56 User feedback is an integral part of the evolution of this and other
57 Bioperl modules. Send your comments and suggestions preferably to one
58 of the Bioperl mailing lists. Your participation is much appreciated.
60 bioperl-l@bioperl.org - General discussion
61 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
65 Please direct usage questions or support issues to the mailing list:
67 I<bioperl-l@bioperl.org>
69 rather than to the module maintainer directly. Many experienced and
70 reponsive experts will be able look at the problem and quickly
71 address it. Please include a thorough description of the problem
72 with code and data examples if at all possible.
76 Report bugs to the Bioperl bug tracking system to help us keep track
77 the bugs and their resolution. Bug reports can be submitted via the
80 https://github.com/bioperl/bioperl-live/issues
82 =head1 AUTHOR - Heikki Lehvaslaiho
84 Email heikki-at-bioperl-dot-org
88 The rest of the documentation details each of the object
89 methods. Internal methods are usually preceded with a _
93 # Let the code begin...
95 package Bio
::SeqIO
::metafasta
;
99 use Bio::Seq::SeqFactory;
100 use Bio::Seq::SeqFastaSpeedFactory;
103 use base qw(Bio::SeqIO);
108 my($self,@args) = @_;
109 $self->SUPER::_initialize
(@args);
110 my ($width) = $self->_rearrange([qw(WIDTH)], @args);
111 $width && $self->width($width);
112 unless ( defined $self->sequence_factory ) {
113 $self->sequence_factory(Bio
::Seq
::SeqFastaSpeedFactory
->new());
120 Usage : $seq = $stream->next_seq()
121 Function: returns the next sequence in the stream
122 Returns : Bio::Seq object
132 return unless my $entry = $self->_readline;
135 if ($entry =~ m/\A\s*\Z/s) { # very first one
136 return unless $entry = $self->_readline;
141 my ($top,$sequence) = split(/\n/,$entry,2);
142 defined $sequence && $sequence =~ s/>//g;
145 ($sequence, @metas) = split /\n&/, $sequence;
148 if( $top =~ /^\s*(\S+)\s*(.*)/ ) {
149 ($id,$fulldesc) = ($1,$2);
152 if (defined $id && $id eq '') {$id=$fulldesc;} # FIX incase no space
153 # between > and name \AE
154 defined $sequence && $sequence =~ s/\s//g; # Remove whitespace
156 # for empty sequences we need to know the mol.type
157 $alphabet = $self->alphabet();
158 if(defined $sequence && length($sequence) == 0) {
159 if(! defined($alphabet)) {
160 # let's default to dna
164 # we don't need it really, so disable
168 $seq = $self->sequence_factory->create(
171 # Ewan's note - I don't think this healthy
172 # but obviously to taste.
175 -alphabet
=> $alphabet,
179 $seq = $seq->primary_seq;
180 bless $seq, 'Bio::Seq::Meta';
182 foreach my $meta (@metas) {
183 my ($name,$string) = split /\n/, $meta;
185 $string =~ s/\n//g; # Remove newlines, spaces are important
186 $seq->named_meta($name, $string);
189 # if there wasn't one before, set the guessed type
190 unless ( defined $alphabet ) {
191 $self->alphabet($seq->alphabet());
199 Usage : $stream->write_seq(@seq)
200 Function: writes the $seq object into the stream
201 Returns : 1 for success and 0 for error
202 Args : array of 1 to n Bio::PrimarySeqI objects
207 my ($self,@seq) = @_;
208 my $width = $self->width;
209 foreach my $seq (@seq) {
210 $self->throw("Did not provide a valid Bio::PrimarySeqI object")
211 unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
214 my $top = $seq->display_id();
215 if ($seq->can('desc') and my $desc = $seq->desc()) {
219 if(length($str) > 0) {
220 $str =~ s/(.{1,$width})/$1\n/g;
224 $self->_print (">",$top,"\n",$str) or return;
225 if ($seq->isa('Bio::Seq::MetaI')) {
226 foreach my $meta ($seq->meta_names) {
227 my $str = $seq->named_meta($meta);
228 $str =~ s/(.{1,$width})/$1\n/g;
229 $self->_print ("&",$meta,"\n",$str);
234 $self->flush if $self->_flush_on_write && defined $self->_fh;
241 Usage : $obj->width($newval)
242 Function: Get/Set the line width for METAFASTA output
243 Returns : value of width
244 Args : newvalue (optional)
250 my ($self,$value) = @_;
251 if( defined $value) {
252 $self->{'width'} = $value;
254 return $self->{'width'} || $WIDTH;