maint: restructure to use Dist::Zilla
[bioperl-live.git] / lib / Bio / SeqIO / metafasta.pm
blobd90a1e6a26e3d9a659eab2c5ad41316ba96c715a
1 # BioPerl module for Bio::SeqIO::metafasta
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Heikki Lehvaslaiho
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::SeqIO::metafasta - metafasta sequence input/output stream
17 =head1 SYNOPSIS
19 Do not use this module directly. Use it via the Bio::SeqIO class.
21 use Bio::SeqIO;
23 # read the metafasta file
24 $io = Bio::SeqIO->new(-file => "test.metafasta",
25 -format => "metafasta" );
27 $seq = $io->next_seq;
29 =head1 DESCRIPTION
31 This object can transform Bio::Seq::Meta objects to and from metafasta
32 flat file databases.
34 For sequence part the code is an exact copy of Bio::SeqIO::fasta
35 module. The only added bits deal with meta data IO.
37 The format of a metafasta file is
39 >test
40 ABCDEFHIJKLMNOPQRSTUVWXYZ
41 &charge
42 NBNAANCNJCNNNONNCNNUNNXNZ
43 &chemical
44 LBSAARCLJCLSMOIMCHHULRXRZ
46 where the sequence block is followed by one or several meta blocks.
47 Each meta block starts with the ampersand character '&' in the first
48 column and is immediately followed by the name of the meta data which
49 continues until the new line. The meta data follows it. All
50 characters, except new line, are important in meta data.
52 =head1 FEEDBACK
54 =head2 Mailing Lists
56 User feedback is an integral part of the evolution of this and other
57 Bioperl modules. Send your comments and suggestions preferably to one
58 of the Bioperl mailing lists. Your participation is much appreciated.
60 bioperl-l@bioperl.org - General discussion
61 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
63 =head2 Support
65 Please direct usage questions or support issues to the mailing list:
67 I<bioperl-l@bioperl.org>
69 rather than to the module maintainer directly. Many experienced and
70 reponsive experts will be able look at the problem and quickly
71 address it. Please include a thorough description of the problem
72 with code and data examples if at all possible.
74 =head2 Reporting Bugs
76 Report bugs to the Bioperl bug tracking system to help us keep track
77 the bugs and their resolution. Bug reports can be submitted via the
78 web:
80 https://github.com/bioperl/bioperl-live/issues
82 =head1 AUTHOR - Heikki Lehvaslaiho
84 Email heikki-at-bioperl-dot-org
86 =head1 APPENDIX
88 The rest of the documentation details each of the object
89 methods. Internal methods are usually preceded with a _
91 =cut
93 # Let the code begin...
95 package Bio::SeqIO::metafasta;
96 use vars qw($WIDTH);
97 use strict;
99 use Bio::Seq::SeqFactory;
100 use Bio::Seq::SeqFastaSpeedFactory;
101 use Bio::Seq::Meta;
103 use base qw(Bio::SeqIO);
105 BEGIN { $WIDTH = 60}
107 sub _initialize {
108 my($self,@args) = @_;
109 $self->SUPER::_initialize(@args);
110 my ($width) = $self->_rearrange([qw(WIDTH)], @args);
111 $width && $self->width($width);
112 unless ( defined $self->sequence_factory ) {
113 $self->sequence_factory(Bio::Seq::SeqFastaSpeedFactory->new());
117 =head2 next_seq
119 Title : next_seq
120 Usage : $seq = $stream->next_seq()
121 Function: returns the next sequence in the stream
122 Returns : Bio::Seq object
123 Args : NONE
125 =cut
127 sub next_seq {
128 my( $self ) = @_;
129 my $seq;
130 my $alphabet;
131 local $/ = "\n>";
132 return unless my $entry = $self->_readline;
134 chomp($entry);
135 if ($entry =~ m/\A\s*\Z/s) { # very first one
136 return unless $entry = $self->_readline;
137 chomp($entry);
139 $entry =~ s/^>//;
141 my ($top,$sequence) = split(/\n/,$entry,2);
142 defined $sequence && $sequence =~ s/>//g;
144 my @metas;
145 ($sequence, @metas) = split /\n&/, $sequence;
147 my ($id,$fulldesc);
148 if( $top =~ /^\s*(\S+)\s*(.*)/ ) {
149 ($id,$fulldesc) = ($1,$2);
152 if (defined $id && $id eq '') {$id=$fulldesc;} # FIX incase no space
153 # between > and name \AE
154 defined $sequence && $sequence =~ s/\s//g; # Remove whitespace
156 # for empty sequences we need to know the mol.type
157 $alphabet = $self->alphabet();
158 if(defined $sequence && length($sequence) == 0) {
159 if(! defined($alphabet)) {
160 # let's default to dna
161 $alphabet = "dna";
163 } else {
164 # we don't need it really, so disable
165 $alphabet = undef;
168 $seq = $self->sequence_factory->create(
169 -seq => $sequence,
170 -id => $id,
171 # Ewan's note - I don't think this healthy
172 # but obviously to taste.
173 #-primary_id => $id,
174 -desc => $fulldesc,
175 -alphabet => $alphabet,
176 -direct => 1,
179 $seq = $seq->primary_seq;
180 bless $seq, 'Bio::Seq::Meta';
182 foreach my $meta (@metas) {
183 my ($name,$string) = split /\n/, $meta;
184 # $split ||= '';
185 $string =~ s/\n//g; # Remove newlines, spaces are important
186 $seq->named_meta($name, $string);
189 # if there wasn't one before, set the guessed type
190 unless ( defined $alphabet ) {
191 $self->alphabet($seq->alphabet());
193 return $seq;
196 =head2 write_seq
198 Title : write_seq
199 Usage : $stream->write_seq(@seq)
200 Function: writes the $seq object into the stream
201 Returns : 1 for success and 0 for error
202 Args : array of 1 to n Bio::PrimarySeqI objects
204 =cut
206 sub write_seq {
207 my ($self,@seq) = @_;
208 my $width = $self->width;
209 foreach my $seq (@seq) {
210 $self->throw("Did not provide a valid Bio::PrimarySeqI object")
211 unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
213 my $str = $seq->seq;
214 my $top = $seq->display_id();
215 if ($seq->can('desc') and my $desc = $seq->desc()) {
216 $desc =~ s/\n//g;
217 $top .= " $desc";
219 if(length($str) > 0) {
220 $str =~ s/(.{1,$width})/$1\n/g;
221 } else {
222 $str = "\n";
224 $self->_print (">",$top,"\n",$str) or return;
225 if ($seq->isa('Bio::Seq::MetaI')) {
226 foreach my $meta ($seq->meta_names) {
227 my $str = $seq->named_meta($meta);
228 $str =~ s/(.{1,$width})/$1\n/g;
229 $self->_print ("&",$meta,"\n",$str);
234 $self->flush if $self->_flush_on_write && defined $self->_fh;
235 return 1;
238 =head2 width
240 Title : width
241 Usage : $obj->width($newval)
242 Function: Get/Set the line width for METAFASTA output
243 Returns : value of width
244 Args : newvalue (optional)
247 =cut
249 sub width{
250 my ($self,$value) = @_;
251 if( defined $value) {
252 $self->{'width'} = $value;
254 return $self->{'width'} || $WIDTH;