1 # BioPerl module for Bio::SeqIO::fastq
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Tony Cox <avc@sanger.ac.uk>
9 # You may distribute this module under the same terms as perl itself
11 # October 29, 2001 incept data
13 # POD documentation - main docs before the code
17 Bio::SeqIO::fastq - fastq sequence input/output stream
21 Do not use this module directly. Use it via the Bio::SeqIO class.
25 This object can transform Bio::Seq and Bio::Seq::Quality
26 objects to and from fastq flat file databases.
28 Fastq is a file format used frequently at the Sanger Centre to bundle
29 a fasta sequence and its quality data. A typical fastaq entry takes
33 GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT.....
35 !''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65.....
37 Fastq files have sequence and quality data on a single line and the
38 quality values are single-byte encoded. To retrieve the decimal values
39 for qualities you need to subtract 33 (or Octal 41) from each byte and
40 then convert to a '2 digit + 1 space' integer.
46 User feedback is an integral part of the evolution of this and other
47 Bioperl modules. Send your comments and suggestions preferably to one
48 of the Bioperl mailing lists. Your participation is much appreciated.
50 bioperl-l@bioperl.org - General discussion
51 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
55 Please direct usage questions or support issues to the mailing list:
57 L<bioperl-l@bioperl.org>
59 rather than to the module maintainer directly. Many experienced and
60 reponsive experts will be able look at the problem and quickly
61 address it. Please include a thorough description of the problem
62 with code and data examples if at all possible.
66 Report bugs to the Bioperl bug tracking system to help us keep track
67 the bugs and their resolution. Bug reports can be submitted via the
70 http://bugzilla.open-bio.org/
72 =head1 AUTHORS - Tony Cox
74 Email: avc@sanger.ac.uk
79 The rest of the documentation details each of the object
80 methods. Internal methods are usually preceded with a _
84 # Let the code begin...
86 package Bio
::SeqIO
::fastq
;
89 use Bio
::Seq
::SeqFactory
;
91 use base
qw(Bio::SeqIO);
95 $self->SUPER::_initialize
(@args);
96 if( ! defined $self->sequence_factory ) {
97 $self->sequence_factory(Bio
::Seq
::SeqFactory
->new(-verbose
=> $self->verbose(), -type
=> 'Bio::Seq::Quality'));
105 Usage : $seq = $stream->next_seq()
106 Function: returns the next sequence in the stream
107 Returns : Bio::Seq::Quality object
118 my @datatype = qw(seqdesc seq qualdesc qual);
120 return unless my $line = $self->_readline; # bail if any data is incomplete
122 my $type = shift @datatype;
123 if ($type eq 'seqdesc' || $type eq 'qualdesc') {
124 $self->throw("$line doesn't match fastq descriptor line type") unless
125 $line =~ m{^[\+@](.*)$};
128 $seqdata->{$type} = $line;
130 $self->warn("Seq/Qual descriptions don't match; using sequence description\n")
131 unless $seqdata->{qualdesc
} eq '' || $seqdata->{seqdesc
} eq $seqdata->{qualdesc
};
132 my ($id,$fulldesc) = $seqdata->{seqdesc
} =~ /^\s*(\S+)\s*(.*)/
133 or $self->throw("Can't parse fastq header");
134 if ($id eq '') {$id=$fulldesc;} # FIX incase no space between \@ and name
135 $seqdata->{seq
} =~ s/\s//g; # Remove whitespace
136 $seqdata->{qual
} =~ s/\s//g;
138 if(length($seqdata->{seq
}) != length($seqdata->{qual
})){
139 $self->warn("Fastq sequence/quality data length mismatch error\n",
140 "Sequence: ",$seqdata->{seqdesc
},", seq length: ",length($seqdata->{seq
}), " Qual length: ", length($seqdata->{qual
}), " \n",
141 $seqdata->{seq
},"\n",$seqdata->{qual
},"\n");
144 my $seq_data_qual = $seqdata->{qual
};
145 my @qual = unpack("A1" x
length($seq_data_qual), $seq_data_qual);
146 # my @qual = split('', $seqdata->{qual}); # Speed up above
149 foreach (@qual) {$qual .= (unpack("C",$_) - 33) ." "};
151 # for empty sequences we need to know the mol.type
152 $alphabet = $self->alphabet();
153 if(length($seqdata->{seq
}) == 0) {
154 if(! defined($alphabet)) {
155 # let's default to dna
159 # we don't need it really, so disable
163 # create the Quality object
164 $seq = $self->sequence_factory->create(
166 -seq
=> $seqdata->{seq
},
170 -alphabet
=> $alphabet
173 # if there wasn't one before, set the guessed type
174 $self->alphabet($seq->alphabet());
182 Usage : $stream->write_seq(@seq)
183 Function: writes the $seq object into the stream
184 Returns : 1 for success and 0 for error
185 Args : Bio::Seq::Quality or Bio::Seq object
191 my ($self,@seq) = @_;
192 foreach my $seq (@seq) {
194 my $top = $seq->display_id();
195 if ($seq->can('desc') and my $desc = $seq->desc()) {
199 if(length($str) > 0) {
200 $str =~ s/(.{1,60})/$1\n/g;
205 $self->_print (">",$top,"\n",$str) or return;
208 $self->flush if $self->_flush_on_write && defined $self->_fh;
215 Usage : $stream->write_qual(@seq)
216 Function: writes the $seq object into the stream
217 Returns : 1 for success and 0 for error
218 Args : Bio::Seq::Quality object
224 my ($self,@seq) = @_;
225 foreach my $seq (@seq) {
226 unless ($seq->isa("Bio::Seq::Quality")){
227 $self->warn("You can write FASTQ without supplying a Bio::Seq::Quality object! ", ref($seq), "\n");
230 my @qual = @
{$seq->qual};
231 my $top = $seq->display_id();
232 if ($seq->can('desc') and my $desc = $seq->desc()) {
237 if(scalar(@qual) > 0) {
239 for (my $q = 0;$q<scalar(@qual);$q++){
240 $qual .= $qual[$q] . " ";
241 if(length($qual) > $max){
250 $self->_print (">",$top,"\n",$qual,"\n") or return;
258 Usage : $stream->write_fastq(@seq)
259 Function: writes the $seq object into the stream
260 Returns : 1 for success and 0 for error
261 Args : Bio::Seq::Quality object
267 my ($self,@seq) = @_;
268 foreach my $seq (@seq) {
269 unless ($seq->isa("Bio::Seq::Quality")){
270 $self->warn("You can't write FASTQ without supplying a Bio::Seq::Quality object! ", ref($seq), "\n");
274 my @qual = @
{$seq->qual};
275 my $top = $seq->display_id();
276 if ($seq->can('desc') and my $desc = $seq->desc()) {
280 if(length($str) == 0) {
284 if(scalar(@qual) > 0) {
285 for (my $q = 0;$q<scalar(@qual);$q++){
286 $qual .= chr($qual[$q] + 33);
292 $self->_print ("\@",$top,"\n",$str,"\n") or return;
293 $self->_print ("+",$top,"\n",$qual,"\n") or return;