3 # BioPerl module for Bio::Index::Abstract
5 # Cared for by James Gilbert <jgrg@sanger.ac.uk>
7 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
13 Bio::Index::Fasta - Interface for indexing (multiple) fasta files
17 # Complete code for making an index for several
19 use Bio::Index::Fasta;
21 my $Index_File_Name = shift;
22 my $inx = Bio::Index::Fasta->new($Index_File_Name, 'WRITE');
23 $inx->make_index(@ARGV);
25 # Print out several sequences present in the index
27 use Bio::Index::Fasta;
29 my $Index_File_Name = shift;
30 my $inx = Bio::Index::Fasta->new($Index_File_Name);
32 foreach my $id (@ARGV) {
33 my $seq = $inx->fetch($id); # Returns Bio::Seq object
34 print $seq->layout('GCG');
39 my $seq = $inx->get_Seq_by_id($id); #identical to fetch
43 Inherits functions for managing dbm files from Bio::Index::Abstract.pm,
44 and provides the basic funtionallity for indexing fasta files, and
45 retrieving the sequence from them.
47 Bio::Index::Fasta supports the Bio::DB::BioSeqI interface, meaning
48 it can be used a a Sequence database for other parts of bioperl
54 User feedback is an integral part of the evolution of this and other
55 Bioperl modules. Send your comments and suggestions preferably to one
56 of the Bioperl mailing lists. Your participation is much appreciated.
58 vsns-bcd-perl@lists.uni-bielefeld.de - General discussion
59 vsns-bcd-perl-guts@lists.uni-bielefeld.de - Technically-oriented discussion
60 http://bio.perl.org/MailList.html - About the mailing lists
64 Report bugs to the Bioperl bug tracking system to help us keep track
65 the bugs and their resolution. Bug reports can be submitted via
68 bioperl-bugs@bio.perl.org
69 http://bio.perl.org/bioperl-bugs/
71 =head1 AUTHOR - James Gilbert
73 Email - jgrg@sanger.ac.uk
77 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
82 # Let the code begin...
85 package Bio
::Index
::Fasta
;
87 use vars
qw($VERSION @ISA @EXPORT_OK);
90 use Bio::Index::Abstract;
93 @ISA = qw(Bio::Index::Abstract Bio::DB::BioSeqI Exporter);
97 return '__FASTA__'; # What kind of index are we?
101 # Suggested fix by Michael G Schwern <schwern@pobox.com> to
102 # get around a clash with CPAN shell...
117 Usage : $index->_initialize
118 Function: Calls $index->SUPER::_initialize(), and then adds
119 the default id parser for fasta files.
127 my($self, $index_file, $write_flag) = @_;
129 $self->SUPER::_initialize
($index_file, $write_flag);
130 $self->id_parser( \
&default_id_parser
);
137 Usage : $index->_index_file( $file_name, $i )
138 Function: Specialist function to index FASTA format files.
139 Is provided with a filename and an integer
140 by make_index in its SUPER class.
150 $i # Index-number of file being indexed
153 my( $begin, # Offset from start of file of the start
154 # of the last found record.
155 $end, # Offset from start of file of the end
156 # of the last found record.
157 $id, # ID of last found record.
163 open FASTA
, $file or $self->throw("Can't open file for read : $file");
168 my $new_begin = tell(FASTA
) - length( $_ );
169 $end = $new_begin - 1;
171 $self->add_record($id, $i, $begin, $end) if $id;
174 ($id) = $self->record_id( $_ );
177 # Don't forget to add the last record
179 $self->add_record($id, $i, $begin, $end) if $id;
186 # Should there be a prototype for this method in Index::Abstract.pm?
190 Usage : $index->record_id( STRING );
191 Function: Parses the ID for an entry from the string
192 supplied, using the code in $index->{'_id_parser'}
194 Returns : scalar or exception
201 my ($self, $line) = @_;
203 if (my $id = $self->{'_id_parser'}->( $line )) {
206 $self->throw("Can't parse ID from line : $line");
214 Usage : $index->id_parser( CODE )
215 Function: Stores or returns the code used by record_id
216 to parse the ID for record from a string. Useful
217 for (for instance) specifying a different parser
218 for different flavours of FASTA file.
219 Example : $index->id_parser( \&my_id_parser )
220 Returns : ref to CODE if called without arguments
226 my( $self, $code ) = @_;
229 $self->{'_id_parser'} = $code;
231 return $self->{'_id_parser'};
237 =head2 default_id_parser
239 Title : default_id_parser
240 Usage : $id = default_id_parser( $header )
241 Function: The default Fasta ID parser for Fasta.pm
242 Returns $1 from applying the regexp /^>\s*(\S+)/
246 Args : a fasta header line string
250 sub default_id_parser
{
252 $line =~ /^>\s*(\S+)/;
260 Usage : $index->fetch( $id )
261 Function: Returns a Bio::Seq object from the index
262 Example : $seq = $index->fetch( 'dJ67B12' )
263 Returns : Bio::Seq object
269 my( $self, $id ) = @_;
271 my $db = $self->db();
272 if (my $rec = $db->{ $id }) {
275 my ($file, $begin, $end) = $self->unpack_record( $rec );
277 # Get the (possibly cached) filehandle
278 my $fh = $self->_file_handle( $file );
280 # Accumulate lines in @record until beyond end
281 seek($fh, $begin, 0);
282 while (defined(my $line = <$fh>)) {
283 push(@record, $line);
284 last if tell($fh) > $end;
287 $self->throw("Can't fetch sequence for record : $id")
291 my $firstLine = shift @record;
292 my ($name, $desc) = $firstLine =~ /^>\s*(\S+)\s*(.*?)\s*$/;
295 # Return a shiny Bio::Seq object
296 return Bio
::Seq
->new( -ID
=> $name,
298 -SEQ
=> uc(join('', @record)) );
300 $self->throw("Unable to find a record for $id in Fasta index");
307 Title : get_Seq_by_id
308 Usage : $seq = $db->get_Seq_by_id()
309 Function: retrieves a sequence object, identically to
310 ->fetch, but here behaving as a Bio::DB::BioSeqI
311 Returns : new Bio::Seq object
312 Args : string represents the id
320 return $self->fetch($id);