Removing from HEAD (deprecated for 1.6)
[bioperl-live.git] / Bio / Index / Hmmer.pm
bloba12e3f15fd836d10e4abccb878a1cd56f6ef8708
2 # BioPerl module for Bio::Index::Hmmer
3 #
4 # Cared for by Josh Lauricha <laurichj@bioinfo.ucr.edu>
6 # Copyright Josh Lauricha
7 # Unless otherwise noted, this was shamelessly ripped from
8 # Bio::Index::Blast
10 # You may distribute this module under the terms of perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Index::Hmmer Indexes HMMER reports and supports retreival based on query
18 =head1 SYNOPSIS
20 # Complete Code for indexing a set off report files
21 #!/usr/bin/perl -w
22 use strict;
23 use Bio::Index::Hmmer;
24 my $indexfile = shift;
25 my $index = Bio::Index::Hmmer->new(
26 -filename => $indexfile,
27 -write_flag => 1
29 $index->make_index(@ARGV);
32 # Complete code for fetching a report
33 use strict;
34 use Bio::Index::Hmmer;
35 my $indexfile = shift;
36 my $index = Bio::Index::Hmmer->new(
37 -filename => $indexfile,
38 -write_flag => 0
41 foreach my $id (@ARGV) {
42 my $report = $index->fetch_report($id);
43 print "Query: ", $report->query_name(), "\n";
44 while( my $hit = $report->next_hit() ) {
45 print "\tHit Name: ", $hit->name(), "\n";
46 while( my $hsp = $hit->next_domain() ) {
47 print "\t\tE-Value: ", $hsp->evalue(), "\n";
52 =head1 DESCRIPTION
54 This object allows one to build an index on a HMMER file (or files)
55 and provide quick access to the HMMER report for that accession.
56 For best results 'use strict'.
58 You can also set or customize the unique key used to retrieve by
59 writing your own function and calling the id_parser() method.
60 For example:
62 $inx->id_parser(\&get_id);
63 # make the index
64 $inx->make_index($file_name);
66 # here is where the retrieval key is specified
67 sub get_id {
68 my $line = shift;
69 $line =~ /^KW\s+([A-Z]+)/i;
70 $1;
74 =head1 FEEDBACK
76 =head2 Mailing Lists
78 User feedback is an integral part of the evolution of this and other
79 Bioperl modules. Send your comments and suggestions preferably to
80 the Bioperl mailing list. Your participation is much appreciated.
82 bioperl-l@bioperl.org - General discussion
83 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
85 =head2 Reporting Bugs
87 Report bugs to the Bioperl bug tracking system to help us keep track
88 of the bugs and their resolution. Bug reports can be submitted via the
89 web:
91 http://bugzilla.open-bio.org/
93 =head1 AUTHOR - Josh Lauricha
95 Email laurichj@bioinfo.ucr.edu
97 =head1 APPENDIX
99 The rest of the documentation details each of the object methods.
100 Internal methods are usually preceded with a _
102 =cut
104 # Let the code begin...
106 package Bio::Index::Hmmer;
107 use strict;
109 use Bio::SearchIO;
110 use IO::String;
111 use Bio::Root::Version;
113 use base qw(Bio::Index::Abstract Bio::Root::Root);
115 sub _version
117 return ${Bio::Root::Version::VERSION};
120 =head2 new
122 Usage : $index = Bio::Index::Hmmer->new(
123 -filename => $dbm_file,
124 -write_flag => 0,
125 -dbm_package => 'DB_File',
126 -verbose => 0
128 Function: Returns a new index object. If filename is
129 specified, then open_dbm() is immediately called.
130 Returns : A new index object
131 Args : -filename The name of the dbm index file.
132 -write_flag TRUE if write access to the dbm file is
133 needed.
134 -dbm_package The Perl dbm module to use for the
135 index.
136 -verbose Print debugging output to STDERR if
137 TRUE.
139 =cut
141 sub new
143 my($class, @args) = @_;
144 my $self = $class->SUPER::new(@args);
147 =head2 Bio::Index::Hmmer implemented methods
149 =cut
151 =head2 fetch_report
153 Title : fetch_report
154 Usage : my $report = $idx->fetch_report($id);
155 Function: Returns a Bio::Search::Result::HMMERResult report object
156 for a specific HMMER report
157 Returns : Bio::Search::Result::HMMERResult
158 Args : valid id
160 =cut
162 sub fetch_report
164 my ($self, $id) = @_;
165 my (@header, @data, $line);
166 my $fh = $self->get_stream($id);
167 my $pos = tell($fh);
169 seek($fh, 0, 0); # The HMMER SearchIO wants the header, so we fetch it
170 while($line = <$fh>) {
171 push @header, $line;
172 last if $line =~ /Query sequence:/o;
174 seek($fh, $pos, 0);
176 # Then the data
177 while(<$fh>) {
178 push @data, $_ if defined;
179 last if m{//}o;
182 # Then join them and send
183 my $rfh = new IO::String(join('', @header, @data));
184 my $report = Bio::SearchIO->new(
185 -noclose => 1,
186 -format => 'hmmer',
187 -fh => $rfh
189 return $report->next_result();
192 # shamelessly stolen from Bio::Index::Fasta
194 =head2 id_parser
196 Title : id_parser
197 Usage : $index->id_parser( CODE )
198 Function: Stores or returns the code used by record_id to
199 parse the ID for record from a string. Useful
200 for (for instance) specifying a different
201 parser for different flavours of blast dbs.
202 Returns \&default_id_parser (see below) if not
203 set. If you supply your own id_parser
204 subroutine, then it should expect a fasta
205 description line. An entry will be added to
206 the index for each string in the list returned.
207 Example : $index->id_parser( \&my_id_parser )
208 Returns : ref to CODE if called without arguments
209 Args : CODE
211 =cut
213 sub id_parser
215 my( $self, $code ) =@_;
217 if ($code) {
218 $self->{'_id_parser'} = $code;
220 return $self->{'_id_parser'} || \&default_id_parser;
223 =head2 default_id_parser
225 Title : default_id_parser
226 Usage : $id = default_id_parser( $header )
227 Function: The default Blast Query ID parser for Bio::Index::Blast.pm
228 Returns $1 from applying the regexp /^>\s*(\S+)/
229 to $header.
230 Returns : ID string
231 Args : a header line string
233 =cut
235 sub default_id_parser
237 if ($_[0] =~ /^\s*(\S+)/) {
238 return $1;
239 } else {
240 return;
244 =head2 Require methods from Bio::Index::Abstract
246 =cut
248 =head2 _index_file
250 Title : _index_file
251 Usage : $index->_index_file( $file_name, $i )
252 Function: Specialist function to index HMMER report file(s).
253 Is provided with a filename and an integer
254 by make_index in its SUPER class.
255 Example :
256 Returns :
257 Args :
259 =cut
262 sub _index_file {
263 my($self, $file, $i) = @_;
264 my($begin);
266 open(my $HMMER, '<', $file) or $self->throw("cannot open file $file");
268 my $id;
269 my $indexpoint = 0;
271 while(<$HMMER>) {
272 if( /Query sequence: ([^\s]+)/o ) {
273 $indexpoint = tell($HMMER);
274 foreach my $id ($self->id_parser()->($1)) {
275 print "id is $id, begin is $indexpoint\n" if $self->verbose() > 0;
276 $self->add_record($id, $i, $indexpoint);
280 close $HMMER;
281 return 1;
284 =head2 Bio::Index::Abstract methods
286 =cut
288 =head2 filename
290 Title : filename
291 Usage : $value = $self->filename();
292 $self->filename($value);
293 Function: Gets or sets the name of the dbm index file.
294 Returns : The current value of filename
295 Args : Value of filename if setting, or none if
296 getting the value.
298 =head2 write_flag
300 Title : write_flag
301 Usage : $value = $self->write_flag();
302 $self->write_flag($value);
303 Function: Gets or sets the value of write_flag, which
304 is wether the dbm file should be opened with
305 write access.
306 Returns : The current value of write_flag (default 0)
307 Args : Value of write_flag if setting, or none if
308 getting the value.
310 =head2 dbm_package
312 Usage : $value = $self->dbm_package();
313 $self->dbm_package($value);
315 Function: Gets or sets the name of the Perl dbm module used.
316 If the value is unset, then it returns the value of
317 the package variable $USE_DBM_TYPE or if that is
318 unset, then it chooses the best available dbm type,
319 choosing 'DB_File' in preference to 'SDBM_File'.
320 Bio::Abstract::Index may work with other dbm file
321 types.
323 Returns : The current value of dbm_package
324 Args : Value of dbm_package if setting, or none if
325 getting the value.
328 =head2 get_stream
330 Title : get_stream
331 Usage : $stream = $index->get_stream( $id );
332 Function: Returns a file handle with the file pointer
333 at the approprite place
335 This provides for a way to get the actual
336 file contents and not an object
338 WARNING: you must parse the record deliminter
339 *yourself*. Abstract wont do this for you
340 So this code
342 $fh = $index->get_stream($myid);
343 while( <$fh> ) {
344 # do something
346 will parse the entire file if you don't put in
347 a last statement in, like
349 while( <$fh> ) {
350 /^\/\// && last; # end of record
351 # do something
354 Returns : A filehandle object
355 Args : string represents the accession number
356 Notes : This method should not be used without forethought
359 =head2 open_dbm
361 Usage : $index->open_dbm()
362 Function: Opens the dbm file associated with the index
363 object. Write access is only given if explicitly
364 asked for by calling new(-write => 1) or having set
365 the write_flag(1) on the index object. The type of
366 dbm file opened is that returned by dbm_package().
367 The name of the file to be is opened is obtained by
368 calling the filename() method.
370 Example : $index->_open_dbm()
371 Returns : 1 on success
374 =head2 _version
376 Title : _version
377 Usage : $type = $index->_version()
378 Function: Returns a string which identifes the version of an
379 index module. Used to permanently identify an index
380 file as having been created by a particular version
381 of the index module. Must be provided by the sub class
382 Example :
383 Returns :
384 Args : none
386 =head2 _filename
388 Title : _filename
389 Usage : $index->_filename( FILE INT )
390 Function: Indexes the file
391 Example :
392 Returns :
393 Args :
395 =head2 _file_handle
397 Title : _file_handle
398 Usage : $fh = $index->_file_handle( INT )
399 Function: Returns an open filehandle for the file
400 index INT. On opening a new filehandle it
401 caches it in the @{$index->_filehandle} array.
402 If the requested filehandle is already open,
403 it simply returns it from the array.
404 Example : $fist_file_indexed = $index->_file_handle( 0 );
405 Returns : ref to a filehandle
406 Args : INT
408 =head2 _file_count
410 Title : _file_count
411 Usage : $index->_file_count( INT )
412 Function: Used by the index building sub in a sub class to
413 track the number of files indexed. Sets or gets
414 the number of files indexed when called with or
415 without an argument.
416 Example :
417 Returns : INT
418 Args : INT
421 =head2 add_record
423 Title : add_record
424 Usage : $index->add_record( $id, @stuff );
425 Function: Calls pack_record on @stuff, and adds the result
426 of pack_record to the index database under key $id.
427 If $id is a reference to an array, then a new entry
428 is added under a key corresponding to each element
429 of the array.
430 Example : $index->add_record( $id, $fileNumber, $begin, $end )
431 Returns : TRUE on success or FALSE on failure
432 Args : ID LIST
434 =head2 pack_record
436 Title : pack_record
437 Usage : $packed_string = $index->pack_record( LIST )
438 Function: Packs an array of scalars into a single string
439 joined by ASCII 034 (which is unlikely to be used
440 in any of the strings), and returns it.
441 Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end )
442 Returns : STRING or undef
443 Args : LIST
445 =head2 unpack_record
447 Title : unpack_record
448 Usage : $index->unpack_record( STRING )
449 Function: Splits the sting provided into an array,
450 splitting on ASCII 034.
451 Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} )
452 Returns : A 3 element ARRAY
453 Args : STRING containing ASCII 034
455 =head2 DESTROY
457 Title : DESTROY
458 Usage : Called automatically when index goes out of scope
459 Function: Closes connection to database and handles to
460 sequence files
461 Returns : NEVER
462 Args : NONE
465 =cut