3 # BioPerl module for Bio::Index::Stockholm
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Chris Fields <cjfields@uiuc.edu>
9 # Copyright Chris Fields
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::Index::Stockholm - Indexes Stockholm format alignments (such as those from
18 Pfam and Rfam. Retrieves raw stream data using the ID or a Bio::SimpleAlign
24 use Bio::Index::Stockholm;
25 my ($indexfile,$file1,$file2,$query);
26 my $index = Bio::Index::Stockholm->new(-filename => $indexfile,
28 $index->make_index($file1,$file2);
30 # get raw data stream starting at alignment position
31 my $fh = $index->get_stream($query);
33 # fetch individual alignment
34 my $align = $index->fetch_aln($query); # alias for fetch_report
35 my $align = $index->fetch_report($query); # same as above
36 print "query is ", $align->display_id, "\n";
40 This object allows one to build an index for any file (or files)
41 containing Stockholm alignment format (such as Rfam and Pfam) and provides
42 quick access to the alignment based on the alignment ID.
44 This also allows for ID parsing using a callback:
46 $inx->id_parser(\&get_id);
48 $inx->make_index($file_name);
50 # here is where the retrieval key is specified
53 $line =~ /^>.+gi\|(\d+)/;
57 The indexer is capable of indexing based on multiple IDs passed back from the
58 callback; this is assuming of course all IDs are unique. The default is to use
59 the alignment ID provided for Rfam/Pfam output.
61 Note: for best results 'use strict'.
65 - allow using an alternative regex for indexing (for instance, the ID instead of AC)
71 User feedback is an integral part of the evolution of this and other
72 Bioperl modules. Send your comments and suggestions preferably to
73 the Bioperl mailing list. Your participation is much appreciated.
75 bioperl-l@bioperl.org - General discussion
76 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
80 Please direct usage questions or support issues to the mailing list:
82 L<bioperl-l@bioperl.org>
84 rather than to the module maintainer directly. Many experienced and
85 reponsive experts will be able look at the problem and quickly
86 address it. Please include a thorough description of the problem
87 with code and data examples if at all possible.
91 Report bugs to the Bioperl bug tracking system to help us keep track
92 of the bugs and their resolution. Bug reports can be submitted via the
95 http://bugzilla.open-bio.org/
97 =head1 AUTHOR - Chris Fields
99 Email cjfields-at-bioperl-dot-org
103 The rest of the documentation details each of the object methods.
104 Internal methods are usually preceded with a _
108 # Let the code begin...
110 package Bio
::Index
::Stockholm
;
114 use base
qw(Bio::Index::Abstract Bio::Root::Root);
117 return ${Bio
::Root
::Version
::VERSION
};
122 Usage : $index = Bio::Index::Abstract->new(
123 -filename => $dbm_file,
125 -dbm_package => 'DB_File',
128 Function: Returns a new index object. If filename is
129 specified, then open_dbm() is immediately called.
130 Bio::Index::Abstract->new() will usually be called
131 directly only when opening an existing index.
132 Returns : A new index object
133 Args : -filename The name of the dbm index file.
134 -write_flag TRUE if write access to the dbm file is
136 -dbm_package The Perl dbm module to use for the
138 -verbose Print debugging output to STDERR if
145 my($class,@args) = @_;
147 my $self = $class->SUPER::new
(@args);
151 =head2 Bio::Index::Stockholm implemented methods
158 Usage : my $align = $idx->fetch_report($id);
159 Function: Returns a Bio::SimpleAlign object
160 for a specific alignment
161 Returns : Bio::SimpleAlign
168 my $fh = $self->get_stream($id);
169 my $report = Bio
::AlignIO
->new(-noclose
=> 1,
170 -format
=> 'stockholm',
172 return $report->next_aln;
178 Usage : my $align = $idx->fetch_report($id);
179 Function: Returns a Bio::SimpleAlign object
180 for a specific alignment
181 Returns : Bio::SimpleAlign
183 Note : alias for fetch_report
187 *fetch_aln
= \
&fetch_report
;
189 =head2 Require methods from Bio::Index::Abstract
196 Usage : $index->_index_file( $file_name, $i )
197 Function: Specialist function to index report file(s).
198 Is provided with a filename and an integer
199 by make_index in its SUPER class.
209 $i, # Index-number of file being indexed
212 my( $begin, # Offset from start of file of the start
213 # of the last found record.
216 open(my $BLAST, '<', $file) or $self->throw("cannot open file $file\n");
220 if(m{^#\sSTOCKHOLM} ) {
221 $indexpoint = tell($BLAST)-length $_;
222 $self->debug("Index:$indexpoint\n")
224 if(m{^#=GF\s+AC\s+(\S[^\n]+)}) {
225 foreach my $id ($self->id_parser()->($1)) {
226 $self->debug("id is $id, begin is $indexpoint\n");
227 #$self->add_record($id, $i, $indexpoint);
233 # shamelessly stolen from Bio::Index::Fasta
238 Usage : $index->id_parser( CODE )
239 Function: Stores or returns the code used by record_id to
240 parse the ID for record from a string. Useful
241 for (for instance) specifying a different
242 parser for different flavours of IDs (for instance,
243 custom stockholm-formated files).
244 Returns \&default_id_parser (see below) if not
245 set. If you supply your own id_parser
246 subroutine, then it should expect a fasta
247 description line. An entry will be added to
248 the index for each string in the list returned.
249 Example : $index->id_parser( \&my_id_parser )
250 Returns : ref to CODE if called without arguments
256 my( $self, $code ) =@_;
259 $self->{'_id_parser'} = $code;
261 return $self->{'_id_parser'} || \
&default_id_parser
;
264 =head2 default_id_parser
266 Title : default_id_parser
267 Usage : $id = default_id_parser( $header )
268 Function: The default Blast Query ID parser for Bio::Index::Blast.pm
269 Returns $1 from applying the regexp /^>\s*(\S+)/
272 Args : a header line string
276 sub default_id_parser
278 if ($_[0] =~ /^\s*(\S+)/) {
285 =head2 Bio::Index::Abstract methods
292 Usage : $value = $self->filename();
293 $self->filename($value);
294 Function: Gets or sets the name of the dbm index file.
295 Returns : The current value of filename
296 Args : Value of filename if setting, or none if
302 Usage : $value = $self->write_flag();
303 $self->write_flag($value);
304 Function: Gets or sets the value of write_flag, which
305 is wether the dbm file should be opened with
307 Returns : The current value of write_flag (default 0)
308 Args : Value of write_flag if setting, or none if
313 Usage : $value = $self->dbm_package();
314 $self->dbm_package($value);
316 Function: Gets or sets the name of the Perl dbm module used.
317 If the value is unset, then it returns the value of
318 the package variable $USE_DBM_TYPE or if that is
319 unset, then it chooses the best available dbm type,
320 choosing 'DB_File' in preference to 'SDBM_File'.
321 Bio::Abstract::Index may work with other dbm file
324 Returns : The current value of dbm_package
325 Args : Value of dbm_package if setting, or none if
332 Usage : $stream = $index->get_stream( $id );
333 Function: Returns a file handle with the file pointer
334 at the approprite place
336 This provides for a way to get the actual
337 file contents and not an object
339 WARNING: you must parse the record deliminter
340 *yourself*. Abstract wont do this for you
343 $fh = $index->get_stream($myid);
347 will parse the entire file if you do not put in
348 a last statement in, like
351 /^\/\// && last; # end of record
355 Returns : A filehandle object
356 Args : string represents the accession number
357 Notes : This method should not be used without forethought
362 Usage : $index->open_dbm()
363 Function: Opens the dbm file associated with the index
364 object. Write access is only given if explicitly
365 asked for by calling new(-write => 1) or having set
366 the write_flag(1) on the index object. The type of
367 dbm file opened is that returned by dbm_package().
368 The name of the file to be is opened is obtained by
369 calling the filename() method.
371 Example : $index->_open_dbm()
372 Returns : 1 on success
378 Usage : $type = $index->_version()
379 Function: Returns a string which identifes the version of an
380 index module. Used to permanently identify an index
381 file as having been created by a particular version
382 of the index module. Must be provided by the sub class
390 Usage : $index->_filename( FILE INT )
391 Function: Indexes the file
399 Usage : $fh = $index->_file_handle( INT )
400 Function: Returns an open filehandle for the file
401 index INT. On opening a new filehandle it
402 caches it in the @{$index->_filehandle} array.
403 If the requested filehandle is already open,
404 it simply returns it from the array.
405 Example : $fist_file_indexed = $index->_file_handle( 0 );
406 Returns : ref to a filehandle
412 Usage : $index->_file_count( INT )
413 Function: Used by the index building sub in a sub class to
414 track the number of files indexed. Sets or gets
415 the number of files indexed when called with or
425 Usage : $index->add_record( $id, @stuff );
426 Function: Calls pack_record on @stuff, and adds the result
427 of pack_record to the index database under key $id.
428 If $id is a reference to an array, then a new entry
429 is added under a key corresponding to each element
431 Example : $index->add_record( $id, $fileNumber, $begin, $end )
432 Returns : TRUE on success or FALSE on failure
438 Usage : $packed_string = $index->pack_record( LIST )
439 Function: Packs an array of scalars into a single string
440 joined by ASCII 034 (which is unlikely to be used
441 in any of the strings), and returns it.
442 Example : $packed_string = $index->pack_record( $fileNumber, $begin, $end )
443 Returns : STRING or undef
448 Title : unpack_record
449 Usage : $index->unpack_record( STRING )
450 Function: Splits the sting provided into an array,
451 splitting on ASCII 034.
452 Example : ( $fileNumber, $begin, $end ) = $index->unpack_record( $self->db->{$id} )
453 Returns : A 3 element ARRAY
454 Args : STRING containing ASCII 034
459 Usage : Called automatically when index goes out of scope
460 Function: Closes connection to database and handles to