strip keyword substitution from perl modules
[bioperl-run.git] / Bio / Tools / Run / StandAloneNCBIBlast.pm
blob1c34e2a9f957ef02e05090c2395d6da531901e30
2 # BioPerl module for Bio::Tools::Run::StandAloneBlast
4 # Copyright Peter Schattner
6 # You may distribute this module under the same terms as perl itself
8 # POD documentation - main docs before the code
10 =head1 NAME
12 Bio::Tools::Run::StandAloneNCBIBlast - Object for the local execution
13 of the NCBI BLAST program suite (blastall, blastpgp, bl2seq). With
14 experimental support for NCBI rpsblast.
16 =head1 SYNOPSIS
18 # Do not use directly; see Bio::Tools::Run::StandAloneBlast
20 =head1 DESCRIPTION
22 See Bio::Tools::Run::StandAloneBlast
24 =head1 FEEDBACK
26 =head2 Mailing Lists
28 User feedback is an integral part of the evolution of this and other
29 Bioperl modules. Send your comments and suggestions preferably to one
30 of the Bioperl mailing lists. Your participation is much appreciated.
32 bioperl-l@bioperl.org - General discussion
33 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
35 =head2 Support
37 Please direct usage questions or support issues to the mailing list:
39 I<bioperl-l@bioperl.org>
41 rather than to the module maintainer directly. Many experienced and
42 reponsive experts will be able look at the problem and quickly
43 address it. Please include a thorough description of the problem
44 with code and data examples if at all possible.
46 =head2 Reporting Bugs
48 Report bugs to the Bioperl bug tracking system to help us keep track
49 the bugs and their resolution. Bug reports can be submitted via
50 the web:
52 http://bugzilla.open-bio.org/
54 =head1 AUTHOR - Peter Schattner
56 Email schattner at alum.mit.edu
58 =head1 MAINTAINER - Torsten Seemann
60 Email torsten at infotech.monash.edu.au
62 =head1 CONTRIBUTORS
64 Sendu Bala bix@sendu.me.uk (reimplementation)
66 =head1 APPENDIX
68 The rest of the documentation details each of the object
69 methods. Internal methods are usually preceded with a _
71 =cut
73 package Bio::Tools::Run::StandAloneNCBIBlast;
75 use strict;
77 use base qw(Bio::Tools::Run::StandAloneBlast);
79 our $AUTOLOAD;
80 our $DEFAULTREADMETHOD = 'BLAST';
82 # If local BLAST databases are not stored in the standard
83 # /data directory, the variable BLASTDATADIR will need to be
84 # set explicitly
85 our $DATADIR = $Bio::Tools::Run::StandAloneBlast::DATADIR;
87 our %GENERAL_PARAMS = (i => 'input',
88 o => 'outfile',
89 p => 'program',
90 d => 'database');
91 our @BLASTALL_PARAMS = qw(A B C D E F G K L M O P Q R S W X Y Z a b e f l m q r t v w y z n);
92 our @BLASTALL_SWITCH = qw(I g J T U n V s);
93 our @BLASTPGP_PARAMS = qw(A B C E F G H I J K L M N O P Q R S T U W X Y Z a b c e f h j k l m q s t u v y z);
94 our @RPSBLAST_PARAMS = qw(F I J L N O P T U V X Y Z a b e l m v y z);
95 our @BL2SEQ_PARAMS = qw(A D E F G I J M S T U V W X Y a e g j m q r t);
97 our @OTHER_PARAMS = qw(_READMETHOD);
100 =head2 new
102 Title : new
103 Usage : my $obj = Bio::Tools::Run::StandAloneBlast->new();
104 Function: Builds a newBio::Tools::Run::StandAloneBlast object
105 Returns : Bio::Tools::Run::StandAloneBlast
106 Args : -quiet => boolean # make program execution quiet
107 -_READMETHOD => 'BLAST' (default, synonym 'SearchIO') || 'blast_pull'
108 # the parsing method, case insensitive
110 Essentially all BLAST parameters can be set via StandAloneBlast.pm.
111 Some of the most commonly used parameters are listed below. All
112 parameters have defaults and are optional except for -p in those programs that
113 have it. For a complete listing of settable parameters, run the relevant
114 executable BLAST program with the option "-" as in blastall -
115 Note that the input paramters (-i, -j, -input) should not be set directly by
116 you: this module sets them when you call one of the executable methods.
118 Blastall
120 -p Program Name [String]
121 Input should be one of "blastp", "blastn", "blastx",
122 "tblastn", or "tblastx".
123 -d Database [String] default = nr
124 The database specified must first be formatted with formatdb.
125 Multiple database names (bracketed by quotations) will be accepted.
126 An example would be -d "nr est"
127 -e Expectation value (E) [Real] default = 10.0
128 -o BLAST report Output File [File Out] Optional,
129 default = ./blastreport.out ; set by StandAloneBlast.pm
130 -S Query strands to search against database (for blast[nx], and tblastx). 3 is both, 1 is top, 2 is bottom [Integer]
131 default = 3
133 Blastpgp (including Psiblast)
135 -j is the maximum number of rounds (default 1; i.e., regular BLAST)
136 -h is the e-value threshold for including sequences in the
137 score matrix model (default 0.001)
138 -c is the "constant" used in the pseudocount formula specified in the paper (default 10)
139 -B Multiple alignment file for PSI-BLAST "jump start mode" Optional
140 -Q Output File for PSI-BLAST Matrix in ASCII [File Out] Optional
142 rpsblast
144 -d Database [String] default = (none - you must specify a database)
145 The database specified must first be formatted with formatdb.
146 Multiple database names (bracketed by quotations) will be accepted.
147 An example would be -d "Cog Smart"
148 -e Expectation value (E) [Real] default = 10.0
149 -o BLAST report Output File [File Out] Optional,
150 default = ./blastreport.out ; set by StandAloneBlast.pm
152 Bl2seq
154 -p Program name: blastp, blastn, blastx. For blastx 1st argument should be nucleotide [String]
155 default = blastp
156 -o alignment output file [File Out] default = stdout
157 -e Expectation value (E) [Real] default = 10.0
158 -S Query strands to search against database (blastn only). 3 is both, 1 is top, 2 is bottom [Integer]
159 default = 3
161 =cut
163 sub new {
164 my ($caller, @args) = @_;
165 my $self = $caller->SUPER::new(@args);
167 # StandAloneBlast is special in that "one can modify the name of
168 # the (ncbi) BLAST parameters as desired as long as the initial letter (and
169 # case) of the parameter are preserved". We handle this by truncating input
170 # args to their first char
171 my %args = @args;
172 @args = ();
173 while (my ($attr, $value) = each %args) {
174 $attr =~ s/^-//;
175 $attr = substr($attr, 0, 1) unless $attr =~ /^_/;
176 push(@args, $attr, $value);
179 $self->_set_from_args(\@args, -methods => {(map { $_ => $GENERAL_PARAMS{$_} } keys %GENERAL_PARAMS),
180 (map { $_ => $_ } (@OTHER_PARAMS,
181 @BLASTALL_PARAMS,
182 @BLASTALL_SWITCH,
183 @BLASTPGP_PARAMS,
184 @RPSBLAST_PARAMS,
185 @BL2SEQ_PARAMS))},
186 -code => { map { $_ => 'my $self = shift;
187 if (@_) {
188 my $value = shift;
189 if ($value && $value ne \'F\') {
190 $value = \'T\';
192 else {
193 $value = \'F\';
195 $self->{\'_\'.$method} = $value;
197 return $self->{\'_\'.$method} || return;' } @BLASTALL_SWITCH }, # these methods can take boolean or 'T' and 'F'
198 -create => 1,
199 -force => 1,
200 -case_sensitive => 1);
202 my ($tfh, $tempfile) = $self->io->tempfile();
203 my $outfile = $self->o || $self->outfile || $tempfile;
204 $self->o($outfile);
205 close($tfh);
207 $self->_READMETHOD($DEFAULTREADMETHOD) unless $self->_READMETHOD;
209 return $self;
212 # StandAloneBlast is special in that "one can modify the name of
213 # the (ncbi) BLAST parameters as desired as long as the initial letter (and
214 # case) of the parameter are preserved". We handle this with AUTOLOAD
215 # redirecting to the automatically created methods from _set_from_args() !
216 sub AUTOLOAD {
217 my $self = shift;
218 my $attr = $AUTOLOAD;
219 $attr =~ s/.*:://;
221 my $orig = $attr;
223 $attr = substr($attr, 0, 1);
225 $self->can($attr) || $self->throw("Unallowed parameter: $orig !");
227 return $self->$attr(@_);
230 =head2 blastall
232 Title : blastall
233 Usage : $blast_report = $factory->blastall('t/testquery.fa');
235 $input = Bio::Seq->new(-id=>"test query",
236 -seq=>"ACTACCCTTTAAATCAGTGGGGG");
237 $blast_report = $factory->blastall($input);
239 $seq_array_ref = \@seq_array;
240 # where @seq_array is an array of Bio::Seq objects
241 $blast_report = $factory->blastall($seq_array_ref);
242 Returns : Reference to a Blast object containing the blast report.
243 Args : Name of a file or Bio::Seq object or an array of
244 Bio::Seq object containing the query sequence(s).
245 Throws an exception if argument is not either a string
246 (eg a filename) or a reference to a Bio::Seq object
247 (or to an array of Seq objects). If argument is string,
248 throws exception if file corresponding to string name can
249 not be found.
251 =cut
253 sub blastall {
254 my ($self, $input1) = @_;
255 $self->io->_io_cleanup();
256 my $executable = 'blastall';
258 # Create input file pointer
259 my $infilename1 = $self->_setinput($executable, $input1) || $self->throw("$input1 not Bio::Seq object or array of Bio::Seq objects or file name!");
260 $self->i($infilename1);
262 my $blast_report = $self->_generic_local_blast($executable);
265 =head2 blastpgp
267 Title : blastpgp
268 Usage : $blast_report = $factory-> blastpgp('t/testquery.fa');
270 $input = Bio::Seq->new(-id=>"test query",
271 -seq=>"ACTADDEEQQPPTCADEEQQQVVGG");
272 $blast_report = $factory->blastpgp ($input);
274 $seq_array_ref = \@seq_array;
275 # where @seq_array is an array of Bio::Seq objects
276 $blast_report = $factory-> blastpgp(\@seq_array);
277 Returns : Reference to a Bio::SearchIO object containing the blast report
278 Args : Name of a file or Bio::Seq object. In psiblast jumpstart
279 mode two additional arguments are required: a SimpleAlign
280 object one of whose elements is the query and a "mask" to
281 determine how BLAST should select scoring matrices see
282 DESCRIPTION above for more details.
284 Throws an exception if argument is not either a string
285 (eg a filename) or a reference to a Bio::Seq object
286 (or to an array of Seq objects). If argument is string,
287 throws exception if file corresponding to string name can
288 not be found.
289 Returns : Reference to Bio::SearchIO object containing the blast report.
291 =cut
293 sub blastpgp {
294 my $self = shift;
295 my $executable = 'blastpgp';
296 my $input1 = shift;
297 my $input2 = shift;
298 # used by blastpgp's -B option to specify which
299 # residues are position aligned
300 my $mask = shift;
302 my ($infilename1, $infilename2 ) = $self->_setinput($executable,
303 $input1, $input2,
304 $mask);
305 if (!$infilename1) {$self->throw("$input1 not Bio::Seq object or array of Bio::Seq objects or file name!");}
306 $self->i($infilename1); # set file name of sequence to be blasted to inputfilename1 (-i param of blastpgp)
307 if ($input2) {
308 unless ($infilename2) {$self->throw("$input2 not SimpleAlign Object in pre-aligned psiblast\n");}
309 $self->B($infilename2); # set file name of partial alignment to inputfilename2 (-B param of blastpgp)
312 my $blast_report = $self->_generic_local_blast($executable);
315 =head2 rpsblast
317 Title : rpsblast
318 Usage : $blast_report = $factory->rpsblast('t/testquery.fa');
320 $input = Bio::Seq->new(-id=>"test query",
321 -seq=>"MVVLCRADDEEQQPPTCADEEQQQVVGG");
322 $blast_report = $factory->rpsblast($input);
324 $seq_array_ref = \@seq_array;
325 # where @seq_array is an array of Bio::Seq objects
326 $blast_report = $factory->rpsblast(\@seq_array);
327 Args : Name of a file or Bio::Seq object or an array of
328 Bio::Seq object containing the query sequence(s).
329 Throws an exception if argument is not either a string
330 (eg a filename) or a reference to a Bio::Seq object
331 (or to an array of Seq objects). If argument is string,
332 throws exception if file corresponding to string name can
333 not be found.
334 Returns : Reference to a Bio::SearchIO object containing the blast report
336 =cut
338 sub rpsblast {
339 my ($self, $input1) = @_;
340 $self->io->_io_cleanup();
341 my $executable = 'rpsblast';
343 # Create input file pointer
344 my $infilename1 = $self->_setinput($executable, $input1) || $self->throw("$input1 not Bio::Seq object or array of Bio::Seq objects or file name!");
345 $self->i($infilename1);
347 my $blast_report = $self->_generic_local_blast($executable);
350 =head2 bl2seq
352 Title : bl2seq
353 Usage : $factory-> bl2seq('t/seq1.fa', 't/seq2.fa');
355 $input1 = Bio::Seq->new(-id=>"test query1",
356 -seq=>"ACTADDEEQQPPTCADEEQQQVVGG");
357 $input2 = Bio::Seq->new(-id=>"test query2",
358 -seq=>"ACTADDEMMMMMMMDEEQQQVVGG");
359 $blast_report = $factory->bl2seq ($input1, $input2);
360 Returns : Reference to a BPbl2seq object containing the blast report.
361 Args : Names of 2 files or 2 Bio::Seq objects containing the
362 sequences to be aligned by bl2seq.
364 Throws an exception if argument is not either a pair of
365 strings (eg filenames) or references to Bio::Seq objects.
366 If arguments are strings, throws exception if files
367 corresponding to string names can not be found.
369 =cut
371 sub bl2seq {
372 my $self = shift;
373 my $executable = 'bl2seq';
374 my $input1 = shift;
375 my $input2 = shift;
377 # Create input file pointer
378 my ($infilename1, $infilename2 ) = $self->_setinput($executable,
379 $input1, $input2);
380 if (!$infilename1){$self->throw(" $input1 not Seq Object or file name!");}
381 if (!$infilename2){$self->throw("$input2 not Seq Object or file name!");}
383 $self->i($infilename1); # set file name of first sequence to
384 # be aligned to inputfilename1
385 # (-i param of bl2seq)
386 $self->j($infilename2); # set file name of first sequence to
387 # be aligned to inputfilename2
388 # (-j param of bl2seq)
390 my $blast_report = $self->_generic_local_blast($executable);
393 =head2 _generic_local_blast
395 Title : _generic_local_blast
396 Usage : internal function not called directly
397 Returns : Bio::SearchIO
398 Args : Reference to calling object and name of BLAST executable
400 =cut
402 sub _generic_local_blast {
403 my $self = shift;
404 my $executable = shift;
406 # Create parameter string to pass to Blast program
407 my $param_string = $self->_setparams($executable);
409 # run Blast
410 my $blast_report = $self->_runblast($executable, $param_string);
413 =head2 _runblast
415 Title : _runblast
416 Usage : Internal function, not to be called directly
417 Function: makes actual system call to Blast program
418 Example :
419 Returns : Report Bio::SearchIO object in the appropriate format
420 Args : Reference to calling object, name of BLAST executable,
421 and parameter string for executable
423 =cut
425 sub _runblast {
426 my ($self, $executable, $param_string) = @_;
427 my ($blast_obj, $exe);
428 if (! ($exe = $self->executable($executable)) ) {
429 $self->warn("cannot find path to $executable");
430 return;
433 my $commandstring = $exe.$param_string;
435 $self->debug("$commandstring\n");
436 system($commandstring) && $self->throw("$executable call crashed: $? | $! | $commandstring\n");
438 # set significance cutoff to set expectation value or default value
439 # (may want to make this value vary for different executables)
440 my $signif = $self->e() || 1e-5;
442 # get outputfilename
443 my $outfile = $self->o();
445 # this should allow any blast SearchIO parser (not just 'blast_pull' or 'blast',
446 # but 'blastxml' and 'blasttable'). Fall back to 'blast' if not stipulated.
447 my $method = $self->_READMETHOD;
448 if ($method =~ /^(?:blast|SearchIO)/i ) {
449 $method = 'blast' if $method =~ m{SearchIO}i;
450 $blast_obj = Bio::SearchIO->new(-file => $outfile,
451 -format => $method);
453 # should these be here? They have been deprecated...
454 elsif ($method =~ /BPlite/i ) {
455 if ($executable =~ /bl2seq/i) {
456 # Added program info so BPbl2seq can compute strand info
457 $self->throw("Use of Bio::Tools::BPbl2seq is deprecated; use Bio::SearchIO modules instead");
459 elsif ($executable =~ /blastpgp/i && defined $self->j() && $self->j() > 1) {
460 $self->throw("Use of Bio::Tools::BPpsilite is deprecated; use Bio::SearchIO modules instead");
462 elsif ($executable =~ /blastall|rpsblast/i) {
463 $self->throw("Use of Bio::Tools::BPlite is deprecated; use Bio::SearchIO modules instead");
465 else {
466 $self->warn("Unrecognized executable $executable");
469 else {
470 $self->warn("Unrecognized readmethod $method");
473 return $blast_obj;
476 =head2 _setparams
478 Title : _setparams
479 Usage : Internal function, not to be called directly
480 Function: Create parameter inputs for Blast program
481 Example :
482 Returns : parameter string to be passed to Blast
483 Args : Reference to calling object and name of BLAST executable
485 =cut
487 sub _setparams {
488 my ($self, $executable) = @_;
489 my ($attr, $value, @execparams);
491 if ($executable eq 'blastall') { @execparams = (@BLASTALL_PARAMS,
492 @BLASTALL_SWITCH); }
493 elsif ($executable eq 'blastpgp') { @execparams = @BLASTPGP_PARAMS; }
494 elsif ($executable eq 'rpsblast') { @execparams = @RPSBLAST_PARAMS; }
495 elsif ($executable eq 'bl2seq' ) { @execparams = @BL2SEQ_PARAMS; }
497 # we also have all the general params
498 push(@execparams, keys %GENERAL_PARAMS);
500 my $database = $self->d;
501 if ($database && $executable ne 'bl2seq') {
502 # Need to prepend datadirectory to database name
503 my @dbs = split(/ /, $database);
504 for my $i (0..$#dbs) {
505 # (works with multiple databases)
506 if (! (-e $dbs[$i].".nin" || -e $dbs[$i].".pin") &&
507 ! (-e $dbs[$i].".nal" || -e $dbs[$i].".pal") ) {
508 $dbs[$i] = File::Spec->catdir($DATADIR, $dbs[$i]);
511 $self->d('"'.join(" ", @dbs).'"');
514 # workaround for problems with shell metacharacters [bug 2707]
515 # simply quoting does not always work!
516 my $tmp = $self->o;
517 $self->o(quotemeta($tmp)) if ($tmp && $^O !~ /^MSWin/);
519 my $param_string = $self->SUPER::_setparams(-params => [@execparams],
520 -dash => 1);
522 $self->o($tmp) if ($tmp && $^O !~ /^MSWin/);
524 $self->d($database) if $database;
526 if ($self->quiet()) {
527 $param_string .= ' 2> '.File::Spec->devnull;
530 return $param_string;