2 # BioPerl module for Bio::Tools::Run::RemoteBlast
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # FORMERLY Cared for by Jason Stajich, Mat Wiepert
8 # Somewhat cared for by Roger Hall, Chris Fields (when they have time)
10 # Copyright Jason Stajich, Bioperl
12 # You may distribute this module under the same terms as perl itself
14 # POD documentation - main docs before the code
18 Bio::Tools::Run::RemoteBlast - Object for remote execution of the NCBI Blast
23 #Remote-blast "factory object" creation and blast-parameter initialization
25 use Bio::Tools::Run::RemoteBlast;
31 my @params = ( '-prog' => $prog,
34 '-readmethod' => 'SearchIO' );
36 my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
38 #change a query parameter
39 $Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = 'Homo sapiens [ORGN]';
41 #change a retrieval parameter
42 $Bio::Tools::Run::RemoteBlast::RETRIEVALHEADER{'DESCRIPTIONS'} = 1000;
45 delete $Bio::Tools::Run::RemoteBlast::HEADER{'FILTER'};
47 #$v is just to turn on and off the messages
50 my $str = Bio::SeqIO->new(-file=>'amino.fa' , -format => 'fasta' );
52 while (my $input = $str->next_seq()){
53 #Blast a sequence against a database:
55 #Alternatively, you could pass in a file with many
56 #sequences rather than loop through sequence one at a time
57 #Remove the loop starting 'while (my $input = $str->next_seq())'
58 #and swap the two lines below for an example of that.
59 my $r = $factory->submit_blast($input);
60 #my $r = $factory->submit_blast('amino.fa');
62 print STDERR "waiting..." if( $v > 0 );
63 while ( my @rids = $factory->each_rid ) {
64 foreach my $rid ( @rids ) {
65 my $rc = $factory->retrieve_blast($rid);
68 $factory->remove_rid($rid);
70 print STDERR "." if ( $v > 0 );
73 my $result = $rc->next_result();
75 my $filename = $result->query_name()."\.out";
76 $factory->save_output($filename);
77 $factory->remove_rid($rid);
78 print "\nQuery Name: ", $result->query_name(), "\n";
79 while ( my $hit = $result->next_hit ) {
80 next unless ( $v > 0);
81 print "\thit name is ", $hit->name, "\n";
82 while( my $hsp = $hit->next_hsp ) {
83 print "\t\tscore is ", $hsp->score, "\n";
91 # This example shows how to change a CGI parameter:
92 $Bio::Tools::Run::RemoteBlast::HEADER{'MATRIX_NAME'} = 'BLOSUM45';
93 $Bio::Tools::Run::RemoteBlast::HEADER{'GAPCOSTS'} = '15 2';
95 # And this is how to delete a CGI parameter:
96 delete $Bio::Tools::Run::RemoteBlast::HEADER{'FILTER'};
101 Class for remote execution of the NCBI Blast via HTTP.
103 For a description of the many CGI parameters see:
104 http://www.ncbi.nlm.nih.gov/BLAST/Doc/urlapi.html
106 Various additional options and input formats are available.
112 User feedback is an integral part of the evolution of this and other
113 Bioperl modules. Send your comments and suggestions preferably to one
114 of the Bioperl mailing lists. Your participation is much appreciated.
116 bioperl-l@bioperl.org - General discussion
117 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
121 Please direct usage questions or support issues to the mailing list:
123 I<bioperl-l@bioperl.org>
125 rather than to the module maintainer directly. Many experienced and
126 reponsive experts will be able look at the problem and quickly
127 address it. Please include a thorough description of the problem
128 with code and data examples if at all possible.
130 =head2 Reporting Bugs
132 Report bugs to the Bioperl bug tracking system to help us keep track
133 the bugs and their resolution. Bug reports can be submitted via the
136 https://github.com/bioperl/bioperl-live/issues
140 Please do NOT contact Jason directly about this module. Please post to
141 the bioperl mailing list (L<FEEDBACK>). If you would like to be the
142 official maintainer of this module, please volunteer on the list and
143 we will make it official in this POD.
145 First written by Jason Stajich, many others have helped keep it running.
149 The rest of the documentation details each of the object
150 methods. Internal methods are usually preceded with a _
154 package Bio
::Tools
::Run
::RemoteBlast
;
163 use HTTP
::Request
::Common
;
173 # Bio::Root::IO is-a Bio::Root::Roo
174 use base
qw(Bio::Root::IO Exporter);
176 our @EXPORT = qw( NOT_FINISHED ERR_QBSTATUS ERR_NOCONTENT ERR_HTTPFAIL ERR_QBNONSPEC );
177 our $MODVERSION = $Bio::Root
::Version
::VERSION
;
178 our $URLBASE = 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi';
180 # In GET/PUTPARAMS the values are regexes which validate the input.
182 'AUTO_FORMAT' => '(Off|(Semi|Full)auto)', # Off, Semiauto, Fullauto
183 'COMPOSITION_BASED_STATISTICS' => '(0|1)', # yes, no on NCBI's site, but actually binary 0/1
185 'DB_GENETIC_CODE' => '([1-9]|1[1-6]|2(1|2))', # 1..16,21,22
186 'DISPLAY_SORT' => '\d',
187 'ENDPOINTS' => '(yes|no)', # yes,no
188 'ENTREZ_QUERY' => '.*',
189 'EXPECT' => '\d+(\.\d+)?([eE]-\d+)?', # Positive double
190 'FILTER' => '[LRm]', # L or R or m
191 'GAPCOSTS' => '-?\d+(\.\d+)\s+-?\d+(\.\d+)',
192 # Two space separated float values
193 'GENETIC_CODE' => '([1-9]|1[1-6]|2(1|2))', # 1..16,21,22
194 'HITLIST_SIZE' => '\d+', # Positive integer
195 'I_THRESH' => '-?\d+(\.\d+)([eE]-\d+)?', # float
196 'LAYOUT' => '(One|Two)Windows?', # onewindow, twowindows
197 'LCASE_MASK' => '(yes|no)', # yes, no
198 'MATRIX_NAME' => '.*',
199 'NUCL_PENALTY' => '-\d+', # Negative integer
200 'NUCL_REWARD' => '-?\d+', # Integer
201 'OTHER_ADVANCED' => '.*',
202 'PERC_IDENT' => '\d\d+', # Integer, 0-99 inclusive
203 'PHI_PATTERN' => '.*',
204 'PROGRAM' => 't?blast[pnx]',
205 # tblastp, tblastn, tblastx, blastp, blastn, blastx
207 'QUERY_FILE' => '.*',
208 'QUERY_BELIEVE_DEFLINE' => '(yes|no)', # yes, no
209 'QUERY_FROM' => '\d+', # Positive integer
210 'QUERY_TO' => '\d+', # Positive integer
211 'SEARCHSP_EFF' => '\d+', # Positive integer
212 'SERVICE' => '(plain|p[sh]i|(rps|mega)blast)',
213 # plain,psi,phi,rpsblast,megablast
214 'SHORT_QUERY_ADJUST' => '(true|false)',
215 'THRESHOLD' => '-?\d+', # Integer
216 'UNGAPPED_ALIGNMENT' => '(yes|no)', # yes, no
217 'WORD_SIZE' => '\d+' # Positive integer
220 'ALIGNMENTS' => '\d+', # Positive integer
222 '(Pairwise|(Flat)?QueryAnchored(NoIdentities)?|Tabular)',
223 # Pairwise, QueryAnchored, QueryAnchoredNoIdentities,
224 # FlatQueryAnchored, FlatQueryAnchoredNoIdentities, Tabular
225 'DATABASE_SORT' => '\d',
226 'DESCRIPTIONS' => '\d+', # Positive integer
227 'ENTREZ_LINKS_NEW_WINDOW' => '(yes|no)', # yes, no
228 'EXPECT_LOW' => '\d+(\.\d+)?([eE]-\d+)?', # Positive double
229 'EXPECT_HIGH' => '\d+(\.\d+)?([eE]-\d+)?', # Positive double
230 'FORMAT_ENTREZ_QUERY' => '',
232 '(Alignment|Neighbors|PSSM|SearchInfo|TaxBlast(Parent|MultiFrame)?)',
233 # Alignment, Neighbors, PSSM, SearchInfo
234 # TaxBlast, TaxblastParent, TaxBlastMultiFrame
235 'FORMAT_TYPE' => '((HT|X)ML|ASN\.1|Text)',
236 # HTML, Text, ASN.1, XML
237 'NCBI_GI' => '(yes|no)', # yes, no
238 'NEW_VIEW' => '(true|false)',
240 'RESULTS_FILE' => '(yes|no)', # yes, no
241 'SERVICE' => '(plain|p[sh]i|(rps|mega)blast)',
242 # plain,psi,phi,rpsblast,megablast
243 'SHOW_OVERVIEW' => '(yes|no)' # yes, no
246 # Default values go in here for PUT
249 'FORMAT_OBJECT' => 'Alignment',
250 'COMPOSITION_BASED_STATISTICS' => 'off',
254 'PROGRAM' => 'blastp',
258 # Default values go in here for GET
259 our %RETRIEVALHEADER = (
261 'ALIGNMENTS' => '50',
262 'ALIGNMENT_VIEW' => 'Pairwise',
263 'DESCRIPTIONS' => '100',
264 'FORMAT_TYPE' => 'Text',
268 my ($caller, @args) = @_;
270 my $self = $caller->SUPER::new
(@args);
271 # so that tempfiles are cleaned up
272 $self->_initialize_io();
273 my ($prog, $data, $readmethod, $url_base) =
274 $self->_rearrange([qw(PROG DATA READMETHOD URL_BASE)],
276 # Use these two parameters for backward-compatibility.
277 # Overridden by PROGRAM and DATABASE if supplied.
278 $self->submit_parameter('PROGRAM',$prog) if $prog;
279 $self->submit_parameter('DATABASE',$data) if $data;
281 $readmethod = 'SearchIO' unless defined $readmethod;
282 $self->readmethod($readmethod);
284 # Now read the rest of the parameters and set them all
286 # PUT parameters first
287 my @putValues = $self->_rearrange([keys %PUTPARAMS],@args);
289 @putNames{keys %PUTPARAMS} = @putValues;
290 foreach my $putName (keys %putNames) {
291 $self->submit_parameter($putName,$putNames{$putName});
293 # GET parameters second
294 my @getValues = $self->_rearrange([keys %GETPARAMS],@args);
296 @getNames{keys %GETPARAMS} = @getValues;
297 foreach my $getName (keys %getNames) {
298 $self->retrieve_parameter($getName,$getNames{$getName});
300 # private variable to keep track of total rids
301 $self->{'_total_rids'} = 0;
302 $url_base ||= $URLBASE; # default to regular NCBI BLAST URL
303 $self->set_url_base($url_base);
307 =head2 retrieve_parameter
309 Title : retrieve_parameter
310 Usage : my $db = $self->retrieve_parameter
311 Function: Get/Set the named parameter for the retrieve_blast operation.
313 Args : $name : name of GET parameter
314 $val : optional value to set the parameter to
318 sub retrieve_parameter
{
319 my ($self, $name, $val) = @_;
321 $self->throw($name." is not a valid GET parameter.") unless
322 exists $GETPARAMS{$name};
324 my $regex = $GETPARAMS{$name};
325 $val =~ m/^$regex$/i or
326 $self->throw("Value ".$val." for GET parameter ".$name." does not match expression ".$regex.". Rejecting.");
327 $RETRIEVALHEADER{$name} = $val;
329 return $RETRIEVALHEADER{$name};
332 =head2 submit_parameter
334 Title : submit_parameter
335 Usage : my $db = $self->submit_parameter
336 Function: Get/Set the named parameter for the submit_blast operation.
338 Args : $name : name of PUT parameter
339 $val : optional value to set the parameter to
343 sub submit_parameter
{
344 my ($self, $name, $val) = @_;
346 $self->throw($name." is not a valid PUT parameter.") unless
347 exists $PUTPARAMS{$name};
349 my $regex = $PUTPARAMS{$name};
350 $val =~ m/^$regex$/i or
351 $self->throw("Value ".$val." for PUT parameter ".$name." does not match expression ".$regex.". Rejecting.");
352 $HEADER{$name} = $val;
354 return $HEADER{$name};
360 Usage : my $header = $self->header
361 Function: Get HTTP header for blast query
375 Usage : my $readmethod = $self->readmethod
376 Function: Get/Set the method to read the blast report
378 Args : string [ blast, blasttable, xml ]
383 my ($self, $val) = @_;
385 if ($val =~ /bplite/i) {
386 $self->throw("Use of Bio::Tools::BPlite is deprecated; use Bio::SearchIO modules instead");
388 $self->{'_readmethod'} = $val;
390 return $self->{'_readmethod'};
397 Usage : my $prog = $self->program
398 Function: Get/Set the program to run. Retained for backwards-compatibility.
400 Args : string [ blastp, blastn, blastx, tblastn, tblastx ]
405 my ($self, $val) = @_;
406 return $self->submit_parameter('PROGRAM',$val);
413 Usage : my $db = $self->database
414 Function: Get/Set the database to search. Retained for backwards-compatibility.
416 Args : string [ swissprot, nr, nt, etc... ]
421 my ($self, $val) = @_;
422 return $self->submit_parameter('DATABASE',$val);
429 Usage : my $expect = $self->expect
430 Function: Get/Set the E value cutoff. Retained for backwards-compatibility.
432 Args : string [ '1e-4' ]
437 my ($self, $val) = @_;
438 return $self->submit_parameter('EXPECT',$val);
444 Usage : my $ua = $self->ua or
446 Function: Get/Set a LWP::UserAgent for use
447 Returns : reference to LWP::UserAgent Object
449 Comments: Will create a UserAgent if none has been requested before.
454 my ($self, $value) = @_;
455 if( ! defined $self->{'_ua'} ) {
456 $self->{'_ua'} = LWP
::UserAgent
->new(env_proxy
=> 1, parse_head
=> 0);
459 $self->{'_ua'}->agent("bioperl-$nm/$MODVERSION");
461 return $self->{'_ua'};
467 Usage : $httpproxy = $db->proxy('http') or
468 $db->proxy(['http','ftp'], 'http://myproxy' )
469 Function: Get/Set a proxy for use of proxy
470 Returns : a string indicating the proxy
471 Args : $protocol : an array ref of the protocol(s) to set/get
472 $proxyurl : url of the proxy to use for the specified protocol
477 my ($self,$protocol,$proxy) = @_;
478 return if ( !defined $self->ua || !defined $protocol
479 || !defined $proxy );
480 return $self->ua->proxy($protocol,$proxy);
484 my ($self, @vals) = @_;
486 $self->{'_rids'}->{$_} = $self->{'_total_rids'};
487 $self->{'_total_rids'}++;
489 return scalar keys %{$self->{'_rids'}};
493 my ($self, @vals) = @_;
495 delete $self->{'_rids'}->{$_};
497 return scalar keys %{$self->{'_rids'}};
502 # sort on key value, a little tricky...
503 my @sort_rids = sort {$self->{'_rids'}->{$a} <=> $self->{'_rids'}->{$b}} keys %{$self->{'_rids'}};
510 Usage : $self->submit_blast([$seq1,$seq2]);
511 Function: Submit blast jobs to ncbi blast queue on sequence(s)
512 Returns : Blast report object as defined by $self->readmethod
515 * array ref of sequence objects
516 * filename of file containing fasta formatted sequences
521 my ($self, $input) = @_;
522 my @seqs = $self->_load_input($input);
523 my $url_base = $self->get_url_base;
524 return 0 unless ( @seqs );
526 my %header = $self->header;
527 $header{$_} ||= $RETRIEVALHEADER{$_} foreach (keys %RETRIEVALHEADER);
528 foreach my $seq ( @seqs ) {
529 #If query has a fasta header, the output has the query line.
530 $header{'QUERY'} = ">".(defined $seq->display_id() ?
$seq->display_id() : "").
531 " ".(defined $seq->desc() ?
$seq->desc() : "")."\n".$seq->seq();
532 my $request = POST
$url_base, [%header];
533 $self->debug($request->as_string) if ( $self->verbose > 1);
534 my $response = $self->ua->request( $request);
536 if( $response->is_success ) {
537 my @subdata = split(/\n/, $response->content );
539 foreach ( @subdata ) {
540 if( /^\s+RID\s+=\s+(\S+)/ ) {
542 #$self->debug("RID: $1\n");
544 } elsif (/^\s+RTOE\s+=\s+(.*$)/) {
551 $self->warn("req was ". $request->as_string() . "\n");
552 $self->warn(join('', @subdata));
556 # should try and be a little more verbose here
557 $self->warn("req was ". $request->as_string() . "\n" .
558 $response->error_as_HTML);
565 =head2 retrieve_blast
567 Title : retrieve_blast
568 Usage : my $blastreport = $blastfactory->retrieve_blast($rid);
569 Function: Attempts to retrieve a blast report from remote blast queue
570 Returns : scalar int (constant) or Bio::SearchIO object
572 NOT_FINISHED (= 0) : 'job not finished'
574 ERR_QBSTATUS (= 1) : return line matches 'Status=ERROR'
575 ERR_NOCONTENT (= 2): HTTP request successful, but no content
577 ERR_HTTPFAIL (= 4) : HTTP request failed
578 ERR_QBNONSPEC (= 8): return line matches 'ERROR' (not status line)
579 Args : Remote Blast ID (RID)
584 my($self, $rid) = @_;
585 my $url_base = $self->get_url_base;
586 my %hdr = %RETRIEVALHEADER;
589 my $req = HTTP
::Request
->new(
590 GET
=> $url_base."?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=$rid",
592 #$self->debug("SearchInfo request is " . $req->as_string());
593 my $response = $self->ua->request($req);
594 if( $response->is_success ) {
596 if($response->content =~ /Status=(WAITING|ERROR|FAILED|UNKNOWN|READY)/i ) {
598 if( $status eq 'ERROR' ) {
599 $self->warn("Server Error");
601 } elsif( $status eq 'FAILED' ) {
602 $self->warn("Request Failed");
606 $self->warn("Error: No status reported\n");
608 if ( $status ne 'READY' ) {
611 my ($fh,$tempfile) = $self->tempfile();
614 my $req = POST
$url_base, [%hdr];
615 $self->debug("retrieve request is " . $req->as_string());
616 my $response = $self->ua->request($req, $tempfile);
619 my $mthd = $self->readmethod;
620 $mthd = ($mthd =~ /blasttable/i) ?
'blasttable' :
621 ($mthd =~ /xml/i) ?
'blastxml' :
622 ($mthd =~ /pull/i) ?
'blast_pull' :
624 $blastobj = Bio
::SearchIO
->new(
627 ## store filename in object ##
628 $self->file($tempfile);
632 $self->warn($response->error_as_HTML);
640 Usage : my $saveoutput = $self->save_output($filename)
641 Function: Method to save the blast report
642 Returns : 1 (throws error otherwise)
643 Args : string [rid, filename]
648 my ($self, $filename) = @_;
649 if( not defined $filename ) {
650 $self->throw("Can't save blast output. You must specify a filename to save to.");
652 my $blastfile = $self->file;
653 #open temp file and output file, have to filter out some HTML
654 open my $TMP, '<', $blastfile or $self->throw("Could not read file '$blastfile': $!");
656 open my $SAVEOUT, '>', $filename or $self->throw("Could not write file '$filename': $!");
658 while (my $line = <$TMP>) {
659 next if ($line =~ /<pre>/);
661 if ( $line =~ /^(?:[T]?BLAST[NPX])\s*.+$/i
662 or $line =~ /^RPS-BLAST\s*.+$/i
663 or $line =~ /<\?xml\sversion=/
664 or $line =~ /^#\s+(?:[T]?BLAST[NPX])\s*.+$/
670 print $SAVEOUT $line;
677 my ($self, $input) = @_;
679 if( ! defined $input ) {
680 $self->throw("Calling remote blast with no input");
685 my $seqio = Bio
::SeqIO
->new(-format
=> 'fasta',
687 while( my $seq = $seqio->next_seq ) {
691 $self->throw("Input $input was not a valid filename");
693 } elsif( ref($input) =~ /ARRAY/i ) {
694 foreach ( @
$input ) {
695 if( ref($_) && $_->isa('Bio::PrimarySeqI') ) {
698 $self->warn("Trying to add a " . ref($_) .
699 " but expected a Bio::PrimarySeqI");
703 $self->throw("Did not pass in valid input -- no sequence objects found");
705 } elsif( $input->isa('Bio::PrimarySeqI') ) {
714 Usage : $self->set_url_base($url)
715 Function: Method to override the default NCBI BLAST database
717 Args : string (database url like
718 NOTE : This is highly experimental; we cannot maintain support on
719 databases other than the default NCBI database at this time
725 $self->{'_urlbase'} = shift if @_;
731 Usage : my $url = $self->set_url_base
732 Function: Get the current URL for BLAST database searching
733 Returns : string (URL used for remote blast searches)
740 return $self->{'_urlbase'};
746 Usage : my $url = $self->rtoe
747 Function: Retrieve the retrieval time (defined after submit_blast())
755 return $self->{rtoe
};