3 # BioPerl module for Bio::Tools::Run::StandAloneBlastPlus
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Mark A. Jensen <maj -at- fortinbras -dot- us>
9 # Copyright Mark A. Jensen
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::Tools::Run::StandAloneBlastPlus - Compute with NCBI's blast+ suite *ALPHA*
21 B<NOTE>: This module is related to the
22 L<Bio::Tools::Run::StandAloneBlast> system in name (and inspiration)
23 only. You must use this module directly.
26 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
30 # create blastdb from fasta file and attach
31 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
33 -db_data => 'myseqs.fas',
37 # create blastdb from BioPerl sequence collection objects
38 $alnio = Bio::AlignIO->new( -file => 'alignment.msf' );
39 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
45 @seqs = $alnio->next_aln->each_seq;
46 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
52 # create database with masks
54 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
55 -db_name => 'my_masked_db',
56 -db_data => 'myseqs.fas',
57 -masker => 'dustmasker',
58 -mask_data => 'maskseqs.fas',
62 # create a mask datafile separately
63 $mask_file = $fac->make_mask(
64 -data => 'maskseqs.fas',
65 -masker => 'dustmasker'
68 # query database for metadata
69 $info_hash = $fac->db_info;
70 $num_seq = $fac->db_num_sequences;
71 @mask_metadata = @{ $fac->db_filter_algorithms };
73 # perform blast methods
74 $result = $fac->tblastn( -query => $seqio );
75 # see Bio::Tools::Run::StandAloneBlastPlus::BlastMethods
76 # for many more details
80 B<NOTE:> This module requires BLAST+ v. 2.2.24+ and higher. Until the API
81 stabilizes for BLAST+, consider this module highly experimental.
83 This module along with
84 L<Bio::Tools::Run::StandAloneBlastPlus::BlastMethods> allows the user
85 to perform BLAST functions using the external program suite C<blast+>
87 L<ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/>), using
88 BioPerl objects and L<Bio::SearchIO> facilities. This wrapper can
89 prepare BLAST databases as well as run BLAST searches. It can also be
90 used to run C<blast+> programs independently.
92 This module encapsulates object construction and production of
93 databases and masks. Blast analysis methods (C<blastp, psiblast>,
94 etc>) are contained in
95 L<Bio::Tools::Run::StandAloneBlastPlus::BlastMethods>.
99 The basic mantra is to (1) create a BlastPlus factory using the
100 C<new()> constructor, and (2) perform BLAST analyses by calling the
101 desired BLAST program by name off the factory object. The blast
102 database itself and any masking data are attached to the factory
103 object (step 1). Query sequences and any parameters associated with
104 particular programs are provided to the blast method call (step 2),
105 and are run against the attached database.
107 =head2 Factory construction/initialization
109 The factory needs to be told where the blast+ programs live. The
110 C<BLASTPLUSDIR> environment variable will be checked for the default
111 executable directory. The program directory can be set for individual
112 factory instances with the C<PROG_DIR> parameter. All the blast+
113 programs must be accessible from that directory (i.e., as executable
116 Either the database or BLAST subject data must be specified at object
117 construction. Databases can be pre-existing formatted BLAST dbs, or
118 can be built directly from fasta sequence files or BioPerl sequence
119 object collections of several kinds. The key constructor parameters
120 are C<DB_NAME>, C<DB_DATA>, C<DB_DIR>.
122 To specify a pre-existing BLAST database, use C<DB_NAME> alone:
124 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
128 The directory can be specified along with the basename, or separately
131 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
132 -DB_NAME => '~/home/blast/mydb'
137 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
138 -DB_NAME => 'mydb', -DB_DIR => '~/home/blast'
141 To create a BLAST database de novo, see L</Creating a BLAST database>.
143 If you wish to apply pre-existing mask data (i.e., the final ASN1
144 output from one of the blast+ masker programs), to the database before
145 querying, specify it with C<MASK_FILE>:
147 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
148 -DB_NAME => 'mydb', -MASK_FILE => 'mymaskdata.asn'
151 =head2 Creating a BLAST database
153 There are several options for creating the database de novo using
154 attached data, both before and after factory construction. If a
155 temporary database (one that can be deleted by the C<cleanup()>
156 method) is desired, leave out the C<-db_name> parameter. If
157 C<-db_name> is specified, the database will be preserved with the
160 Use C<-create => 1> to create a new database (otherwise the factory
161 will look for an existing database). Use C<-overwrite => 1> to create
162 and overwrite an existing database.
164 Note that the database is not created immediately on factory
165 construction. It will be created if necessary on the first use of a
166 factory BLAST method, or you can force database creation by executing
172 =item * Specify data during construction
176 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
178 -db_data => 'myseqs.fas',
182 With another BioPerl object collection:
184 $alnio = Bio::AlignIO->new( -file => 'alignment.msf' );
185 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
190 @seqs = $alnio->next_aln->each_seq;
191 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
197 Other collections (e.g., L<Bio::SeqIO>) are valid. If a certain type
198 does not work, please submit an enhancement request.
200 To create temporary databases, leave out the C<-db_name>, e.g.
202 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
203 -db_data => 'myseqs.fas',
207 To get the tempfile basename, do:
211 =item * Specify data post-construction
213 Use the explicit attribute setters:
215 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
219 $fac->set_db_data('myseqs.fas');
224 =head2 Creating and using mask data
226 The blast+ mask utilities C<windowmasker>, C<segmasker>, and
227 C<dustmasker> are available. Masking can be rolled into database
228 creation, or can be executed later. If your mask data is already
229 created and in ASN1 format, set the C<-mask_file> attribute on
230 construction (see L</Factory constuction/initialization>).
232 To create a mask from raw data or an existing database and apply the
233 mask upon database creation, construct the factory like so:
235 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
236 -db_name => 'my_masked_db',
237 -db_data => 'myseqs.fas',
238 -masker => 'dustmasker',
239 -mask_data => 'maskseqs.fas',
243 The masked database will be created during C<make_db>.
245 The C<-mask_data> parameter can be a FASTA filename or any BioPerl
246 sequence object collection. If the datatype ('nucl' or 'prot') of the
247 mask data is not compatible with the selected masker, an exception
248 will be thrown with a message to that effect.
250 To create a mask ASN1 file that can be used in the C<-mask_file>
251 parameter separately from the attached database, use the
252 C<make_mask()> method directly:
254 $mask_file = $fac->make_mask(-data => 'maskseqs.fas',
255 -masker => 'dustmasker');
256 # segmasker can use a blastdb as input
257 $mask_file = $fac->make_mask(-mask_db => 'mydb',
258 -masker => 'segmasker')
260 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
261 -db_name => 'my_masked_db',
262 -db_data => 'myseqs.fas',
263 -mask_file => $mask_file
267 =head2 Getting database information
269 To get a hash containing useful metadata on an existing database
270 (obtained by running C<blastdbcmd -info>, use C<db_info()>:
272 # get info on the attached database..
273 $info = $fac->db_info;
274 # get info on another database
275 $info = $fac->db_info('~/home/blastdbs/another');
277 To get a particular info element for the attached database, just call
278 the element name off the factory:
280 $num_seqs = $fac->db_num_sequences;
281 # info on all the masks applied to the db, if any:
282 @masking_info = @{ $fac->db_filter_algorithms };
284 =head2 Accessing the L<Bio::Tools::Run::BlastPlus> factory
286 The blast+ programs are actually executed by a
287 L<Bio::Tools::Run::BlastPlus> wrapper instance. This instance is
288 available for peeking and poking in the L<StandAloneBlastPlus>
289 C<factory()> attribute. For convenience, C<BlastPlus> methods can be
290 run from the C<StandAloneBlastPlus> object, and are delegated to the
291 C<factory()> attribute. For example, to get the blast+ program to be
292 executed, examine either
294 $fac->factory->command
300 Similarly, the current parameters for the C<BlastPlus> factory are
302 @parameters = $fac->get_parameters
304 =head2 Cleaning up temp files
306 Temporary analysis files produced under a single factory instances can
307 be unlinked by running
311 Tempfiles are generally not removed unless this method is explicitly
312 called. C<cleanup()> only unlinks "registered" files and
313 databases. All temporary files are automatically registered; in
314 particular, "anonymous" databases (such as
316 $fac->Bio::Tools::Run::StandAloneBlastPlus->new(
317 -db_data => 'myseqs.fas',
321 without a C<-db_name> specification) are registered for cleanup. Any
322 file or database can be registered with an internal method:
324 $fac->_register_temp_for_cleanup('testdb');
332 You can check whether a given basename points to a properly formatted
333 BLAST database by doing
335 $is_good = $fac->check_db('putative_db');
339 User parameters can be passed to the underlying blast+ programs (if
340 you know what you're doing) with C<db_make_args> and C<mask_make_args>:
342 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
343 -db_name => 'customdb',
344 -db_data => 'myseqs.fas',
345 -db_make_args => [ '-taxid_map' => 'seq_to_taxa.txt' ],
346 -masker => 'windowmasker',
347 -mask_data => 'myseqs.fas',
348 -mask_make_args => [ '-dust' => 'T' ],
354 You can prevent exceptions from being thrown by failed blast+ program
355 executions by setting C<no_throw_on_crash>. Examine the error with
358 $fac->no_throw_on_crash(1);
360 if ($fac->stderr =~ /Error:/) {
369 L<Bio::Tools::Run::StandAloneBlastPlus::BlastMethods>,
370 L<Bio::Tools::Run::BlastPlus>
376 User feedback is an integral part of the evolution of this and other
377 Bioperl modules. Send your comments and suggestions preferably to
378 the Bioperl mailing list. Your participation is much appreciated.
380 bioperl-l@bioperl.org - General discussion
381 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
385 Please direct usage questions or support issues to the mailing list:
387 L<bioperl-l@bioperl.org>
389 rather than to the module maintainer directly. Many experienced and
390 reponsive experts will be able look at the problem and quickly
391 address it. Please include a thorough description of the problem
392 with code and data examples if at all possible.
394 =head2 Reporting Bugs
396 Report bugs to the Bioperl bug tracking system to help us keep track
397 of the bugs and their resolution. Bug reports can be submitted via
400 http://redmine.open-bio.org/projects/bioperl/
402 =head1 AUTHOR - Mark A. Jensen
404 Email maj -at- fortinbras -dot- us
410 The rest of the documentation details each of the object methods.
411 Internal methods are usually preceded with a _
415 # Let the code begin...
418 package Bio
::Tools
::Run
::StandAloneBlastPlus
;
422 # Object preamble - inherits from Bio::Root::Root
427 use Bio
::Tools
::GuessSeqFormat
;
428 use Bio
::Tools
::Run
::StandAloneBlastPlus
::BlastMethods
;
432 use base
qw(Bio::Root::Root);
433 unless ( eval "require Bio::Tools::Run::BlastPlus" ) {
434 Bio
::Root
::Root
->throw("This module requires 'Bio::Tools::Run::BlastPlus'");
437 my %AVAILABLE_MASKERS = (
438 'windowmasker' => 'nucl',
439 'dustmasker' => 'nucl',
440 'segmasker' => 'prot'
443 my %MASKER_ENCODING = (
444 'windowmasker' => 'maskinfo_asn1_text',
445 'dustmasker' => 'maskinfo_asn1_text',
446 'segmasker' => 'maskinfo_asn1_text'
449 my $bp_class = 'Bio::Tools::Run::BlastPlus';
451 # what's the desire here?
453 # * factory object (created by new())
454 # - points to some blast db entity, so all functions run off the
455 # the factory (except bl2seq?) use the associated db
457 # * create a blast formatted database:
458 # - specify a file, or an AlignI object
459 # - store for later, or store in a tempfile to throw away
460 # - object should store its own database pointer
461 # - provide masking options based on the maskers provided
463 # * perform database actions via db-oriented blast+ commands
466 # * perform blast searches against the database
467 # - blastx, blastp, blastn, tblastx, tblastn
468 # - specify Bio::Seq objects or files as queries
469 # - output the results as a file or as a Bio::Search::Result::BlastResult
470 # * perform 'special' (i.e., ones I don't know) searches
471 # - psiblast, megablast, rpsblast, rpstblastn
472 # some of these are "tasks" under particular programs
473 # check out psiblast, why special (special 'iteration' handling in
475 # check out rpsblast, megablast
478 # - return the alignment directly as a convenience, using Bio::Search
481 # lazy db formatting: makeblastdb only on first blast request...
482 # ParameterBaseI delegation : use AUTOLOAD
489 Usage : my $obj = new Bio::Tools::Run::StandAloneBlastPlus();
490 Function: Builds a new Bio::Tools::Run::StandAloneBlastPlus object
491 Returns : an instance of Bio::Tools::Run::StandAloneBlastPlus
492 Args : named argument (key => value) pairs:
498 my ($class,@args) = @_;
499 my $self = $class->SUPER::new
(@args);
500 my ($db_name, $db_data, $db_dir, $db_make_args,
501 $mask_file, $mask_data, $mask_make_args, $masker,
502 $create, $overwrite, $is_remote, $prog_dir, $program_dir)
503 = $self->_rearrange([qw(
521 $self->throw("DB name contains invalid characters") unless $db_name =~ m{^[a-z0-9_/:.+-]+$}i;
525 $self->throw("DB directory (DB_DIR) not found") unless (-d
$db_dir);
526 $self->{'_db_dir'} = $db_dir;
529 $self->{'_db_dir'} = '.';
531 $program_dir ||= $prog_dir; # alias
532 # now handle these systematically (bug #3003)
533 # allow db_name to include path info
534 # let db_dir act as root if present and db_name is a relative path
535 # db property contains the pathless name only
537 my ($v,$d,$f) = File
::Spec
->splitpath($db_name);
538 $self->throw("No DB name at the end of path '$db_name'") unless $f;
539 $f =~ s/\..*$//; # tolerant of extensions, but ignore them
541 # now establish db_path property as the internal authority on
543 if ( File
::Spec
->file_name_is_absolute($db_name) ) {
544 $self->throw("Path specified in DB name ('$d') does not exist") unless !$d || (-d
$d);
545 $self->{_db_path
} = File
::Spec
->catfile($d,$f);
546 $self->{_db_dir
} = $d;
547 # ignore $db_dir, give heads-up
548 $self->warn("DB name is an absolute path; setting db_dir to '".$self->db_dir."'") if $db_dir;
551 $d = File
::Spec
->catdir($self->db_dir, $d);
552 $self->throw("Path specified by DB_DIR+DB_NAME ('$d') does not exist") unless !$d || (-d
$d);
553 $self->{_db_path
} = File
::Spec
->catfile($d,$f);
558 $self->throw("Masker '$masker' not available") unless
559 grep /^$masker$/, keys %AVAILABLE_MASKERS;
560 $self->{_masker
} = $masker;
564 $self->throw("Can't find program directory '$program_dir'") unless
566 $self->{_program_dir
} = $program_dir;
568 elsif ($ENV{BLASTPLUSDIR
}) {
569 $self->{_program_dir
} = $ENV{BLASTPLUSDIR
};
571 $Bio::Tools
::Run
::BlastPlus
::program_dir
= $self->{_program_dir
} ||
572 $Bio::Tools
::Run
::BlastPlus
::program_dir
;
575 $self->set_db_make_args( $db_make_args) if ( $db_make_args );
576 $self->set_mask_make_args( $mask_make_args) if ($mask_make_args);
577 $self->{'_create'} = $create;
578 $self->{'_overwrite'} = $overwrite;
579 $self->{'_is_remote'} = $is_remote;
580 $self->{'_db_data'} = $db_data;
582 $self->{'_mask_file'} = $mask_file;
583 $self->{'_mask_data'} = $mask_data;
587 if (defined $self->check_db and $self->check_db == 0 and !$self->is_remote) {
588 $self->throw("DB '".$self->db."' can't be found. To create, set -create => 1.") unless ($create || $overwrite);
591 # allow this to pass; catch lazily at make_db...
592 if (!$self->db_data) {
593 $self->debug('No database or db data specified. '.
594 'To create a new database, provide '.
595 '-db_data => [fasta|\@seqs|$seqio_object]')
598 # no db specified; create temp db
599 $self->{_create
} = 1;
601 my $fh = File
::Temp
->new(TEMPLATE
=> 'DBXXXXX',
602 DIR
=> $self->db_dir,
604 my ($v,$d,$f) = File
::Spec
->splitpath($fh->filename);
606 $self->{_db_path
} = $fh->filename;
607 $self->_register_temp_for_cleanup($self->db_path);
611 $self->{_db_dir
} = File
::Temp
->newdir('DBDXXXXX');
612 $self->{_db
} = 'DBTEMP';
613 $self->{_db_path
} = File
::Spec
->catfile($self->db_dir,
624 Usage : $obj->db($newval)
625 Function: contains the basename of the local blast database
627 Returns : value of db (a scalar string)
632 sub db
{ shift->{_db
} }
633 sub db_name
{ shift->{_db
} }
634 sub set_db_name
{ shift->{_db
} = shift }
635 sub db_dir
{ shift->{_db_dir
} }
636 sub set_db_dir
{ shift->{_db_dir
} = shift }
637 sub db_path
{ shift->{_db_path
} }
638 sub db_data
{ shift->{_db_data
} }
639 sub set_db_data
{ shift->{_db_data
} = shift }
640 sub db_type
{ shift->{_db_type
} }
641 sub masker
{ shift->{_masker
} }
642 sub set_masker
{ shift->{_masker
} = shift }
643 sub mask_file
{ shift->{_mask_file
} }
644 sub set_mask_file
{ shift->{_mask_file
} = shift }
645 sub mask_data
{ shift->{_mask_data
} }
646 sub set_mask_data
{ shift->{_mask_data
} = shift }
651 Usage : $obj->factory($newval)
652 Function: attribute containing the Bio::Tools::Run::BlastPlus
655 Returns : value of factory (Bio::Tools::Run::BlastPlus object)
660 sub factory
{ shift->{_factory
} }
661 sub create
{ shift->{_create
} }
662 sub overwrite
{ shift->{_overwrite
} }
663 sub is_remote
{ shift->{_is_remote
} }
665 =head2 program_version()
667 Title : program_version
668 Usage : $version = $bedtools_fac->program_version()
669 Function: Returns the program version (if available)
670 Returns : string representing location and version of the program
671 Note : this works around the WrapperBase::version() method conflicting with
672 the -version parameter for SABlast (good argument for not having
673 getter/setters for these)
677 =head2 package_version()
679 Title : package_version
680 Usage : $version = $bedtools_fac->package_version()
681 Function: Returns the BLAST+ package version (if available)
682 Returns : string representing BLAST+ package version (may differ from version())
686 sub program_version
{
688 my $fac = $self->factory;
689 $fac->program_version(@_) if $fac;
692 sub package_version
{
694 my $fac = $self->factory;
695 $fac->package_version(@_) if $fac;
704 Function: create the blast database (if necessary),
705 imposing masking if specified
706 Returns : true on success
711 # should also provide facility for creating subdatabases from
712 # existing databases (i.e., another format for $data: the name of an
713 # existing blastdb...)
717 return 1 if ( $self->check_db && !$self->overwrite ); # already there or force make
719 $self->throw('No database or db data specified. '.
720 'To create a new database, provide '.
721 '-db_data => [fasta|\@seqs|$seqio_object]')
722 unless $self->db_data;
723 # db_data can be: fasta file, array of seqs, Bio::SeqIO object
724 my $data = $self->db_data;
725 $data = $self->_fastize($data);
726 my $testio = Bio
::SeqIO
->new(-file
=>$data, -format
=>'fasta');
727 $self->{_db_type
} = ($testio->next_seq->alphabet =~ /.na/) ?
'nucl' : 'prot';
730 my ($v,$d,$name) = File
::Spec
->splitpath($data);
732 $self->{_db
} ||= $name;
733 $self->{_db_path
} = File
::Spec
->catfile($self->db_dir,$self->db);
735 # deal with creating masks here,
736 # and provide correct parameters to the
739 # accomodate $self->db_make_args here -- allow them
740 # to override defaults, or allow only those args
741 # that are not specified here?
742 my $usr_db_args ||= $self->db_make_args;
743 my %usr_args = @
$usr_db_args if $usr_db_args;
747 -dbtype
=> $self->db_type,
748 -out
=> $self->db_path,
750 -parse_seqids
=> 1 # necessary for masking
754 $db_args{$_} = $usr_args{$_} for keys %usr_args;
757 # do masking if requested
758 # if the (masker and mask_data) OR mask_file attributes of this
759 # object are set, assume that masking is desired
761 if ($self->mask_file) { # the actual masking data is provided
762 $db_args{'-mask_data'} = $self->mask_file;
764 elsif ($self->masker && $self->mask_data) { # build the mask
765 $db_args{'-mask_data'} = $self->make_mask(-data
=> $self->mask_data);
766 $self->throw("Masker error: message is '".$self->stderr."'") unless
767 $db_args{'-mask_data'};
768 $self->{_mask_data
} = $db_args{'-mask_data'};
771 $self->{_factory
} = $bp_class->new(
772 -command
=> 'makeblastdb',
775 $self->factory->no_throw_on_crash($self->no_throw_on_crash);
776 return $self->factory->_run;
783 Function: create masking data based on specified parameters
784 Returns : mask data filename (scalar string)
789 # mask program usage (based on blast+ manual)
792 # windowmasker nucl mask overrep data, low-complexity (optional)
793 # dustmasker nucl mask low-complexity
799 my ($data, $mask_db, $make_args, $masker) = $self->_rearrange([qw(
804 my (%mask_args,%usr_args,$db_type);
806 $self->throw("make_mask requires -data argument") unless $data;
807 $masker ||= $self->masker;
808 $self->throw("no masker specified and no masker default set in object")
810 my $usr_make_args ||= $self->mask_make_args;
811 %usr_args = @
$usr_make_args if $usr_make_args;
812 unless (grep /^$masker$/, keys %AVAILABLE_MASKERS) {
813 $self->throw("Masker '$masker' not available");
815 if ($self->check_db($data)) {
816 unless ($masker eq 'segmasker') {
817 $self->throw("Masker '$masker' can't use a blastdb as primary input");
819 unless ($self->db_info($data)->{_db_type
} eq
820 $AVAILABLE_MASKERS{$masker}) {
821 $self->throw("Masker '$masker' is incompatible with input db sequence type");
826 $data = $self->_fastize($data);
827 my $sio = Bio
::SeqIO
->new(-file
=>$data);
828 my $s = $sio->next_seq;
830 if ($s->alphabet =~ /.na/) {
833 elsif ($s->alphabet =~ /protein/) {
839 unless ($type eq $AVAILABLE_MASKERS{$masker}) {
840 $self->throw("Masker '$masker' is incompatible with sequence type '$type'");
844 # check that sequence type and masker program match:
846 # now, need to provide reasonable default masker arg settings,
847 # and override these with $usr_make_args as necessary and appropriate
848 my $mh = File
::Temp
->new(TEMPLATE
=>'MSKXXXXX',
850 DIR
=> $self->db_dir);
851 my $mask_outfile = $mh->filename;
853 $self->_register_temp_for_cleanup(File
::Spec
->catfile($self->db_dir,$mask_outfile));
858 #-outfmt => $MASKER_ENCODING{$masker}
862 $mask_args{$_} = $usr_args{$_} for keys %usr_args;
864 # masker-specific pipelines
867 m/dustmasker/ && do {
868 $mask_args{'-out'} = $mask_outfile;
869 $self->{_factory
} = $bp_class->new(-command
=> $masker,
871 $self->factory->no_throw_on_crash($self->no_throw_on_crash);
872 $status = $self->factory->_run;
875 m/windowmasker/ && do {
876 # check mask_db if present
878 unless ($self->check_db($mask_db)) {
879 $self->throw("Mask database '$mask_db' is not present or valid");
882 my $cth = File
::Temp
->new(TEMPLATE
=>'MCTXXXXX',
883 DIR
=> $self->db_dir);
884 my $ct_file = $cth->filename;
886 $mask_args{'-out'} = $ct_file;
887 $mask_args{'-mk_counts'} = 'true';
888 $self->{_factory
} = $bp_class->new(-command
=> $masker,
890 $self->factory->no_throw_on_crash($self->no_throw_on_crash);
891 $status = $self->factory->_run;
893 delete $mask_args{'-mk_counts'};
894 $mask_args{'-ustat'} = $ct_file;
895 $mask_args{'-out'} = $mask_outfile;
897 $mask_args{'-in'} = $mask_db;
898 $mask_args{'-infmt'} = 'blastdb';
900 $self->factory->reset_parameters(%mask_args);
901 $self->factory->no_throw_on_crash($self->no_throw_on_crash);
902 $status = $self->factory->_run;
906 $mask_args{'-infmt'} = $infmt;
907 $mask_args{'-out'} = $mask_outfile;
908 $self->{_factory
} = $bp_class->new(-command
=> $masker,
910 $self->factory->no_throw_on_crash($self->no_throw_on_crash);
911 $status = $self->factory->_run;
915 $self->throw("Masker program '$masker' not recognized");
918 return $status ?
$mask_outfile : $status;
925 Function: get info for database
926 (via blastdbcmd -info); add factory attributes
927 Returns : hash of database attributes
928 Args : [optional] db name (scalar string) (default: currently attached db)
935 $db ||= $self->db_path;
937 $self->warn("db_info: db not specified and no db attached");
940 if ($self->is_remote) {
941 $self->warn("db_info: sorry, can't get info for remote database (complain to NCBI)");
944 if ($db eq $self->db and $self->{_db_info
}) {
945 return $self->{_db_info
}; # memoized
948 $self->{_factory
} = $bp_class->new( -command
=> 'blastdbcmd',
951 $self->factory->no_throw_on_crash(1);
952 $self->factory->_run();
953 $self->factory->no_throw_on_crash(0);
954 if ($self->factory->stderr =~ /No alias or index file found/) {
955 $self->warn("db_info: Couldn't find database ".$self->db."; make with make_db()");
958 $db_info_text = $self->factory->stdout;
959 # parse info into attributes
960 my $infh = IO
::String
->new($db_info_text);
963 /Database: (.*)/ && do {
964 $attr{db_info_name
} = $1;
967 /([0-9,]+) sequences; ([0-9,]+) total/ && do {
968 $attr{db_num_sequences
} = $1;
969 $attr{db_total_bases
} = $2;
970 $attr{db_num_sequences
} =~ s/,//g;
971 $attr{db_total_bases
} =~ s/,//g;
974 /Date: (.*?)\s+Longest sequence: ([0-9,]+)/ && do {
975 $attr{db_date
} = $1; # convert to more usable date object
976 $attr{db_longest_sequence
} = $2;
977 $attr{db_longest_sequence
} =~ s/,//g;
980 /Algorithm ID/ && do {
981 my $alg = $attr{db_filter_algorithms
} = [];
983 if (/\s+([0-9]+)\s+([a-z0-9_]+)\s+(.*)/i) {
984 push @
$alg, { algorithm_id
=> $1,
985 algorithm_name
=> $2,
986 algorithm_opts
=> $3 };
996 if ( -e
$db.'.psq' ) {
997 $attr{_db_type
} = 'prot';
999 elsif (-e
$db.'.nsq') {
1000 $attr{_db_type
} = 'nucl';
1003 $attr{_db_type
} = 'UNK'; # bork
1005 if ($db eq $self->db) {
1006 $self->{_db_type
} = $attr{_db_type
};
1007 $self->{_db_info_text
} = $db_info_text;
1008 $self->{_db_info
} = \
%attr;
1013 =head2 set_db_make_args()
1015 Title : set_db_make_args
1017 Function: set the DB make arguments attribute
1019 Returns : true on success
1020 Args : arrayref or hashref of named arguments
1024 sub set_db_make_args
{
1027 $self->throw("Arrayref or hashref required at DB_MAKE_ARGS") unless
1028 ref($args) =~ /^ARRAY|HASH$/;
1029 if (ref($args) eq 'HASH') {
1033 $self->throw("Named args required for DB_MAKE_ARGS") unless !(@
$args % 2);
1034 $self->{'_db_make_args'} = $args;
1038 sub db_make_args
{ shift->{_db_make_args
} }
1040 =head2 set_mask_make_args()
1042 Title : set_mask_make_args
1044 Function: set the masker make arguments attribute
1046 Returns : true on success
1047 Args : arrayref or hasref of named arguments
1051 sub set_mask_make_args
{
1054 $self->throw("Arrayref or hashref required at MASK_MAKE_ARGS") unless
1055 ref($args) =~ /^ARRAY|HASH$/;
1056 if (ref($args) eq 'HASH') {
1060 $self->throw("Named args required at MASK_MAKE_ARGS") unless !(@
$args % 2);
1061 $self->{'_mask_make_args'} = $args;
1065 sub mask_make_args
{ shift->{_mask_make_args
} }
1071 Function: determine if database with registered name and dir
1073 Returns : 1 if db present, 0 if not present, undef if name/dir not
1075 Args : [optional] db name (default is 'registered' name in $self->db)
1076 [optional] db directory (default is 'registered' dir in
1086 my ($v,$d,$f) = File
::Spec
->splitpath($db);
1087 $f =~ s/\..*$//; # ignore extensions
1088 $db_path = File
::Spec
->catfile($d||'.',$f);
1091 $db_path = $self->db_path;
1094 $self->{_factory
} = $bp_class->new( -command
=> 'blastdbcmd',
1098 $self->factory->no_throw_on_crash(1);
1099 $self->factory->_run();
1100 $self->factory->no_throw_on_crash(0);
1101 return 0 if ($self->factory->stderr =~ /No alias or index file found/);
1107 =head2 no_throw_on_crash()
1109 Title : no_throw_on_crash
1110 Usage : $fac->no_throw_on_crash($newval)
1111 Function: set to prevent an exeception throw on a failed
1112 blast program execution
1114 Returns : value of no_throw_on_crash (boolean)
1115 Args : on set, new value (boolean)
1119 sub no_throw_on_crash
{
1122 return $self->{'no_throw_on_crash'} = shift if @_;
1123 return $self->{'no_throw_on_crash'};
1133 Function: convert a sequence collection to a temporary
1134 fasta file (sans gaps)
1135 Returns : fasta filename (scalar string)
1136 Args : sequence collection
1145 # suppose a fasta file name
1146 $self->throw('Sequence file not found') unless -e
$data;
1147 my $guesser = Bio
::Tools
::GuessSeqFormat
->new(-file
=> $data);
1148 $self->throw('Sequence file not in FASTA format') unless
1149 $guesser->guess eq 'fasta';
1152 (ref eq 'ARRAY') && (ref $$data[0]) &&
1153 ($$data[0]->isa('Bio::Seq') || $$data[0]->isa('Bio::PrimarySeq'))
1155 my $fh = File
::Temp
->new(TEMPLATE
=> 'DBDXXXXX',
1157 DIR
=> $self->db_dir,
1159 my $fname = $fh->filename;
1161 $self->_register_temp_for_cleanup($fname);
1162 my $fasio = Bio
::SeqIO
->new(-file
=>">$fname", -format
=>"fasta")
1163 or $self->throw("Can't create temp fasta file");
1166 my $a = $_->alphabet;
1167 $s =~ s/[$Bio::PrimarySeq::GAP_SYMBOLS]//g;
1170 $fasio->write_seq($_);
1176 ref && do { # some kind of object
1177 my ($fmt) = ref($data) =~ /.*::(.*)/;
1178 if ($fmt eq 'fasta') {
1179 $data = $data->file; # use the fasta file directly
1183 my $fh = File
::Temp
->new(TEMPLATE
=> 'DBDXXXXX',
1185 DIR
=> $self->db_dir,
1187 my $fname = $fh->filename;
1189 $self->_register_temp_for_cleanup($fname);
1190 my $fasio = Bio
::SeqIO
->new(-file
=>">$fname", -format
=>"fasta")
1191 or $self->throw("Can't create temp fasta file");
1192 require Bio
::PrimarySeq
;
1193 if ($data->isa('Bio::AlignIO')) {
1194 my $aln = $data->next_aln;
1195 for ($aln->each_seq) {
1198 my $a = $_->alphabet;
1199 $s =~ s/[$Bio::PrimarySeq::GAP_SYMBOLS]//g;
1202 $fasio->write_seq($_)
1205 elsif ($data->isa('Bio::SeqIO')) {
1206 while (local $_ = $data->next_seq) {
1208 my $a = $_->alphabet;
1209 $s =~ s/[$Bio::PrimarySeq::GAP_SYMBOLS]//g;
1212 $fasio->write_seq($_);
1215 elsif ($data->isa('Bio::Align::AlignI')) {
1216 for( $data->each_seq) {
1218 my $a = $_->alphabet;
1219 $s =~ s/[$Bio::PrimarySeq::GAP_SYMBOLS]//g;
1222 $fasio->write_seq($_)
1225 elsif ($data->isa('Bio::Seq') || $data->isa('Bio::PrimarySeq')) {
1227 my $a = $data->alphabet;
1228 $s =~ s/[$Bio::PrimarySeq::GAP_SYMBOLS]//g;
1230 $data->alphabet($a);
1231 $fasio->write_seq($data);
1234 $self->throw("Can't handle sequence container object ".
1235 "of type '".ref($data)."'");
1246 =head2 _register_temp_for_cleanup()
1248 Title : _register_temp_for_cleanup
1250 Function: register a file for cleanup with
1252 Returns : true on success
1253 Args : a file name or a blastdb basename
1258 sub _register_temp_for_cleanup
{
1263 my ($v, $d, $n) = File
::Spec
->splitpath($_);
1264 $_ = File
::Spec
->catfile($self->db_dir, $n) unless length($d);
1265 push @
{$self->{_cleanup_list
}}, File
::Spec
->rel2abs($_);
1274 Function: unlink files registered for cleanup
1275 Returns : true on success
1282 return unless $self->{_cleanup_list
};
1283 for (@
{$self->{_cleanup_list
}}) {
1284 m/(\.[a-z0-9_]+)+$/i && do {
1288 do { # catch all index files
1289 if ( -e
$_.".psq" ) {
1290 unlink glob($_.".p*");
1291 unlink glob($_.".??.p*");
1293 elsif ( -e
$_.".nsq" ) {
1294 unlink glob($_.".n*");
1295 unlink glob($_.".??.n*");
1308 In this module, C<AUTOLOAD()> delegates L<Bio::Tools::Run::WrapperBase> and
1309 L<Bio::Tools::Run::WrapperBase::CommandExts> methods (including those
1310 of L<Bio::ParamterBaseI>) to the C<factory()> attribute:
1316 $fac->factory->stderr
1318 If $AUTOLOAD isn't pointing to a WrapperBase method, then AUTOLOAD attempts to return a C<db_info> attribute: e.g.
1320 $fac->db_num_sequences
1322 works by looking in the $fac->db_info() hash.
1324 Finally, if $AUTOLOAD is pointing to a blast query method, AUTOLOAD
1325 runs C<run> with the C<-method> parameter appropriately set.
1332 my $method = $AUTOLOAD;
1333 $method =~ s/.*:://;
1335 if (grep /^$method$/, @Bio::Tools
::Run
::StandAloneBlastPlus
::BlastMethods
) {
1336 push @args, ('-method_args' => ['-remote' => 1] ) if ($self->is_remote);
1337 return $self->run( -method
=> $method, @args );
1339 if ($self->factory and $self->factory->can($method)) { # factory method
1340 return $self->factory->$method(@args);
1342 if ($self->db_info and grep /^$method$/, keys %{$self->db_info}) {
1343 return $self->db_info->{$method};
1346 $self->throw("Can't locate method '$method' in class ".ref($self));