From c609db6e2ab09f45600399fc0650581d5e035d7f Mon Sep 17 00:00:00 2001 From: "Mark A. Jensen" Date: Mon, 25 Aug 2014 22:34:43 -0400 Subject: [PATCH] Remove modules and tests (StandAloneBlast and WrapperBase) that now reside in bioperl-run --- Bio/Tools/Run/StandAloneBlast.pm | 634 -------------- Bio/Tools/Run/StandAloneNCBIBlast.pm | 538 ------------ Bio/Tools/Run/StandAloneWUBlast.pm | 299 ------- Bio/Tools/Run/WrapperBase.pm | 511 ----------- Bio/Tools/Run/WrapperBase/CommandExts.pm | 1405 ------------------------------ t/Tools/Run/Dummy.pm | 21 - t/Tools/Run/Dummy/Config.pm | 75 -- t/Tools/Run/StandAloneBlast.t | 185 ---- t/Tools/Run/WBCommandExts.t | 66 -- t/Tools/Run/WrapperBase.t | 129 --- 10 files changed, 3863 deletions(-) delete mode 100644 Bio/Tools/Run/StandAloneBlast.pm delete mode 100644 Bio/Tools/Run/StandAloneNCBIBlast.pm delete mode 100644 Bio/Tools/Run/StandAloneWUBlast.pm delete mode 100644 Bio/Tools/Run/WrapperBase.pm delete mode 100644 Bio/Tools/Run/WrapperBase/CommandExts.pm delete mode 100755 t/Tools/Run/Dummy.pm delete mode 100755 t/Tools/Run/Dummy/Config.pm delete mode 100644 t/Tools/Run/StandAloneBlast.t delete mode 100755 t/Tools/Run/WBCommandExts.t delete mode 100755 t/Tools/Run/WrapperBase.t diff --git a/Bio/Tools/Run/StandAloneBlast.pm b/Bio/Tools/Run/StandAloneBlast.pm deleted file mode 100644 index ecd349596..000000000 --- a/Bio/Tools/Run/StandAloneBlast.pm +++ /dev/null @@ -1,634 +0,0 @@ -# -# BioPerl module for Bio::Tools::Run::StandAloneBlast -# -# Copyright Peter Schattner -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Tools::Run::StandAloneBlast - Object for the local execution -of the NCBI BLAST program suite (blastall, blastpgp, bl2seq). -There is experimental support for WU-Blast and NCBI rpsblast. - -=head1 SYNOPSIS - - # Local-blast "factory object" creation and blast-parameter - # initialization: - @params = (-database => 'swissprot', -outfile => 'blast1.out'); - $factory = Bio::Tools::Run::StandAloneBlast->new(@params); - - # Blast a sequence against a database: - $str = Bio::SeqIO->new(-file=>'t/amino.fa', -format => 'Fasta'); - $input = $str->next_seq(); - $input2 = $str->next_seq(); - $blast_report = $factory->blastall($input); - - # Run an iterated Blast (psiblast) of a sequence against a database: - $factory->j(3); # 'j' is blast parameter for # of iterations - $factory->outfile('psiblast1.out'); - $factory = Bio::Tools::Run::StandAloneBlast->new(@params); - $blast_report = $factory->blastpgp($input); - - # Use blast to align 2 sequences against each other: - $factory = Bio::Tools::Run::StandAloneBlast->new(-outfile => 'bl2seq.out'); - $factory->bl2seq($input, $input2); - - # Experimental support for WU-Blast 2.0 - my $factory = Bio::Tools::Run::StandAloneBlast->new(-program =>"wublastp", - -database =>"swissprot", - -e => 1e-20); - my $blast_report = $factory->wublast($seq); - - # Experimental support for NCBI rpsblast - my $factory = Bio::Tools::Run::StandAloneBlast->new(-db => 'CDD/Cog', - -expect => 0.001); - $factory->F('T'); # turn on SEG filtering of query sequence - my $blast_report = $factory->rpsblast($seq); - - # Use the experimental fast Blast parser, 'blast_pull' - my $factory = Bio::Tools::Run::StandAloneBlast->new(-_READMETHOD =>'blast_pull', - @other_params); - - # Various additional options and input formats are available, - # see the DESCRIPTION section for details. - -=head1 DESCRIPTION - -This DESCRIPTION only documents Bio::Tools::Run::StandAloneBlast, a -Bioperl object for running the NCBI standAlone BLAST package. Blast -itself is a large & complex program - for more information regarding -BLAST, please see the BLAST documentation which accompanies the BLAST -distribution. BLAST is available from ftp://ncbi.nlm.nih.gov/blast/. - -A source of confusion in documenting a BLAST interface is that the -term "program" is used in - at least - three different ways in the -BLAST documentation. In this DESCRIPTION, "program" will refer to the -BLAST routine set by the BLAST C<-p> parameter that can be set to blastn, -blastp, tblastx etc. We will use the term Blast "executable" to refer -to the various different executable files that may be called - ie. -blastall, blastpgp or bl2seq. In addition, there are several BLAST -capabilities, which are also referred to as "programs", and are -implemented by using specific combinations of BLAST executables, -programs and parameters. They will be referred by their specific -names - eg PSIBLAST and PHIBLAST. - -Before running StandAloneBlast it is necessary: to install BLAST -on your system, to edit set the environmental variable $BLASTDIR -or your $PATH variable to point to the BLAST directory, and to -ensure that users have execute privileges for the BLAST program. - -If the databases which will be searched by BLAST are located in the -data subdirectory of the blast program directory (the default -installation location), StandAloneBlast will find them; however, -if the database files are located in any other location, environmental -variable $BLASTDATADIR will need to be set to point to that directory. - -The use of the StandAloneBlast module is as follows: Initially, a -local blast "factory object" is created. The constructor may be passed -an optional array of (non-default) parameters to be used by the -factory, eg: - - @params = (-program => 'blastn', -database => 'ecoli.nt'); - $factory = Bio::Tools::Run::StandAloneBlast->new(@params); - -Any parameters not explicitly set will remain as the defaults of the -BLAST executable. Note each BLAST executable has somewhat different -parameters and options. See the BLAST Documentation for a description -or run the BLAST executable from the command line followed solely with -a "-" to see a list of options and default values for that executable; -eg Eblastall -. - -BLAST parameters can be changed and/or examined at any time after the -factory has been created. The program checks that any -parameter/switch being set/read is valid. Except where specifically -noted, StandAloneBlast uses the same single-letter, case-sensitive -parameter names as the actual blast program. Currently no checks are -included to verify that parameters are of the proper type (e.g. string -or numeric) or that their values are within the proper range. - -As an example, to change the value of the Blast parameter 'e' ('e' is -the parameter for expectation-value cutoff) - - $expectvalue = 0.01; - $factory->e($expectvalue); - -Note that for improved script readibility one can modify the name of -the (ncbi) BLAST parameters as desired as long as the initial letter (and -case) of the parameter are preserved, e.g.: - - $factory->expectvalue($expectvalue); - -Unfortunately, some of the BLAST parameters are not the single -letter one might expect (eg "iteration round" in blastpgp is 'j'). -Again one can check by using, for example: - - > blastpgp - - -Wublast parameters need to be complete (ie. don't truncate them to their -first letter), but are case-insensitive. - -Once the factory has been created and the appropriate parameters set, -one can call one of the supported blast executables. The input -sequence(s) to these executables may be fasta file(s) as described in -the BLAST documentation. - - $inputfilename = 't/testquery.fa'; - $blast_report = $factory->blastall($inputfilename); - -In addition, sequence input may be in the form of either a Bio::Seq -object or (a reference to) an array of Bio::Seq objects, e.g.: - - $input = Bio::Seq->new(-id => "test query", - -seq => "ACTACCCTTTAAATCAGTGGGGG"); - $blast_report = $factory->blastall($input); - -NOTE: Use of the BPlite method has been deprecated and is no longer supported. - -For blastall and non-psiblast blastpgp runs, report object is a L -object, selected by the user with the parameter _READMETHOD. The leading -underscore is needed to distinguish this option from options which are passed to -the BLAST executable. The default parser is Bio::SearchIO::blast. In any case, -the "raw" blast report is also available. The filename is set by the 'outfile' -parameter and has the default value of "blastreport.out". - -For psiblast execution in the BLAST "jumpstart" mode, the program must -be passed (in addition to the query sequence itself) an alignment -containing the query sequence (in the form of a SimpleAlign object) as -well as a "mask" specifying at what residues position-specific scoring -matrices (PSSMs) are to used and at what residues default scoring -matrices (eg BLOSUM) are to be used. See psiblast documentation for -more details. The mask itself is a string of 0's and 1's which is the -same length as each sequence in the alignment and has a "1" at -locations where (PSSMs) are to be used and a "0" at all other -locations. So for example: - - $str = Bio::AlignIO->new(-file => "cysprot.msf", - -format => 'msf'); - $aln = $str->next_aln(); - $len = $aln->length_aln(); - $mask = '1' x $len; - # simple case where PSSM's to be used at all residues - $report = $factory->blastpgp("cysprot1.fa", $aln, $mask); - -For bl2seq execution, StandAloneBlast.pm can be combined with -AlignIO.pm to directly produce a SimpleAlign object from the alignment -of the two sequences produced by bl2seq as in: - - # Get 2 sequences - $str = Bio::SeqIO->new(-file=>'t/amino.fa' , -format => 'Fasta'); - my $seq3 = $str->next_seq(); - my $seq4 = $str->next_seq(); - - # Run bl2seq on them - $factory = Bio::Tools::Run::StandAloneBlast->new(-program => 'blastp', - -outfile => 'bl2seq.out'); - my $bl2seq_report = $factory->bl2seq($seq3, $seq4); - - # Use AlignIO.pm to create a SimpleAlign object from the bl2seq report - $str = Bio::AlignIO->new(-file=> 'bl2seq.out',-format => 'bl2seq'); - $aln = $str->next_aln(); - -For more examples of syntax and use of StandAloneBlast.pm, the user is -encouraged to run the scripts standaloneblast.pl in the bioperl -examples/tools directory and StandAloneBlast.t in the bioperl t/ -directory. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to one -of the Bioperl mailing lists. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via -the web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Peter Schattner - -Email schattner at alum.mit.edu - -=head1 MAINTAINER - Torsten Seemann - -Email torsten at infotech.monash.edu.au - -=head1 CONTRIBUTORS - -Sendu Bala bix@sendu.me.uk (reimplementation) - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - -package Bio::Tools::Run::StandAloneBlast; - -use strict; -use warnings; - -use Bio::Root::IO; -use Bio::Seq; -use Bio::SeqIO; -use Bio::SearchIO; -use File::Spec; - -use base qw(Bio::Tools::Run::WrapperBase Bio::Factory::ApplicationFactoryI); - -our $AUTOLOAD; -our $DEFAULTBLASTTYPE = 'NCBI'; -our $DEFAULTREADMETHOD = 'BLAST'; - -# If local BLAST databases are not stored in the standard -# /data directory, the variable BLASTDATADIR will need to be -# set explicitly -our $DATADIR = $ENV{'BLASTDATADIR'} || $ENV{'BLASTDB'}; -if (! defined $DATADIR && defined $ENV{'BLASTDIR'}) { - my $dir = Bio::Root::IO->catfile($ENV{'BLASTDIR'}, 'data'); - if (-d $dir) { - $DATADIR = $dir; - } - elsif ($ENV{'BLASTDIR'} =~ /bin/) { - $dir = $ENV{'BLASTDIR'}; - $dir =~ s/bin/data/; - $DATADIR = $dir if -d $dir; - } -} - -=head2 new - - Title : new - Usage : my $obj = Bio::Tools::Run::StandAloneBlast->new(); - Function: Builds a newBio::Tools::Run::StandAloneBlast object - Returns : Bio::Tools::Run::StandAloneNCBIBlast or StandAloneWUBlast - Args : -quiet => boolean # make program execution quiet - -_READMETHOD => 'BLAST' (default, synonym 'SearchIO') || 'blast_pull' - # the parsing method, case insensitive - -Essentially all BLAST parameters can be set via StandAloneBlast.pm. -Some of the most commonly used parameters are listed below. All -parameters have defaults and are optional except for -p in those programs that -have it. For a complete listing of settable parameters, run the relevant -executable BLAST program with the option "-" as in blastall - -Note that the input parameters (-i, -j, -input) should not be set directly by -you: this module sets them when you call one of the executable methods. - -Blastall - - -p Program Name [String] - Input should be one of "blastp", "blastn", "blastx", - "tblastn", or "tblastx". - -d Database [String] default = nr - The database specified must first be formatted with formatdb. - Multiple database names (bracketed by quotations) will be accepted. - An example would be -d "nr est" - -e Expectation value (E) [Real] default = 10.0 - -o BLAST report Output File [File Out] Optional, - default = ./blastreport.out ; set by StandAloneBlast.pm - -S Query strands to search against database (for blast[nx], and tblastx). 3 is both, 1 is top, 2 is bottom [Integer] - default = 3 - -Blastpgp (including Psiblast) - - -j is the maximum number of rounds (default 1; i.e., regular BLAST) - -h is the e-value threshold for including sequences in the - score matrix model (default 0.001) - -c is the "constant" used in the pseudocount formula specified in the paper (default 10) - -B Multiple alignment file for PSI-BLAST "jump start mode" Optional - -Q Output File for PSI-BLAST Matrix in ASCII [File Out] Optional - -rpsblast - - -d Database [String] default = (none - you must specify a database) - The database specified must first be formatted with formatdb. - Multiple database names (bracketed by quotations) will be accepted. - An example would be -d "Cog Smart" - -e Expectation value (E) [Real] default = 10.0 - -o BLAST report Output File [File Out] Optional, - default = ./blastreport.out ; set by StandAloneBlast.pm - -Bl2seq - - -p Program name: blastp, blastn, blastx. For blastx 1st argument should be nucleotide [String] - default = blastp - -o alignment output file [File Out] default = stdout - -e Expectation value (E) [Real] default = 10.0 - -S Query strands to search against database (blastn only). 3 is both, 1 is top, 2 is bottom [Integer] - default = 3 - -WU-Blast - - -p Program Name [String] - Input should be one of "wublastp", "wublastn", "wublastx", - "wutblastn", or "wutblastx". - -d Database [String] default = nr - The database specified must first be formatted with xdformat. - -E Expectation value (E) [Real] default = 10.0 - -o BLAST report Output File [File Out] Optional, - default = ./blastreport.out ; set by StandAloneBlast.pm - -=cut - -sub new { - my ($caller, @args) = @_; - my $class = ref($caller) || $caller; - - # Because of case-sensitivity issues, ncbi and wublast methods are - # mutually exclusive. We can't load ncbi methods if we start with wublast - # (and vice versa) since wublast e() and E() should be the same thing, - # whilst they must be different things in ncbi blast. - # - # Solution: split StandAloneBlast out into two more modules for NCBI and WU - - if ($class =~ /NCBI|WU/) { - return $class->SUPER::new(@args); - } - - my %args = @args; - my $blasttype = $DEFAULTBLASTTYPE; - while (my ($attr, $value) = each %args) { - if ($attr =~/^-?\s*program\s*$|^-?p$/) { - if ($value =~ /^wu*/) { - $blasttype = 'WU'; - } - } - } - - my $module = "Bio::Tools::Run::StandAlone${blasttype}Blast"; - Bio::Root::Root->_load_module($module); - return $module->new(@args); -} - -=head2 executable - - Title : executable - Usage : my $exe = $blastfactory->executable('blastall'); - Function: Finds the full path to the executable - Returns : string representing the full path to the exe - Args : [optional] name of executable to set path to - [optional] boolean flag whether or not warn when exe is not found - -=cut - -sub executable { - my ($self, $exename, $exe, $warn) = @_; - $exename = 'blastall' unless (defined $exename || $self =~ /WUBlast/); - $self->program_name($exename); - - if( defined $exe && -x $exe ) { - $self->{'_pathtoexe'}->{$exename} = $exe; - } - unless( defined $self->{'_pathtoexe'}->{$exename} ) { - my $f = $self->program_path($exename); - $exe = $self->{'_pathtoexe'}->{$exename} = $f if(-e $f && -x $f ); - - # This is how I meant to split up these conditionals --jason - # if exe is null we will execute this (handle the case where - # PROGRAMDIR pointed to something invalid) - unless( $exe ) { # we didn't find it in that last conditional - if( ($exe = $self->io->exists_exe($exename)) && -x $exe ) { - $self->{'_pathtoexe'}->{$exename} = $exe; - } - else { - $self->warn("Cannot find executable for $exename") if $warn; - $self->{'_pathtoexe'}->{$exename} = undef; - } - } - } - return $self->{'_pathtoexe'}->{$exename}; -} - -=head2 program_dir - - Title : program_dir - Usage : my $dir = $factory->program_dir(); - Function: Abstract get method for dir of program. - Returns : string representing program directory - Args : none - -=cut - -sub program_dir { - my $self = shift; - $self =~ /NCBIBlast/? $ENV{'BLASTDIR'}: $ENV{'WUBLASTDIR'}; -} - -sub program_name { - my $self = shift; - if (@_) { $self->{program_name} = shift } - return $self->{program_name} || ''; -} - -sub program { - my $self = shift; - if( wantarray ) { - return ($self->executable, $self->p()); - } else { - return $self->executable(@_); - } -} - -=head2 _setinput - - Title : _setinput - Usage : Internal function, not to be called directly - Function: Create input file(s) for Blast executable - Example : - Returns : name of file containing Blast data input - Args : Seq object reference or input file name - -=cut - -sub _setinput { - my ($self, $executable, $input1, $input2) = @_; - my ($seq, $temp, $infilename1, $infilename2,$fh ) ; - # If $input1 is not a reference it better be the name of a file with - # the sequence/ alignment data... - $self->io->_io_cleanup(); - - SWITCH: { - unless (ref $input1) { - $infilename1 = (-e $input1) ? $input1 : 0 ; - last SWITCH; - } - - # $input may be an array of BioSeq objects... - if (ref($input1) =~ /ARRAY/i ) { - ($fh,$infilename1) = $self->io->tempfile(); - $temp = Bio::SeqIO->new(-fh=> $fh, -format => 'fasta'); - foreach $seq (@$input1) { - unless ($seq->isa("Bio::PrimarySeqI")) {return 0;} - $seq->display_id($seq->display_id); - $temp->write_seq($seq); - } - close $fh; - $fh = undef; - last SWITCH; - } - - # $input may be a single BioSeq object... - elsif ($input1->isa("Bio::PrimarySeqI")) { - ($fh,$infilename1) = $self->io->tempfile(); - - # just in case $input1 is taken from an alignment and has spaces (ie - # deletions) indicated within it, we have to remove them - otherwise - # the BLAST programs will be unhappy - my $seq_string = $input1->seq(); - $seq_string =~ s/\W+//g; # get rid of spaces in sequence - $input1->seq($seq_string); - $temp = Bio::SeqIO->new(-fh=> $fh, '-format' => 'fasta'); - $temp->write_seq($input1); - close $fh; - undef $fh; - last SWITCH; - } - - $infilename1 = 0; # Set error flag if you get here - } - - unless ($input2) { return $infilename1; } - - SWITCH2: { - unless (ref $input2) { - $infilename2 = (-e $input2) ? $input2 : 0 ; - last SWITCH2; - } - if ($input2->isa("Bio::PrimarySeqI") && $executable eq 'bl2seq' ) { - ($fh,$infilename2) = $self->io->tempfile(); - - $temp = Bio::SeqIO->new(-fh=> $fh, '-format' => 'Fasta'); - $temp->write_seq($input2); - close $fh; - undef $fh; - last SWITCH2; - } - - # Option for using psiblast's pre-alignment "jumpstart" feature - elsif ($input2->isa("Bio::SimpleAlign") && $executable eq 'blastpgp' ) { - # a bit of a lie since it won't be a fasta file - ($fh,$infilename2) = $self->io->tempfile(); - - # first we retrieve the "mask" that determines which residues should - # by scored according to their position and which should be scored - # using the non-position-specific matrices - my @mask = split("", shift ); # get mask - - # then we have to convert all the residues in every sequence to upper - # case at the positions that we want psiblast to use position specific - # scoring - foreach $seq ( $input2->each_seq() ) { - my @seqstringlist = split("",$seq->seq()); - for (my $i = 0; $i < scalar(@mask); $i++) { - unless ( $seqstringlist[$i] =~ /[a-zA-Z]/ ) {next} - $seqstringlist[$i] = $mask[$i] ? uc $seqstringlist[$i]: lc $seqstringlist[$i] ; - } - my $newseqstring = join("", @seqstringlist); - $seq->seq($newseqstring); - } - - # Now we need to write out the alignment to a file - # in the "psi format" which psiblast is expecting - $input2->map_chars('\.','-'); - $temp = Bio::AlignIO->new(-fh=> $fh, '-format' => 'psi'); - $temp->write_aln($input2); - close $fh; - undef $fh; - last SWITCH2; - } - - $infilename2 = 0; # Set error flag if you get here - } - - return ($infilename1, $infilename2); -} - -=head1 Bio::Tools::Run::WrapperBase methods - -=cut - -=head2 no_param_checks - - Title : no_param_checks - Usage : $obj->no_param_checks($newval) - Function: Boolean flag as to whether or not we should - trust the sanity checks for parameter values - Returns : value of no_param_checks - Args : newvalue (optional) - -=cut - -=head2 save_tempfiles - - Title : save_tempfiles - Usage : $obj->save_tempfiles($newval) - Function: - Returns : value of save_tempfiles - Args : newvalue (optional) - -=cut - -=head2 outfile_name - - Title : outfile_name - Usage : my $outfile = $tcoffee->outfile_name(); - Function: Get/Set the name of the output file for this run - (if you wanted to do something special) - Returns : string - Args : [optional] string to set value to - -=cut - -=head2 tempdir - - Title : tempdir - Usage : my $tmpdir = $self->tempdir(); - Function: Retrieve a temporary directory name (which is created) - Returns : string which is the name of the temporary directory - Args : none - -=cut - -=head2 cleanup - - Title : cleanup - Usage : $tcoffee->cleanup(); - Function: Will cleanup the tempdir directory after a PAML run - Returns : none - Args : none - -=cut - -=head2 io - - Title : io - Usage : $obj->io($newval) - Function: Gets a Bio::Root::IO object - Returns : Bio::Root::IO - Args : none - -=cut - -1; diff --git a/Bio/Tools/Run/StandAloneNCBIBlast.pm b/Bio/Tools/Run/StandAloneNCBIBlast.pm deleted file mode 100644 index 7dcdef9c6..000000000 --- a/Bio/Tools/Run/StandAloneNCBIBlast.pm +++ /dev/null @@ -1,538 +0,0 @@ -# -# BioPerl module for Bio::Tools::Run::StandAloneBlast -# -# Copyright Peter Schattner -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Tools::Run::StandAloneNCBIBlast - Object for the local execution -of the NCBI BLAST program suite (blastall, blastpgp, bl2seq). With -experimental support for NCBI rpsblast. - -=head1 SYNOPSIS - - # Do not use directly; see Bio::Tools::Run::StandAloneBlast - -=head1 DESCRIPTION - -See Bio::Tools::Run::StandAloneBlast - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to one -of the Bioperl mailing lists. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via -the web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Peter Schattner - -Email schattner at alum.mit.edu - -=head1 MAINTAINER - Torsten Seemann - -Email torsten at infotech.monash.edu.au - -=head1 CONTRIBUTORS - -Sendu Bala bix@sendu.me.uk (reimplementation) - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - -package Bio::Tools::Run::StandAloneNCBIBlast; - -use strict; -use warnings; - -use base qw(Bio::Tools::Run::StandAloneBlast); - -our $AUTOLOAD; -our $DEFAULTREADMETHOD = 'BLAST'; - -# If local BLAST databases are not stored in the standard -# /data directory, the variable BLASTDATADIR will need to be -# set explicitly -our $DATADIR = $Bio::Tools::Run::StandAloneBlast::DATADIR; - -our %GENERAL_PARAMS = (i => 'input', - o => 'outfile', - p => 'program', - d => 'database'); -our @BLASTALL_PARAMS = qw(A B C D E F G K L M O P Q R S W X Y Z a b e f l m q r t v w y z n); -our @BLASTALL_SWITCH = qw(I g J T U n V s); -our @BLASTPGP_PARAMS = qw(A B C E F G H I J K L M N O P Q R S T U W X Y Z a b c e f h j k l m q s t u v y z); -our @RPSBLAST_PARAMS = qw(F I J L N O P T U V X Y Z a b e l m v y z); -our @BL2SEQ_PARAMS = qw(A D E F G I J M S T U V W X Y a e g j m q r t); - -our @OTHER_PARAMS = qw(_READMETHOD); - - -=head2 new - - Title : new - Usage : my $obj = Bio::Tools::Run::StandAloneBlast->new(); - Function: Builds a newBio::Tools::Run::StandAloneBlast object - Returns : Bio::Tools::Run::StandAloneBlast - Args : -quiet => boolean # make program execution quiet - -_READMETHOD => 'BLAST' (default, synonym 'SearchIO') || 'blast_pull' - # the parsing method, case insensitive - -Essentially all BLAST parameters can be set via StandAloneBlast.pm. -Some of the most commonly used parameters are listed below. All -parameters have defaults and are optional except for -p in those programs that -have it. For a complete listing of settable parameters, run the relevant -executable BLAST program with the option "-" as in blastall - -Note that the input parameters (-i, -j, -input) should not be set directly by -you: this module sets them when you call one of the executable methods. - -Blastall - - -p Program Name [String] - Input should be one of "blastp", "blastn", "blastx", - "tblastn", or "tblastx". - -d Database [String] default = nr - The database specified must first be formatted with formatdb. - Multiple database names (bracketed by quotations) will be accepted. - An example would be -d "nr est" - -e Expectation value (E) [Real] default = 10.0 - -o BLAST report Output File [File Out] Optional, - default = ./blastreport.out ; set by StandAloneBlast.pm - -S Query strands to search against database (for blast[nx], and tblastx). 3 is both, 1 is top, 2 is bottom [Integer] - default = 3 - -Blastpgp (including Psiblast) - - -j is the maximum number of rounds (default 1; i.e., regular BLAST) - -h is the e-value threshold for including sequences in the - score matrix model (default 0.001) - -c is the "constant" used in the pseudocount formula specified in the paper (default 10) - -B Multiple alignment file for PSI-BLAST "jump start mode" Optional - -Q Output File for PSI-BLAST Matrix in ASCII [File Out] Optional - -rpsblast - - -d Database [String] default = (none - you must specify a database) - The database specified must first be formatted with formatdb. - Multiple database names (bracketed by quotations) will be accepted. - An example would be -d "Cog Smart" - -e Expectation value (E) [Real] default = 10.0 - -o BLAST report Output File [File Out] Optional, - default = ./blastreport.out ; set by StandAloneBlast.pm - -Bl2seq - - -p Program name: blastp, blastn, blastx. For blastx 1st argument should be nucleotide [String] - default = blastp - -o alignment output file [File Out] default = stdout - -e Expectation value (E) [Real] default = 10.0 - -S Query strands to search against database (blastn only). 3 is both, 1 is top, 2 is bottom [Integer] - default = 3 - -=cut - -sub new { - my ($caller, @args) = @_; - my $self = $caller->SUPER::new(@args); - - # StandAloneBlast is special in that "one can modify the name of - # the (ncbi) BLAST parameters as desired as long as the initial letter (and - # case) of the parameter are preserved". We handle this by truncating input - # args to their first char - my %args = @args; - @args = (); - while (my ($attr, $value) = each %args) { - $attr =~ s/^-//; - $attr = substr($attr, 0, 1) unless $attr =~ /^_/; - push(@args, $attr, $value); - } - - $self->_set_from_args(\@args, -methods => {(map { $_ => $GENERAL_PARAMS{$_} } keys %GENERAL_PARAMS), - (map { $_ => $_ } (@OTHER_PARAMS, - @BLASTALL_PARAMS, - @BLASTALL_SWITCH, - @BLASTPGP_PARAMS, - @RPSBLAST_PARAMS, - @BL2SEQ_PARAMS))}, - -code => { map { $_ => 'my $self = shift; - if (@_) { - my $value = shift; - if ($value && $value ne \'F\') { - $value = \'T\'; - } - else { - $value = \'F\'; - } - $self->{\'_\'.$method} = $value; - } - return $self->{\'_\'.$method} || return;' } @BLASTALL_SWITCH }, # these methods can take boolean or 'T' and 'F' - -create => 1, - -force => 1, - -case_sensitive => 1); - - my ($tfh, $tempfile) = $self->io->tempfile(); - my $outfile = $self->o || $self->outfile || $tempfile; - $self->o($outfile); - close($tfh); - - $self->_READMETHOD($DEFAULTREADMETHOD) unless $self->_READMETHOD; - - return $self; -} - -# StandAloneBlast is special in that "one can modify the name of -# the (ncbi) BLAST parameters as desired as long as the initial letter (and -# case) of the parameter are preserved". We handle this with AUTOLOAD -# redirecting to the automatically created methods from _set_from_args() ! -sub AUTOLOAD { - my $self = shift; - my $attr = $AUTOLOAD; - $attr =~ s/.*:://; - - my $orig = $attr; - - $attr = substr($attr, 0, 1); - - $self->can($attr) || $self->throw("Unallowed parameter: $orig !"); - - return $self->$attr(@_); -} - -=head2 blastall - - Title : blastall - Usage : $blast_report = $factory->blastall('t/testquery.fa'); - or - $input = Bio::Seq->new(-id=>"test query", - -seq=>"ACTACCCTTTAAATCAGTGGGGG"); - $blast_report = $factory->blastall($input); - or - $seq_array_ref = \@seq_array; - # where @seq_array is an array of Bio::Seq objects - $blast_report = $factory->blastall($seq_array_ref); - Returns : Reference to a Blast object containing the blast report. - Args : Name of a file or Bio::Seq object or an array of - Bio::Seq object containing the query sequence(s). - Throws an exception if argument is not either a string - (eg a filename) or a reference to a Bio::Seq object - (or to an array of Seq objects). If argument is string, - throws exception if file corresponding to string name can - not be found. - -=cut - -sub blastall { - my ($self, $input1) = @_; - $self->io->_io_cleanup(); - my $executable = 'blastall'; - - # Create input file pointer - my $infilename1 = $self->_setinput($executable, $input1) || $self->throw("$input1 not Bio::Seq object or array of Bio::Seq objects or file name!"); - $self->i($infilename1); - - my $blast_report = $self->_generic_local_blast($executable); -} - -=head2 blastpgp - - Title : blastpgp - Usage : $blast_report = $factory-> blastpgp('t/testquery.fa'); - or - $input = Bio::Seq->new(-id=>"test query", - -seq=>"ACTADDEEQQPPTCADEEQQQVVGG"); - $blast_report = $factory->blastpgp ($input); - or - $seq_array_ref = \@seq_array; - # where @seq_array is an array of Bio::Seq objects - $blast_report = $factory-> blastpgp(\@seq_array); - Returns : Reference to a Bio::SearchIO object containing the blast report - Args : Name of a file or Bio::Seq object. In psiblast jumpstart - mode two additional arguments are required: a SimpleAlign - object one of whose elements is the query and a "mask" to - determine how BLAST should select scoring matrices see - DESCRIPTION above for more details. - - Throws an exception if argument is not either a string - (eg a filename) or a reference to a Bio::Seq object - (or to an array of Seq objects). If argument is string, - throws exception if file corresponding to string name can - not be found. - Returns : Reference to Bio::SearchIO object containing the blast report. - -=cut - -sub blastpgp { - my $self = shift; - my $executable = 'blastpgp'; - my $input1 = shift; - my $input2 = shift; - # used by blastpgp's -B option to specify which - # residues are position aligned - my $mask = shift; - - my ($infilename1, $infilename2 ) = $self->_setinput($executable, - $input1, $input2, - $mask); - if (!$infilename1) {$self->throw("$input1 not Bio::Seq object or array of Bio::Seq objects or file name!");} - $self->i($infilename1); # set file name of sequence to be blasted to inputfilename1 (-i param of blastpgp) - if ($input2) { - unless ($infilename2) {$self->throw("$input2 not SimpleAlign Object in pre-aligned psiblast\n");} - $self->B($infilename2); # set file name of partial alignment to inputfilename2 (-B param of blastpgp) - } - - my $blast_report = $self->_generic_local_blast($executable); -} - -=head2 rpsblast - - Title : rpsblast - Usage : $blast_report = $factory->rpsblast('t/testquery.fa'); - or - $input = Bio::Seq->new(-id=>"test query", - -seq=>"MVVLCRADDEEQQPPTCADEEQQQVVGG"); - $blast_report = $factory->rpsblast($input); - or - $seq_array_ref = \@seq_array; - # where @seq_array is an array of Bio::Seq objects - $blast_report = $factory->rpsblast(\@seq_array); - Args : Name of a file or Bio::Seq object or an array of - Bio::Seq object containing the query sequence(s). - Throws an exception if argument is not either a string - (eg a filename) or a reference to a Bio::Seq object - (or to an array of Seq objects). If argument is string, - throws exception if file corresponding to string name can - not be found. - Returns : Reference to a Bio::SearchIO object containing the blast report - -=cut - -sub rpsblast { - my ($self, $input1) = @_; - $self->io->_io_cleanup(); - my $executable = 'rpsblast'; - - # Create input file pointer - my $infilename1 = $self->_setinput($executable, $input1) || $self->throw("$input1 not Bio::Seq object or array of Bio::Seq objects or file name!"); - $self->i($infilename1); - - my $blast_report = $self->_generic_local_blast($executable); -} - -=head2 bl2seq - - Title : bl2seq - Usage : $factory-> bl2seq('t/seq1.fa', 't/seq2.fa'); - or - $input1 = Bio::Seq->new(-id=>"test query1", - -seq=>"ACTADDEEQQPPTCADEEQQQVVGG"); - $input2 = Bio::Seq->new(-id=>"test query2", - -seq=>"ACTADDEMMMMMMMDEEQQQVVGG"); - $blast_report = $factory->bl2seq ($input1, $input2); - Returns : Reference to a BPbl2seq object containing the blast report. - Args : Names of 2 files or 2 Bio::Seq objects containing the - sequences to be aligned by bl2seq. - - Throws an exception if argument is not either a pair of - strings (eg filenames) or references to Bio::Seq objects. - If arguments are strings, throws exception if files - corresponding to string names can not be found. - -=cut - -sub bl2seq { - my $self = shift; - my $executable = 'bl2seq'; - my $input1 = shift; - my $input2 = shift; - - # Create input file pointer - my ($infilename1, $infilename2 ) = $self->_setinput($executable, - $input1, $input2); - if (!$infilename1){$self->throw(" $input1 not Seq Object or file name!");} - if (!$infilename2){$self->throw("$input2 not Seq Object or file name!");} - - $self->i($infilename1); # set file name of first sequence to - # be aligned to inputfilename1 - # (-i param of bl2seq) - $self->j($infilename2); # set file name of first sequence to - # be aligned to inputfilename2 - # (-j param of bl2seq) - - my $blast_report = $self->_generic_local_blast($executable); -} - -=head2 _generic_local_blast - - Title : _generic_local_blast - Usage : internal function not called directly - Returns : Bio::SearchIO - Args : Reference to calling object and name of BLAST executable - -=cut - -sub _generic_local_blast { - my $self = shift; - my $executable = shift; - - # Create parameter string to pass to Blast program - my $param_string = $self->_setparams($executable); - - # run Blast - my $blast_report = $self->_runblast($executable, $param_string); -} - -=head2 _runblast - - Title : _runblast - Usage : Internal function, not to be called directly - Function: makes actual system call to Blast program - Example : - Returns : Report Bio::SearchIO object in the appropriate format - Args : Reference to calling object, name of BLAST executable, - and parameter string for executable - -=cut - -sub _runblast { - my ($self, $executable, $param_string) = @_; - my ($blast_obj, $exe); - if (! ($exe = $self->executable($executable)) ) { - $self->warn("cannot find path to $executable"); - return; - } - - # Use double quotes if executable path have empty spaces - if ($exe =~ m/ /) { - $exe = "\"$exe\""; - } - my $commandstring = $exe.$param_string; - - $self->debug("$commandstring\n"); - system($commandstring) && $self->throw("$executable call crashed: $? | $! | $commandstring\n"); - - # set significance cutoff to set expectation value or default value - # (may want to make this value vary for different executables) - my $signif = $self->e() || 1e-5; - - # get outputfilename - my $outfile = $self->o(); - - # this should allow any blast SearchIO parser (not just 'blast_pull' or 'blast', - # but 'blastxml' and 'blasttable'). Fall back to 'blast' if not stipulated. - my $method = $self->_READMETHOD; - if ($method =~ /^(?:blast|SearchIO)/i ) { - $method = 'blast' if $method =~ m{SearchIO}i; - $blast_obj = Bio::SearchIO->new(-file => $outfile, - -format => $method); - } - # should these be here? They have been deprecated... - elsif ($method =~ /BPlite/i ) { - if ($executable =~ /bl2seq/i) { - # Added program info so BPbl2seq can compute strand info - $self->throw("Use of Bio::Tools::BPbl2seq is deprecated; use Bio::SearchIO modules instead"); - } - elsif ($executable =~ /blastpgp/i && defined $self->j() && $self->j() > 1) { - $self->throw("Use of Bio::Tools::BPpsilite is deprecated; use Bio::SearchIO modules instead"); - } - elsif ($executable =~ /blastall|rpsblast/i) { - $self->throw("Use of Bio::Tools::BPlite is deprecated; use Bio::SearchIO modules instead"); - } - else { - $self->warn("Unrecognized executable $executable"); - } - } - else { - $self->warn("Unrecognized readmethod $method"); - } - - return $blast_obj; -} - -=head2 _setparams - - Title : _setparams - Usage : Internal function, not to be called directly - Function: Create parameter inputs for Blast program - Example : - Returns : parameter string to be passed to Blast - Args : Reference to calling object and name of BLAST executable - -=cut - -sub _setparams { - my ($self, $executable) = @_; - my ($attr, $value, @execparams); - - if ($executable eq 'blastall') { @execparams = (@BLASTALL_PARAMS, - @BLASTALL_SWITCH); } - elsif ($executable eq 'blastpgp') { @execparams = @BLASTPGP_PARAMS; } - elsif ($executable eq 'rpsblast') { @execparams = @RPSBLAST_PARAMS; } - elsif ($executable eq 'bl2seq' ) { @execparams = @BL2SEQ_PARAMS; } - - # we also have all the general params - push(@execparams, keys %GENERAL_PARAMS); - - my $database = $self->d; - if ($database && $executable ne 'bl2seq') { - # Need to prepend datadirectory to database name - my @dbs = split(/ /, $database); - for my $i (0..$#dbs) { - # (works with multiple databases) - if (! (-e $dbs[$i].".nin" || -e $dbs[$i].".pin") && - ! (-e $dbs[$i].".nal" || -e $dbs[$i].".pal") ) { - $dbs[$i] = File::Spec->catdir($DATADIR, $dbs[$i]); - } - } - $self->d('"'.join(" ", @dbs).'"'); - } - - # workaround for problems with shell metacharacters [bug 2707] - # simply quoting does not always work! - my $tmp = $self->o; - $self->o(quotemeta($tmp)) if ($tmp && $^O !~ /^MSWin/); - - my $param_string = $self->SUPER::_setparams(-params => [@execparams], - -dash => 1); - - $self->o($tmp) if ($tmp && $^O !~ /^MSWin/); - - $self->d($database) if $database; - - if ($self->quiet()) { - $param_string .= ' 2> '.File::Spec->devnull; - } - - return $param_string; -} - -1; diff --git a/Bio/Tools/Run/StandAloneWUBlast.pm b/Bio/Tools/Run/StandAloneWUBlast.pm deleted file mode 100644 index 273239998..000000000 --- a/Bio/Tools/Run/StandAloneWUBlast.pm +++ /dev/null @@ -1,299 +0,0 @@ -# -# BioPerl module for Bio::Tools::Run::StandAloneBlast -# -# Copyright Peter Schattner -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Tools::Run::StandAloneWUBlast - Object for the local execution -of WU-Blast. - -=head1 SYNOPSIS - - # Do not use directly; use Bio::Tools::Run::StandAloneBlast - -=head1 DESCRIPTION - -See Bio::Tools::Run::StandAloneBlast - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to one -of the Bioperl mailing lists. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -the bugs and their resolution. Bug reports can be submitted via -the web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Peter Schattner - -Email schattner at alum.mit.edu - -=head1 MAINTAINER - Torsten Seemann - -Email torsten at infotech.monash.edu.au - -=head1 CONTRIBUTORS - -Sendu Bala bix@sendu.me.uk (reimplementation) - -=head1 APPENDIX - -The rest of the documentation details each of the object -methods. Internal methods are usually preceded with a _ - -=cut - -package Bio::Tools::Run::StandAloneWUBlast; - -use strict; - -use base qw(Bio::Tools::Run::StandAloneBlast); - -our $AUTOLOAD; -our $DEFAULTREADMETHOD = 'BLAST'; - -# If local BLAST databases are not stored in the standard -# /data directory, the variable BLASTDATADIR will need to be -# set explicitly -our $DATADIR = $Bio::Tools::Run::StandAloneBlast::DATADIR; - -our %GENERAL_PARAMS = (i => 'input', - o => 'outfile', - p => 'program', - d => 'database'); -our @WUBLAST_PARAMS = qw(e s e2 s2 w t x m y z l k h v b q r - matrix filter wordmask filter maskextra hitdist wink ctxfactor gape - gaps gape2 gaps2 gapw gapx olf golf olmax golmax gapdecayrate - topcombon topcomboe sumstatsmethod hspsepqmax hspsepsmax gapsepqmax - gapsepsmax altscore hspmax gspmax qoffset nwstart nwlen qrecmin qrecmax - dbrecmin dbrecmax vdbdescmax dbchunks sort_by_pvalue cpus putenv - getenv progress); -our @WUBLAST_SWITCH = qw(kap sump poissonp lcfilter lcmask echofilter - stats nogap gapall pingpong nosegs postsw span2 span1 span prune - consistency links ucdb gi noseqs qtype qres sort_by_pvalue - sort_by_count sort_by_highscore sort_by_totalscore - sort_by_subjectlength mmio nonnegok novalidctxok shortqueryok notes - warnings errors endputenv getenv endgetenv abortonerror abortonfatal); - -our @OTHER_PARAMS = qw(_READMETHOD); - - -=head2 new - - Title : new - Usage : my $obj = Bio::Tools::Run::StandAloneBlast->new(); - Function: Builds a newBio::Tools::Run::StandAloneBlast object - Returns : Bio::Tools::Run::StandAloneBlast - Args : -quiet => boolean # make program execution quiet - -_READMETHOD => 'BLAST' (default, synonym 'SearchIO') || 'blast_pull' - # the parsing method, case insensitive - -Essentially all BLAST parameters can be set via StandAloneBlast.pm. -Some of the most commonly used parameters are listed below. All -parameters have defaults and are optional except for -p. - - -p Program Name [String] - Input should be one of "wublastp", "wublastn", "wublastx", - "wutblastn", or "wutblastx". - -d Database [String] default = nr - The database specified must first be formatted with xdformat. - -E Expectation value (E) [Real] default = 10.0 - -o BLAST report Output File [File Out] Optional, - default = ./blastreport.out ; set by StandAloneBlast.pm - -=cut - -sub new { - my ($caller, @args) = @_; - my $self = $caller->SUPER::new(@args); - - $self->_set_from_args(\@args, -methods => {(map { $_ => $GENERAL_PARAMS{$_} } keys %GENERAL_PARAMS), - (map { $_ => $_ } (@OTHER_PARAMS, - @WUBLAST_PARAMS, - @WUBLAST_SWITCH))}, - -create => 1, - -force => 1); - - my ($tfh, $tempfile) = $self->io->tempfile(); - my $outfile = $self->o || $self->outfile || $tempfile; - $self->o($outfile); - close($tfh); - - $self->_READMETHOD($DEFAULTREADMETHOD) unless $self->_READMETHOD; - - return $self; -} - -# We let get/setter method names be case-insensitve -sub AUTOLOAD { - my $self = shift; - my $attr = $AUTOLOAD; - $attr =~ s/.*:://; - - my $orig = $attr; - - $attr = lc($attr); - - $self->can($attr) || $self->throw("Unallowed parameter: $orig !"); - - return $self->$attr(@_); -} - -=head2 wublast - - Title : wublast - Usage : $blast_report = $factory->wublast('t/testquery.fa'); - or - $input = Bio::Seq->new(-id=>"test query", - -seq=>"ACTACCCTTTAAATCAGTGGGGG"); - $blast_report = $factory->wublast($input); - or - $seq_array_ref = \@seq_array; # where @seq_array is an array of Bio::Seq objects - $blast_report = $factory->wublast(\@seq_array); - Returns : Reference to a Blast object - Args : Name of a file or Bio::Seq object or an array of - Bio::Seq object containing the query sequence(s). - Throws an exception if argument is not either a string - (eg a filename) or a reference to a Bio::Seq object - (or to an array of Seq objects). If argument is string, - throws exception if file corresponding to string name can - not be found. - -=cut - -sub wublast { - my ($self, $input1) = @_; - $self->io->_io_cleanup(); - my $executable = 'wublast'; - - # Create input file pointer - my $infilename1 = $self->_setinput($executable, $input1) || $self->throw("$input1 not Bio::Seq object or array of Bio::Seq objects or file name!"); - $self->i($infilename1); - - my $blast_report = $self->_generic_local_wublast($executable); -} - -=head2 _generic_local_wublast - - Title : _generic_local_wublast - Usage : internal function not called directly - Returns : Blast object - Args : Reference to calling object and name of BLAST executable - -=cut - -sub _generic_local_wublast { - my $self = shift; - my $executable = shift; - - # Create parameter string to pass to Blast program - my $param_string = $self->_setparams($executable); - $param_string = " ".$self->database." ".$self->input." ".$param_string; - - # run Blast - my $blast_report = $self->_runwublast($executable, $param_string); -} - -=head2 _runwublast - - Title : _runwublast - Usage : Internal function, not to be called directly - Function: makes actual system call to WU-Blast program - Example : - Returns : Report Blast object - Args : Reference to calling object, name of BLAST executable, - and parameter string for executable - -=cut - -sub _runwublast { - my ($self, $executable, $param_string) = @_; - my ($blast_obj, $exe); - if (! ($exe = $self->executable($self->p))){ - $self->warn("cannot find path to $executable"); - return; - } - - # Use double quotes if executable path have empty spaces - if ($exe =~ m/ /) { - $exe = "\"$exe\""; - } - my $commandstring = $exe.$param_string; - - $self->debug("$commandstring\n"); - system($commandstring) && $self->throw("$executable call crashed: $? | $! | $commandstring\n"); - - # get outputfilename - my $outfile = $self->o(); - $blast_obj = Bio::SearchIO->new(-file => $outfile, -format => 'blast'); - - return $blast_obj; -} - -=head2 _setparams - - Title : _setparams - Usage : Internal function, not to be called directly - Function: Create parameter inputs for Blast program - Example : - Returns : parameter string to be passed to Blast - Args : Reference to calling object and name of BLAST executable - -=cut - -sub _setparams { - my ($self, $executable) = @_; - my ($attr, $value, @execparams); - - @execparams = @WUBLAST_PARAMS; - - # of the general params, wublast only takes outfile at - # this stage (we add in program, input and database manually elsewhere) - push(@execparams, 'o'); - - # workaround for problems with shell metacharacters [bug 2707] - # simply quoting does not always work! - # Fixed so Windows files are not quotemeta'd - my $tmp = $self->o; - $self->o(quotemeta($tmp)) if ($tmp && $^O !~ /^MSWin/); - - my $param_string = $self->SUPER::_setparams(-params => [@execparams], - -switches => \@WUBLAST_SWITCH, - -dash => 1); - - $self->o($tmp) if ($tmp && $^O !~ /^MSWin/); - - if ($self->quiet()) { - $param_string .= ' 2> '.File::Spec->devnull; - } - - return $param_string; -} - -1; diff --git a/Bio/Tools/Run/WrapperBase.pm b/Bio/Tools/Run/WrapperBase.pm deleted file mode 100644 index 74efe37fe..000000000 --- a/Bio/Tools/Run/WrapperBase.pm +++ /dev/null @@ -1,511 +0,0 @@ -# -# BioPerl module for Bio::Tools::Run::WrapperBase -# -# Please direct questions and support issues to -# -# Cared for by Jason Stajich -# -# Copyright Jason Stajich -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Tools::Run::WrapperBase - A Base object for wrappers around executables - -=head1 SYNOPSIS - - # do not use this object directly, it provides the following methods - # for its subclasses - - my $errstr = $obj->error_string(); - my $exe = $obj->executable(); - $obj->save_tempfiles($booleanflag) - my $outfile= $obj->outfile_name(); - my $tempdir= $obj->tempdir(); # get a temporary dir for executing - my $io = $obj->io; # Bio::Root::IO object - my $cleanup= $obj->cleanup(); # remove tempfiles - - $obj->run({-arg1 => $value}); - -=head1 DESCRIPTION - -This is a basic module from which to build executable wrapper modules. -It has some basic methods to help when implementing new modules. - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion - http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -I - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track of -the bugs and their resolution. Bug reports can be submitted via the -web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Jason Stajich - -Email jason-at-bioperl.org - -=head1 CONTRIBUTORS - -Sendu Bala, bix@sendu.me.uk - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - - -# Let the code begin... - - -package Bio::Tools::Run::WrapperBase; -use strict; - -# Object preamble - inherits from Bio::Root::Root - -use base qw(Bio::Root::Root); - -use File::Spec; -use File::Path qw(); # don't import anything - -=head2 run - - Title : run - Usage : $wrapper->run({ARGS HERE}); - Function: Support generic running with args passed in - as a hashref - Returns : Depends on the implementation, status OR data - Args : hashref of named arguments - - -=cut - -sub run { - my ($self,@args) = @_; - $self->throw_not_implemented(); -} - - -=head2 error_string - - Title : error_string - Usage : $obj->error_string($newval) - Function: Where the output from the last analysis run is stored. - Returns : value of error_string - Args : newvalue (optional) - - -=cut - -sub error_string{ - my ($self,$value) = @_; - if( defined $value) { - $self->{'_error_string'} = $value; - } - return $self->{'_error_string'} || ''; -} - -=head2 arguments - - Title : arguments - Usage : $obj->arguments($newval) - Function: Commandline parameters - Returns : value of arguments - Args : newvalue (optional) - - -=cut - -sub arguments { - my ($self,$value) = @_; - if(defined $value) { - $self->{'_arguments'} = $value; - } - return $self->{'_arguments'} || ''; -} - - -=head2 no_param_checks - - Title : no_param_checks - Usage : $obj->no_param_checks($newval) - Function: Boolean flag as to whether or not we should - trust the sanity checks for parameter values - Returns : value of no_param_checks - Args : newvalue (optional) - - -=cut - -sub no_param_checks{ - my ($self,$value) = @_; - if( defined $value || ! defined $self->{'no_param_checks'} ) { - $value = 0 unless defined $value; - $self->{'no_param_checks'} = $value; - } - return $self->{'no_param_checks'}; -} - -=head2 save_tempfiles - - Title : save_tempfiles - Usage : $obj->save_tempfiles($newval) - Function: Get/set the choice of if tempfiles in the temp dir (see tempdir()) - are kept or cleaned up. Default is '0', ie. delete temp files. - NB: This must be set to the desired value PRIOR to first creating - a temp dir with tempdir(). Any attempt to set this after tempdir creation will get a warning. - Returns : boolean - Args : none to get, boolean to set - -=cut - -sub save_tempfiles{ - my $self = shift; - my @args = @_; - if (($args[0]) && (exists ($self->{'_tmpdir'}))) { - $self->warn ("Tempdir already created; setting save_tempfiles will not affect cleanup behavior."); - } - return $self->io->save_tempfiles(@_); -} - -=head2 outfile_name - - Title : outfile_name - Usage : my $outfile = $wrapper->outfile_name(); - Function: Get/Set the name of the output file for this run - (if you wanted to do something special) - Returns : string - Args : [optional] string to set value to - - -=cut - -sub outfile_name{ - my ($self,$nm) = @_; - if( defined $nm || ! defined $self->{'_outfilename'} ) { - $nm = 'mlc' unless defined $nm; - $self->{'_outfilename'} = $nm; - } - return $self->{'_outfilename'}; -} - - -=head2 tempdir - - Title : tempdir - Usage : my $tmpdir = $self->tempdir(); - Function: Retrieve a temporary directory name (which is created) - Returns : string which is the name of the temporary directory - Args : none - - -=cut - -sub tempdir{ - my ($self) = shift; - - $self->{'_tmpdir'} = shift if @_; - unless( $self->{'_tmpdir'} ) { - $self->{'_tmpdir'} = $self->io->tempdir(CLEANUP => ! $self->save_tempfiles ); - } - unless( -d $self->{'_tmpdir'} ) { - mkdir($self->{'_tmpdir'},0777); - } - return $self->{'_tmpdir'}; -} - -=head2 cleanup - - Title : cleanup - Usage : $wrapper->cleanup(); - Function: Will cleanup the tempdir directory - Returns : none - Args : none - - -=cut - -sub cleanup{ - my ($self) = @_; - $self->io->_io_cleanup(); - if( defined $self->{'_tmpdir'} && -d $self->{'_tmpdir'} ) { - my $verbose = ($self->verbose >= 1) ? 1 : 0; - File::Path::rmtree( $self->{'_tmpdir'}, $verbose); - } -} - -=head2 io - - Title : io - Usage : $obj->io($newval) - Function: Gets a Bio::Root::IO object - Returns : Bio::Root::IO object - Args : none - - -=cut - -sub io{ - my ($self) = @_; - unless( defined $self->{'io'} ) { - $self->{'io'} = Bio::Root::IO->new(-verbose => $self->verbose); - } - return $self->{'io'}; -} - -=head2 version - - Title : version - Usage : $version = $wrapper->version() - Function: Returns the program version (if available) - Returns : string representing version of the program - Args : [Optional] value to (re)set version string - - -=cut - -sub version{ - my ($self,@args) = @_; - return; -} - -=head2 executable - - Title : executable - Usage : my $exe = $factory->executable(); - Function: Finds the full path to the executable - Returns : string representing the full path to the exe - Args : [optional] name of executable to set path to - [optional] boolean flag whether or not warn when exe is not found - -=cut - -sub executable { - my ($self, $exe, $warn) = @_; - - if (defined $exe) { - $self->{'_pathtoexe'} = $exe; - } - - unless( defined $self->{'_pathtoexe'} ) { - my $prog_path = $self->program_path; - - if ($prog_path) { - if (-f $prog_path && -x $prog_path) { - $self->{'_pathtoexe'} = $prog_path; - } - elsif ($self->program_dir) { - $self->warn("executable not found in $prog_path, trying system path...") if $warn; - } - } - unless ($self->{'_pathtoexe'}) { - my $exe; - if ( $exe = $self->io->exists_exe($self->program_name) ) { - $self->{'_pathtoexe'} = $exe; - } - else { - $self->warn("Cannot find executable for ".$self->program_name) if $warn; - $self->{'_pathtoexe'} = undef; - } - } - } - - # bail if we never found the executable - unless ( defined $self->{'_pathtoexe'}) { - $self->throw("Cannot find executable for ".$self->program_name . - ". path=\"".$self->program_path."\""); - } - return $self->{'_pathtoexe'}; -} - -=head2 program_path - - Title : program_path - Usage : my $path = $factory->program_path(); - Function: Builds path for executable - Returns : string representing the full path to the exe - Args : none - -=cut - -sub program_path { - my ($self) = @_; - my @path; - push @path, $self->program_dir if $self->program_dir; - push @path, $self->program_name.($^O =~ /mswin/i ? '.exe' : '') if $self->program_name; - return File::Spec->catfile(@path); -} - -=head2 program_dir - - Title : program_dir - Usage : my $dir = $factory->program_dir(); - Function: Abstract get method for dir of program. To be implemented - by wrapper. - Returns : string representing program directory - Args : none - -=cut - -sub program_dir { - my ($self) = @_; - $self->throw_not_implemented(); -} - -=head2 program_name - - Title : program_name - Usage : my $name = $factory->program_name(); - Function: Abstract get method for name of program. To be implemented - by wrapper. - Returns : string representing program name - Args : none - -=cut - -sub program_name { - my ($self) = @_; - $self->throw_not_implemented(); -} - -=head2 quiet - - Title : quiet - Usage : $factory->quiet(1); - if ($factory->quiet()) { ... } - Function: Get/set the quiet state. Can be used by wrappers to control if - program output is printed to the console or not. - Returns : boolean - Args : none to get, boolean to set - -=cut - -sub quiet { - my $self = shift; - if (@_) { $self->{quiet} = shift } - return $self->{quiet} || 0; -} - -=head2 _setparams() - - Title : _setparams - Usage : $params = $self->_setparams(-params => [qw(window evalue_cutoff)]) - Function: For internal use by wrapper modules to build parameter strings - suitable for sending to the program being wrapped. For each method - name supplied, calls the method and adds the method name (as modified - by optional things) along with its value (unless a switch) to the - parameter string - Example : $params = $self->_setparams(-params => [qw(window evalue_cutoff)], - -switches => [qw(simple large all)], - -double_dash => 1, - -underscore_to_dash => 1); - If window() and simple() had not been previously called, but - evalue_cutoff(0.5), large(1) and all(0) had been called, $params - would be ' --evalue-cutoff 0.5 --large' - Returns : parameter string - Args : -params => [] or {} # array ref of method names to call, - or hash ref where keys are method names and - values are how those names should be output - in the params string - -switches => [] or {}# as for -params, but no value is printed for - these methods - -join => string # define how parameters and their values are - joined, default ' '. (eg. could be '=' for - param=value) - -lc => boolean # lc() method names prior to output in string - -dash => boolean # prefix all method names with a single dash - -double_dash => bool # prefix all method names with a double dash - -mixed_dash => bool # prefix single-character method names with a - # single dash, and multi-character method names - # with a double-dash - -underscore_to_dash => boolean # convert all underscores in method - names to dashes - -=cut - -sub _setparams { - my ($self, @args) = @_; - - my ($params, $switches, $join, $lc, $d, $dd, $md, $utd) = - $self->_rearrange([qw(PARAMS - SWITCHES - JOIN - LC - DASH - DOUBLE_DASH - MIXED_DASH - UNDERSCORE_TO_DASH)], @args); - $self->throw('at least one of -params or -switches is required') unless ($params || $switches); - $self->throw("-dash, -double_dash and -mixed_dash are mutually exclusive") if (defined($d) + defined($dd) + defined($md) > 1); - $join ||= ' '; - - my %params = ref($params) eq 'HASH' ? %{$params} : map { $_ => $_ } @{$params}; - my %switches = ref($switches) eq 'HASH' ? %{$switches} : map { $_ => $_ } @{$switches}; - - my $param_string = ''; - for my $hash_ref (\%params, \%switches) { - while (my ($method, $method_out) = each %{$hash_ref}) { - my $value = $self->$method(); - next unless (defined $value); - next if (exists $switches{$method} && ! $value); - - $method_out = lc($method_out) if $lc; - my $method_length = length($method_out) if $md; - $method_out = '-'.$method_out if ($d || ($md && ($method_length == 1))); - $method_out = '--'.$method_out if ($dd || ($md && ($method_length > 1))); - $method_out =~ s/_/-/g if $utd; - - if ( exists $params{$method} ) { - # if value are quoted with " or ', re-quote it - if ( $value =~ m{^[\'\"]+(.+)[\'\"]+$} ) { - $value = '"'. $1 . '"'; - } - # quote values that contain spaces - elsif ( $value =~ m{\s+} ) { - $value = '"'. $value . '"'; - } - } - - $param_string .= ' '.$method_out.(exists $switches{$method} ? '' : $join.$value); - } - } - - return $param_string; -} - -sub DESTROY { - my $self= shift; - unless ( $self->save_tempfiles ) { - $self->cleanup(); - } - $self->SUPER::DESTROY(); -} - - -1; diff --git a/Bio/Tools/Run/WrapperBase/CommandExts.pm b/Bio/Tools/Run/WrapperBase/CommandExts.pm deleted file mode 100644 index dc5a76151..000000000 --- a/Bio/Tools/Run/WrapperBase/CommandExts.pm +++ /dev/null @@ -1,1405 +0,0 @@ -# -# BioPerl module for Bio::Tools::Run::WrapperBase::CommandExts -# -# Please direct questions and support issues to -# -# Cared for by Mark A. Jensen -# -# Copyright Mark A. Jensen -# -# You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code - -=head1 NAME - -Bio::Tools::Run::WrapperBase::CommandExts - Extensions to WrapperBase for handling programs with commands *ALPHA* - -=head1 SYNOPSIS - -Devs, see L. -Users, see L. - -=head1 DESCRIPTION - -This is a developer-focused experimental module. The main idea is to -extend L to make it relatively easy to -create run wrappers around I of related programs, like -C or C. - -Some definitions: - -=over - -=item * program - -The program is the command-line frontend application. C, for example, is run from the command line as follows: - - $ samtools view -bS in.bam > out.sam - $ samtools faidx - -=item * command - -The command is the specific component of a suite run by executing the -program. In the example above, C and C are commands. - -=item * command prefix - -The command prefix is an abbreviation of the command name used -internally by C method, and sometimes by the user of the -factory for specifying command line parameters to subcommands of -composite commands. - -=item * composite command - -A composite command is a pipeline or script representing a series of -separate executions of different commands. Composite commands can be -specified by configuring C appropriately; the composite -command can be run by the user from a factory in the same way as -ordinary commands. - -=item * options, parameters, switches and filespecs - -An option is any command-line option; i.e., a specification set off by -a command-line by a specifier (like C<-v> or C<--outfile>). Parameters -are command-line options that accept a value (C<-title mydb>); -switches are boolean flags (C<--no-filter>). Filespecs are barewords -at the end of the command line that usually indicate input or output -files. In this module, this includes files that capture STDIN, STDOUT, -or STDERR via redirection. - -=item * pseudo-program - -A "pseudo-program" is a way to refer to a collection of related -applications that are run independently from the command line, rather -than via a frontend program. The C suite of programs is an -example: C, C, etc. C can be -configured to create a single factory for a suite of related, -independent programs that treats each independent program as a -"pseudo-program" command. - -=back - -This module essentially adds the non-assembler-specific wrapper -machinery of fangly's L to the -L namespace, adding the general -command-handling capability of L. It creates run -factories that are automatically Bio::ParameterBaseI compliant, -meaning that C, C, -C, C, and C -are available. - -=head1 DEVELOPER INTERFACE - -C is currently set up to read particular package globals -which define the program, the commands available, command-line options -for those commands, and human-readable aliases for those options. - -The easiest way to use C is probably to create two modules: - - Bio::Tools::Run::YourRunPkg - Bio::Tools::Run::YourRunPkg::Config - -The package globals should be defined in the C module, and the -run package itself should begin with the following mantra: - - use YourRunPkg::Config; - use Bio::Tools::Run::WrapperBase; - use Bio::Tools::Run::WrapperBase::CommandExts; - sub new { - my $class = shift; - my @args = @_; - my $self = $class->SUPER::new(@args); - ... - return $self; - } - -The following globals can/should be defined in the C module: - - $program_name - $program_dir - $use_dash - $join - @program_commands - %command_prefixes - @program_params - @program_switches - %param_translation - %composite_commands - %command_files - -See L for detailed descriptions. - -The work of creating a run wrapper with C lies mainly in -setting up the globals. The key methods for the developer interface are: - -=over - -=item * program_dir($path_to_programs) - -Set this to point the factory to the executables. - -=item * _run(@file_args) - -Runs an instantiated factory with the given file args. Use in the - C method override. - -=item * _create_factory_set() - -Returns a hash of instantiated factories for each true command from a -composite command factory. The hash keys are the true command names, so -you could do - - $cmds = $composite_fac->_create_factory_set; - for (@true_commands) { - $cmds->{$_}->_run(@file_args); - } - -=item * executables($cmd,[$fullpath]) - -For pseudo-programs, this gets/sets the full path to the executable of -the true program corresponding to the command C<$cmd>. - -=back - -=head2 Implementing Composite Commands - -=head2 Implementing Pseudo-programs - -To indicate that a package wraps disparate programs under a single pseudo program, use an asterisk before the program name: - - package Bio::Tools::Run::YourPkg::Config; - ... - our $program_name = '*blast+'; - -and C<_run> will know what to do. Specify the rest of the globals as -if the desired programs were commands. Use the basename of the -programs for the command names. - -If all the programs can be found in a single directory, just specify -that directory in C. If not, use C to set the paths to each program explicitly: - - foreach (keys %cmdpaths) { - $self->executables($_, $cmdpaths{$_}); - } - -=head2 Config Globals - -Here is an example config file. Further details in prose are below. - - package Dummy::Config; - use strict; - use warnings; - no warnings qw(qw); - use Exporter; - our (@ISA, @EXPORT, @EXPORT_OK); - push @ISA, 'Exporter'; - @EXPORT = qw( - $program_name - $program_dir - $use_dash - $join - @program_commands - %command_prefixes - @program_params - @program_switches - %param_translation - %command_files - %composite_commands - ); - - our $program_name = '*flurb'; - our $program_dir = 'C:\cygwin\usr\local\bin'; - our $use_dash = 'mixed'; - our $join = ' '; - - our @program_commands = qw( - rpsblast - find - goob - blorb - multiglob - ); - - our %command_prefixes = ( - blastp => 'blp', - tblastn => 'tbn', - goob => 'g', - blorb => 'b', - multiglob => 'm' - ); - - our @program_params = qw( - command - g|narf - g|schlurb - b|scroob - b|frelb - m|trud - ); - - our @program_switches = qw( - g|freen - b|klep - ); - - our %param_translation = ( - 'g|narf' => 'n', - 'g|schlurb' => 'schlurb', - 'g|freen' => 'f', - 'b|scroob' => 's', - 'b|frelb' => 'frelb' - ); - - our %command_files = ( - 'goob' => [qw( fas faq )], - ); - - our %composite_commands = ( - 'multiglob' => [qw( blorb goob )] - ); - 1; - -C<$use_dash> can be one of C, C, or C. See L. - -There is a syntax for the C<%command_files> specification. The token -matching C<[a-zA-Z0-9_]+> in each element of each arrayref becomes the -named filespec parameter for the C<_run()> method in the wrapper -class. Additional symbols surrounding this token indicate how this -argument should be handled. Some examples: - - >out : stdout is redirected into the file - specified by (..., -out => $file,... ) - $file,... ) - 2>log : stderr is redirected into the file - specified by (..., -log => $file,... ) - #opt : this filespec argument is optional - (no throw if -opt => $option is missing) - 2>#log: if -log is not specified in the arguments, the stderr() - method will capture stderr - *lst : this filespec can take multiple arguments, - specify using an arrayref (..., -lst => [$file1, $file2], ...) - *#lst : an optional list - -The tokens above are examples; they can be anything matching the above regexp. - -=head1 USER INTERFACE - -Using a wrapper created with C: - -=over - -=item * Getting a list of available commands, parameters, and filespecs: - -To get a list of commands, simply: - - @commands = Bio::Tools::Run::ThePkg->available_commands; - -The wrapper will generally have human-readable aliases for each of the -command-line options for the wrapped program and commands. To obtain a -list of the parameters and switches available for a particular -command, do - - $factory = Bio::Tools::Run::ThePkg->new( -command => 'glurb' ); - @params = $factory->available_parameters('params'); - @switches = $factory->available_parameters('switches'); - @filespec = $factory->available_parameters('filespec'); - @filespec = $factory->filespec; # alias - -=item * Create factories - -The factory is a handle on the program and command you wish to -run. Create a factory using C to set command-line parameters: - - $factory = Bio::Tools::Run::ThePkg->new( -command => 'glurb', - -freen => 1, - -furschlugginer => 'vreeble' ); - -A shorthand for this is: - - $factory = Bio::Tools::Run::ThePkg->new_glurb( - -freen => 1, - -furschlugginer => 'vreeble' ); - -=item * Running programs - -To run the program, use the C method, providing filespecs as arguments - - $factory = Bio::Tools::Run::ThePkg->new_assemble( -min_qual => 63 ); - $factory->run( -faq1 => 'read1.fq', -faq2 => 'read2.fq', - -ref => 'refseq.fas', -out => 'new.sam' ); - # do another - $factory->run( -faq1 => 'read-old1.fq', -faq2 => 'read-old2.fq', - -ref => 'refseq.fas', -out => 'old.sam' ); - -Messages on STDOUT and STDERR are dumped into their respective attributes: - - $stdout = $factory->stdout; - $stderr = $factory->stderr; - -unless STDOUT and/or STDERR are part of the named files in the filespec. - -=item * Setting/getting/resetting/polling parameters. - -A C-based factory is always L -compliant. That means that you may set, get, and reset parameters -using C, C, and -C. You can ask whether parameters have changed since -they were last accessed by using the predicate -C. See L for more details. - -Once set, parameters become attributes of the factory. Thus, you can get their values as follows: - - if ($factory->freen) { - $furs = $factory->furshlugginer; - #... - } - -=back - -=head1 FEEDBACK - -=head2 Mailing Lists - -User feedback is an integral part of the evolution of this and other -Bioperl modules. Send your comments and suggestions preferably to -the Bioperl mailing list. Your participation is much appreciated. - - bioperl-l@bioperl.org - General discussion -http://bioperl.org/wiki/Mailing_lists - About the mailing lists - -=head2 Support - -Please direct usage questions or support issues to the mailing list: - -L - -rather than to the module maintainer directly. Many experienced and -reponsive experts will be able look at the problem and quickly -address it. Please include a thorough description of the problem -with code and data examples if at all possible. - -=head2 Reporting Bugs - -Report bugs to the Bioperl bug tracking system to help us keep track -of the bugs and their resolution. Bug reports can be submitted via -the web: - - https://github.com/bioperl/bioperl-live/issues - -=head1 AUTHOR - Mark A. Jensen - -Email maj -at- fortinbras -dot- us - -Describe contact details here - -=head1 CONTRIBUTORS - -Dan Kortschak ( dan -dot- kortschak -at- adelaide -dot- edu -dot- au ) - -=head1 APPENDIX - -The rest of the documentation details each of the object methods. -Internal methods are usually preceded with a _ - -=cut - -# Let the code begin... - -package Bio::Tools::Run::WrapperBase; # need these methods in WrapperBase/maj -use strict; -use warnings; -no warnings qw(redefine); - -use Bio::Root::Root; -use File::Spec; -use IPC::Run; -use base qw(Bio::Root::Root Bio::ParameterBaseI); - -our $AUTOLOAD; - -=head2 new() - - Title : new - Usage : - Function: constructor for WrapperBase::CommandExts ; - correctly binds configuration variables - to the WrapperBase object - Returns : Bio::Tools::Run::WrapperBase object with command extensions - Args : - Note : this method subsumes the old _register_program_commands and - _set_program_options, leaving out the assembler-specific - parms ($qual_param and out_type()) - -=cut - -sub new { - my ($class, @args) = @_; - my $self = bless ({}, $class); - # pull in *copies* of the Config variables from the caller namespace: - my ($pkg, @goob) = caller(); - my ($commands, - $prefixes, - $params, - $switches, - $translation, - $use_dash, - $join, - $name, - $dir, - $composite_commands, - $files); - for (qw( @program_commands - %command_prefixes - @program_params - @program_switches - %param_translation - $use_dash - $join - $program_name - $program_dir - %composite_commands - %command_files ) ) { - my ($sigil, $var) = m/(.)(.*)/; - my $qualvar = "${sigil}${pkg}::${var}"; - for ($sigil) { - /\@/ && do { $qualvar = "\[$qualvar\]" }; - /\%/ && do { $qualvar = "\{$qualvar\}" }; - } - my $locvar = "\$${var}"; - $locvar =~ s/program_|command_|param_//g; - eval "$locvar = $qualvar"; - } - # set up the info registry hash - my %registry; - if ($composite_commands) { - $self->_register_composite_commands($composite_commands, - $params, - $switches, - $prefixes); - } - @registry{qw( _commands _prefixes _files - _params _switches _translation - _composite_commands )} = - ($commands, $prefixes, $files, - $params, $switches, $translation, - $composite_commands); - $self->{_options} = \%registry; - if (not defined $use_dash) { - $self->{'_options'}->{'_dash'} = 1; - } else { - $self->{'_options'}->{'_dash'} = $use_dash; - } - if (not defined $join) { - $self->{'_options'}->{'_join'} = ' '; - } else { - $self->{'_options'}->{'_join'} = $join; - } - if ($name =~ /^\*/) { - $self->is_pseudo(1); - $name =~ s/^\*//; - } - $self->program_name($name) if not defined $self->program_name(); - $self->program_dir($dir) if not defined $self->program_dir(); - $self->set_parameters(@args); - $self->parameters_changed(1); # set on instantiation, per Bio::ParameterBaseI - return $self; -} - -=head2 program_name - - Title : program_name - Usage : $factory->program_name($name) - Function: get/set the executable name - Returns: string - Args : string - -=cut - -sub program_name { - my ($self, $val) = @_; - $self->{'_program_name'} = $val if $val; - return $self->{'_program_name'}; -} - -=head2 program_dir - - Title : program_dir - Usage : $factory->program_dir($dir) - Function: get/set the program dir - Returns: string - Args : string - -=cut - -sub program_dir { - my ($self, $val) = @_; - $self->{'_program_dir'} = $val if $val; - return $self->{'_program_dir'}; -} - -=head2 _register_program_commands() - - Title : _register_program_commands - Usage : $factory->_register_program_commands( \@commands, \%prefixes ) - Function: Register the commands a program accepts (for programs that act - as frontends for a set of commands, each command having its own - set of params/switches) - Returns : true on success - Args : arrayref to a list of commands (scalar strings), - hashref to a translation table of the form - { $prefix1 => $command1, ... } [optional] - Note : To implement a program with this kind of calling structure, - include a parameter called 'command' in the - @program_params global - Note : The translation table is used to associate parameters and - switches specified in _set_program_options with the correct - program command. In the globals @program_params and - @program_switches, specify elements as 'prefix1|param' and - 'prefix1|switch', etc. - -=cut - -=head2 _set_program_options - - Title : _set_program_options - Usage : $factory->_set_program_options( \@ args ); - Function: Register the parameters and flags that an assembler takes. - Returns : 1 for success - Args : - arguments passed by the user - - parameters that the program accepts, optional (default: none) - - switches that the program accepts, optional (default: none) - - parameter translation, optional (default: no translation occurs) - - dash option for the program parameters, [1|single|double|mixed], - optional (default: yes, use single dashes only) - - join, optional (default: ' ') - -=cut - -=head2 _translate_params - - Title : _translate_params - Usage : @options = @{$assembler->_translate_params( )}; - Function: Translate the Bioperl arguments into the arguments to pass to the - program on the command line - Returns : Arrayref of arguments - Args : none - -=cut - -sub _translate_params { - my ($self) = @_; - # Get option string - my ($params, $switches, $join, $dash, $translat) = - @{$self->{_options}}{qw(_params _switches _join _dash _translation)}; - - # access the multiple dash choices of _setparams... - my @dash_args; - $dash ||= 1; # default as advertised - for ($dash) { - $_ eq '1' && do { - @dash_args = ( -dash => 1 ); - last; - }; - /^s/ && do { #single dash only - @dash_args = ( -dash => 1); - last; - }; - /^d/ && do { # double dash only - @dash_args = ( -double_dash => 1); - last; - }; - /^m/ && do { # mixed dash: one-letter opts get -, - # long opts get -- - @dash_args = ( -mixed_dash => 1); - last; - }; - do { - $self->warn( "Dash spec '$dash' not recognized; using 'single'" ); - @dash_args = ( -dash => 1 ); - }; - } - my $options = $self->_setparams( - -params => $params, - -switches => $switches, - -join => $join, - @dash_args - ); - - # Translate options - # parse more carefully - bioperl-run issue #12 - $options =~ s/^\s+//; - $options =~ s/\s+$//; - my @options; - my $in_quotes; - for (split(/(\s|$join)/, $options)) { - if (/^-/) { - push @options, $_; - } - elsif (s/^"//) { - $in_quotes=1 unless (s/["']$//); - push @options, $_; - } - elsif (s/"$//) { - $options[-1] .= $_; - $in_quotes=0; - } - else { - $in_quotes ? $options[-1] .= $_ : - push(@options, $_); - } - } - $self->throw("Unmatched quote in option value") if $in_quotes; - for (my $i = 0; $i < scalar @options; $i++) { - my ($prefix, $name) = ( $options[$i] =~ m/^(-{0,2})(.+)$/ ); - if (defined $name) { - if ($name =~ /command/i) { - $name = $options[$i+2]; # get the command - splice @options, $i, 4; - $i--; - # don't add the command if this is a pseudo-program - unshift @options, $name unless ($self->is_pseudo); # put command first - } - elsif (defined $$translat{$name}) { - $options[$i] = $prefix.$$translat{$name}; - } - } - else { - splice @options, $i, 1; - $i--; - } - } - - @options = grep (!/^\s*$/,@options); - # this is a kludge for mixed options: the reason mixed doesn't - # work right on the pass through _setparams is that the - # *aliases* and not the actual params are passed to it. - # here we just rejigger the dashes - if ($dash =~ /^m/) { - s/--([a-z0-9](?:\s|$))/-$1/gi for @options; - } - # Now arrayify the options - - return \@options; -} - -=head2 executable() - - Title : executable - Usage : - Function: find the full path to the main executable, - or to the command executable for pseudo-programs - Returns : full path, if found - Args : [optional] explicit path to the executable - (will set the appropriate command exec if - applicable) - [optional] boolean flag whether or not to warn when exe no found - Note : overrides WrapperBase.pm - -=cut - -sub executable { - my $self = shift; - my ($exe, $warn) = @_; - if ($self->is_pseudo) { - return $self->{_pathtoexe} = $self->executables($self->command,$exe); - } - - # otherwise - # setter - if (defined $exe) { - $self->throw("binary '$exe' does not exist") unless -e $exe; - $self->throw("'$exe' is not executable") unless -x $exe; - return $self->{_pathtoexe} = $exe; - } - - # getter - return $self->{_pathtoexe} if defined $self->{_pathstoexe}; - - # finder - return $self->{_pathtoexe} = $self->_find_executable($exe, $warn); -} - -=head2 executables() - - Title : executables - Usage : - Function: find the full path to a command's executable - Returns : full path (scalar string) - Args : command (scalar string), - [optional] explicit path to this command exe - [optional] boolean flag whether or not to warn when exe no found - -=cut - -sub executables { - my $self = shift; - my ($cmd, $exe, $warn) = @_; - # for now, barf if this is not a pseudo program - $self->throw("This wrapper represents a single program with commands, not multiple programs; can't use executables()") unless $self->is_pseudo; - $self->throw("Command name required at arg 1") unless defined $cmd; - $self->throw("The desired executable '$cmd' is not registered as a command") unless grep /^$cmd$/, @{$self->{_options}->{_commands}}; - - # setter - if (defined $exe) { - $self->throw("binary '$exe' does not exist") unless -e $exe; - $self->throw("'$exe' is not executable") unless -x $exe; - $self->{_pathstoexe} = {} unless defined $self->{_pathstoexe}; - return $self->{_pathstoexe}->{$cmd} = $exe; - } - - # getter - return $self->{_pathstoexe}->{$cmd} if defined $self->{_pathstoexe}->{$cmd}; - - $exe ||= $cmd; - # finder - return $self->{_pathstoexe}->{$cmd} = $self->_find_executable($exe, $warn); -} - -=head2 _find_executable() - - Title : _find_executable - Usage : my $exe_path = $fac->_find_executable($exe, $warn); - Function: find the full path to a named executable, - Returns : full path, if found - Args : name of executable to find - [optional] boolean flag whether or not to warn when exe no found - Note : differs from executable and executables in not - setting any object attributes - -=cut - -sub _find_executable { - my $self = shift; - my ($exe, $warn) = @_; - - if ($self->is_pseudo && !$exe) { - if (!$self->command) { - # this throw probably appropriate - # the rest are now warns if $warn.../maj - $self->throw( - "The ".__PACKAGE__." wrapper represents several different programs;". - "arg1 to _find_executable must be specified explicitly,". - "or the command() attribute set"); - } - else { - $exe = $self->command; - } - } - $exe ||= $self->program_path; - - my $path; - if ($self->program_dir) { - $path = File::Spec->catfile($self->program_dir, $exe); - } else { - $path = $exe; - $self->warn('Program directory not specified; use program_dir($path).') if $warn; - } - - # use provided info - we are allowed to follow symlinks, but refuse directories - map { return $path.$_ if ( -x $path.$_ && !(-d $path.$_) ) } ('', '.exe') if defined $path; - - # couldn't get path to executable from provided info, so use system path - $path = $path ? " in $path" : undef; - $self->warn("Executable $exe not found$path, trying system path...") if $warn; - if ($path = $self->io->exists_exe($exe)) { - return $path; - } else { - $self->warn("Cannot find executable for program '".($self->is_pseudo ? $self->command : $self->program_name)."'") if $warn; - return; - } -} - -=head2 _register_composite_commands() - - Title : _register_composite_commands - Usage : - Function: adds subcomand params and switches for composite commands - Returns : true on success - Args : \%composite_commands, - \@program_params, - \@program_switches - -=cut - -sub _register_composite_commands { - my $self = shift; - my ($composite_commands, $program_params, - $program_switches, $command_prefixes) = @_; - my @sub_params; - my @sub_switches; - foreach my $cmd (keys %$composite_commands) { - my $pfx = $command_prefixes->{$cmd} || $cmd; - foreach my $subcmd ( @{$$composite_commands{$cmd}} ) { - my $spfx = $command_prefixes->{$subcmd} || $subcmd; - my @sub_program_params = grep /^$spfx\|/, @$program_params; - my @sub_program_switches = grep /^$spfx\|/, @$program_switches; - for (@sub_program_params) { - m/^$spfx\|(.*)/; - push @sub_params, "$pfx\|${spfx}_".$1; - } - for (@sub_program_switches) { - m/^$spfx\|(.*)/; - push @sub_switches, "$pfx\|${spfx}_".$1; - } - } - } - push @$program_params, @sub_params; - push @$program_switches, @sub_switches; - # translations for subcmd params/switches not necessary - return 1; -} - -=head2 _create_factory_set() - - Title : _create_factory_set - Usage : @facs = $self->_create_factory_set - Function: instantiate a set of individual command factories for - a given composite command - Factories will have the correct parameter fields set for - their own subcommand - Returns : hash of factories: ( $subcmd_prefix => $subcmd_factory, ... ) - Args : none - -=cut - -sub _create_factory_set { - my $self = shift; - $self->throw('command not set') unless $self->command; - my $cmd = $self->command; - $self->throw('_create_factory_set only works on composite commands') - unless grep /^$cmd$/, keys %{$self->{_options}->{_composite_commands}}; - my %ret; - my $class = ref $self; - my $subargs_hash = $self->_collate_subcmd_args($cmd); - for (keys %$subargs_hash) { - $ret{$_} = $class->new( -command => $_, @{$$subargs_hash{$_}} ); - } - return %ret; -} - -=head2 _collate_subcmd_args() - - Title : _collate_subcmd_args - Usage : $args_hash = $self->_collate_subcmd_args - Function: collate parameters and switches into command-specific - arg lists for passing to new() - Returns : hash of named argument lists - Args : [optional] composite cmd prefix (scalar string) - [default is 'run'] - -=cut - -sub _collate_subcmd_args { - my $self = shift; - my $cmd = shift; - my %ret; - # default command is 'run' - $cmd ||= 'run'; - return unless $self->{'_options'}->{'_composite_commands'}; - return unless $self->{'_options'}->{'_composite_commands'}->{$cmd}; - my @subcmds = @{$self->{'_options'}->{'_composite_commands'}->{$cmd}}; - - my $cur_options = $self->{'_options'}; - # collate - foreach my $subcmd (@subcmds) { - # find the composite cmd form of the argument in - # the current params and switches - # e.g., map_max_mismatches - my $pfx = $self->{_options}->{_prefixes}->{$subcmd} || $subcmd; - my @params = grep /^${pfx}_/, @{$$cur_options{'_params'}}; - my @switches = grep /^${pfx}_/, @{$$cur_options{'_switches'}}; - $ret{$subcmd} = []; - # create an argument list suitable for passing to new() of - # the subcommand factory... - foreach my $opt (@params, @switches) { - my $subopt = $opt; - $subopt =~ s/^${pfx}_//; - push(@{$ret{$subcmd}}, '-'.$subopt => $self->$opt) if defined $self->$opt; - } - } - return \%ret; -} - -=head2 _run - - Title : _run - Usage : $fac->_run( @file_args ) - Function: Run a command as specified during object contruction - Returns : true on success - Args : a specification of the files to operate on according - to the filespec - -=cut - -sub _run { - my ($self, @args) = @_; - # _translate_params will provide an array of command/parameters/switches - # -- these are set at object construction - # to set up the run, need to add the files to the call - # -- provide these as arguments to this function - my $cmd = $self->command if $self->can('command'); - my $opts = $self->{_options}; - my %args; - $self->throw("No command specified for the object") unless $cmd; - # setup files necessary for this command - my $filespec = $opts->{'_files'}->{$cmd}; - my @switches; - my ($in, $out, $err); - # some applications rely completely on switches - if (defined $filespec && @$filespec) { - # parse args based on filespec - # require named args - $self->throw("Named args are required") unless !(@args % 2); - s/^-// for @args; - %args = @args; - # validate - my @req = map { - my $s = $_; - $s =~ s/^-.*\|//; - $s =~ s/^[012]?[<>]//; - $s =~ s/[^a-zA-Z0-9_]//g; - $s - } grep !/[#]/, @$filespec; - !defined($args{$_}) && $self->throw("Required filearg '$_' not specified") for @req; - # set up redirects and file switches - for (@$filespec) { - m/^1?>#?(.*)/ && do { - defined($args{$1}) && ( open $out, '>', $args{$1} or $self->throw("Could not write file '$args{$1}': $!") ); - next; - }; - m/^2>#?(.*)/ && do { - defined($args{$1}) && ( open $err, '>', $args{$1} or $self->throw("Could not write file '$args{$1}': $!") ); - next; - }; - m/^<#?(.*)/ && do { - defined($args{$1}) && ( open $in, '<', $args{$1} or $self->throw("Could not read file '$args{$1}': $!") ); - next; - }; - if (m/^-(.*)\|/) { - push @switches, $self->_dash_switch($1); - } else { - push @switches, undef; - } - } - } - my $dum; - $in || ($in = \$dum); - $out || ($out = \$self->{'stdout'}); - $err || ($err = \$self->{'stderr'}); - - # Get program executable - my $exe = $self->executable; - $self->throw("Can't find executable for '".($self->is_pseudo ? $self->command : $self->program_name)."'; can't continue") unless $exe; - - # Get command-line options - my $options = $self->_translate_params(); - # Get file specs sans redirects in correct order - my @specs = map { - my $s = $_; - $s =~ s/^-.*\|//; - $s =~ s/[^a-zA-Z0-9_]//g; - $s - } grep !/[<>]/, @$filespec; - my @files = @args{@specs}; - # expand arrayrefs - my $l = $#files; - - # Note: below code block may be brittle, see link on this: - # http://lists.open-bio.org/pipermail/bioperl-l/2010-June/033439.html - - for (0..$l) { - if (ref($files[$_]) eq 'ARRAY') { - splice(@switches, $_, 1, ($switches[$_]) x @{$files[$_]}); - splice(@files, $_, 1, @{$files[$_]}); - } - } - - - @files = map { - my $s = shift @switches; - defined $_ ? ($s, $_): () - } @files; - @files = map { defined $_ ? $_ : () } @files; # squish undefs - my @ipc_args = ( $exe, @$options, @files ); - $self->{_last_execution} = join( $self->{'_options'}->{'_join'}, @ipc_args ); - eval { - IPC::Run::run(\@ipc_args, $in, $out, $err) or - die ("There was a problem running $exe : ".$$err); - }; - - if ($@) { - $self->throw("$exe call crashed: $@") unless $self->no_throw_on_crash; - return 0; - } - - return 1; -} - - - -=head2 no_throw_on_crash() - - Title : no_throw_on_crash - Usage : - Function: prevent throw on execution error - Returns : - Args : [optional] boolean - -=cut - -sub no_throw_on_crash { - my $self = shift; - return $self->{'_no_throw'} = shift if @_; - return $self->{'_no_throw'}; -} - -=head2 last_execution() - - Title : last_execution - Usage : - Function: return the last executed command with options - Returns : string of command line sent to IPC::Run - Args : - -=cut - -sub last_execution { - my $self = shift; - return $self->{'_last_execution'}; -} - -=head2 _dash_switch() - - Title : _dash_switch - Usage : $version = $fac->_dash_switch( $switch ) - Function: Returns an appropriately dashed switch for the executable - Args : A string containing a switch without dashes - Returns : string containing an appropriately dashed switch for the current executable - -=cut - -sub _dash_switch { - my ($self, $switch) = @_; - - my $dash = $self->{'_options'}->{'_dash'}; - for ($dash) { - $_ eq '1' && do { - $switch = '-'.$switch; - last; - }; - /^s/ && do { #single dash only - $switch = '-'.$switch; - last; - }; - /^d/ && do { # double dash only - $switch = '--'.$switch; - last; - }; - /^m/ && do { # mixed dash: one-letter opts get -, - $switch = '-'.$switch; - $switch =~ s/^(-[a-z0-9](?:\w+))$/-$1/i; - last; - }; - do { - $self->warn( "Dash spec '$dash' not recognized; using 'single'" ); - $switch = '-'.$switch; - }; - } - - return $switch; -} - -=head2 stdout() - - Title : stdout - Usage : $fac->stdout() - Function: store the output from STDOUT for the run, - if no file specified in _run arguments - Example : - Returns : scalar string - Args : on set, new value (a scalar or undef, optional) - -=cut - -sub stdout { - my $self = shift; - return $self->{'stdout'} = shift if @_; - return $self->{'stdout'}; -} - -=head2 stderr() - - Title : stderr - Usage : $fac->stderr() - Function: store the output from STDERR for the run, - if no file is specified in _run arguments - Example : - Returns : scalar string - Args : on set, new value (a scalar or undef, optional) - -=cut - -sub stderr { - my $self = shift; - return $self->{'stderr'} = shift if @_; - return $self->{'stderr'}; -} - -=head2 is_pseudo() - - Title : is_pseudo - Usage : $obj->is_pseudo($newval) - Function: returns true if this factory represents - a pseudo-program - Example : - Returns : value of is_pseudo (boolean) - Args : on set, new value (a scalar or undef, optional) - -=cut - -sub is_pseudo { - my $self = shift; - - return $self->{'is_pseudo'} = shift if @_; - return $self->{'is_pseudo'}; -} - -=head2 AUTOLOAD - -AUTOLOAD permits - - $class->new_yourcommand(@args); - -as an alias for - - $class->new( -command => 'yourcommand', @args ); - -=cut - -sub AUTOLOAD { - my $class = shift; - my $tok = $AUTOLOAD; - my @args = @_; - $tok =~ s/.*:://; - unless ($tok =~ /^new_/) { - $class->throw("Can't locate object method '$tok' via package '".ref($class)?ref($class):$class); - } - my ($cmd) = $tok =~ m/new_(.*)/; - return $class->new( -command => $cmd, @args ); -} - -=head1 Bio:ParameterBaseI compliance - -=head2 set_parameters() - - Title : set_parameters - Usage : $pobj->set_parameters(%params); - Function: sets the parameters listed in the hash or array - Returns : true on success - Args : [optional] hash or array of parameter/values. - -=cut - -sub set_parameters { - my ($self, @args) = @_; - - # currently stored stuff - my $opts = $self->{'_options'}; - my $params = $opts->{'_params'}; - my $switches = $opts->{'_switches'}; - my $translation = $opts->{'_translation'}; - my $use_dash = $opts->{'_dash'}; - my $join = $opts->{'_join'}; - unless (($self->can('command') && $self->command) - || (grep /command/, @args)) { - push @args, '-command', 'run'; - } - my %args = @args; - my $cmd = $args{'-command'} || $args{'command'} || ($self->can('command') && $self->command); - if ($cmd) { - my (@p,@s, %x); - $self->warn('Command present, but no commands registered') unless $self->{'_options'}->{'_commands'}; - $self->throw("Command '$cmd' not registered") unless grep /^$cmd$/, @{$self->{'_options'}->{'_commands'}}; - $cmd = $self->{_options}->{_prefixes}->{$cmd} || $cmd; - - @p = (grep(!/^.*?\|/, @$params), grep(/^${cmd}\|/, @$params)); - @s = (grep(!/^.*?\|/, @$switches), grep(/^${cmd}\|/, @$switches)); - s/.*?\|// for @p; - s/.*?\|// for @s; - @x{@p, @s} = @{$translation}{ - grep( !/^.*?\|/, @$params, @$switches), - grep(/^${cmd}\|/, @$params, @$switches) }; - $opts->{_translation} = $translation = \%x; - $opts->{_params} = $params = \@p; - $opts->{_switches} = $switches = \@s; - } - $self->_set_from_args( - \@args, - -methods => [ @$params, @$switches, 'program_name', 'program_dir', 'out_type' ], - -create => 1, - # when our parms are accessed, signal parameters are unchanged for - # future reads (until set_parameters is called) - -code => - ' my $self = shift; - $self->parameters_changed(0); - return $self->{\'_\'.$method} = shift if @_; - return $self->{\'_\'.$method};' - ); - # the question is, are previously-set parameters left alone when - # not specified in @args? - $self->parameters_changed(1); - return 1; -} - -=head2 reset_parameters() - - Title : reset_parameters - Usage : resets values - Function: resets parameters to either undef or value in passed hash - Returns : none - Args : [optional] hash of parameter-value pairs - -=cut - -sub reset_parameters { - my ($self, @args) = @_; - - my @reset_args; - # currently stored stuff - my $opts = $self->{'_options'}; - my $params = $opts->{'_params'}; - my $switches = $opts->{'_switches'}; - my $translation = $opts->{'_translation'}; - my $qual_param = $opts->{'_qual_param'}; - my $use_dash = $opts->{'_dash'}; - my $join = $opts->{'_join'}; - - # handle command name - my %args = @args; - my $cmd = $args{'-command'} || $args{'command'} || $self->command; - $args{'command'} = $cmd; - delete $args{'-command'}; - @args = %args; - # don't like this, b/c _set_program_args will create a bunch of - # accessors with undef values, but oh well for now /maj - - for my $p (@$params) { - push(@reset_args, $p => undef) unless grep /^[-]?$p$/, @args; - } - for my $s (@$switches) { - push(@reset_args, $s => undef) unless grep /^[-]?$s$/, @args; - } - push @args, @reset_args; - $self->set_parameters(@args); - $self->parameters_changed(1); -} - -=head2 parameters_changed() - - Title : parameters_changed - Usage : if ($pobj->parameters_changed) {...} - Function: Returns boolean true (1) if parameters have changed - Returns : Boolean (0 or 1) - Args : [optional] Boolean - -=cut - -sub parameters_changed { - my $self = shift; - return $self->{'_parameters_changed'} = shift if @_; - return $self->{'_parameters_changed'}; -} - -=head2 available_parameters() - - Title : available_parameters - Usage : @params = $pobj->available_parameters() - Function: Returns a list of the available parameters - Returns : Array of parameters - Args : 'params' for settable program parameters - 'switches' for boolean program switches - default: all - -=cut - -sub available_parameters { - my $self = shift; - my $subset = shift; - my $opts = $self->{'_options'}; - my @ret; - for ($subset) { - (!defined || /^a/) && do { - @ret = (@{$opts->{'_params'}}, @{$opts->{'_switches'}}); - last; - }; - m/^p/i && do { - @ret = @{$opts->{'_params'}}; - last; - }; - m/^s/i && do { - @ret = @{$opts->{'_switches'}}; - last; - }; - m/^c/i && do { - @ret = @{$opts->{'_commands'}}; - last; - }; - m/^f/i && do { # get file spec - return @{$opts->{'_files'}->{$self->command}}; - }; - do { #fail - $self->throw("available_parameters: unrecognized subset"); - }; - } - return @ret; -} - -sub available_commands { shift->available_parameters('commands') } -sub filespec { shift->available_parameters('filespec') } - -=head2 get_parameters() - - Title : get_parameters - Usage : %params = $pobj->get_parameters; - Function: Returns list of key-value pairs of parameter => value - Returns : List of key-value pairs - Args : [optional] A string is allowed if subsets are wanted or (if a - parameter subset is default) 'all' to return all parameters - -=cut - -sub get_parameters { - my $self = shift; - my $subset = shift; - $subset ||= 'all'; - my @ret; - my $opts = $self->{'_options'}; - for ($subset) { - m/^p/i && do { #params only - for (@{$opts->{'_params'}}) { - push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_; - } - last; - }; - m/^s/i && do { #switches only - for (@{$opts->{'_switches'}}) { - push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_; - } - last; - }; - m/^a/i && do { # all - for ((@{$opts->{'_params'}},@{$opts->{'_switches'}})) { - push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_; - } - last; - }; - do { - $self->throw("get_parameters: unrecognized subset"); - }; - } - return @ret; -} - -1; diff --git a/t/Tools/Run/Dummy.pm b/t/Tools/Run/Dummy.pm deleted file mode 100755 index c34e73b83..000000000 --- a/t/Tools/Run/Dummy.pm +++ /dev/null @@ -1,21 +0,0 @@ -package Dummy; -use strict; -use warnings; - -use lib '.'; -use lib '..'; -use Dummy::Config; - -use Bio::Tools::Run::WrapperBase; -use Bio::Tools::Run::WrapperBase::CommandExts; - -use base qw(Bio::Tools::Run::WrapperBase Bio::Root::Root); - -sub new { - my ($class,@args) = @_; - my $self = $class->SUPER::new(@args); - return $self; -} - -1; - diff --git a/t/Tools/Run/Dummy/Config.pm b/t/Tools/Run/Dummy/Config.pm deleted file mode 100755 index f3c091a71..000000000 --- a/t/Tools/Run/Dummy/Config.pm +++ /dev/null @@ -1,75 +0,0 @@ -package Dummy::Config; -use strict; -use warnings; -no warnings qw(qw); -use Exporter; -our (@ISA, @EXPORT, @EXPORT_OK); -push @ISA, 'Exporter'; -@EXPORT = qw( - $program_name - $program_dir - $use_dash - $join - @program_commands - %command_prefixes - @program_params - @program_switches - %param_translation - %command_files - %composite_commands - ); - -@EXPORT_OK = qw(); - -our $program_name = '*flurb'; -#our $program_dir = 'C:\cygwin\usr\local\bin'; -our $use_dash = 'mixed'; -our $join = ' '; - - -our @program_commands = qw( - rpsblast - find - goob - blorb - multiglob - ); - -our %command_prefixes = ( - blastp => 'blp', - tblastn => 'tbn', - goob => 'g', - blorb => 'b', - multiglob => 'm' - ); - -our @program_params = qw( - command - g|narf - g|schlurb - b|scroob - b|frelb - m|trud -); - -our @program_switches = qw( - g|freen - b|klep -); - -our %param_translation = ( - 'g|narf' => 'n', - 'g|schlurb' => 'schlurb', - 'g|freen' => 'f', - 'b|scroob' => 's', - 'b|frelb' => 'frelb' - ); - -our %command_files = ( - 'goob' => [qw( fas faq )], - ); - -our %composite_commands = ( - 'multiglob' => [qw( blorb goob )] - ); -1; diff --git a/t/Tools/Run/StandAloneBlast.t b/t/Tools/Run/StandAloneBlast.t deleted file mode 100644 index 81a42ac40..000000000 --- a/t/Tools/Run/StandAloneBlast.t +++ /dev/null @@ -1,185 +0,0 @@ -# -*-Perl-*- Test Harness script for Bioperl -# $Id$ - -use strict; -use warnings; -use File::Spec; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 45); - - use_ok('Bio::Tools::Run::StandAloneBlast'); - use_ok('Bio::SeqIO'); -} - -# Note: the swissprot and ecoli.nt data sets may be downloaded from -# ftp://ftp.ncbi.nih.gov/blast/db/FASTA -my $verbose = test_debug() || -1; -my $nt_database = 'ecoli.nt'; -my $amino_database = 'swissprot'; -my $evalue = 0.001; -my ($seq1,$seq2,$seq3,$seq4); - -# Tests to check that "-attr" and "attr" and "a" all do the same thing -# http://bugzilla.open-bio.org/show_bug.cgi?id=1912 -for my $p (qw(database db -d -database d)) { - my $f = Bio::Tools::Run::StandAloneBlast->new($p => $nt_database); - is $f->d(), $nt_database; -} -for my $p (qw(expect evalue -e -expect e)) { - my $f = Bio::Tools::Run::StandAloneBlast->new($p => $evalue); - is $f->e(), $evalue; -} - -# NCBI blast params are case-sensitive, wublast aren't -my $ncbi_factory = Bio::Tools::Run::StandAloneBlast->new(-program => 'blastn'); -isa_ok $ncbi_factory, 'Bio::Tools::Run::StandAloneBlast'; -isa_ok $ncbi_factory, 'Bio::Tools::Run::StandAloneNCBIBlast'; -my $wu_factory = Bio::Tools::Run::StandAloneBlast->new(-program => 'wublastn'); -isa_ok $wu_factory, 'Bio::Tools::Run::StandAloneBlast'; -isa_ok $wu_factory, 'Bio::Tools::Run::StandAloneWUBlast'; -for my $p (qw(e E)) { - $ncbi_factory->$p($p); - $wu_factory->$p($p); -} -is $ncbi_factory->e, 'e'; -is $ncbi_factory->E, 'E'; -is $wu_factory->e, 'E'; -is $wu_factory->E, 'E'; - -# blastall switches like -I should take boolean but return 'T' or 'F' once set -is $ncbi_factory->I, undef; -is $ncbi_factory->I(1), 'T'; -is $ncbi_factory->I(0), 'F'; -is $ncbi_factory->I('T'), 'T'; -is $ncbi_factory->I('F'), 'F'; - -# We should be able to set -F "m D" in an intuitive way, and also by manually -# quoting the value ourselves -$ncbi_factory->F('m D'); -my $param_string = $ncbi_factory->_setparams('blastall'); -like $param_string, qr/-F ['"]m D['"]/; -$ncbi_factory->F('"m S"'); -$param_string = $ncbi_factory->_setparams('blastall'); -like $param_string, qr/-F ["']m S['"]/; -$ncbi_factory->F("'m D'"); -$param_string = $ncbi_factory->_setparams('blastall'); -like $param_string, qr/-F ['"]m D["']/; - -# dashed parameters should work -my $outfile = test_output_file(); -ok my $factory = Bio::Tools::Run::StandAloneBlast->new(-verbose => $verbose, - -program => 'blastn', - -database => $nt_database , - -_READMETHOD => 'SearchIO', - -output => $outfile, - -verbose => 0); -is $factory->database, $nt_database; - -# Setup and then do tests that actually run blast - -my @params = ('program' => 'blastn', - 'database' => $nt_database , - '_READMETHOD' => 'SearchIO', - 'output' => $outfile, - 'verbose' => 0 ); -ok $factory = Bio::Tools::Run::StandAloneBlast->new('-verbose' => $verbose, @params); - -my $inputfilename = test_input_file('test.txt'); - -is $factory->quiet(0), 0; -is $factory->q(-3), -3; - -SKIP: { - skip 'blastall not installed, skipping tests', 12 unless $factory->executable('blastall'); - skip 'must have BLASTDIR, BLASTDB or BLASTDATADIR env variable set, skipping tests', 12 unless defined $Bio::Tools::Run::StandAloneBlast::DATADIR; - - my @testresults = qw(37 182 182 253 167 2); - my $testcount = 0; - - # use ecoli.nt - my $nt_database_file = File::Spec->catfile($Bio::Tools::Run::StandAloneBlast::DATADIR, $nt_database); - like $nt_database_file, qr/$nt_database/; - SKIP: { - skip "Database $nt_database not found, skipping tests on it", 8 unless -e $nt_database_file; - - my $parser = $factory->blastall($inputfilename); - my $blast_report = $parser->next_result; - is $blast_report->num_hits, $testresults[$testcount++]; - - $factory->_READMETHOD('blast_pull'); # Note required leading underscore in _READMETHOD - my $str = Bio::SeqIO->new('-file' => test_input_file('dna2.fa'), - '-format' => 'fasta'); - $seq1 = $str->next_seq(); - $seq2 = $str->next_seq(); - - my $pull_report = $factory->blastall($seq1); - my $sbjct = $pull_report->next_result->next_hit; - my $hsp = $sbjct->next_hsp; - is $hsp->score, $testresults[$testcount]; - - $factory->_READMETHOD('Blast'); - my $searchio_report = $factory->blastall($seq1); - $sbjct = $searchio_report->next_result->next_hit; - $hsp = $sbjct->next_hsp; - is $hsp->score, $testresults[$testcount++]; - - my @seq_array =($seq1,$seq2); - my $seq_array_ref = \@seq_array; - $factory->_READMETHOD('blast_pull'); - $pull_report = $factory->blastall($seq_array_ref); - $sbjct = $pull_report->next_result->next_hit; - $hsp = $sbjct->next_hsp; - is $hsp->score, $testresults[$testcount]; - - $factory->_READMETHOD('Blast'); - $searchio_report = $factory->blastall($seq_array_ref); - $sbjct = $searchio_report->next_result->next_hit; - $hsp = $sbjct->next_hsp; - is $hsp->score, $testresults[$testcount++]; - - ok $sbjct = $searchio_report->next_result->next_hit; - $hsp = $sbjct->next_hsp; - is $hsp->score, $testresults[$testcount++]; - - @params = ('-verbose' => $verbose, 'program' => 'blastp'); - $factory = Bio::Tools::Run::StandAloneBlast->new(@params); - - $str = Bio::SeqIO->new(-file => test_input_file('amino.fa'), - -format => 'Fasta' ); - $seq3 = $str->next_seq(); - $seq4 = $str->next_seq(); - - $factory->_READMETHOD('Blast'); - my $bl2seq_report = $factory->bl2seq($seq3, $seq4); - $hsp = $bl2seq_report->next_result->next_hit->next_hsp; - is $hsp->hit->start, $testresults[$testcount++], "creating/parsing SearchIO bl2seq report object"; - } - - # use swissprot - my $amino_database_file = File::Spec->catfile($Bio::Tools::Run::StandAloneBlast::DATADIR, $amino_database); - SKIP: { - skip "Database $amino_database not found, skipping tests on it", 3 unless -e $amino_database_file; - - @params = ('database' => $amino_database, '-verbose' => $verbose); - $factory = Bio::Tools::Run::StandAloneBlast->new(@params); - - my $iter = 2; - $factory->j($iter); # 'j' is blast parameter for # of iterations - my $new_iter = $factory->j(); - is $new_iter, 2, "set blast parameter"; - - my $blast_report = $factory->blastpgp($seq3)->next_result; - is $blast_report->number_of_iterations, $testresults[$testcount]; - - $factory->_READMETHOD('blast_pull'); - $iter = 2; - $factory->j($iter); # 'j' is blast parameter for # of iterations - $new_iter = $factory->j(); - is $new_iter, $iter, "set blast parameter"; - - } -} diff --git a/t/Tools/Run/WBCommandExts.t b/t/Tools/Run/WBCommandExts.t deleted file mode 100755 index 74f473fcf..000000000 --- a/t/Tools/Run/WBCommandExts.t +++ /dev/null @@ -1,66 +0,0 @@ -#-*-perl-*- -#$Id$ -# testing CommandExts -use strict; -use warnings; -our $home; -BEGIN { - use Bio::Root::Test; - use lib '.'; - use lib 't/Tools/Run'; - $home = '../../..'; # set to '.' for Build use, - # '../../..' for debugging from .t file - unshift @INC, $home; - test_begin(-tests => 25, - -requires_modules => [qw(Bio::Tools::Run::WrapperBase - Bio::Tools::Run::WrapperBase::CommandExts)]); -} - -use_ok( 'Dummy::Config' ); -use_ok( 'Dummy' ); -use_ok('Bio::Tools::Run::WrapperBase'); -use_ok('Bio::Tools::Run::WrapperBase::CommandExts'); - -ok my $fac = Dummy->new( -command => 'goob', - -narf => 42.0, - -schlurb => 'breb', - -freen => 1 ), "make factory"; -ok $fac->parameters_changed, "parm changed flag set"; -is $fac->program_name, 'flurb', "correct prog name"; -ok $fac->is_pseudo, "is pseudo"; -is $fac->narf, 42, "correct parm set"; -ok !$fac->parameters_changed, "parm flag cleared"; -my $param_str = join(' ',@{$fac->_translate_params}); - -like ($param_str, qr/--schlurb breb/, 'translate opts to command line'); -like ($param_str, qr/-n 42/, 'translate opts to command line'); -like ($param_str, qr/-f/, 'translate opts to command line'); - -TODO: { - local $TODO ='Determine whether the order of the parameters should be set somehow; this sporadically breaks hash randomization introduced in perl 5.17+'; - is join(' ',@{$fac->_translate_params}), '--schlurb breb -n 42 -f', "translate opts to command line"; -} - -ok $fac->reset_parameters, "parm reset"; -ok !$fac->narf, "parm cleared after reset"; - -is_deeply( [$fac->available_parameters('parameters')], [qw( command narf schlurb )], "avail parms"); -is_deeply( [$fac->available_parameters('switches')], ['freen'], "avail switches"); -is_deeply( [$fac->available_parameters('commands')], [qw(rpsblast find goob blorb multiglob)], "avail commands"); - -ok $fac = Dummy->new( -command => 'multiglob', - -g_freen => 1, - -b_scroob => 10.5, - -trud => 'sklim' ), "make composite cmd factory"; - -is $fac->trud, 'sklim', "comp cmd parm set"; - -ok my %facs = $fac->_create_factory_set, "make subfactories"; -is $facs{goob}->freen, 1, "subfactory 1 parm correct"; -is $facs{blorb}->scroob, 10.5, "subfactory 2 parm correct"; - -$fac->program_dir('.'); -# ok $fac->executables('rpsblast'), "find in program_dir"; -ok $fac->executables('find'), "find in syspath"; - -1; diff --git a/t/Tools/Run/WrapperBase.t b/t/Tools/Run/WrapperBase.t deleted file mode 100755 index 6d32c6c99..000000000 --- a/t/Tools/Run/WrapperBase.t +++ /dev/null @@ -1,129 +0,0 @@ -# -*-Perl-*- Test Harness script for Bioperl -# $Id$ - -use strict; - -BEGIN { - use lib '.'; - use Bio::Root::Test; - - test_begin(-tests => 27); - - use_ok('Bio::Tools::Run::WrapperBase'); -} - -my @params = qw(test1 test_2); -my @switches = qw(Test3 test_4); -*Bio::Tools::Run::WrapperBase::new = sub { - my ($class, @args) = @_; - my $self = $class->Bio::Tools::Run::WrapperBase::SUPER::new(@args); - - $self->_set_from_args(\@args, -methods => [@params, @switches], - -create => 1); - - return $self; -}; -my $new = *Bio::Tools::Run::WrapperBase::new; # just to avoid warning -my $obj = Bio::Tools::Run::WrapperBase->new(-test_2 => 2, -test3 => 1, -test_4 => 0); -isa_ok($obj, 'Bio::Tools::Run::WrapperBase'); - -# it is interface-like with throw_not_implemented methods; check their -# existance -foreach my $method (qw(run program_dir program_name version)) { - ok $obj->can($method), "$method() exists"; -} - -## most methods are defined; check their function - -# simple get/setters -foreach my $method (qw(error_string arguments no_param_checks save_tempfiles - outfile_name quiet)) { - $obj->$method(1); - is $obj->$method(), 1, "$method could be set"; -} - -# tempdir - -$obj->save_tempfiles(0); -my $tmpdir = $obj->tempdir(); -ok -d $tmpdir, 'tempdir created a directory'; -ok open(my $test, '>', File::Spec->catfile($tmpdir, 'test')), 'could create file in tempdir'; -print $test "test\n"; -close $test; - -# cleanup - -$obj->cleanup(); -ok ! -d $tmpdir, 'following cleanup() with save_tempfiles unset, tempdir was deleted'; - -# io -my $io1 = $obj->io; -my $io2 = $obj->io; -isa_ok($io1, 'Bio::Root::IO'); -is $io1, $io2, 'io() always returns the same instance of IO'; - -# program_dir and program_name need to be defined for program_path and -# executable to work -{ - no warnings 'redefine'; - *Bio::Tools::Run::WrapperBase::program_dir = sub { - my $self = shift; - if (@_) { $self->{pdir} = shift } - return $self->{pdir} || ''; - }; - *Bio::Tools::Run::WrapperBase::program_name = sub { - my $self = shift; - if (@_) { $self->{pname} = shift } - return $self->{pname} || ''; - }; -} -$obj->program_dir('test_dir'); -$obj->program_name('test_name'); - -# program_path -is $obj->program_path, File::Spec->catfile('test_dir', 'test_name'.($^O =~ /mswin/i ?'.exe':'')), 'program_path was correct'; - -# executable -throws_ok { $obj->executable } qr/Cannot find executable/, 'pretend program name not found as executable'; -$obj->program_name('perl'); -ok $obj->executable, 'perl found as executable'; - -# _setparams -my $params = $obj->_setparams(-params => \@params, - -switches => \@switches); -is $params, ' test_2 2 Test3', 'params string correct'; -$params = $obj->_setparams(-params => \@params, - -switches => \@switches, - -join => '='); -is $params, ' test_2=2 Test3', 'params string correct'; -$params = $obj->_setparams(-params => \@params, - -switches => \@switches, - -join => '=', - -lc => 1); -is $params, ' test_2=2 test3', 'params string correct'; -$params = $obj->_setparams(-params => \@params, - -switches => \@switches, - -join => '=', - -lc => 1, - -dash => 1); -is $params, ' -test_2=2 -test3', 'params string correct'; -$params = $obj->_setparams(-params => \@params, - -switches => \@switches, - -join => '=', - -lc => 1, - -double_dash => 1); -is $params, ' --test_2=2 --test3', 'params string correct'; -$params = $obj->_setparams(-params => \@params, - -switches => \@switches, - -join => '=', - -lc => 1, - -double_dash => 1, - -underscore_to_dash => 1); -is $params, ' --test-2=2 --test3', 'params string correct'; -$params = $obj->_setparams(-params => {(test1 => 't1', test_2 => 't_2')}, - -switches => {(Test3 => 'T3', test_4 => 't4')}, - -join => '=', - -lc => 1, - -double_dash => 1, - -underscore_to_dash => 1); -is $params, ' --t-2=2 --t3', 'params string correct'; -- 2.11.4.GIT