3 # Copyright Balamurugan Kumarasamy
5 # You may distribute this module under the same terms as perl itself
6 # POD documentation - main docs before the code
10 Bio::Tools::Run::Alignment::Blat
16 use Bio::Tools::Run::Alignment::Blat;
18 my $factory = Bio::Tools::Run::Alignment::Blat->new();
20 # Pass the factory a Bio::Seq object
21 # @feats is an array of Bio::SeqFeature::Generic objects
22 my @feats = $factory->run($seq,$DB);
26 Wrapper module for Blat program. This newer version allows for all
29 Key bits not implemented yet (TODO):
33 =item * Implement all needed L<Bio::Tools::Run::WrapperBase> methods
35 Missing are a few, including version().
37 =item * Re-implement using L<IPC::Run>
39 Would like to get this running under something less reliant on OS-dependent
42 =item * No .2bit or .nib conversions yet
44 These require callouts to faToNib or faTwoTwoBit, which may or may not be
45 installed on a user's machine. We can possibly add functionality to
46 check for faToTwoBit/faToNib and other UCSC tools in the future.
54 User feedback is an integral part of the evolution of this and other
55 Bioperl modules. Send your comments and suggestions preferably to one
56 of the Bioperl mailing lists. Your participation is much appreciated.
58 bioperl-l@bioperl.org - General discussion
59 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
63 Please direct usage questions or support issues to the mailing list:
65 I<bioperl-l@bioperl.org>
67 rather than to the module maintainer directly. Many experienced and
68 reponsive experts will be able look at the problem and quickly
69 address it. Please include a thorough description of the problem
70 with code and data examples if at all possible.
74 Report bugs to the Bioperl bug tracking system to help us keep track
75 the bugs and their resolution. Bug reports can be submitted via the
78 http://bugzilla.open-bio.org/
86 The rest of the documentation details each of the object
87 methods. Internal methods are usually preceded with a _
91 package Bio
::Tools
::Run
::Alignment
::Blat
;
95 use base
qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
99 use Bio
::Factory
::ApplicationFactoryI
;
101 use Bio
::Tools
::Run
::WrapperBase
;
103 our ($PROGRAM, $PROGRAMDIR, $PROGRAMNAME);
105 our %BLAT_PARAMS = map {$_ => 1} qw(ooc t q tileSize stepSize oneOff
106 minMatch minScore minIdentity maxGap makeOoc repmatch mask qMask repeats
107 minRepeatsDivergence dots out maxIntron);
108 our %BLAT_SWITCHES = map {$_ => 1} qw(prot noHead trimT noTrimA trimHardA
109 fastMap fine extendThroughN);
111 our %LOCAL_ATTRIBUTES = map {$_ => 1} qw(db DB qsegment hsegment
114 #psl - Default. Tab separated format, no sequence
115 #pslx - Tab separated format with sequence
116 #axt - blastz-associated axt format
117 #maf - multiz-associated maf format
118 #sim4 - similar to sim4 format
119 #wublast - similar to wublast format
120 #blast - similar to NCBI blast format
121 #blast8- NCBI blast tabular format
122 #blast9 - NCBI blast tabular format with comments
125 our %searchio_map = (
127 'pslx' => 'psl', # I don't think there is support for this yet
131 'wublast' => 'blast',
132 'blast8' => 'blasttable',
133 'blast9' => 'blasttable'
139 Usage : $blat->new(@params)
140 Function: creates a new Blat factory
141 Returns : Bio::Tools::Run::Alignment::Blat
147 my ($class,@args) = @_;
148 my $self = $class->SUPER::new
(@args);
149 $self->io->_initialize_io();
150 $self->set_parameters(@args);
157 Usage : $factory->program_name()
158 Function: holds the program name
171 Usage : $factory->program_dir(@params)
172 Function: returns the program directory, obtained from ENV variable.
179 return Bio
::Root
::IO
->catfile($ENV{BLATDIR
}) if $ENV{BLATDIR
};
185 Usage : $obj->run($query)
186 Function: Runs Blat and creates an array of featrues
187 Returns : An array of Bio::SeqFeature::Generic objects
188 Args : A Bio::PrimarySeqI or a file name
193 my ($self,$query) = @_;
196 if (ref($query) ) { # it is an object
197 if (ref($query) =~ /GLOB/) {
198 $self->throw("Cannot use filehandle as argument to run()");
200 my $infile = $self->_writeSeqFile($query);
201 return $self->_run($infile);
203 return $self->_run($query);
210 Usage : $obj->align($query)
211 Function: Alias to run()
216 return shift->run(@_);
225 return $self->{blat_db
} = shift if @_;
226 return $self->{blat_db
};
229 # this is a kludge for tests (so one might expect this to be used elsewhere).
230 # None of the other parameters worked in the past, so not replacing them
237 Usage : $obj->qsegment('sequence_a:0-1000')
238 Function : pass in a B<UCSC-compliant> string for the query sequence(s)
242 Note : Requires the sequence(s) in question be 2bit or nib format
243 Reminder : UCSC segment/regions coordinates are 0-based half-open (sequence
244 begins at 0, but start isn't counted with length), whereas BioPerl
245 coordinates are 1-based closed (sequence begins with 1, both start
246 and end are counted in the length of the segment). For example, a
247 segment that is 'sequence_a:0-1000' will have BioPerl coordinates of
248 'sequence_a:1-1000', both with the same length (1000).
254 return $self->{blat_qsegment
} = shift if @_;
255 return $self->{blat_qsegment
};
261 Usage : $obj->tsegment('sequence_a:0-1000')
262 Function : pass in a B<UCSC-compliant> string for the target sequence(s)
266 Note : Requires the sequence(s) in question be 2bit or nib format
267 Reminder : UCSC segment/regions coordinates are 0-based half-open (sequence
268 begins at 0, but start isn't counted with length), whereas BioPerl
269 coordinates are 1-based closed (sequence begins with 1, both start
270 and end are counted in the length of the segment). For example, a
271 segment that is 'sequence_a:0-1000' will have BioPerl coordinates of
272 'sequence_a:1-1000', both with the same length (1000).
278 return $self->{blat_tsegment
} = shift if @_;
279 return $self->{blat_tsegment
};
282 # override this, otherwise one gets a default of 'mlc'
285 return $self->{blat_outfile
} = shift if @_;
286 return $self->{blat_outfile
};
292 Usage : $obj->searchio{-writer => $writer}
293 Function : Pass in additional parameters to the returned Bio::SearchIO parser
294 Returns : Hash reference with Bio::SearchIO parameters
295 Args : Hash reference
297 Note : Currently, this implementation overrides any passed -format
298 parameter based on whether the output is changed ('out'). This
299 may change if requested, but we can't see the utility of doing so,
300 as requesting mismatched output/parser combinations is just a recipe
306 my ($self, $params) = @_;
307 if ($params && ref $params eq 'HASH') {
308 if ($self->{parameters
}->{out
} && exists $searchio_map{$self->{parameters
}->{out
}}) {
309 $params->{-format
} = $searchio_map{$self->{parameters
}->{out
}};
311 $params->{-format
} = 'psl';
313 $self->{blat_searchio
} = $params;
316 return $self->{blat_searchio
} ||
317 {-format
=> exists($self->{parameters
}->{out
}) ?
318 $searchio_map{$self->{parameters
}->{out
}} : 'psl'};
321 =head1 Bio::ParameterBaseI-specific methods
323 These methods are part of the Bio::ParameterBaseI interface
327 =head2 set_parameters
329 Title : set_parameters
330 Usage : $pobj->set_parameters(%params);
331 Function: sets the parameters listed in the hash or array
333 Args : [optional] hash or array of parameter/values. These can optionally
334 be hash or array references
335 Note : This only sets parameters; to set methods use the method name
340 # circumvent any issues arising from passing in refs
341 my %args = (ref($_[0]) eq 'HASH') ?
%{$_[0]} :
342 (ref($_[0]) eq 'ARRAY') ? @
{$_[0]} :
344 # set the parameters passed in, but only ones supported for the program
345 %args = map { my $a = $_;
350 while (my ($key, $val) = each %args) {
351 if (exists $BLAT_PARAMS{$key}) {
352 $self->{parameters
}->{$key} = $val;
353 } elsif (exists $BLAT_SWITCHES{$key}) {
354 $self->{parameters
}->{$key} = $BLAT_SWITCHES{$key} ?
1 : 0;
355 } elsif ($LOCAL_ATTRIBUTES{$key} && $self->can($key)) {
361 =head2 reset_parameters
363 Title : reset_parameters
364 Usage : resets values
365 Function: resets parameters to either undef or value in passed hash
367 Args : [optional] hash of parameter-value pairs
371 sub reset_parameters
{
373 delete $self->{parameters
};
375 $self->set_parameters(@_);
379 =head2 validate_parameters
381 Title : validate_parameters
382 Usage : $pobj->validate_parameters(1);
383 Function: sets a flag indicating whether to validate parameters via
384 set_parameters() or reset_parameters()
386 Args : [optional] value evaluating to True/False
391 sub validate_parameters
{ 0 }
393 =head2 parameters_changed
395 Title : parameters_changed
396 Usage : if ($pobj->parameters_changed) {...}
397 Function: Returns boolean true (1) if parameters have changed
398 Returns : Boolean (0 or 1)
400 Note : This module does not run state checks, so this always returns True
404 sub parameters_changed
{ 1 }
406 =head2 available_parameters
408 Title : available_parameters
409 Usage : @params = $pobj->available_parameters()
410 Function: Returns a list of the available parameters
411 Returns : Array of parameters
412 Args : [optional] name of executable being used; defaults to returning all
417 sub available_parameters
{
418 my ($self, $exec) = @_;
419 my @params = (sort keys %BLAT_PARAMS, sort keys %BLAT_SWITCHES);
423 =head2 get_parameters
425 Title : get_parameters
426 Usage : %params = $pobj->get_parameters;
427 Function: Returns list of set key-value pairs, parameter => value
428 Returns : List of key-value pairs
434 my ($self, $option) = @_;
435 $option ||= ''; # no option
437 if (exists $self->{parameters
}) {
438 %params = map {$_ => $self->{parameters
}->{$_}} sort keys %{$self->{parameters
}};
447 All to_* methods are implementation-specific
453 Title : to_exe_string
454 Usage : $string = $pobj->to_exe_string;
455 Function: Returns string (command line string in this case)
462 my ($self, @passed) = @_;
463 my ($seq) = $self->_rearrange([qw(SEQ_FILE)], @passed);
464 $self->throw("Must provide a seq_file") unless defined $seq;
465 my %params = $self->get_parameters();
467 my ($exe, $db, $qseg, $tseg) = ($self->executable,
472 $self->throw("Executable not found") unless defined($exe);
484 for my $p (sort keys %BLAT_SWITCHES) {
485 if (exists $params{$p}) {
490 for my $p (sort keys %BLAT_PARAMS) {
491 if (exists $params{$p}) {
492 push @params, "-$p=$params{$p}"
496 # this only passes in the first seq file (no globs are allow AFAIK)
498 push @params, ($db, $seq);
500 # quiet! Unfortunately, it is NYI
502 my $string = "$exe ".join(' ',@params);
510 # Usage : obj->_input($seqFile)
511 # Function: Internal (not to be used directly)
518 my ($self,$infile1) = @_;
519 if(defined $infile1){
520 $self->{'input'}=$infile1;
522 return $self->{'input'};
528 # Usage : obj->_database($seqFile)
529 # Function: Internal (not to be used directly)
536 my ($self,$infile1) = @_;
537 $self->{'db'} = $infile1 if(defined $infile1);
538 return $self->{'db'};
545 # Usage : $obj->_run()
546 # Function: Internal (not to be used directly)
547 # Returns : An array of Bio::SeqFeature::Generic objects
554 my $str = $self->to_exe_string(-seq_file
=> shift);
556 my $out = $self->outfile_name || $self->_tempfile;
558 $str .= " $out".$self->_quiet;
559 $self->debug($str."\n") if( $self->verbose > 0 );
561 my $status = system($str);
562 $self->throw( "Blat call ($str) crashed: $? \n") unless $status==0;
565 if (ref ($out) !~ /GLOB/) {
566 $blat_obj = Bio
::SearchIO
->new(%{$self->searchio},
569 $blat_obj = Bio
::SearchIO
->new(%{$self->searchio},
576 #=head2 _writeSeqFile
578 # Title : _writeSeqFile
579 # Usage : obj->_writeSeqFile($seq)
580 # Function: Internal (not to be used directly)
587 my ($self,$seq) = @_;
588 my ($tfh,$inputfile) = $self->io->tempfile(-dir
=>$Bio::Root
::IO
::TEMPDIR
);
589 my $in = Bio
::SeqIO
->new(-fh
=> $tfh , '-format' => 'fasta');
590 $in->write_seq($seq);
597 my ($tfh,$outfile) = $self->io->tempfile(-dir
=>$Bio::Root
::IO
::TEMPDIR
);
598 # this is because we only want a unique filename
606 # BLAT output goes to a file, all other output is STDOUT
608 $q = $^O
=~ /Win/i ?
' 2>&1 NUL' : ' > /dev/null 2>&1';