speelink fixes, patch courtesy Charles Plessy, fixes #3256
[bioperl-run.git] / lib / Bio / Tools / Run / Infernal.pm
blob5fab77e8df96554d204b767fe20703f0262e0abf
1 # $Id$
3 # You may distribute this module under the same terms as perl itself
5 # POD documentation - main docs before the code
7 # _history
9 # March 2007 - first full implementation; needs some file IO tweaking between
10 # runs but works for now
11 # April 2008 - add 0.81 parameters (may be removed in the 1.0 release)
13 # July 2009 - updated for v1.0. No longer supporting pre-1.0 Infernal
15 =head1 NAME
17 Bio::Tools::Run::Infernal - Wrapper for local execution of cmalign, cmbuild,
18 cmsearch, cmscore
20 =head1 SYNOPSIS
22 # parameters which are switches are set with any value that evals TRUE,
23 # others are set to a specific value
25 my $factory = Bio::Tools::Run::Infernal->new(@params);
27 # run cmalign|cmbuild|cmsearch|cmscore|cmemit directly as a wrapper method
28 # this resets the program flag if previously set
30 $factory->cmsearch(@seqs); # searches Bio::PrimarySeqI's based on set cov. model
31 # saves output to optional outfile_name, returns
32 # Bio::SearchIO
34 # only values which are allowed for a program are set, so one can use the same
35 # wrapper for the following...
37 $factory->cmalign(@seqs); # aligns Bio::PrimarySeqI's to a set cov. model,
38 # --merge option allows two alignments generated
39 # from the same CM to be merged.
40 # output to outfile_name, returns Bio::AlignIO
41 $factory->cmscore(); # scores set cov. model against Bio::PrimarySeqI,
42 # output to outfile_name/STDOUT.
43 $factory->cmbuild($aln); # builds covariance model based on alignment
44 # CM to outfile_name or model_file (one is required
45 # here), output to STDOUT.
46 $factory->cmemit(); # emits sequence from specified cov. model;
47 # set one if no file specified. output to
48 # outfile_name, returns Bio::SeqIO or (if -a is set)
49 # Bio::AlignIO
50 $factory->cmcalibrate($file); # calibrates specified cov. model; output to
51 # STDOUT
52 $factory->cmstat($file); # summary stats for cov. model; set one if no file
53 # specified; output to STDOUT
55 # run based on the setting of the program parameter
57 my $factory = Bio::Tools::Run::Infernal->new(-program => 'cmsearch',
58 @params);
59 my $search = $factory->run($seq);
61 # using cmsearch returns a Bio::SearchIO object
63 while (my $result = $searchio->next_result){
64 while(my $hit = $result->next_hit){
65 while (my $hsp = $hit->next_hsp){
66 print join("\t", ( $r->query_name,
67 $hit->name,
68 $hsp->hit->start,
69 $hsp->hit->end,
70 $hsp->meta,
71 $hsp->score,
72 )), "\n";
77 =head1 DESCRIPTION
79 Wrapper module for Sean Eddy's Infernal suite of programs. The current
80 implementation runs cmsearch, cmcalibrate, cmalign, cmemit, cmbuild, cmscore,
81 and cmstat. cmsearch will return a Bio::SearchIO, cmemit a Bio::SeqIO/AlignIO,
82 and cmalign a Bio::AlignIO. All others send output to STDOUT. Optionally,
83 any program's output can be redirected to outfile_name.
85 We HIGHLY suggest upgrading to Infernal 1.0. In that spirit, this wrapper now
86 supports parameters for Infernal 1.0 only; for wrapping older versions of
87 Infernal we suggest using the version of Bio::Tools::Run::Infernal that came
88 with previous versions of BioPerl-run.
90 NOTE: Due to conflicts in the way Infernal parameters are now formatted vs.
91 subroutine naming in Perl (specifically the inclusion of hyphens) and due to the
92 very large number of parameters available, setting and resetting parameters via
93 set_parameters() and reset_parameters() is required. All valid parameters can
94 be set, but only ones valid for the executable set via program()/program_name()
95 are used for calling the executables, the others are silently ignored.
97 =head1 FEEDBACK
99 =head2 Mailing Lists
101 User feedback is an integral part of the evolution of this and other
102 Bioperl modules. Send your comments and suggestions preferably to one
103 of the Bioperl mailing lists. Your participation is much appreciated.
105 bioperl-l@bioperl.org - General discussion
106 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
108 =head2 Support
110 Please direct usage questions or support issues to the mailing list:
112 I<bioperl-l@bioperl.org>
114 rather than to the module maintainer directly. Many experienced and
115 reponsive experts will be able look at the problem and quickly
116 address it. Please include a thorough description of the problem
117 with code and data examples if at all possible.
119 =head2 Reporting Bugs
121 Report bugs to the Bioperl bug tracking system to help us keep track
122 the bugs and their resolution. Bug reports can be submitted via the
123 web:
125 http://redmine.open-bio.org/projects/bioperl/
127 =head1 AUTHOR - Chris Fields
129 Email: cjfields-at-uiuc-dot-edu
131 =head1 CONTRIBUTORS
133 cjfields-at-uiuc-dot-edu
135 =head1 APPENDIX
137 The rest of the documentation details each of the object
138 methods. Internal methods are usually preceded with a _
140 =cut
142 package Bio::Tools::Run::Infernal;
144 use strict;
145 use warnings;
146 use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase Bio::ParameterBaseI);
148 use Bio::SeqIO;
149 use Bio::SearchIO;
150 use Bio::AlignIO;
151 use Data::Dumper;
153 # yes, these are the current parameters
154 our %INFERNAL_PARAMS = (
155 'A' => ['switch', '-', qw(cmbuild)],
156 'E' => ['param', '-', qw(cmsearch cmstat)],
157 'F' => ['switch', '-', qw(cmbuild)],
158 'Lmax' => ['param', '--', qw(cmscore)],
159 'Lmin' => ['param', '--', qw(cmscore)],
160 'T' => ['param', '-', qw(cmsearch cmstat)],
161 'Wbeta' => ['param', '--', qw(cmbuild)],
162 'Z' => ['param', '-', qw(cmsearch cmstat)],
163 'a' => ['switch', '-', qw(cmbuild cmemit cmscore)],
164 'afile' => ['param', '--', qw(cmstat)],
165 'ahmm' => ['param', '--', qw(cmemit)],
166 'all' => ['switch', '--', qw(cmstat)],
167 'aln-hbanded' => ['switch', '--', qw(cmsearch)],
168 'aln-optacc' => ['switch', '--', qw(cmsearch)],
169 'aln2bands' => ['switch', '--', qw(cmscore cmsearch)],
170 'banddump' => ['param', '--', qw(cmalign)],
171 'begin' => ['param', '--', qw(cmemit)],
172 'beta' => ['param', '--', qw(cmalign cmscore cmsearch cmstat)],
173 'betae' => ['param', '--', qw(cmscore)],
174 'betas' => ['param', '--', qw(cmscore)],
175 'bfile' => ['param', '--', qw(cmstat)],
176 'binary' => ['switch', '--', qw(cmbuild)],
177 'bits' => ['switch', '--', qw(cmstat)],
178 'bottomonly' => ['switch', '--', qw(cmsearch)],
179 'c' => ['switch', '-', qw(cmemit)],
180 'call' => ['switch', '--', qw(cmbuild)],
181 'cdump' => ['param', '--', qw(cmbuild)],
182 'cfile' => ['param', '--', qw(cmbuild)],
183 'checkfb' => ['switch', '--', qw(cmalign)],
184 'checkpost' => ['switch', '--', qw(cmalign)],
185 'cmL' => ['param', '--', qw(cmstat)],
186 'cmaxid' => ['param', '--', qw(cmbuild)],
187 'cmtbl' => ['param', '--', qw(cmbuild)],
188 'corig' => ['switch', '--', qw(cmbuild)],
189 'ctarget' => ['param', '--', qw(cmbuild)],
190 'cyk' => ['switch', '--', qw(cmalign cmbuild cmsearch)],
191 'devhelp' => ['switch', '--', qw(cmalign cmbuild cmcalibrate cmemit cmscore cmsearch)],
192 'dlev' => ['param', '--', qw(cmalign)],
193 'dna' => ['switch', '--', qw(cmalign cmemit cmsearch)],
194 'eX' => ['param', '--', qw(cmbuild)],
195 'eent' => ['switch', '--', qw(cmbuild)],
196 'efile' => ['param', '--', qw(cmstat)],
197 'ehmmre' => ['param', '--', qw(cmbuild)],
198 'elself' => ['param', '--', qw(cmbuild)],
199 'emap' => ['param', '--', qw(cmbuild)],
200 'emit' => ['switch', '--', qw(cmscore)],
201 'end' => ['param', '--', qw(cmemit)],
202 'enone' => ['switch', '--', qw(cmbuild)],
203 'ere' => ['param', '--', qw(cmbuild)],
204 'exp' => ['param', '--', qw(cmemit)],
205 'exp-T' => ['param', '--', qw(cmcalibrate)],
206 'exp-beta' => ['param', '--', qw(cmcalibrate)],
207 'exp-cmL-glc' => ['param', '--', qw(cmcalibrate)],
208 'exp-cmL-loc' => ['param', '--', qw(cmcalibrate)],
209 'exp-ffile' => ['param', '--', qw(cmcalibrate)],
210 'exp-fract' => ['param', '--', qw(cmcalibrate)],
211 'exp-gc' => ['param', '--', qw(cmcalibrate)],
212 'exp-hfile' => ['param', '--', qw(cmcalibrate)],
213 'exp-hmmLn-glc' => ['param', '--', qw(cmcalibrate)],
214 'exp-hmmLn-loc' => ['param', '--', qw(cmcalibrate)],
215 'exp-hmmLx' => ['param', '--', qw(cmcalibrate)],
216 'exp-no-qdb' => ['switch', '--', qw(cmcalibrate)],
217 'exp-pfile' => ['param', '--', qw(cmcalibrate)],
218 'exp-qqfile' => ['param', '--', qw(cmcalibrate)],
219 'exp-random' => ['switch', '--', qw(cmcalibrate)],
220 'exp-sfile' => ['param', '--', qw(cmcalibrate)],
221 'exp-tailn-cglc' => ['param', '--', qw(cmcalibrate)],
222 'exp-tailn-cloc' => ['param', '--', qw(cmcalibrate)],
223 'exp-tailn-hglc' => ['param', '--', qw(cmcalibrate)],
224 'exp-tailn-hloc' => ['param', '--', qw(cmcalibrate)],
225 'exp-tailp' => ['param', '--', qw(cmcalibrate)],
226 'exp-tailxn' => ['param', '--', qw(cmcalibrate)],
227 'fil-E-hmm' => ['param', '--', qw(cmsearch)],
228 'fil-E-qdb' => ['param', '--', qw(cmsearch)],
229 'fil-F' => ['param', '--', qw(cmcalibrate)],
230 'fil-N' => ['param', '--', qw(cmcalibrate)],
231 'fil-Smax-hmm' => ['param', '--', qw(cmsearch)],
232 'fil-T-hmm' => ['param', '--', qw(cmsearch)],
233 'fil-T-qdb' => ['param', '--', qw(cmsearch)],
234 'fil-aln2bands' => ['switch', '--', qw(cmcalibrate)],
235 'fil-beta' => ['param', '--', qw(cmsearch)],
236 'fil-dfile' => ['param', '--', qw(cmcalibrate)],
237 'fil-gemit' => ['switch', '--', qw(cmcalibrate)],
238 'fil-no-hmm' => ['switch', '--', qw(cmsearch)],
239 'fil-no-qdb' => ['switch', '--', qw(cmsearch)],
240 'fil-nonbanded' => ['switch', '--', qw(cmcalibrate)],
241 'fil-tau' => ['param', '--', qw(cmcalibrate)],
242 'fil-xhmm' => ['param', '--', qw(cmcalibrate)],
243 'fins' => ['switch', '--', qw(cmalign cmbuild)],
244 'forecast' => ['param', '--', qw(cmcalibrate cmsearch)],
245 'forward' => ['switch', '--', qw(cmscore cmsearch)],
246 'g' => ['switch', '-', qw(cmsearch cmstat)],
247 'ga' => ['switch', '--', qw(cmsearch cmstat)],
248 'gapthresh' => ['param', '--', qw(cmalign cmbuild)],
249 'gcfile' => ['param', '--', qw(cmsearch)],
250 'ge' => ['switch', '--', qw(cmstat)],
251 'gfc' => ['switch', '--', qw(cmstat)],
252 'gfi' => ['switch', '--', qw(cmstat)],
253 'gibbs' => ['switch', '--', qw(cmbuild)],
254 'gtbl' => ['param', '--', qw(cmbuild)],
255 'gtree' => ['param', '--', qw(cmbuild)],
256 'h' => ['switch', '-', qw(cmalign cmbuild cmcalibrate cmemit cmscore cmsearch cmstat)],
257 'hbanded' => ['switch', '--', qw(cmalign cmscore cmsearch)],
258 'hmm-W' => ['param', '--', qw(cmsearch)],
259 'hmm-cW' => ['param', '--', qw(cmsearch)],
260 'hmmL' => ['param', '--', qw(cmstat)],
261 'hsafe' => ['switch', '--', qw(cmalign cmscore)],
262 'ignorant' => ['switch', '--', qw(cmbuild)],
263 'iins' => ['switch', '--', qw(cmbuild)],
264 'infile' => ['param', '--', qw(cmscore)],
265 'informat' => ['param', '--', qw(cmalign cmbuild cmsearch)],
266 'inside' => ['switch', '--', qw(cmalign cmscore cmsearch)],
267 'l' => ['switch', '-', qw(cmalign cmbuild cmemit cmscore)],
268 'lambda' => ['param', '--', qw(cmsearch)],
269 'le' => ['switch', '--', qw(cmstat)],
270 'lfc' => ['switch', '--', qw(cmstat)],
271 'lfi' => ['switch', '--', qw(cmstat)],
272 'm' => ['switch', '-', qw(cmstat)],
273 'matchonly' => ['switch', '--', qw(cmalign)],
274 'merge' => ['switch', '--', qw(cmalign)],
275 'mpi' => ['switch', '--', qw(cmalign cmcalibrate cmscore cmsearch)],
276 'mxsize' => ['param', '--', qw(cmalign cmbuild cmcalibrate cmscore cmsearch)],
277 'n' => ['param', '-', qw(cmbuild cmemit cmscore)],
278 'nc' => ['switch', '--', qw(cmsearch cmstat)],
279 'no-null3' => ['switch', '--', qw(cmalign cmcalibrate cmscore cmsearch)],
280 'no-qdb' => ['switch', '--', qw(cmsearch)],
281 'noalign' => ['switch', '--', qw(cmsearch)],
282 'nobalance' => ['switch', '--', qw(cmbuild)],
283 'nodetach' => ['switch', '--', qw(cmbuild)],
284 'nonbanded' => ['switch', '--', qw(cmalign cmbuild cmscore)],
285 'null' => ['param', '--', qw(cmbuild)],
286 'null2' => ['switch', '--', qw(cmsearch)],
287 'o' => ['param', '-', qw(cmalign cmsearch)],
288 'old' => ['switch', '--', qw(cmscore)],
289 'onepost' => ['switch', '--', qw(cmalign)],
290 'optacc' => ['switch', '--', qw(cmalign)],
291 'outfile' => ['param', '--', qw(cmscore)],
292 'p' => ['switch', '-', qw(cmalign cmsearch)],
293 'pad' => ['switch', '--', qw(cmscore)],
294 'pbegin' => ['param', '--', qw(cmalign cmcalibrate cmemit cmscore cmsearch)],
295 'pbswitch' => ['param', '--', qw(cmbuild)],
296 'pebegin' => ['switch', '--', qw(cmalign cmcalibrate cmemit cmscore cmsearch)],
297 'pend' => ['param', '--', qw(cmalign cmcalibrate cmemit cmscore cmsearch)],
298 'pfend' => ['param', '--', qw(cmalign cmcalibrate cmemit cmscore cmsearch)],
299 'prior' => ['param', '--', qw(cmbuild)],
300 'q' => ['switch', '-', qw(cmalign)],
301 'qdb' => ['switch', '--', qw(cmalign cmscore)],
302 'qdbboth' => ['switch', '--', qw(cmscore)],
303 'qdbfile' => ['param', '--', qw(cmstat)],
304 'qdbsmall' => ['switch', '--', qw(cmscore)],
305 'random' => ['switch', '--', qw(cmscore)],
306 'rdump' => ['param', '--', qw(cmbuild)],
307 'refine' => ['param', '--', qw(cmbuild)],
308 'regress' => ['param', '--', qw(cmalign cmbuild cmscore)],
309 'resonly' => ['switch', '--', qw(cmalign)],
310 'rf' => ['switch', '--', qw(cmalign cmbuild)],
311 'rna' => ['switch', '--', qw(cmalign cmemit cmsearch)],
312 'rsearch' => ['param', '--', qw(cmbuild)],
313 'rtrans' => ['switch', '--', qw(cmsearch)],
314 's' => ['param', '-', qw(cmalign cmbuild cmcalibrate cmemit cmscore)],
315 'sample' => ['switch', '--', qw(cmalign)],
316 'scoreonly' => ['switch', '--', qw(cmscore)],
317 'search' => ['switch', '--', qw(cmscore cmstat)],
318 'seqfile' => ['param', '--', qw(cmstat)],
319 'sfile' => ['param', '--', qw(cmstat)],
320 'shmm' => ['param', '--', qw(cmemit)],
321 'small' => ['switch', '--', qw(cmalign)],
322 'stall' => ['switch', '--', qw(cmalign cmscore cmsearch)],
323 'sub' => ['switch', '--', qw(cmalign cmbuild cmscore)],
324 'sums' => ['switch', '--', qw(cmalign cmsearch)],
325 'tabfile' => ['param', '--', qw(cmsearch)],
326 'tau' => ['param', '--', qw(cmalign cmbuild cmscore cmsearch)],
327 'taue' => ['param', '--', qw(cmscore)],
328 'taus' => ['param', '--', qw(cmscore)],
329 'tc' => ['switch', '--', qw(cmsearch cmstat)],
330 'tfile' => ['param', '--', qw(cmalign cmbuild cmemit cmscore)],
331 'toponly' => ['switch', '--', qw(cmsearch cmstat)],
332 'u' => ['switch', '-', qw(cmemit)],
333 'v' => ['switch', '-', qw(cmbuild cmcalibrate)],
334 'viterbi' => ['switch', '--', qw(cmalign cmscore cmsearch)],
335 'wblosum' => ['switch', '--', qw(cmbuild)],
336 'wgiven' => ['switch', '--', qw(cmbuild)],
337 'wgsc' => ['switch', '--', qw(cmbuild)],
338 'wid' => ['param', '--', qw(cmbuild)],
339 'withali' => ['param', '--', qw(cmalign)],
340 'withpknots' => ['switch', '--', qw(cmalign)],
341 'wnone' => ['switch', '--', qw(cmbuild)],
342 'wpb' => ['switch', '--', qw(cmbuild)],
343 'x' => ['switch', '-', qw(cmsearch)],
344 'xfile' => ['param', '--', qw(cmstat)],
347 our %INFERNAL_PROGRAM = (
348 'cmalign' => "cmalign [-options] <cmfile> <sequence file>\n".
349 'cmalign [-options] --merge <cmfile> <msafile1> <msafile2>',
350 'cmbuild' => 'cmbuild [-options] <cmfile output> <alignment file>',
351 'cmcalibrate' => 'cmcalibrate [-options] <cmfile>',
352 'cmemit' => 'cmemit [-options] <cmfile> <sequence output file>',
353 'cmscore' => 'cmscore [-options] <cmfile>',
354 'cmsearch' => 'cmsearch [-options] <cmfile> <sequence file>',
355 'cmstat' => 'cmstat [-options] <cmfile>',
358 # this is a simple lookup for easy validation for passed methods
359 our %LOCAL_PARAMS = map {$_ => 1} qw(program outfile tempfile model);
361 =head2 new
363 Title : new
364 Usage : my $wrapper = Bio::Tools::Run::Infernal->new(@params)
365 Function: creates a new Infernal factory
366 Returns: Bio::Tools::Run::Infernal wrapper
367 Args : list of parameters
369 =cut
371 sub new {
372 my ($class,@args) = @_;
373 my $self = $class->SUPER::new(@args);
375 # these are specific parameters we do not want passed on to set_parameters
376 my ($program, $model, $validate, $q, $o1, $o2) =
377 $self->_rearrange([qw(PROGRAM
378 MODEL_FILE
379 VALIDATE_PARAMETERS
380 QUIET
381 OUTFILE_NAME
382 O)], @args);
384 if ($o1 && $o2) {
385 $self->warn("Only assign to either -outfile_name or -o, not both;");
387 my $out = $o1 || $o2;
388 $self->validate_parameters($validate);
389 $q && $self->quiet($q);
390 $program && $self->program($program);
391 $model && $self->model_file($model);
392 $out ||= '';
393 $self->outfile_name($out);
394 $self->io->_initialize_io();
395 $self->set_parameters(@args);
396 return $self;
399 =head2 program
401 Title : program
402 Usage : $obj->program()
403 Function: Set the program called when run() is used. Synonym of
404 program_name()
405 Returns : String (program name)
406 Args : String (program name)
407 Status : Unstable (may delegate to program_name, which is the interface method)
409 =cut
411 sub program {
412 my $self = shift;
413 return $self->program_name(@_);
416 =head2 program_name
418 Title : program_name
419 Usage : $factory>program_name()
420 Function: holds the program name
421 Returns: string
422 Args : None
424 =cut
426 sub program_name {
427 my ($self) = shift;
428 if (@_) {
429 my $p = shift;
430 $self->throw("Program '$p' not supported")
431 if !exists $INFERNAL_PROGRAM{lc $p};
432 $self->{'_program'} = lc $p;
433 # set up cache of valid parameters
434 while (my ($p, $data) = each %INFERNAL_PARAMS) {
435 my %in_exe = map {$_ => 1} @$data[2..$#{$data}];
436 $self->{valid_params}->{$p} = 1 if exists $in_exe{$self->{'_program'}};
439 return $self->{'_program'};
442 =head2 model_file
444 Title : model_file
445 Usage : $obj->model_file()
446 Function: Set the model file used when run() is called.
447 Returns : String (file location of covariance model)
448 Args : String (file location of covariance model)
450 =cut
452 sub model_file {
453 my $self = shift;
454 return $self->{'_model_file'} = shift if @_;
455 return $self->{'_model_file'};
458 =head2 program_dir
460 Title : program_dir
461 Usage : $factory->program_dir(@params)
462 Function: returns the program directory, obtained from ENV variable.
463 Returns: string
464 Args :
466 =cut
468 sub program_dir {
469 my ($self, $dir) = @_;
470 if ($dir) {
471 $self->{_program_dir} = $dir;
473 return Bio::Root::IO->catfile($ENV{INFERNALDIR}) || '';
476 =head2 version
478 Title : version
479 Usage : $v = $prog->version();
480 Function: Determine the version number of the program (uses cmsearch)
481 Example :
482 Returns : float or undef
483 Args : none
485 =cut
487 sub version {
488 my ($self) = @_;
489 return unless $self->executable;
490 my $exe = $self->executable;
491 my $string = `$exe -h 2>&1`;
492 my $v;
493 if ($string =~ m{Infernal\s([\d.]+)}) {
494 $v = $1;
495 $self->deprecated(-message => "Only Infernal 1.0 and above is supported.",
496 -version => 1.006001) if $v < 1;
498 return $self->{'_progversion'} = $v || undef;
501 =head2 run
503 Title : run
504 Usage : $obj->run($seqFile)
505 Function: Runs Infernal and returns Bio::SearchIO
506 Returns : A Bio::SearchIO
507 Args : A Bio::PrimarySeqI or file name
509 =cut
511 # TODO: update to accept multiple seqs, alignments
512 sub run {
513 my ($self,@seq) = @_;
514 if (ref $seq[0] && $seq[0]->isa("Bio::PrimarySeqI") ){# it is an object
515 my $infile1 = $self->_writeSeqFile(@seq);
516 return $self->_run($infile1);
517 } elsif (ref $seq[0] && $seq[0]->isa("Bio::Align::AlignI") ){ # it is an object
518 my $infile1 = $self->_writeAlignFile(@seq);
519 return $self->_run($infile1);
520 } else {
521 return $self->_run(@seq);
525 =head1 Specific program interface methods
527 =head2 cmsearch
529 Title : cmsearch
530 Usage : $obj->cmsearch($seqFile)
531 Function: Runs Infernal cmsearch and returns Bio::SearchIO
532 Returns : A Bio::SearchIO
533 Args : Bio::PrimarySeqI or file name
535 =cut
537 sub cmsearch {
538 my ($self,@seq) = @_;
539 $self->program('cmsearch');
540 if (ref $seq[0] && $seq[0]->isa("Bio::PrimarySeqI") ){# it is an object
541 my $infile1 = $self->_writeSeqFile(@seq);
542 return $self->_run(-seq_files => [$infile1]);
543 } else {
544 return $self->_run(-seq_files => \@seq);
548 =head2 cmalign
550 Title : cmalign
551 Usage : $obj->cmalign($seqFile)
552 Function: Runs Infernal cmalign and returns Bio::AlignIO
553 Returns : A Bio::AlignIO
554 Args : Bio::PrimarySeqI or file name
556 =cut
558 sub cmalign {
559 my ($self,@seq) = @_;
560 $self->program('cmalign');
561 if (ref $seq[0]) { # it is an object
562 if ($seq[0]->isa("Bio::PrimarySeqI") ){
563 my $infile1 = $self->_writeSeqFile(@seq);
564 return $self->_run(-seq_files => [$infile1]);
565 } elsif ( $seq[0]->isa("Bio::Align::AlignI") ) {
566 if (scalar(@seq) != 2) {
567 $self->throw("")
569 my $infile1 = $self->_writeAlignFile($seq[0]);
570 my $infile2 = $self->_writeAlignFile($seq[1]);
571 return $self->_run(-align_files => [$infile1, $infile2]);
573 } else {
574 # we can maybe add a check for the file extension and try to DTRT
575 my %params = $self->get_parameters('valid');
576 $params{merge} ? return $self->_run(-align_files => \@seq):
577 return $self->_run(-seq_files => \@seq);
578 return $self->_run(-seq_files => \@seq);
582 =head2 cmemit
584 Title : cmemit
585 Usage : $obj->cmemit($modelfile)
586 Function: Runs Infernal cmemit and returns Bio::AlignIO
587 Returns : A Bio::AlignIO
588 Args : None; set model_file() to use a specific model
590 =cut
592 sub cmemit {
593 my ($self) = shift;
594 $self->program('cmemit');
595 return $self->_run(@_);
598 =head2 cmbuild
600 Title : cmbuild
601 Usage : $obj->cmbuild($alignment)
602 Function: Runs Infernal cmbuild and saves covariance model
603 Returns : 1 on success (no object for covariance models)
604 Args : Bio::AlignIO with structural information (such as from Stockholm
605 format source) or alignment file name
607 =cut
609 sub cmbuild {
610 my ($self,@seq) = @_;
611 $self->program('cmbuild');
612 if (ref $seq[0] && $seq[0]->isa("Bio::Align::AlignI") ){# it is an object
613 my $infile1 = $self->_writeAlignFile(@seq);
614 return $self->_run(-align_files => [$infile1]);
615 } else {
616 return $self->_run(-align_files => \@seq);
620 =head2 cmscore
622 Title : cmscore
623 Usage : $obj->cmscore($seq)
624 Function: Runs Infernal cmscore and saves output
625 Returns : None
626 Args : None; set model_file() to use a specific model
628 =cut
630 sub cmscore {
631 my ($self,@seq) = @_;
632 $self->program('cmscore');
633 return $self->_run();
636 =head2 cmcalibrate
638 Title : cmcalibrate
639 Usage : $obj->cmcalibrate('file')
640 Function: Runs Infernal calibrate on specified CM
641 Returns : None
642 Args : None; set model_file() to use a specific model
644 =cut
646 sub cmcalibrate {
647 my ($self,@seq) = @_;
648 $self->program('cmcalibrate');
649 return $self->_run();
652 =head2 cmstat
654 Title : cmstat
655 Usage : $obj->cmstat($seq)
656 Function: Runs Infernal cmstat and saves output
657 Returns : None
658 Args : None; set model_file() to use a specific model
660 =cut
662 sub cmstat {
663 my ($self,@seq) = @_;
664 $self->program('cmstat');
665 return $self->_run();
668 =head1 Bio::ParameterBaseI-specific methods
670 These methods are part of the Bio::ParameterBaseI interface
672 =cut
674 =head2 set_parameters
676 Title : set_parameters
677 Usage : $pobj->set_parameters(%params);
678 Function: sets the parameters listed in the hash or array
679 Returns : None
680 Args : [optional] hash or array of parameter/values. These can optionally
681 be hash or array references
682 Note : This only sets parameters; to set methods use the method name
683 =cut
685 sub set_parameters {
686 my $self = shift;
687 # circumvent any issues arising from passing in refs
688 my %args = (ref($_[0]) eq 'HASH') ? %{$_[0]} :
689 (ref($_[0]) eq 'ARRAY') ? @{$_[0]} :
691 # set the parameters passed in, but only ones supported for the program
692 my ($prog, $validate) = ($self->program, $self->validate_parameters);
694 # parameter cleanup
695 %args = map { my $a = $_;
696 $a =~ s{^-}{};
697 lc $a => $args{$_}
698 } sort keys %args;
700 while (my ($key, $val) = each %args) {
701 if (exists $INFERNAL_PARAMS{$key}) {
702 my ($type, $prefix) = @{$INFERNAL_PARAMS{$key}}[0..1];
703 @{$self->{parameters}->{$key}} = ($type, $prefix);
704 unshift @{$self->{parameters}->{$key}},
705 $type eq 'param' ? $val :
706 $type eq 'switch' && $val ? 1 : 0;
707 if ($validate) {
708 my %in_exe = map {$_ => 1} @{$INFERNAL_PARAMS{$key}}[2..$#{$INFERNAL_PARAMS{$key}}];
709 $self->warn("Parameter $key not used for $prog") if !exists $in_exe{$key};
711 } else {
712 $self->warn("Parameter $key does not exist") if ($validate);
717 =head2 reset_parameters
719 Title : reset_parameters
720 Usage : resets values
721 Function: resets parameters to either undef or value in passed hash
722 Returns : none
723 Args : [optional] hash of parameter-value pairs
725 =cut
727 sub reset_parameters {
728 my $self = shift;
729 delete $self->{parameters};
730 if (@_) {
731 $self->set_parameters(@_);
735 =head2 validate_parameters
737 Title : validate_parameters
738 Usage : $pobj->validate_parameters(1);
739 Function: sets a flag indicating whether to validate parameters via
740 set_parameters() or reset_parameters()
741 Returns : Bool
742 Args : [optional] value evaluating to True/False
743 Note : Optionally implemented method; up to the implementation on whether
744 to automatically validate parameters or optionally do so
746 =cut
748 sub validate_parameters {
749 my ($self) = shift;
750 if (@_) {
751 $self->{validate_params} = defined $_[0] ? 1 : 0;
753 return $self->{validate_params};
756 =head2 parameters_changed
758 Title : parameters_changed
759 Usage : if ($pobj->parameters_changed) {...}
760 Function: Returns boolean true (1) if parameters have changed
761 Returns : Boolean (0 or 1)
762 Args : None
763 Note : This module does not run state checks, so this always returns True
765 =cut
767 sub parameters_changed { 1 }
769 =head2 available_parameters
771 Title : available_parameters
772 Usage : @params = $pobj->available_parameters()
773 Function: Returns a list of the available parameters
774 Returns : Array of parameters
775 Args : [optional] name of executable being used; defaults to returning all
776 available parameters
778 =cut
780 sub available_parameters {
781 my ($self, $exec) = @_;
782 my @params;
783 if ($exec) {
784 $self->throw("$exec is not part of the Infernal package") if !exists($INFERNAL_PROGRAM{$exec});
785 for my $p (sort keys %INFERNAL_PARAMS) {
786 if (grep { $exec eq $_ }
787 @{$INFERNAL_PARAMS{$p}}[2..$#{$INFERNAL_PARAMS{$p}}])
789 push @params, $p;
792 } else {
793 @params = (sort keys %INFERNAL_PARAMS, sort keys %LOCAL_PARAMS);
795 return @params;
798 =head2 get_parameters
800 Title : get_parameters
801 Usage : %params = $pobj->get_parameters;
802 Function: Returns list of set key-value pairs, parameter => value
803 Returns : List of key-value pairs
804 Args : [optional]
805 'full' - this option returns everything associated with the parameter
806 as an array ref value; that is, not just the value but also
807 the value, type, and prefix. Default is value only.
808 'valid'- same a 'full', but only returns the grouping valid for the
809 currently set executable
811 =cut
813 sub get_parameters {
814 my ($self, $option) = @_;
815 $option ||= ''; # no option
816 my %params;
817 if (exists $self->{parameters}) {
818 %params = (ref $option eq 'ARRAY') ? (
819 map {$_ => $self->{parameters}{$_}[0]}
820 grep { exists $self->{parameters}{$_} } @$option) :
821 (lc $option eq 'full') ?
822 (%{$self->{parameters}}) :
823 (lc $option eq 'valid') ?
824 (map {$_ => $self->{parameters}{$_}}
825 grep { exists $self->{valid_params}->{$_} } keys %{$self->{parameters}}) :
826 (map {$_ => $self->{parameters}{$_}[0]} keys %{$self->{parameters}});
827 } else {
828 %params = ();
830 return %params;
833 =head1 to_* methods
835 All to_* methods are implementation-specific
837 =cut
839 =head2 to_exe_string
841 Title : to_exe_string
842 Usage : $string = $pobj->to_exe_string;
843 Function: Returns string (command line string in this case)
844 Returns : String
845 Args :
847 =cut
849 sub to_exe_string {
850 my ($self, @passed) = @_;
851 my ($seqs, $aligns) = $self->_rearrange([qw(SEQ_FILES ALIGN_FILES)], @passed);
852 if ($seqs || $aligns) {
853 $self->throw("Seqs or alignments must be an array reference") unless
854 ($seqs && ref($seqs) eq 'ARRAY') || ($aligns && ref($aligns) eq 'ARRAY' );
857 my %args = map {$_ => []} qw(switch param input redirect);
858 my %params = $self->get_parameters('valid');
860 my ($exe, $prog, $model, $outfile) = ($self->executable,
861 $self->program_name,
862 $self->model_file,
863 $self->outfile_name);
865 $self->throw("Executable not found") unless defined($exe);
867 delete $params{o} if exists $params{o};
869 if (!defined($model) && $prog ne 'cmbuild') {
870 $self->throw("model_file() not defined")
873 $outfile ||= '';
875 for my $p (sort keys %params) {
876 if ($params{$p}[0]) {
877 my $val = $params{$p}[1] eq 'param' ? ' '.$params{$p}[0] : '';
878 push @{$args{$params{$p}[1]}}, $params{$p}[2].$p.$val;
882 # TODO: not sure what happens when we pass in multiple seq or alignment
883 # filenames, may need checking
884 if ($prog eq 'cmscore' || $prog eq 'cmstat' || $prog eq 'cmcalibrate') {
885 push @{$args{'redirect'}}, "> $outfile" if $outfile;
886 push @{$args{'input'}}, $model;
887 } elsif ($prog eq 'cmsearch') {
888 if (!defined $seqs) {
889 $self->throw('cmsearch requires a sequence file name');
891 push @{$args{'param'}}, "-o $outfile" if $outfile;
892 push @{$args{'input'}}, ($model, @$seqs);
893 } elsif ($prog eq 'cmalign') {
894 if ($params{'merge'}) {
895 $self->throw('cmalign with --merge option requires two alignment files') if
896 !defined($aligns) || @$aligns < 2;
897 push @{$args{'input'}}, ($model, @$aligns);
898 } else {
899 $self->throw('cmalign requires a sequence file') if
900 !defined $seqs;
901 push @{$args{'input'}}, ($model, @$seqs);
903 push @{$args{'param'}}, "-o $outfile" if $outfile;
904 } elsif ($prog eq 'cmbuild') {
905 $self->throw('cmbuild requires one alignment file') if
906 !defined($aligns);
907 if ($model) {
908 push @{$args{'input'}}, ($model, @$aligns);
909 push @{$args{'redirect'}}, "> $outfile" if $outfile;
910 } else {
911 push @{$args{'input'}}, ($outfile, @$aligns);
913 } elsif ($prog eq 'cmemit') {
914 if (!$outfile) {
915 $self->throw('cmemit requires an outfile_name; tempfile support not implemented yet');
916 } else {
917 push @{$args{'input'}}, ($model, ,$outfile);
921 # quiet!
922 if ($self->quiet && $prog ne 'cmsearch') {
923 if ($prog eq 'cmalign') {
924 push @{$args{switch}}, '-q' if !exists $params{q};
925 } else {
926 push @{$args{redirect}}, '> /dev/null';
930 my $string = "$exe ".join(' ',(@{$args{switch}},
931 @{$args{param}},
932 @{$args{input}},
933 @{$args{redirect}}));
935 $string;
938 ############### PRIVATE ###############
940 #=head2 _run
942 # Title : _run
943 # Usage : $obj->_run()
944 # Function: Internal(not to be used directly)
945 # Returns :
946 # Args :
948 #=cut
951 my %ALLOWED = map {$_ => 1} qw(run cmsearch cmalign cmemit cmbuild
952 cmcalibrate cmstat cmscore);
954 sub _run {
955 my ($self)= shift;
957 my ($prog, $model, $out, $version) = ($self->program,
958 $self->model_file,
959 $self->outfile_name,
960 $self->version);
962 if (my $caller = (caller(1))[3]) {
963 $caller =~ s{.*::(\w+)$}{$1};
964 $self->throw("Calling _run() from disallowed method") unless exists $ALLOWED{$caller};
965 } else {
966 $self->throw("Can't call _run directly");
969 # a model and a file must be defined for all but cmemit; cmemit must have a
970 # file or model defined (using $file if both are defined)
972 # relevant files are passed on to the string builder
973 my $str = $self->to_exe_string(@_);
974 $self->debug("Infernal command: $str\n");
976 my %has = $self->get_parameters('valid');
978 my $obj =
979 ($prog eq 'cmsearch') ? Bio::SearchIO->new(-format => 'infernal',
980 -version => $version,
981 -model => $model) :
982 ($prog eq 'cmalign' ) ? Bio::AlignIO->new(-format => 'stockholm') :
983 ($prog eq 'cmemit' && $has{a}) ? Bio::AlignIO->new(-format => 'stockholm') :
984 ($prog eq 'cmemit') ? Bio::SeqIO->new(-format => 'fasta') :
985 undef;
986 my @args;
987 # file output
988 if ($out) {
989 my $status = system($str);
990 if($status || !-e $out || -z $out ) {
991 my $error = ($!) ? "$! Status: $status" : "Status: $status";
992 $self->throw( "Infernal call crashed: $error \n[command $str]\n");
993 return undef;
995 if ($obj && ref($obj)) {
996 $obj->file($out);
997 @args = (-file => $out);
999 # fh-based (no outfile)
1000 } else {
1001 open(my $fh,"$str |") || $self->throw("Infernal call ($str) crashed: $?\n");
1002 if ($obj && ref($obj)) {
1003 $obj->fh($fh);
1004 @args = (-fh => $fh);
1005 } else {
1006 # dump to debugging
1007 my $io;
1008 while(<$fh>) {$io .= $_;}
1009 close($fh);
1010 $self->debug($io) if $io;
1011 return 1;
1014 $obj->_initialize_io(@args) if $obj && ref($obj);
1015 return $obj || 1;
1020 =head2 _writeSeqFile
1022 Title : _writeSeqFile
1023 Usage : obj->_writeSeqFile($seq)
1024 Function: Internal(not to be used directly)
1025 Returns :
1026 Args :
1028 =cut
1030 sub _writeSeqFile {
1031 my ($self,@seq) = @_;
1032 my ($tfh,$inputfile) = $self->io->tempfile(-dir=>$self->tempdir);
1033 my $in = Bio::SeqIO->new(-fh => $tfh , '-format' => 'Fasta');
1034 foreach my $s(@seq){
1035 $in->write_seq($s);
1037 $in->close();
1038 $in = undef;
1039 close($tfh);
1040 undef $tfh;
1041 return $inputfile;
1044 =head2 _writeAlignFile
1046 Title : _writeAlignFile
1047 Usage : obj->_writeAlignFile($seq)
1048 Function: Internal(not to be used directly)
1049 Returns :
1050 Args :
1052 =cut
1054 sub _writeAlignFile{
1055 my ($self,@align) = @_;
1056 my ($tfh,$inputfile) = $self->io->tempfile(-dir=>$self->tempdir);
1057 my $in = Bio::AlignIO->new('-fh' => $tfh ,
1058 '-format' => 'stockholm');
1059 foreach my $s(@align){
1060 $in->write_aln($s);
1062 $in->close();
1063 $in = undef;
1064 close($tfh);
1065 undef $tfh;
1066 return $inputfile;
1069 # this is a private sub used to regenerate the class data structures,
1070 # dumped to STDOUT
1072 # could probably add in a description field if needed...
1074 sub _dump_params {
1075 my %params;
1076 my %usage;
1077 for my $exec (qw(cmalign cmbuild cmcalibrate cmemit cmscore cmsearch cmstat)) {
1078 my $output = `$exec --devhelp`;
1079 if ($?) {
1080 $output = `$exec -h`;
1082 my @lines = split("\n",$output);
1084 for my $line (@lines) {
1085 next if $line =~ /^#/;
1086 if ($line =~ /^\s*(-{1,2})(\S+)\s+(<\S+>)?/) {
1087 my %data;
1088 ($data{prefix}, my $p, $data{arg}) = ($1, $2, $3 ? 'param' : 'switch');
1089 if (exists $params{$p}) {
1090 if ($data{prefix} ne $params{$p}{prefix}) {
1091 warn("$data{prefix} for $p in $exec doesn't match prefix for same parameter in ".$params{$p}{exec}[-1].":".$params{$p}{prefix});
1093 if ($data{arg} ne $params{$p}{arg}) {
1094 warn("$data{arg} for $p in $exec doesn't match arg for same parameter in ".$params{$p}{exec}[-1].":".$params{$p}{arg});
1098 while (my ($key, $val) = each %data) {
1099 $params{$p}->{$key} = $val;
1101 push @{$params{$p}->{exec}}, $exec;
1102 } elsif ($line =~ /Usage:\s*(.+)$/) {
1103 push @{$usage{$exec}}, $1;
1104 } else {
1105 #print "$line\n";
1110 # generate data structure
1111 print "our %INFERNAL_PARAMS = (\n";
1112 for my $k (sort keys %params) {
1113 printf(" %-17s => [","'$k'");
1114 for my $sub (qw(arg prefix exec)) {
1115 my $str = (ref($params{$k}{$sub}) eq 'ARRAY') ?
1116 "qw(".join(' ', @{$params{$k}{$sub}}).")" :
1117 "'".$params{$k}{$sub}."',";
1118 printf("%-10s", $str);
1120 print "],\n";
1122 print ");\n\n";
1124 # generate usage data structure
1125 print "our %INFERNAL_PROGRAM = (\n";
1126 for my $k (sort keys %usage) {
1127 printf(" %-17s => [\n","'$k'");
1128 print ' '.join(",\n ", map {"'$_'"} @{$usage{$k}})."\n";
1129 print " ],\n";
1131 print ");\n";