speelink fixes, patch courtesy Charles Plessy, fixes #3256
[bioperl-run.git] / lib / Bio / Tools / Run / Hmmer.pm
blob5abebcc5219325e668022e11ab0b094a5566274f
1 # You may distribute this module under the same terms as perl itself
2 # POD documentation - main docs before the code
4 =head1 NAME
6 Bio::Tools::Run::Hmmer - Wrapper for local execution of hmmalign, hmmbuild,
7 hmmcalibrate, hmmemit, hmmpfam, hmmsearch
9 =head1 SYNOPSIS
11 # run hmmsearch (similar for hmmpfam)
12 my $factory = Bio::Tools::Run::Hmmer->new(-hmm => 'model.hmm');
14 # Pass the factory a Bio::Seq object or a file name, returns a Bio::SearchIO
15 my $searchio = $factory->hmmsearch($seq);
17 while (my $result = $searchio->next_result){
18 while(my $hit = $result->next_hit){
19 while (my $hsp = $hit->next_hsp){
20 print join("\t", ( $result->query_name,
21 $hsp->query->start,
22 $hsp->query->end,
23 $hit->name,
24 $hsp->hit->start,
25 $hsp->hit->end,
26 $hsp->score,
27 $hsp->evalue,
28 $hsp->seq_str,
29 )), "\n";
34 # build a hmm using hmmbuild
35 my $aio = Bio::AlignIO->new(-file => "protein.msf", -format => 'msf');
36 my $aln = $aio->next_aln;
37 my $factory = Bio::Tools::Run::Hmmer->new(-hmm => 'model.hmm');
38 $factory->hmmbuild($aln);
40 # calibrate the hmm
41 $factory->calibrate();
43 # emit a sequence stream from the hmm
44 my $seqio = $factory->hmmemit();
46 # align sequences to the hmm
47 my $alnio = $factory->hmmalign(@seqs);
49 =head1 DESCRIPTION
51 Wrapper module for Sean Eddy's HMMER suite of program to allow running of
52 hmmalign, hmmbuild, hmmcalibrate, hmmemit, hmmpfam and hmmsearch. Binaries are
53 available at http://hmmer.janelia.org/
55 You can pass most options understood by the command-line programs to new(), or
56 set the options by calling methods with the same name as the argument. In both
57 instances, case sensitivity matters.
59 Additional methods are hmm() to specifiy the hmm file (needed for all HMMER
60 programs) which you would normally set in the call to new().
62 The HMMER programs must either be in your path, or you must set the environment
63 variable HMMERDIR to point to their location.
65 =head1 FEEDBACK
67 =head2 Mailing Lists
69 User feedback is an integral part of the evolution of this and other
70 Bioperl modules. Send your comments and suggestions preferably to one
71 of the Bioperl mailing lists. Your participation is much appreciated.
73 bioperl-l@bioperl.org - General discussion
74 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
76 =head2 Support
78 Please direct usage questions or support issues to the mailing list:
80 I<bioperl-l@bioperl.org>
82 rather than to the module maintainer directly. Many experienced and
83 reponsive experts will be able look at the problem and quickly
84 address it. Please include a thorough description of the problem
85 with code and data examples if at all possible.
87 =head2 Reporting Bugs
89 Report bugs to the Bioperl bug tracking system to help us keep track
90 the bugs and their resolution. Bug reports can be submitted via the
91 web:
93 http://redmine.open-bio.org/projects/bioperl/
95 =head1 AUTHOR - Shawn Hoon
97 Email: shawnh-at-gmx.net
99 =head1 CONTRIBUTORS
101 Shawn Hoon shawnh-at-gmx.net
102 Jason Stajich jason -at- bioperl -dot- org
103 Scott Markel scott -at- scitegic -dot com
104 Sendu Bala bix@sendu.me.uk
106 =head1 APPENDIX
108 The rest of the documentation details each of the object
109 methods. Internal methods are usually preceded with a _
111 =cut
113 package Bio::Tools::Run::Hmmer;
115 use strict;
117 use Bio::SeqIO;
118 use Bio::SearchIO;
119 use Bio::AlignIO;
121 use base qw(Bio::Tools::Run::WrapperBase);
123 our $DefaultFormat = 'msf';
124 our $DefaultReadMethod = 'hmmer';
125 our %ALL = (quiet => 'q', o => 'outfile');
126 our @ALIGN_PARAMS = qw(mapali outformat withali o);
127 our @ALIGN_SWITCHES = qw(m oneline q);
128 our @BUILD_PARAMS = qw(n archpri cfile gapmax idlevel null pam pamwgt
129 pbswitch prior swentry swexit o);
130 our @BUILD_SWITCHES = qw(f g s A F amino binary fast hand noeff nucleic
131 wblosum wgsc wme wnone wpb wvoronoi);
132 our @CALIBRATE_PARAMS = qw(fixed histfile mean num sd seed cpu);
133 our @CALIBRATE_SWITCHES = qw();
134 our @EMIT_PARAMS = qw(n seed o);
135 our @EMIT_SWITCHES = qw(c q);
136 our @PFAM_PARAMS = qw(A E T Z domE domT informat cpu);
137 our @PFAM_SWITCHES = qw(n acc cut_ga cut_gc cut_nc forward null2 xnu);
138 our @SEARCH_PARAMS = @PFAM_PARAMS;
139 our @SEARCH_SWITCHES = @PFAM_SWITCHES;
140 our %OTHER = (_READMETHOD => '_readmethod',
141 program_name => [qw(PROGRAM program)],
142 hmm => [qw(HMM db DB)]);
144 # just to be explicit
145 our @UNSUPPORTED = qw(h verbose a compat pvm);
148 =head2 new
150 Title : new
151 Usage : $HMMER->new(@params)
152 Function: Creates a new HMMER factory
153 Returns : Bio::Tools::Run::HMMER
154 Args : -hmm => filename # the hmm, used by all program types; if not set
155 # here, must be set with hmm() method prior to
156 # running anything
157 -_READMETHOD => 'hmmer' (default) || 'hmmer_pull' # the parsing
158 # module to use for
159 # hmmpfam/hmmsearch
161 Any option supported by a Hmmer program, where switches are given
162 a true value, eg. -q => 1, EXCEPT for the following which are handled
163 internally/ incompatible: h verbose a compat pvm
165 WARNING: the default sequence format passed to hmmpfam is msf. If
166 you are using a different format, you need to pass it with informat.
167 e.g.
168 my $factory = Bio::Tools::Run::Hmmer->new(-hmm => 'model.hmm',
169 -informat => 'fasta');
171 -q is synonymous with -quiet
172 -o is synonymous with -outfile
174 # may be specified here, allowing run() to be used, or
175 # it can be omitted and the corresponding method (eg.
176 # hmmalign()) used later.
177 -program => hmmalign|hmmbuild|hmmcalibrate|hmmemit|hmmpfam|hmmsearch
179 =cut
181 sub new {
182 my($class, @args) = @_;
183 my $self = $class->SUPER::new(@args);
185 $self->_set_from_args(\@args, -methods => {(map { $_ => $ALL{$_} } keys %ALL),
186 (map { $_ => $OTHER{$_} } keys %OTHER),
187 (map { $_ => $_ }
188 (@ALIGN_PARAMS,
189 @ALIGN_SWITCHES,
190 @BUILD_PARAMS,
191 @BUILD_SWITCHES,
192 @CALIBRATE_PARAMS,
193 @CALIBRATE_SWITCHES,
194 @EMIT_PARAMS,
195 @EMIT_SWITCHES,
196 @PFAM_PARAMS,
197 @PFAM_SWITCHES,
198 @SEARCH_PARAMS,
199 @SEARCH_SWITCHES))},
200 -create => 1,
201 -case_sensitive => 1);
203 $self->informat || $self->informat($DefaultFormat);
204 $self->_READMETHOD || $self->_READMETHOD($DefaultReadMethod);
206 return $self;
209 =head2 run
211 Title : run
212 Usage : $obj->run($seqFile)
213 Function: Runs one of the Hmmer programs, according to the current setting of
214 program() (as typically set during new(-program => 'name')).
215 Returns : A Bio::SearchIO, Bio::AlignIO, Bio::SeqIO or boolean depending on
216 the program being run (see method corresponding to program name for
217 details).
218 Args : A Bio::PrimarySeqI, Bio::Align::AlignI or filename
220 =cut
222 sub run {
223 my $self = shift;
224 my $program = lc($self->program_name || $self->throw("The program must already be specified"));
225 $self->can($program) || $self->throw("'$program' wasn't a valid program");
226 return $self->$program(@_);
229 =head2 hmmalign
231 Title : hmmalign
232 Usage : $obj->hmmalign()
233 Function: Runs hmmalign
234 Returns : A Bio::AlignIO
235 Args : list of Bio::SeqI OR Bio::Align::AlignI OR filename of file with
236 sequences or an alignment
238 =cut
240 sub hmmalign {
241 my $self = shift;
242 $self->program_name('hmmalign');
243 my $input = $self->_setinput(@_);
245 unless (defined $self->o()) {
246 $self->q(1);
248 if (! $self->outformat) {
249 $self->outformat($DefaultFormat);
252 return $self->_run($input);
255 =head2 hmmbuild
257 Title : hmmbuild
258 Usage : $obj->hmmbuild()
259 Function: Runs hmmbuild, outputting an hmm to the file currently set by method
260 hmm() or db(), or failing that, o() or outfile(), or failing that, to
261 a temp location.
262 Returns : true on success
263 Args : Bio::Align::AlignI OR filename of file with an alignment
265 =cut
267 sub hmmbuild {
268 my $self = shift;
269 $self->program_name('hmmbuild');
270 my $input = $self->_setinput(@_);
272 unless (defined $self->hmm()) {
273 $self->hmm($self->o() || $self->io->tempfile(-dir => $self->tempdir));
276 return $self->_run($input);
279 =head2 hmmcalibrate
281 Title : hmmcalibrate
282 Usage : $obj->hmmcalibrate()
283 Function: Runs hmmcalibrate
284 Returns : true on success
285 Args : none (hmm() must be set, most likely by the -hmm option of new()), OR
286 optionally supply an hmm filename to set hmm() and run
288 =cut
290 sub hmmcalibrate {
291 my ($self, $hmm) = @_;
292 $self->program_name('hmmcalibrate');
293 $self->hmm($hmm) if $hmm;
294 $self->hmm || $self->throw("hmm() must be set first");
295 return $self->_run();
298 =head2 hmmemit
300 Title : hmmemit
301 Usage : $obj->hmmemit()
302 Function: Runs hmmemit
303 Returns : A Bio::SeqIO
304 Args : none (hmm() must be set, most likely by the -hmm option of new()), OR
305 optionally supply an hmm filename to set hmm() and run
307 =cut
309 sub hmmemit {
310 my ($self, $hmm) = @_;
311 $self->program_name('hmmemit');
312 $self->hmm($hmm) if $hmm;
313 $self->hmm || $self->throw("hmm() must be set first");
315 unless (defined $self->o()) {
316 $self->q(1);
319 return $self->_run();
322 =head2 hmmpfam
324 Title : hmmpfam
325 Usage : $obj->hmmpfam()
326 Function: Runs hmmpfam
327 Returns : A Bio::SearchIO
328 Args : A Bio::PrimarySeqI, Bio::Align::AlignI or filename
330 =cut
332 sub hmmpfam {
333 my $self = shift;
334 $self->program_name('hmmpfam');
335 my $input = $self->_setinput(@_);
336 return $self->_run($input);
339 =head2 hmmsearch
341 Title : hmmsearch
342 Usage : $obj->hmmsearch()
343 Function: Runs hmmsearch
344 Returns : A Bio::SearchIO
345 Args : A Bio::PrimarySeqI, Bio::Align::AlignI or filename
347 =cut
349 sub hmmsearch {
350 my $self = shift;
351 $self->program_name('hmmsearch');
352 my $input = $self->_setinput(@_);
353 return $self->_run($input);
356 =head2 _setinput
358 Title : _setinput
359 Usage : $obj->_setinput()
360 Function: Internal(not to be used directly)
361 Returns : filename
362 Args : A Bio::PrimarySeqI, Bio::Align::AlignI or filename
364 =cut
366 sub _setinput {
367 my ($self, @things) = @_;
368 @things || $self->throw("At least one input is required");
370 my $infile;
371 if (ref $things[0] && $things[0]->isa("Bio::PrimarySeqI") ){# it is an object
372 $infile = $self->_writeSeqFile(@things);
374 elsif(ref $things[0] && $things[0]->isa("Bio::Align::AlignI")){
375 $infile = $self->_writeAlignFile(@things);
377 elsif (-e $things[0]) {
378 $infile = $things[0];
380 else {
381 $self->throw("Unknown kind of input '@things'");
384 return $infile;
387 =head2 _run
389 Title : _run
390 Usage : $obj->_run()
391 Function: Internal(not to be used directly)
392 Returns : Bio::SearchIO
393 Args : file name
395 =cut
397 sub _run {
398 my ($self, $file) = @_;
400 my $str = $self->executable;
401 $str .= $self->_setparams;
402 $str .= ' '.$file if $file;
403 $self->debug("HMMER command = $str");
405 my $progname = $self->program_name;
407 my @in;
408 my @verbose = (-verbose => $self->verbose);
409 if ($progname =~ /align|build|emit/) {
410 my $outfile = $self->o;
411 if ($outfile || $progname eq 'hmmbuild') {
412 $str .= " > /dev/null" if $self->quiet;
414 if ($progname eq 'hmmbuild') {
415 my $status = system($str);
416 return $status ? 0 : 1;
418 else {
419 system($str) && $self->throw("HMMER call ($str) crashed: $?\n");
420 @in = (-file => $outfile);
423 else {
424 open(my $fh, "$str |") || $self->throw("HMMER call ($str) crashed: $?\n");
425 @in = (-fh => $fh);
428 elsif ($progname =~ /pfam|search/i) {
429 open(my $fh, "$str |") || $self->throw("HMMER call ($str) crashed: $?\n");
431 return Bio::SearchIO->new(-fh => $fh,
432 @verbose,
433 -format => $self->_READMETHOD);
436 if ($progname eq 'hmmalign') {
437 return Bio::AlignIO->new(@in,
438 @verbose,
439 -format => $self->outformat);
441 elsif ($progname eq 'hmmemit') {
442 return Bio::SeqIO->new(@in,
443 @verbose,
444 -format => 'fasta');
446 elsif ($progname =~ /calibrate/) {
447 $str .= " > /dev/null 2> /dev/null" if $self->quiet;
448 my $status = system($str);
449 return $status ? 0 : 1;
453 =head2 _setparams
455 Title : _setparams
456 Usage : Internal function, not to be called directly
457 Function: creates a string of params to be used in the command string
458 Returns : string of params
459 Args : none
461 =cut
463 sub _setparams {
464 my $self = shift;
466 my @execparams;
467 my @execswitches;
468 SWITCH: for ($self->program_name) {
469 /align/ && do { @execparams = @ALIGN_PARAMS;
470 @execswitches = @ALIGN_SWITCHES;
471 last SWITCH; };
472 /build/ && do { @execparams = @BUILD_PARAMS;
473 @execswitches = @BUILD_SWITCHES;
474 last SWITCH; };
475 /calibrate/ && do { @execparams = @CALIBRATE_PARAMS;
476 @execswitches = @CALIBRATE_SWITCHES;
477 last SWITCH; };
478 /emit/ && do { @execparams = @EMIT_PARAMS;
479 @execswitches = @EMIT_SWITCHES;
480 last SWITCH; };
481 /pfam/ && do { @execparams = @PFAM_PARAMS;
482 @execswitches = @PFAM_SWITCHES;
483 last SWITCH; };
484 /search/ && do { @execparams = @SEARCH_PARAMS;
485 @execswitches = @SEARCH_SWITCHES;
486 last SWITCH; };
489 my $param_string = $self->SUPER::_setparams(-params => \@execparams,
490 -switches => \@execswitches,
491 -mixed_dash => 1);
493 my $hmm = $self->hmm || $self->throw("Need to specify either HMM file or Database");
494 $param_string .= ' '.$hmm;
496 return $param_string;
499 =head2 program_name
501 Title : program_name
502 Usage : $factory>program_name()
503 Function: holds the program name
504 Returns : string
505 Args : none
507 =cut
509 sub program_name {
510 my $self = shift;
511 if (@_) {
512 $self->{program_name} = shift;
514 # hack so that when program_name changes, so does executable()
515 delete $self->{'_pathtoexe'};
517 return $self->{program_name} || '';
520 =head2 program_dir
522 Title : program_dir
523 Usage : $factory->program_dir(@params)
524 Function: returns the program directory, obtained from ENV variable.
525 Returns : string
526 Args : none
528 =cut
530 sub program_dir {
531 return $ENV{HMMERDIR} if $ENV{HMMERDIR};
534 =head2 _writeSeqFile
536 Title : _writeSeqFile
537 Usage : obj->_writeSeqFile($seq)
538 Function: Internal(not to be used directly)
539 Returns : filename
540 Args : list of Bio::SeqI
542 =cut
544 sub _writeSeqFile {
545 my ($self, @seq) = @_;
546 my ($tfh, $inputfile) = $self->io->tempfile(-dir=>$self->tempdir);
547 $self->informat('fasta');
548 my $out = Bio::SeqIO->new(-fh => $tfh , '-format' => 'fasta');
549 foreach my $s (@seq) {
550 $out->write_seq($s);
552 $out->close();
553 $out = undef;
554 close($tfh);
555 undef $tfh;
556 return $inputfile;
559 =head2 _writeAlignFile
561 Title : _writeAlignFile
562 Usage : obj->_writeAlignFile($seq)
563 Function: Internal(not to be used directly)
564 Returns : filename
565 Args : list of Bio::Align::AlignI
567 =cut
569 sub _writeAlignFile{
570 my ($self, @align) = @_;
571 my ($tfh, $inputfile) = $self->io->tempfile(-dir=>$self->tempdir);
572 my $out = Bio::AlignIO->new('-fh' => $tfh, '-format' => $self->informat);
573 foreach my $a (@align) {
574 $out->write_aln($a);
576 $out->close();
577 $out = undef;
578 close($tfh);
579 undef $tfh;
580 return $inputfile;