1 # You may distribute this module under the same terms as perl itself
2 # POD documentation - main docs before the code
6 Bio::Tools::Run::Hmmer - Wrapper for local execution of hmmalign, hmmbuild,
7 hmmcalibrate, hmmemit, hmmpfam, hmmsearch
11 # run hmmsearch (similar for hmmpfam)
12 my $factory = Bio::Tools::Run::Hmmer->new(-hmm => 'model.hmm');
14 # Pass the factory a Bio::Seq object or a file name, returns a Bio::SearchIO
15 my $searchio = $factory->hmmsearch($seq);
17 while (my $result = $searchio->next_result){
18 while(my $hit = $result->next_hit){
19 while (my $hsp = $hit->next_hsp){
20 print join("\t", ( $result->query_name,
34 # build a hmm using hmmbuild
35 my $aio = Bio::AlignIO->new(-file => "protein.msf", -format => 'msf');
36 my $aln = $aio->next_aln;
37 my $factory = Bio::Tools::Run::Hmmer->new(-hmm => 'model.hmm');
38 $factory->hmmbuild($aln);
41 $factory->calibrate();
43 # emit a sequence stream from the hmm
44 my $seqio = $factory->hmmemit();
46 # align sequences to the hmm
47 my $alnio = $factory->hmmalign(@seqs);
51 Wrapper module for Sean Eddy's HMMER suite of program to allow running of
52 hmmalign, hmmbuild, hmmcalibrate, hmmemit, hmmpfam and hmmsearch. Binaries are
53 available at http://hmmer.janelia.org/
55 You can pass most options understood by the command-line programs to new(), or
56 set the options by calling methods with the same name as the argument. In both
57 instances, case sensitivity matters.
59 Additional methods are hmm() to specifiy the hmm file (needed for all HMMER
60 programs) which you would normally set in the call to new().
62 The HMMER programs must either be in your path, or you must set the environment
63 variable HMMERDIR to point to their location.
69 User feedback is an integral part of the evolution of this and other
70 Bioperl modules. Send your comments and suggestions preferably to one
71 of the Bioperl mailing lists. Your participation is much appreciated.
73 bioperl-l@bioperl.org - General discussion
74 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
78 Please direct usage questions or support issues to the mailing list:
80 I<bioperl-l@bioperl.org>
82 rather than to the module maintainer directly. Many experienced and
83 reponsive experts will be able look at the problem and quickly
84 address it. Please include a thorough description of the problem
85 with code and data examples if at all possible.
89 Report bugs to the Bioperl bug tracking system to help us keep track
90 the bugs and their resolution. Bug reports can be submitted via the
93 http://redmine.open-bio.org/projects/bioperl/
95 =head1 AUTHOR - Shawn Hoon
97 Email: shawnh-at-gmx.net
101 Shawn Hoon shawnh-at-gmx.net
102 Jason Stajich jason -at- bioperl -dot- org
103 Scott Markel scott -at- scitegic -dot com
104 Sendu Bala bix@sendu.me.uk
108 The rest of the documentation details each of the object
109 methods. Internal methods are usually preceded with a _
113 package Bio
::Tools
::Run
::Hmmer
;
121 use base
qw(Bio::Tools::Run::WrapperBase);
123 our $DefaultFormat = 'msf';
124 our $DefaultReadMethod = 'hmmer';
125 our %ALL = (quiet
=> 'q', o
=> 'outfile');
126 our @ALIGN_PARAMS = qw(mapali outformat withali o);
127 our @ALIGN_SWITCHES = qw(m oneline q);
128 our @BUILD_PARAMS = qw(n archpri cfile gapmax idlevel null pam pamwgt
129 pbswitch prior swentry swexit o);
130 our @BUILD_SWITCHES = qw(f g s A F amino binary fast hand noeff nucleic
131 wblosum wgsc wme wnone wpb wvoronoi);
132 our @CALIBRATE_PARAMS = qw(fixed histfile mean num sd seed cpu);
133 our @CALIBRATE_SWITCHES = qw();
134 our @EMIT_PARAMS = qw(n seed o);
135 our @EMIT_SWITCHES = qw(c q);
136 our @PFAM_PARAMS = qw(A E T Z domE domT informat cpu);
137 our @PFAM_SWITCHES = qw(n acc cut_ga cut_gc cut_nc forward null2 xnu);
138 our @SEARCH_PARAMS = @PFAM_PARAMS;
139 our @SEARCH_SWITCHES = @PFAM_SWITCHES;
140 our %OTHER = (_READMETHOD
=> '_readmethod',
141 program_name
=> [qw(PROGRAM program)],
142 hmm
=> [qw(HMM db DB)]);
144 # just to be explicit
145 our @UNSUPPORTED = qw(h verbose a compat pvm);
151 Usage : $HMMER->new(@params)
152 Function: Creates a new HMMER factory
153 Returns : Bio::Tools::Run::HMMER
154 Args : -hmm => filename # the hmm, used by all program types; if not set
155 # here, must be set with hmm() method prior to
157 -_READMETHOD => 'hmmer' (default) || 'hmmer_pull' # the parsing
161 Any option supported by a Hmmer program, where switches are given
162 a true value, eg. -q => 1, EXCEPT for the following which are handled
163 internally/ incompatible: h verbose a compat pvm
165 WARNING: the default sequence format passed to hmmpfam is msf. If
166 you are using a different format, you need to pass it with informat.
168 my $factory = Bio::Tools::Run::Hmmer->new(-hmm => 'model.hmm',
169 -informat => 'fasta');
171 -q is synonymous with -quiet
172 -o is synonymous with -outfile
174 # may be specified here, allowing run() to be used, or
175 # it can be omitted and the corresponding method (eg.
176 # hmmalign()) used later.
177 -program => hmmalign|hmmbuild|hmmcalibrate|hmmemit|hmmpfam|hmmsearch
182 my($class, @args) = @_;
183 my $self = $class->SUPER::new
(@args);
185 $self->_set_from_args(\
@args, -methods
=> {(map { $_ => $ALL{$_} } keys %ALL),
186 (map { $_ => $OTHER{$_} } keys %OTHER),
201 -case_sensitive
=> 1);
203 $self->informat || $self->informat($DefaultFormat);
204 $self->_READMETHOD || $self->_READMETHOD($DefaultReadMethod);
212 Usage : $obj->run($seqFile)
213 Function: Runs one of the Hmmer programs, according to the current setting of
214 program() (as typically set during new(-program => 'name')).
215 Returns : A Bio::SearchIO, Bio::AlignIO, Bio::SeqIO or boolean depending on
216 the program being run (see method corresponding to program name for
218 Args : A Bio::PrimarySeqI, Bio::Align::AlignI or filename
224 my $program = lc($self->program_name || $self->throw("The program must already be specified"));
225 $self->can($program) || $self->throw("'$program' wasn't a valid program");
226 return $self->$program(@_);
232 Usage : $obj->hmmalign()
233 Function: Runs hmmalign
234 Returns : A Bio::AlignIO
235 Args : list of Bio::SeqI OR Bio::Align::AlignI OR filename of file with
236 sequences or an alignment
242 $self->program_name('hmmalign');
243 my $input = $self->_setinput(@_);
245 unless (defined $self->o()) {
248 if (! $self->outformat) {
249 $self->outformat($DefaultFormat);
252 return $self->_run($input);
258 Usage : $obj->hmmbuild()
259 Function: Runs hmmbuild, outputting an hmm to the file currently set by method
260 hmm() or db(), or failing that, o() or outfile(), or failing that, to
262 Returns : true on success
263 Args : Bio::Align::AlignI OR filename of file with an alignment
269 $self->program_name('hmmbuild');
270 my $input = $self->_setinput(@_);
272 unless (defined $self->hmm()) {
273 $self->hmm($self->o() || $self->io->tempfile(-dir
=> $self->tempdir));
276 return $self->_run($input);
282 Usage : $obj->hmmcalibrate()
283 Function: Runs hmmcalibrate
284 Returns : true on success
285 Args : none (hmm() must be set, most likely by the -hmm option of new()), OR
286 optionally supply an hmm filename to set hmm() and run
291 my ($self, $hmm) = @_;
292 $self->program_name('hmmcalibrate');
293 $self->hmm($hmm) if $hmm;
294 $self->hmm || $self->throw("hmm() must be set first");
295 return $self->_run();
301 Usage : $obj->hmmemit()
302 Function: Runs hmmemit
303 Returns : A Bio::SeqIO
304 Args : none (hmm() must be set, most likely by the -hmm option of new()), OR
305 optionally supply an hmm filename to set hmm() and run
310 my ($self, $hmm) = @_;
311 $self->program_name('hmmemit');
312 $self->hmm($hmm) if $hmm;
313 $self->hmm || $self->throw("hmm() must be set first");
315 unless (defined $self->o()) {
319 return $self->_run();
325 Usage : $obj->hmmpfam()
326 Function: Runs hmmpfam
327 Returns : A Bio::SearchIO
328 Args : A Bio::PrimarySeqI, Bio::Align::AlignI or filename
334 $self->program_name('hmmpfam');
335 my $input = $self->_setinput(@_);
336 return $self->_run($input);
342 Usage : $obj->hmmsearch()
343 Function: Runs hmmsearch
344 Returns : A Bio::SearchIO
345 Args : A Bio::PrimarySeqI, Bio::Align::AlignI or filename
351 $self->program_name('hmmsearch');
352 my $input = $self->_setinput(@_);
353 return $self->_run($input);
359 Usage : $obj->_setinput()
360 Function: Internal(not to be used directly)
362 Args : A Bio::PrimarySeqI, Bio::Align::AlignI or filename
367 my ($self, @things) = @_;
368 @things || $self->throw("At least one input is required");
371 if (ref $things[0] && $things[0]->isa("Bio::PrimarySeqI") ){# it is an object
372 $infile = $self->_writeSeqFile(@things);
374 elsif(ref $things[0] && $things[0]->isa("Bio::Align::AlignI")){
375 $infile = $self->_writeAlignFile(@things);
377 elsif (-e
$things[0]) {
378 $infile = $things[0];
381 $self->throw("Unknown kind of input '@things'");
391 Function: Internal(not to be used directly)
392 Returns : Bio::SearchIO
398 my ($self, $file) = @_;
400 my $str = $self->executable;
401 $str .= $self->_setparams;
402 $str .= ' '.$file if $file;
403 $self->debug("HMMER command = $str");
405 my $progname = $self->program_name;
408 my @verbose = (-verbose
=> $self->verbose);
409 if ($progname =~ /align|build|emit/) {
410 my $outfile = $self->o;
411 if ($outfile || $progname eq 'hmmbuild') {
412 $str .= " > /dev/null" if $self->quiet;
414 if ($progname eq 'hmmbuild') {
415 my $status = system($str);
416 return $status ?
0 : 1;
419 system($str) && $self->throw("HMMER call ($str) crashed: $?\n");
420 @in = (-file
=> $outfile);
424 open(my $fh, "$str |") || $self->throw("HMMER call ($str) crashed: $?\n");
428 elsif ($progname =~ /pfam|search/i) {
429 open(my $fh, "$str |") || $self->throw("HMMER call ($str) crashed: $?\n");
431 return Bio
::SearchIO
->new(-fh
=> $fh,
433 -format
=> $self->_READMETHOD);
436 if ($progname eq 'hmmalign') {
437 return Bio
::AlignIO
->new(@in,
439 -format
=> $self->outformat);
441 elsif ($progname eq 'hmmemit') {
442 return Bio
::SeqIO
->new(@in,
446 elsif ($progname =~ /calibrate/) {
447 $str .= " > /dev/null 2> /dev/null" if $self->quiet;
448 my $status = system($str);
449 return $status ?
0 : 1;
456 Usage : Internal function, not to be called directly
457 Function: creates a string of params to be used in the command string
458 Returns : string of params
468 SWITCH
: for ($self->program_name) {
469 /align/ && do { @execparams = @ALIGN_PARAMS;
470 @execswitches = @ALIGN_SWITCHES;
472 /build/ && do { @execparams = @BUILD_PARAMS;
473 @execswitches = @BUILD_SWITCHES;
475 /calibrate/ && do { @execparams = @CALIBRATE_PARAMS;
476 @execswitches = @CALIBRATE_SWITCHES;
478 /emit/ && do { @execparams = @EMIT_PARAMS;
479 @execswitches = @EMIT_SWITCHES;
481 /pfam/ && do { @execparams = @PFAM_PARAMS;
482 @execswitches = @PFAM_SWITCHES;
484 /search/ && do { @execparams = @SEARCH_PARAMS;
485 @execswitches = @SEARCH_SWITCHES;
489 my $param_string = $self->SUPER::_setparams
(-params
=> \
@execparams,
490 -switches
=> \
@execswitches,
493 my $hmm = $self->hmm || $self->throw("Need to specify either HMM file or Database");
494 $param_string .= ' '.$hmm;
496 return $param_string;
502 Usage : $factory>program_name()
503 Function: holds the program name
512 $self->{program_name
} = shift;
514 # hack so that when program_name changes, so does executable()
515 delete $self->{'_pathtoexe'};
517 return $self->{program_name
} || '';
523 Usage : $factory->program_dir(@params)
524 Function: returns the program directory, obtained from ENV variable.
531 return $ENV{HMMERDIR
} if $ENV{HMMERDIR
};
536 Title : _writeSeqFile
537 Usage : obj->_writeSeqFile($seq)
538 Function: Internal(not to be used directly)
540 Args : list of Bio::SeqI
545 my ($self, @seq) = @_;
546 my ($tfh, $inputfile) = $self->io->tempfile(-dir
=>$self->tempdir);
547 $self->informat('fasta');
548 my $out = Bio
::SeqIO
->new(-fh
=> $tfh , '-format' => 'fasta');
549 foreach my $s (@seq) {
559 =head2 _writeAlignFile
561 Title : _writeAlignFile
562 Usage : obj->_writeAlignFile($seq)
563 Function: Internal(not to be used directly)
565 Args : list of Bio::Align::AlignI
570 my ($self, @align) = @_;
571 my ($tfh, $inputfile) = $self->io->tempfile(-dir
=>$self->tempdir);
572 my $out = Bio
::AlignIO
->new('-fh' => $tfh, '-format' => $self->informat);
573 foreach my $a (@align) {