speelink fixes, patch courtesy Charles Plessy, fixes #3256
[bioperl-run.git] / lib / Bio / Tools / Run / Genemark.pm
blob8d1c33a4ca546d6f37ed72ce387497d8d953305e
1 # $Id$
3 # BioPerl module for Bio::Tools::Run::Genemark
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Bioperl
9 # Copyright Bioperl, Mark Johnson <mjohnson-at-watson-dot-wustl-dot-edu>
11 # Special thanks to Chris Fields, Sendu Bala
13 # You may distribute this module under the same terms as perl itself
15 # POD documentation - main docs before the code
17 =head1 NAME
19 Bio::Tools::Run::Genemark - Wrapper for local execution of the GeneMark
20 family of programs.
22 =head1 SYNOPSIS
24 # GeneMark.hmm (prokaryotic)
25 my $factory =
26 Bio::Tools::Run::Genemark->new('-program' => 'gmhmmp',
27 '-m' => 'model.icm');
29 # Pass the factory Bio::Seq objects
30 # returns a Bio::Tools::Genemark object
31 my $genemark = $factory->run($seq);
33 =head1 DESCRIPTION
35 Wrapper module for the GeneMark family of programs. Should work with
36 all flavors of GeneMark.hmm at least, although only the prokaryotic
37 version has been tested.
39 General information about GeneMark is available at
40 L<http://exon.gatech.edu/GeneMark/>.
42 Contact information for licensing inquiries is available at:
43 L<http://opal.biology.gatech.edu/GeneMark/contact.html>
45 Note that GeneMark.hmm (prokaryotic at least) will only process the
46 first sequence in a fasta file (if you run() more than one sequence
47 at a time, only the first will be processed).
49 =head1 FEEDBACK
51 =head2 Mailing Lists
53 User feedback is an integral part of the evolution of this and other
54 Bioperl modules. Send your comments and suggestions preferably to one
55 of the Bioperl mailing lists. Your participation is much appreciated.
57 bioperl-l@bioperl.org - General discussion
58 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
60 =head2 Support
62 Please direct usage questions or support issues to the mailing list:
64 I<bioperl-l@bioperl.org>
66 rather than to the module maintainer directly. Many experienced and
67 reponsive experts will be able look at the problem and quickly
68 address it. Please include a thorough description of the problem
69 with code and data examples if at all possible.
71 =head2 Reporting Bugs
73 Report bugs to the Bioperl bug tracking system to help us keep track
74 the bugs and their resolution. Bug reports can be submitted via the
75 web:
77 http://redmine.open-bio.org/projects/bioperl/
79 =head1 AUTHOR - Mark Johnson
81 Email: mjohnson-at-watson-dot-wustl-dot-edu
83 =head1 APPENDIX
85 The rest of the documentation details each of the object
86 methods. Internal methods are usually preceded with a _
88 =cut
90 package Bio::Tools::Run::Genemark;
92 use strict;
93 use warnings;
95 use Bio::SeqIO;
96 use Bio::Root::Root;
97 use Bio::Tools::Run::WrapperBase;
98 use Bio::Tools::Genemark;
99 use English;
100 use IPC::Run; # Should be okay on WIN32 (See IPC::Run Docs)
102 use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
104 our @params = (qw(program));
105 our @genemark_params = (qw(i m p));
106 our @genemark_switches = (qw(a n r));
108 =head2 program_name
110 Title : program_name
111 Usage : $factory>program_name()
112 Function: gets/sets the program name
113 Returns: string
114 Args : string
116 =cut
118 sub program_name {
120 my ($self, $val) = @_;
122 $self->program($val) if $val;
124 return $self->program();
128 =head2 program_dir
130 Title : program_dir
131 Usage : $factory->program_dir()
132 Function: gets/sets the program dir
133 Returns: string
134 Args : string
136 =cut
138 sub program_dir {
140 my ($self, $val) = @_;
142 $self->{'_program_dir'} = $val if $val;
144 return $self->{'_program_dir'};
148 =head2 new
150 Title : new
151 Usage : $genemark->new(@params)
152 Function: creates a new Genemark factory
153 Returns: Bio::Tools::Run::Genemark
154 Args :
156 =cut
158 sub new {
160 my ($class,@args) = @_;
161 my $self = $class->SUPER::new(@args);
163 $self->io->_initialize_io();
165 $self->_set_from_args(
166 \@args,
167 -methods => [
168 @params,
169 @genemark_params,
170 @genemark_switches,
172 -create => 1,
175 unless (defined($self->program())) {
176 $self->throw('Must specify program');
179 unless (defined($self->m())) {
180 $self->throw('Must specify model');
183 return $self;
187 =head2 run
189 Title : run
190 Usage : $obj->run($seq_file)
191 Function: Runs Genemark
192 Returns : A Bio::Tools::Genemark object
193 Args : An array of Bio::PrimarySeqI objects
195 =cut
197 sub run {
199 my ($self, @seq) = @_;
201 unless (@seq) {
202 $self->throw("Must supply at least one Bio::PrimarySeqI");
205 foreach my $seq (@seq) {
207 unless ($seq->isa('Bio::PrimarySeqI')) {
208 $self->throw("Object does not implement Bio::PrimarySeqI");
213 my $program_name = $self->program_name();
214 my $file_name = $self->_write_seq_file(@seq);
216 # GeneMark.hmm (prokaryotic version, anyway) ignores sequences after the
217 # first in a fasta file
218 if ($program_name eq 'gmhmmp') {
219 if (@seq > 1) {
220 $self->warn("Program $program_name processes one sequence at a time");
224 return $self->_run($file_name, $seq[0]->display_id());
228 =head2 _run
230 Title : _run
231 Usage : $obj->_run()
232 Function: Internal(not to be used directly)
233 Returns : An instance of Bio::Tools::Genemark
234 Args : file name, sequence identifier (optional)
236 =cut
238 sub _run {
240 my ($self, $seq_file_name, $seq_id) = @_;
242 my ($temp_fh, $temp_file_name) =
243 $self->io->tempfile(-dir=>$self->tempdir());
244 close($temp_fh);
246 # IPC::Run wants an array where the first element is the executable
247 my @cmd = (
248 $self->executable(),
249 split(/\s+/, $self->_setparams()),
250 '-o',
251 $temp_file_name,
252 $seq_file_name,
255 my $cmd = join(' ', @cmd);
256 $self->debug("GeneMark Command = $cmd");
258 # Run the program via IPC::Run so:
259 # 1) The console doesn't get cluttered up with the program's STDERR/STDOUT
260 # 2) We don't have to embed STDERR/STDOUT redirection in $cmd
261 # 3) We don't have to deal with signal handling (IPC::Run should take care
262 # of everything automagically.
263 my ($program_stdout, $program_stderr);
265 eval {
266 IPC::Run::run(
267 \@cmd,
268 \undef,
269 \$program_stdout,
270 \$program_stderr,
271 ) || die $CHILD_ERROR;
275 if ($EVAL_ERROR) {
276 $self->throw("GeneMark call crashed: $EVAL_ERROR");
279 ## The prokaryotic version of GeneMark.HMM, at least, returns
280 ## 0 (success) even when the license has expired.
281 if ((-z $temp_file_name) && ($program_stderr =~ /license period has ended/i)) {
282 $self->throw($program_stderr);
284 elsif ($program_stderr =~ /\d+ days remaining/i) {
285 $self->warn($program_stderr);
288 $self->debug(join("\n", 'GeneMark STDOUT:', $program_stdout)) if $program_stdout;
289 $self->debug(join("\n", 'GeneMark STDERR:', $program_stderr)) if $program_stderr;
291 return Bio::Tools::Genemark->new(-file => $temp_file_name,
292 -seqname => $seq_id);
296 sub _setparams {
298 my ($self) = @_;
300 my $param_string = $self->SUPER::_setparams(
301 -params => [@genemark_params],
302 -switches => [@genemark_switches],
303 -dash => 1,
306 # Kill leading and trailing whitespace
307 $param_string =~ s/^\s+//g;
308 $param_string =~ s/\s+$//g;
310 return $param_string;
314 =head2 _write_seq_file
316 Title : _write_seq_file
317 Usage : obj->_write_seq_file($seq) or obj->_write_seq_file(@seq)
318 Function: Internal(not to be used directly)
319 Returns : Name of a temp file containing program output
320 Args : One or more Bio::PrimarySeqI objects
322 =cut
324 sub _write_seq_file {
326 my ($self, @seq) = @_;
328 my ($fh, $file_name) = $self->io->tempfile(-dir=>$self->tempdir());
329 my $out = Bio::SeqIO->new(-fh => $fh , '-format' => 'Fasta');
331 foreach my $seq (@seq){
332 $out->write_seq($seq);
335 close($fh);
336 $out->close();
338 return $file_name;