3 # BioPerl module for Bio::Tools::Run::Genemark
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
9 # Copyright Bioperl, Mark Johnson <mjohnson-at-watson-dot-wustl-dot-edu>
11 # Special thanks to Chris Fields, Sendu Bala
13 # You may distribute this module under the same terms as perl itself
15 # POD documentation - main docs before the code
19 Bio::Tools::Run::Genemark - Wrapper for local execution of the GeneMark
24 # GeneMark.hmm (prokaryotic)
26 Bio::Tools::Run::Genemark->new('-program' => 'gmhmmp',
29 # Pass the factory Bio::Seq objects
30 # returns a Bio::Tools::Genemark object
31 my $genemark = $factory->run($seq);
35 Wrapper module for the GeneMark family of programs. Should work with
36 all flavors of GeneMark.hmm at least, although only the prokaryotic
37 version has been tested.
39 General information about GeneMark is available at
40 L<http://exon.gatech.edu/GeneMark/>.
42 Contact information for licensing inquiries is available at:
43 L<http://opal.biology.gatech.edu/GeneMark/contact.html>
45 Note that GeneMark.hmm (prokaryotic at least) will only process the
46 first sequence in a fasta file (if you run() more than one sequence
47 at a time, only the first will be processed).
53 User feedback is an integral part of the evolution of this and other
54 Bioperl modules. Send your comments and suggestions preferably to one
55 of the Bioperl mailing lists. Your participation is much appreciated.
57 bioperl-l@bioperl.org - General discussion
58 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62 Please direct usage questions or support issues to the mailing list:
64 I<bioperl-l@bioperl.org>
66 rather than to the module maintainer directly. Many experienced and
67 reponsive experts will be able look at the problem and quickly
68 address it. Please include a thorough description of the problem
69 with code and data examples if at all possible.
73 Report bugs to the Bioperl bug tracking system to help us keep track
74 the bugs and their resolution. Bug reports can be submitted via the
77 http://redmine.open-bio.org/projects/bioperl/
79 =head1 AUTHOR - Mark Johnson
81 Email: mjohnson-at-watson-dot-wustl-dot-edu
85 The rest of the documentation details each of the object
86 methods. Internal methods are usually preceded with a _
90 package Bio
::Tools
::Run
::Genemark
;
97 use Bio
::Tools
::Run
::WrapperBase
;
98 use Bio
::Tools
::Genemark
;
100 use IPC
::Run
; # Should be okay on WIN32 (See IPC::Run Docs)
102 use base
qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
104 our @params = (qw(program));
105 our @genemark_params = (qw(i m p));
106 our @genemark_switches = (qw(a n r));
111 Usage : $factory>program_name()
112 Function: gets/sets the program name
120 my ($self, $val) = @_;
122 $self->program($val) if $val;
124 return $self->program();
131 Usage : $factory->program_dir()
132 Function: gets/sets the program dir
140 my ($self, $val) = @_;
142 $self->{'_program_dir'} = $val if $val;
144 return $self->{'_program_dir'};
151 Usage : $genemark->new(@params)
152 Function: creates a new Genemark factory
153 Returns: Bio::Tools::Run::Genemark
160 my ($class,@args) = @_;
161 my $self = $class->SUPER::new
(@args);
163 $self->io->_initialize_io();
165 $self->_set_from_args(
175 unless (defined($self->program())) {
176 $self->throw('Must specify program');
179 unless (defined($self->m())) {
180 $self->throw('Must specify model');
190 Usage : $obj->run($seq_file)
191 Function: Runs Genemark
192 Returns : A Bio::Tools::Genemark object
193 Args : An array of Bio::PrimarySeqI objects
199 my ($self, @seq) = @_;
202 $self->throw("Must supply at least one Bio::PrimarySeqI");
205 foreach my $seq (@seq) {
207 unless ($seq->isa('Bio::PrimarySeqI')) {
208 $self->throw("Object does not implement Bio::PrimarySeqI");
213 my $program_name = $self->program_name();
214 my $file_name = $self->_write_seq_file(@seq);
216 # GeneMark.hmm (prokaryotic version, anyway) ignores sequences after the
217 # first in a fasta file
218 if ($program_name eq 'gmhmmp') {
220 $self->warn("Program $program_name processes one sequence at a time");
224 return $self->_run($file_name, $seq[0]->display_id());
232 Function: Internal(not to be used directly)
233 Returns : An instance of Bio::Tools::Genemark
234 Args : file name, sequence identifier (optional)
240 my ($self, $seq_file_name, $seq_id) = @_;
242 my ($temp_fh, $temp_file_name) =
243 $self->io->tempfile(-dir
=>$self->tempdir());
246 # IPC::Run wants an array where the first element is the executable
249 split(/\s+/, $self->_setparams()),
255 my $cmd = join(' ', @cmd);
256 $self->debug("GeneMark Command = $cmd");
258 # Run the program via IPC::Run so:
259 # 1) The console doesn't get cluttered up with the program's STDERR/STDOUT
260 # 2) We don't have to embed STDERR/STDOUT redirection in $cmd
261 # 3) We don't have to deal with signal handling (IPC::Run should take care
262 # of everything automagically.
263 my ($program_stdout, $program_stderr);
271 ) || die $CHILD_ERROR;
276 $self->throw("GeneMark call crashed: $EVAL_ERROR");
279 ## The prokaryotic version of GeneMark.HMM, at least, returns
280 ## 0 (success) even when the license has expired.
281 if ((-z
$temp_file_name) && ($program_stderr =~ /license period has ended/i)) {
282 $self->throw($program_stderr);
284 elsif ($program_stderr =~ /\d+ days remaining/i) {
285 $self->warn($program_stderr);
288 $self->debug(join("\n", 'GeneMark STDOUT:', $program_stdout)) if $program_stdout;
289 $self->debug(join("\n", 'GeneMark STDERR:', $program_stderr)) if $program_stderr;
291 return Bio
::Tools
::Genemark
->new(-file
=> $temp_file_name,
292 -seqname
=> $seq_id);
300 my $param_string = $self->SUPER::_setparams
(
301 -params
=> [@genemark_params],
302 -switches
=> [@genemark_switches],
306 # Kill leading and trailing whitespace
307 $param_string =~ s/^\s+//g;
308 $param_string =~ s/\s+$//g;
310 return $param_string;
314 =head2 _write_seq_file
316 Title : _write_seq_file
317 Usage : obj->_write_seq_file($seq) or obj->_write_seq_file(@seq)
318 Function: Internal(not to be used directly)
319 Returns : Name of a temp file containing program output
320 Args : One or more Bio::PrimarySeqI objects
324 sub _write_seq_file
{
326 my ($self, @seq) = @_;
328 my ($fh, $file_name) = $self->io->tempfile(-dir
=>$self->tempdir());
329 my $out = Bio
::SeqIO
->new(-fh
=> $fh , '-format' => 'Fasta');
331 foreach my $seq (@seq){
332 $out->write_seq($seq);