speelink fixes, patch courtesy Charles Plessy, fixes #3256
[bioperl-run.git] / lib / Bio / Tools / Run / ERPIN.pm
blob779d486cd0f69613f54c8429b4ae2257ebe8ba12
1 # $Id$
3 # BioPerl module for Bio::Tools::Run::ERPIN
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Chris Fields
9 # Copyright Chris Fields
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tools::Run::ERPIN - Wrapper for local execution of the ERPIN suite of
18 programs.
20 =head1 SYNOPSIS
22 #run
24 my @params = (
25 trset => 'BL.erpin',
26 region => [1, 10],
27 # Set up search strategy this way...
28 strategy => [ 'umask' => [1, 2],
29 'umask' => [1, 2, 3, 4],
30 'umask' => [1, 2, 3, 4, 5, 6],
31 'nomask',
32 'cutoff' => [0, 10, 15, 20]
34 # or use a simple string...
35 #strategy => 'Ðumask 4 Ðadd 5 -nomask -cutoff 0 10 15',
36 pcw => 100
39 my $factory = Bio::Tools::Run::ERPIN->new(-program =>'erpin',
40 @params);
42 # Pass the factory a Bio::Seq object or a file name
43 # Returns a Bio::SearchIO object
45 #my $search = $factory->run("B_sub.fas");
46 my $search = $factory->run($seq);
47 my @feat;
48 while (my $result = $searchio->next_result){
49 while(my $hit = $result->next_hit){
50 while (my $hsp = $hit->next_hsp){
51 print join("\t", ( $r->query_name,
52 $hit->name,
53 $hsp->hit->start,
54 $hsp->hit->end,
55 $hsp->meta,
56 $hsp->score,
57 )), "\n";
62 =head1 DESCRIPTION
64 =cut
66 =head1 FEEDBACK
68 =head2 Mailing Lists
70 User feedback is an integral part of the evolution of this and other
71 Bioperl modules. Send your comments and suggestions preferably to one
72 of the Bioperl mailing lists. Your participation is much appreciated.
74 bioperl-l@bioperl.org - General discussion
75 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
77 =head2 Support
79 Please direct usage questions or support issues to the mailing list:
81 I<bioperl-l@bioperl.org>
83 rather than to the module maintainer directly. Many experienced and
84 reponsive experts will be able look at the problem and quickly
85 address it. Please include a thorough description of the problem
86 with code and data examples if at all possible.
88 =head2 Reporting Bugs
90 Report bugs to the Bioperl bug tracking system to help us keep track
91 the bugs and their resolution. Bug reports can be submitted via the
92 web:
94 http://redmine.open-bio.org/projects/bioperl/
96 =head1 AUTHOR - Chris Fields
98 Email: cjfields-at-uiuc-dot-edu
100 =head1 CONTRIBUTORS
102 cjfields-at-uiuc-dot-edu
104 =head1 APPENDIX
106 The rest of the documentation details each of the object
107 methods. Internal methods are usually preceded with a _
109 =cut
111 package Bio::Tools::Run::ERPIN;
113 use strict;
114 use Bio::SeqIO;
115 use Bio::Root::Root;
116 use Bio::SearchIO;
117 use Bio::AlignIO;
118 use Bio::Tools::Run::WrapperBase;
120 use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
122 # will move parameters to each program, use this for _set_params
123 my %ERPIN_PROGS = (
124 cfgs => 1,
125 erpin => 1,
126 frandseq => 1,
127 mstat => 1,
128 sview => 1,
129 tstrip => 1,
130 epnstat => 1,
131 ev => 1,
132 mhistview => 1,
133 pview => 1,
134 tstat => 1,
135 tview => 1,
138 my %ERPIN_SWITCHES = map {$_ => 1} qw(dmp smp fwd rev fwd+rev long short mute
139 warnings globstat locstat unifstat Eon Eoff hist chrono);
141 # order is important here
142 my @ERPIN_PARAMS=qw(program model file strategy dmp smp fwd rev fwd+rev long
143 short mute warnings globstat locstat unifstat Eon Eoff hist seq1 nseq bgn
144 len logzero tablen chrono pcw hpcw spcw sumf tset);
146 =head2 new
148 Title : new
149 Usage : my $wrapper = Bio::Tools::Run::RNAMotif->new(@params)
150 Function: creates a new RNAMotif factory
151 Returns: Bio::Tools::Run::RNAMotif
152 Args : list of parameters
153 -tempfile => set tempfile flag (default 0)
154 -outfile_name => set file to send output to (default none)
156 =cut
158 sub new {
159 my ($class,@args) = @_;
160 my $self = $class->SUPER::new(@args);
161 my ($out, $tf) = $self->_rearrange([qw(OUTFILE_NAME TEMPFILE)], @args);
162 $self->io->_initialize_io();
163 if ($tf && !$out) {
164 my ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir());
165 close($tfh);
166 undef $tfh;
167 $self->outfile_name($outfile);
168 } else {
169 $out ||= '';
170 $self->outfile_name($out);
172 $tf && $self->tempfile($tf);
173 $self->_set_from_args(\@args,
174 -methods => [@ERPIN_PARAMS],
175 -create => 1
177 return $self;
180 =head2 program_name
182 Title : program_name
183 Usage : $factory>program_name()
184 Function: holds the program name
185 Returns: string
186 Args : None
188 =cut
190 sub program_name {
191 my ($self) = shift;
192 return $self->program(@_);
195 =head2 program_dir
197 Title : program_dir
198 Usage : $factory->program_dir(@params)
199 Function: returns the program directory, obtained from ENV variable.
200 Returns: string
201 Args :
203 =cut
205 sub program_dir {
206 return Bio::Root::IO->catfile($ENV{ERPINDIR}) if $ENV{ERPINDIR};
209 =head2 version
211 Title : version
212 Usage : $v = $prog->version();
213 Function: Determine the version number of the program
214 Example :
215 Returns : float or undef
216 Args : none
218 =cut
220 sub version {
221 my ($self) = @_;
222 return undef unless $self->executable;
223 my $string = `erpin -h 2>&1`;
224 my $v;
225 if ($string =~ m{Version\s([\d.]+)}) {
226 $v = $1;
228 return $self->{'_progversion'} = $v || $string;
231 =head2 run
233 Title : run
234 Usage : $obj->run($seqFile)
235 Function: Runs ERPIN programs and returns Bio::SearchIO
236 Returns :
237 Args : Must pass Bio::PrimarySeqI's or file names
239 =cut
241 sub run {
242 my ($self,@seq) = @_;
243 $self->throw ("Must define 'db', pass a file name, or a list of Bio::PrimarySeqI objects")
244 if (!@seq);
245 if (ref $seq[0] && $seq[0]->isa("Bio::PrimarySeqI") ){# it is an object
246 my $infile1 = $self->_writeSeqFile(@seq);
247 return $self->_run($infile1);
248 } else {
249 return $self->_run(@seq);
253 =head2 tempfile
255 Title : tempfile
256 Usage : $obj->tempfile(1)
257 Function: Set tempfile flag. When set, writes output to a tempfile; this
258 is overridden by outfile_name() if set
259 Returns : Boolean setting (or undef if not set)
260 Args : [OPTIONAL] Boolean
262 =cut
264 sub tempfile {
265 my $self = shift;
266 return $self->{'_tempfile'} = shift if @_;
267 return $self->{'_tempfile'};
270 =head2 _run
272 Title : _run
273 Usage : $obj->_run()
274 Function: Internal(not to be used directly)
275 Returns :
276 Args :
278 =cut
280 sub _run {
281 my ($self,$file,$prog) = @_;
282 return unless $self->executable;
283 $self->io->_io_cleanup();
284 my ($str, $progname, $outfile) =
285 ($prog || $self->executable, $self->program_name, $self->outfile_name);
286 my $param_str = $self->_setparams($file);
287 $str .= " $param_str";
288 $self->debug("ERPIN command: $str\n");
290 # rnamotif => SearchIO object
291 # rmfmt -a => AlignIO object
292 # all others sent to outfile, tempfile, or STDERR (upon verbose = 1)
294 my $obj = ($progname eq 'erpin') ?
295 Bio::SearchIO->new(-verbose => $self->verbose,
296 -format => "erpin",
297 -version => $self->version,
298 -database => $file
300 undef;
302 my @args;
303 # file-based
304 if ($outfile) {
305 local $SIG{CHLD} = 'DEFAULT';
306 my $status = system($str);
307 if($status || !-e $outfile || -z $outfile ) {
308 my $error = ($!) ? "$! Status: $status" : "Status: $status";
309 $self->throw( "ERPIN call crashed: $error \n[command $str]\n");
310 return undef;
312 if ($obj && ref($obj)) {
313 $obj->file($outfile);
314 @args = (-file => $outfile);
316 # fh-based
317 } else {
318 open(my $fh,"$str |") || $self->throw("ERPIN call ($str) crashed: $?\n");
319 if ($obj && ref($obj)) {
320 $obj->fh($fh);
321 @args = (-fh => $fh);
322 } else {
323 # dump to debugging
324 my $io;
325 while(<$fh>) {$io .= $_;}
326 close($fh);
327 $self->debug($io);
328 return 1;
331 # initialize SearchIO/AlignIO...um...IO
332 # (since file/fh set post obj construction)
333 $obj->_initialize_io(@args) if $obj && ref($obj);
334 return $obj || 1;
337 =head2 _setparams
339 Title : _setparams
340 Usage : Internal function, not to be called directly
341 Function: creates a string of params to be used in the command string
342 Example :
343 Returns : string of params
344 Args :
346 =cut
348 sub _setparams {
349 my ($self, $file) = @_;
350 my $progname = $self->program_name;
351 # small sanity check
352 $self->throw("Unknown program: $progname") if
353 (!exists $ERPIN_PROGS{$progname} );
354 my $param_string;
356 my $outfile = ($self->outfile_name) ? ' > '.$self->outfile_name : '';
357 my ($tset, $st) = ($self->tset, $self->strategy);
359 $param_string = join " ", ($tset, $file, $st);
360 $self->debug("String : $param_string\n");
362 $self->throw("Must have both a training set and search strategy defined!")
363 if (!defined($tset) || !defined ($st));
365 my @params;
366 foreach my $attr (@ERPIN_PARAMS) {
367 next if $attr eq 'program' || $attr eq 'tset' || $attr eq 'strategy';
368 my $value = $self->$attr();
369 next unless ($attr eq 'file' || defined $value);
370 my $attr_key = '-'.$attr;
371 if (exists $ERPIN_SWITCHES{$attr}) {
372 push @params, $attr_key;
373 } else {
374 if ($attr eq 'file') {
375 push @params, $file;
376 } else {
377 push @params, $attr_key.' '.$value;
382 $param_string .= ' '.join ' ', @params;
383 $param_string .= $outfile if $outfile;
385 return $param_string;
388 =head2 _writeSeqFile
390 Title : _writeSeqFile
391 Usage : obj->_writeSeqFile($seq)
392 Function: Internal(not to be used directly)
393 Returns : writes passed Seq objects to tempfile, to be used as input
394 for program
395 Args :
397 =cut
399 sub _writeSeqFile {
400 my ($self,@seq) = @_;
401 my ($tfh,$inputfile) = $self->io->tempfile(-dir=>$self->tempdir);
402 my $in = Bio::SeqIO->new(-fh => $tfh , '-format' => 'fasta');
403 foreach my $s(@seq){
404 $in->write_seq($s);
406 $in->close();
407 $in = undef;
408 close($tfh);
409 undef $tfh;
410 return $inputfile;