rework tempfile handling to explicitly close a filehanle - assigning it to undef...
[bioperl-run.git] / Bio / Tools / Run / Pseudowise.pm
blobda4d8fc2257b87802ba573b1a4fd72ed07f8bb04
1 # BioPerl module for Bio::Tools::Run::Pseudowise
3 # Cared for by
5 # Copyright Kiran
7 # You may distribute this module under the same terms as perl itself
8 # POD documentation - main docs before the code
10 =head1 NAME
12 Bio::Tools::Run::Pseudowise - Object for prediting pseudogenes in a
13 given sequence given a protein and a cdna sequence
15 =head1 SYNOPSIS
17 # Build a pseudowise alignment factory
18 my $factory = Bio::Tools::Run::Pseudowise->new();
20 # Pass the factory 3 Bio:SeqI objects (in the order of query peptide and cdna and
21 target_genomic)
22 #@genes is an array of GenericSeqFeature objects
23 my @genes = $factory->run($seq1, $seq2, $seq3);
25 =head1 DESCRIPTION
27 Pseudowise is a pseudogene predition program developed by Ewan Birney
28 http://www.sanger.ac.uk/software/wise2.
30 =head1 FEEDBACK
32 =head2 Mailing Lists
34 User feedback is an integral part of the evolution of this and other
35 Bioperl modules. Send your comments and suggestions preferably to one
36 of the Bioperl mailing lists. Your participation is much appreciated.
38 bioperl-l@bioperl.org - General discussion
39 http://bio.perl.org/MailList.html - About the mailing lists
41 =head2 Reporting Bugs
43 Report bugs to the Bioperl bug tracking system to help us keep track
44 the bugs and their resolution. Bug reports can be submitted via
45 email or the web:
47 bioperl-bugs@bio.perl.org
48 http://bio.perl.org/bioperl-bugs/
50 =head1 AUTHOR - Kiran
52 Email kiran@fugu-sg.org
54 =head1 APPENDIX
56 The rest of the documentation details each of the object
57 methods. Internal methods are usually preceded with a _
59 =cut
62 package Bio::Tools::Run::Pseudowise;
63 use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
64 @PSEUDOWISE_SWITCHES @PSEUDOWISE_PARAMS
65 @OTHER_SWITCHES %OK_FIELD);
66 use strict;
67 use Bio::SeqIO;
68 use Bio::SeqFeature::Generic;
69 use Bio::Root::Root;
70 use Bio::Tools::Run::WrapperBase;
72 @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
74 # You will need to enable pseudowise to find the pseudowise program. This
75 # can be done in (at least) two ways:
77 # 1. define an environmental variable WISEDIR
78 # export WISEDIR =/usr/local/share/wise2.2.0
79 # where the wise2.2.20 package is installed
81 # 2. include a definition of an environmental variable WISEDIR in
82 # every script that will use DBA.pm
83 # $ENV{WISEDIR} = '/usr/local/share/wise2.2.20';
85 BEGIN {
86 @PSEUDOWISE_PARAMS = qw(SPLICE_MAX_COLLAR SPLICE_MIN_COLLAR
87 SPLICE_SCORE_OFFSET
88 GENESTATS NOMATCHN PARAMS KBYTE
89 DYMEM DYDEBUG PALDEBUG
90 ERRORLOG);
92 @PSEUDOWISE_SWITCHES = qw(HELP SILENT QUIET ERROROFFSTD);
94 # Authorize attribute fields
95 foreach my $attr ( @PSEUDOWISE_PARAMS, @PSEUDOWISE_SWITCHES,
96 @OTHER_SWITCHES) { $OK_FIELD{$attr}++; }
100 =head2 program_name
102 Title : program_name
103 Usage : $factory>program_name()
104 Function: holds the program name
105 Returns: string
106 Args : None
108 =cut
110 sub program_name {
111 return 'pseudowise';
114 =head2 program_dir
116 Title : program_dir
117 Usage : $factory->program_dir(@params)
118 Function: returns the program directory, obtiained from ENV variable.
119 Returns: string
120 Args :
122 =cut
124 sub program_dir {
125 return Bio::Root::IO->catfile($ENV{WISEDIR},"/src/bin") if $ENV{WISEDIR};
128 sub new {
129 my ($class, @args) = @_;
130 my $self = $class->SUPER::new(@args);
132 my ($attr, $value);
133 while (@args) {
134 $attr = shift @args;
135 $value = shift @args;
136 next if( $attr =~ /^-/ ); # don't want named parameters
137 if ($attr =~/'PROGRAM'/i) {
138 $self->executable($value);
139 next;
141 $self->$attr($value);
143 return $self;
147 sub AUTOLOAD {
148 my $self = shift;
149 my $attr = $AUTOLOAD;
150 $attr =~ s/.*:://;
151 $attr = uc $attr;
152 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
153 $self->{$attr} = shift if @_;
154 return $self->{$attr};
157 =head2 version
159 Title : version
160 Usage : exit if $prog->version() < 1.8
161 Function: Determine the version number of the program
162 Example :
163 Returns : float or undef
164 Args : none
166 =cut
168 sub version {
169 my ($self) = @_;
171 return undef unless $self->executable;
172 my $string = `pseudowise -- ` ;
173 $string =~ /\(([\d.]+)\)/;
174 return $1 || undef;
178 =head2 predict_genes
180 Title : predict_genes
181 Usage : DEPRECATED. Use $factory->run instead
182 Function: Predict pseudogenes
183 Returns : An array of Bio::Seqfeature::Generic objects
184 Args : Name of a file containing a set of 3 fasta sequences in the order of
185 peptide, cdna and genomic sequences
186 or else 3 Bio::Seq objects.
188 Throws an exception if argument is not either a string (eg a
189 filename) or 3 Bio::Seq objects. If
190 arguments are strings, throws exception if file corresponding to string
191 name can not be found.
193 =cut
195 sub predict_genes {
196 return shift->run(@_);
199 =head2 run
201 Title : run
202 Usage : my @feats = $factory->run($seq1, $seq2, $seq3);
203 Function: Executes pseudogene binary
204 Returns : An array of Bio::Seqfeature::Generic objects
205 Args : Name of a file containing a set of 3 fasta sequences in the order of
206 peptide, cdna and genomic sequences
207 or else 3 Bio::Seq objects.
209 Throws an exception if argument is not either a string (eg a
210 filename) or 3 Bio::Seq objects. If
211 arguments are strings, throws exception if file corresponding to string
212 name can not be found.
214 =cut
216 sub run{
218 my ($self,$seq1, $seq2, $seq3)=@_;
219 my ($attr, $value, $switch);
221 # Create input file pointer
222 my ($infile1,$infile2,$infile3)= $self->_setinput($seq1, $seq2, $seq3);
223 if (!($infile1 && $infile2 && $infile3)) {$self->throw("Bad input data (sequences need an id ) ");}
225 my $prot_name = $seq1->display_id;
226 # run pseudowise
227 my @feats = $self->_run($prot_name, $infile1,$infile2,$infile3);
228 return @feats;
232 =head2 _run
234 Title : _run
235 Usage : Internal function, not to be called directly
236 Function: makes actual system call to a pseudowise program
237 Example :
238 Returns : nothing; pseudowise output is written to a
239 temporary file $TMPOUTFILE
240 Args : Name of a files containing 3 sequences in the order of peptide, cdna and genomic
242 =cut
244 sub _run {
245 my ($self,$prot_name, $infile1,$infile2,$infile3) = @_;
246 my $instring;
247 $self->debug( "Program ".$self->executable."\n");
248 my ($tfh1,$outfile) = $self->io->tempfile(-dir=>$self->tempdir);
249 my $paramstring = $self->_setparams;
250 my $commandstring = $self->executable." $paramstring $infile1 $infile2 $infile3 > $outfile";
251 if($self->silent || $self->quiet || !($self->vebose)){
252 $commandstring .= ' 2> /dev/null';
254 $self->debug( "pseudowise command = $commandstring");
255 my $status = system($commandstring);
256 $self->throw( "Pseudowise call ($commandstring) crashed: $? \n") unless $status==0;
258 #parse the outpur and return a Bio::Seqfeature array
259 my $genes = $self->_parse_results($prot_name,$outfile);
260 close($tfh1);
261 undef $tfh1;
262 return @{$genes};
265 =head2 _parse_results
267 Title : __parse_results
268 Usage : Internal function, not to be called directly
269 Function: Parses pseudowise output
270 Example :
271 Returns : an reference to an array of Seqfeatures
272 Args : the name of the output file
274 =cut
276 sub _parse_results {
277 my ($self,$prot_name,$outfile) = @_;
278 $outfile||$self->throw("No outfile specified");
279 my $filehandle;
280 if (ref ($outfile) !~ /GLOB/)
282 open (PSEUDOWISE, "<".$outfile)
283 or $self->throw ("Couldn't open file ".$outfile.": $!\n");
284 $filehandle = \*PSEUDOWISE;
286 else
288 $filehandle = $outfile;
291 my @genes;
292 #The big parsing loop - parses exons and predicted peptides
294 my $count = 0;
295 my $score;
296 while (<$filehandle>)
298 if(/Ka/i) {
299 my @line_elements = split;
300 my $no = scalar(@line_elements);
301 $score=$line_elements[$no-1];
304 my $gene;
305 if (/Gene/i)
307 $gene = new Bio::SeqFeature::Generic (
308 -primary => 'pseudogene',
309 -source => 'pseudowise');
310 push @genes, $gene;
311 $count = $count + 1;
313 while(<$filehandle>) {
314 my @gene_elements = split;
315 my $no = scalar(@gene_elements);
316 if ((/Gene/i) && $no == 3) {
317 my @element = split;
318 my $no = scalar(@element);
319 my $gene_start = $element[1];
320 my $gene_end = $element[2];
321 $gene->start($gene_start);
322 $gene->end($gene_end);
323 $gene->score($score);
325 elsif (/Exon/i) {
326 my @element = split;
327 my $no = scalar(@element);
328 my $exon_start = $element[1];
329 my $exon_end = $element[2];
330 my $exon_phase = $element[4];
331 #my $seqname = $prot_name.'_'.$count;
332 my $seqname = $prot_name;
333 my $exon = new Bio::SeqFeature::Generic (
334 -seq_id=> $seqname,
335 -start => $exon_start,
336 -end => $exon_end,
337 -primary => 'exon',
338 -source => 'pseudowise',
339 -frame => $exon_phase);
340 $exon->score($score);
341 $gene->add_sub_SeqFeature($exon);
343 elsif ((/Gene/i) && $no != 3) {
344 $gene = new Bio::SeqFeature::Generic (
345 -primary => 'pseudogene',
346 -source => 'pseudowise');
347 $gene->score($score);
348 push @genes, $gene;
349 $count = $count + 1;
355 return \@genes;
358 =head2 _setinput()
360 Title : _setinput
361 Usage : Internal function, not to be called directly
362 Function: Create input files for pseudowise program
363 Example :
364 Returns : name of file containing dba data input
365 Args : Seq objects in the order of query protein and cdna and target genomic sequence
367 =cut
369 sub _setinput {
370 my ($self, $seq1, $seq2, $seq3) = @_;
371 my ($tfh1,$tfh2,$tfh3,$outfile1,$outfile2,$outfile3);
373 if(!($seq1->isa("Bio::PrimarySeqI") && $seq2->isa("Bio::PrimarySeqI")&& $seq2->isa("Bio::PrimarySeqI")))
374 { $self->throw("One or more of the sequences are nor Bio::PrimarySeqI objects\n"); }
375 my $tempdir = $self->tempdir();
376 ($tfh1,$outfile1) = $self->io->tempfile(-dir=>$tempdir);
377 ($tfh2,$outfile2) = $self->io->tempfile(-dir=>$tempdir);
378 ($tfh3,$outfile3) = $self->io->tempfile(-dir=>$tempdir);
380 my $out1 = Bio::SeqIO->new(-fh => $tfh1 , '-format' => 'Fasta');
381 my $out2 = Bio::SeqIO->new(-fh => $tfh2, '-format' => 'Fasta');
382 my $out3 = Bio::SeqIO->new(-fh => $tfh3, '-format' => 'Fasta');
384 $out1->write_seq($seq1);
385 $out2->write_seq($seq2);
386 $out3->write_seq($seq3);
387 $self->_query_pep_seq($seq1);
388 $self->_query_cdna_seq($seq2);
389 $self->_subject_dna_seq($seq3);
391 close($tfh1);
392 close($tfh2);
393 close($tfh3);
394 undef ($tfh1);
395 undef ($tfh2);
396 undef ($tfh3);
398 return $outfile1,$outfile2,$outfile3;
401 sub _setparams {
402 my ($self) = @_;
403 my $param_string;
404 foreach my $attr(@PSEUDOWISE_PARAMS){
405 my $value = $self->$attr();
406 next unless (defined $value);
407 my $attr_key = ' -'.(lc $attr);
408 $param_string .=$attr_key.' '.$value;
411 foreach my $attr(@PSEUDOWISE_SWITCHES){
412 my $value = $self->$attr();
413 next unless (defined $value);
414 my $attr_key = ' -'.(lc $attr);
415 $param_string .=$attr_key;
418 return $param_string;
423 =head2 _query_pep_seq()
425 Title : _query_pep_seq
426 Usage : Internal function, not to be called directly
427 Function: get/set for the query sequence
428 Example :
429 Returns :
430 Args :
432 =cut
434 sub _query_pep_seq {
435 my ($self,$seq) = @_;
436 if(defined $seq){
437 $self->{'_query_pep_seq'} = $seq;
439 return $self->{'_query_pep_seq'};
442 =head2 _query_cdna_seq()
444 Title : _query_cdna_seq
445 Usage : Internal function, not to be called directly
446 Function: get/set for the query sequence
447 Example :
448 Returns :
449 Args :
451 =cut
453 sub _query_cdna_seq {
454 my ($self,$seq) = @_;
455 if(defined $seq){
456 $self->{'_query_cdna_seq'} = $seq;
458 return $self->{'_query_cdna_seq'};
461 =head2 _subject_dna_seq()
463 Title : _subject_dna_seq
464 Usage : Internal function, not to be called directly
465 Function: get/set for the subject sequence
466 Example :
467 Returns :
469 Args :
471 =cut
473 sub _subject_dna_seq {
474 my ($self,$seq) = @_;
475 if(defined $seq){
476 $self->{'_subject_dna_seq'} = $seq;
478 return $self->{'_subject_dna_seq'};
480 1; # Needed to keep compiler happy