1 # BioPerl module for Bio::Tools::Run::Pseudowise
7 # You may distribute this module under the same terms as perl itself
8 # POD documentation - main docs before the code
12 Bio::Tools::Run::Pseudowise - Object for prediting pseudogenes in a
13 given sequence given a protein and a cdna sequence
17 # Build a pseudowise alignment factory
18 my $factory = Bio::Tools::Run::Pseudowise->new();
20 # Pass the factory 3 Bio:SeqI objects (in the order of query peptide and cdna and
22 #@genes is an array of GenericSeqFeature objects
23 my @genes = $factory->run($seq1, $seq2, $seq3);
27 Pseudowise is a pseudogene predition program developed by Ewan Birney
28 http://www.sanger.ac.uk/software/wise2.
34 User feedback is an integral part of the evolution of this and other
35 Bioperl modules. Send your comments and suggestions preferably to one
36 of the Bioperl mailing lists. Your participation is much appreciated.
38 bioperl-l@bioperl.org - General discussion
39 http://bio.perl.org/MailList.html - About the mailing lists
43 Report bugs to the Bioperl bug tracking system to help us keep track
44 the bugs and their resolution. Bug reports can be submitted via
47 bioperl-bugs@bio.perl.org
48 http://bio.perl.org/bioperl-bugs/
52 Email kiran@fugu-sg.org
56 The rest of the documentation details each of the object
57 methods. Internal methods are usually preceded with a _
62 package Bio
::Tools
::Run
::Pseudowise
;
63 use vars
qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
64 @PSEUDOWISE_SWITCHES @PSEUDOWISE_PARAMS
65 @OTHER_SWITCHES %OK_FIELD);
68 use Bio::SeqFeature::Generic;
70 use Bio::Tools::Run::WrapperBase;
72 @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
74 # You will need to enable pseudowise to find the pseudowise program. This
75 # can be done in (at least) two ways:
77 # 1. define an environmental variable WISEDIR
78 # export WISEDIR =/usr/local/share/wise2.2.0
79 # where the wise2.2.20 package is installed
81 # 2. include a definition of an environmental variable WISEDIR in
82 # every script that will use DBA.pm
83 # $ENV{WISEDIR} = '/usr/local/share/wise2.2.20';
86 @PSEUDOWISE_PARAMS = qw(SPLICE_MAX_COLLAR SPLICE_MIN_COLLAR
88 GENESTATS NOMATCHN PARAMS KBYTE
89 DYMEM DYDEBUG PALDEBUG
92 @PSEUDOWISE_SWITCHES = qw(HELP SILENT QUIET ERROROFFSTD);
94 # Authorize attribute fields
95 foreach my $attr ( @PSEUDOWISE_PARAMS, @PSEUDOWISE_SWITCHES,
96 @OTHER_SWITCHES) { $OK_FIELD{$attr}++; }
103 Usage : $factory>program_name()
104 Function: holds the program name
117 Usage : $factory->program_dir(@params)
118 Function: returns the program directory, obtiained from ENV variable.
125 return Bio
::Root
::IO
->catfile($ENV{WISEDIR
},"/src/bin") if $ENV{WISEDIR
};
129 my ($class, @args) = @_;
130 my $self = $class->SUPER::new
(@args);
135 $value = shift @args;
136 next if( $attr =~ /^-/ ); # don't want named parameters
137 if ($attr =~/'PROGRAM'/i) {
138 $self->executable($value);
141 $self->$attr($value);
149 my $attr = $AUTOLOAD;
152 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
153 $self->{$attr} = shift if @_;
154 return $self->{$attr};
160 Usage : exit if $prog->version() < 1.8
161 Function: Determine the version number of the program
163 Returns : float or undef
171 return undef unless $self->executable;
172 my $string = `pseudowise -- ` ;
173 $string =~ /\(([\d.]+)\)/;
180 Title : predict_genes
181 Usage : DEPRECATED. Use $factory->run instead
182 Function: Predict pseudogenes
183 Returns : An array of Bio::Seqfeature::Generic objects
184 Args : Name of a file containing a set of 3 fasta sequences in the order of
185 peptide, cdna and genomic sequences
186 or else 3 Bio::Seq objects.
188 Throws an exception if argument is not either a string (eg a
189 filename) or 3 Bio::Seq objects. If
190 arguments are strings, throws exception if file corresponding to string
191 name can not be found.
196 return shift->run(@_);
202 Usage : my @feats = $factory->run($seq1, $seq2, $seq3);
203 Function: Executes pseudogene binary
204 Returns : An array of Bio::Seqfeature::Generic objects
205 Args : Name of a file containing a set of 3 fasta sequences in the order of
206 peptide, cdna and genomic sequences
207 or else 3 Bio::Seq objects.
209 Throws an exception if argument is not either a string (eg a
210 filename) or 3 Bio::Seq objects. If
211 arguments are strings, throws exception if file corresponding to string
212 name can not be found.
218 my ($self,$seq1, $seq2, $seq3)=@_;
219 my ($attr, $value, $switch);
221 # Create input file pointer
222 my ($infile1,$infile2,$infile3)= $self->_setinput($seq1, $seq2, $seq3);
223 if (!($infile1 && $infile2 && $infile3)) {$self->throw("Bad input data (sequences need an id ) ");}
225 my $prot_name = $seq1->display_id;
227 my @feats = $self->_run($prot_name, $infile1,$infile2,$infile3);
235 Usage : Internal function, not to be called directly
236 Function: makes actual system call to a pseudowise program
238 Returns : nothing; pseudowise output is written to a
239 temporary file $TMPOUTFILE
240 Args : Name of a files containing 3 sequences in the order of peptide, cdna and genomic
245 my ($self,$prot_name, $infile1,$infile2,$infile3) = @_;
247 $self->debug( "Program ".$self->executable."\n");
248 my ($tfh1,$outfile) = $self->io->tempfile(-dir
=>$self->tempdir);
249 my $paramstring = $self->_setparams;
250 my $commandstring = $self->executable." $paramstring $infile1 $infile2 $infile3 > $outfile";
251 if($self->silent || $self->quiet || !($self->vebose)){
252 $commandstring .= ' 2> /dev/null';
254 $self->debug( "pseudowise command = $commandstring");
255 my $status = system($commandstring);
256 $self->throw( "Pseudowise call ($commandstring) crashed: $? \n") unless $status==0;
258 #parse the outpur and return a Bio::Seqfeature array
259 my $genes = $self->_parse_results($prot_name,$outfile);
265 =head2 _parse_results
267 Title : __parse_results
268 Usage : Internal function, not to be called directly
269 Function: Parses pseudowise output
271 Returns : an reference to an array of Seqfeatures
272 Args : the name of the output file
277 my ($self,$prot_name,$outfile) = @_;
278 $outfile||$self->throw("No outfile specified");
280 if (ref ($outfile) !~ /GLOB/)
282 open (PSEUDOWISE
, "<".$outfile)
283 or $self->throw ("Couldn't open file ".$outfile.": $!\n");
284 $filehandle = \
*PSEUDOWISE
;
288 $filehandle = $outfile;
292 #The big parsing loop - parses exons and predicted peptides
296 while (<$filehandle>)
299 my @line_elements = split;
300 my $no = scalar(@line_elements);
301 $score=$line_elements[$no-1];
307 $gene = new Bio
::SeqFeature
::Generic
(
308 -primary
=> 'pseudogene',
309 -source
=> 'pseudowise');
313 while(<$filehandle>) {
314 my @gene_elements = split;
315 my $no = scalar(@gene_elements);
316 if ((/Gene/i) && $no == 3) {
318 my $no = scalar(@element);
319 my $gene_start = $element[1];
320 my $gene_end = $element[2];
321 $gene->start($gene_start);
322 $gene->end($gene_end);
323 $gene->score($score);
327 my $no = scalar(@element);
328 my $exon_start = $element[1];
329 my $exon_end = $element[2];
330 my $exon_phase = $element[4];
331 #my $seqname = $prot_name.'_'.$count;
332 my $seqname = $prot_name;
333 my $exon = new Bio
::SeqFeature
::Generic
(
335 -start
=> $exon_start,
338 -source
=> 'pseudowise',
339 -frame
=> $exon_phase);
340 $exon->score($score);
341 $gene->add_sub_SeqFeature($exon);
343 elsif ((/Gene/i) && $no != 3) {
344 $gene = new Bio
::SeqFeature
::Generic
(
345 -primary
=> 'pseudogene',
346 -source
=> 'pseudowise');
347 $gene->score($score);
361 Usage : Internal function, not to be called directly
362 Function: Create input files for pseudowise program
364 Returns : name of file containing dba data input
365 Args : Seq objects in the order of query protein and cdna and target genomic sequence
370 my ($self, $seq1, $seq2, $seq3) = @_;
371 my ($tfh1,$tfh2,$tfh3,$outfile1,$outfile2,$outfile3);
373 if(!($seq1->isa("Bio::PrimarySeqI") && $seq2->isa("Bio::PrimarySeqI")&& $seq2->isa("Bio::PrimarySeqI")))
374 { $self->throw("One or more of the sequences are nor Bio::PrimarySeqI objects\n"); }
375 my $tempdir = $self->tempdir();
376 ($tfh1,$outfile1) = $self->io->tempfile(-dir
=>$tempdir);
377 ($tfh2,$outfile2) = $self->io->tempfile(-dir
=>$tempdir);
378 ($tfh3,$outfile3) = $self->io->tempfile(-dir
=>$tempdir);
380 my $out1 = Bio
::SeqIO
->new(-fh
=> $tfh1 , '-format' => 'Fasta');
381 my $out2 = Bio
::SeqIO
->new(-fh
=> $tfh2, '-format' => 'Fasta');
382 my $out3 = Bio
::SeqIO
->new(-fh
=> $tfh3, '-format' => 'Fasta');
384 $out1->write_seq($seq1);
385 $out2->write_seq($seq2);
386 $out3->write_seq($seq3);
387 $self->_query_pep_seq($seq1);
388 $self->_query_cdna_seq($seq2);
389 $self->_subject_dna_seq($seq3);
398 return $outfile1,$outfile2,$outfile3;
404 foreach my $attr(@PSEUDOWISE_PARAMS){
405 my $value = $self->$attr();
406 next unless (defined $value);
407 my $attr_key = ' -'.(lc $attr);
408 $param_string .=$attr_key.' '.$value;
411 foreach my $attr(@PSEUDOWISE_SWITCHES){
412 my $value = $self->$attr();
413 next unless (defined $value);
414 my $attr_key = ' -'.(lc $attr);
415 $param_string .=$attr_key;
418 return $param_string;
423 =head2 _query_pep_seq()
425 Title : _query_pep_seq
426 Usage : Internal function, not to be called directly
427 Function: get/set for the query sequence
435 my ($self,$seq) = @_;
437 $self->{'_query_pep_seq'} = $seq;
439 return $self->{'_query_pep_seq'};
442 =head2 _query_cdna_seq()
444 Title : _query_cdna_seq
445 Usage : Internal function, not to be called directly
446 Function: get/set for the query sequence
453 sub _query_cdna_seq
{
454 my ($self,$seq) = @_;
456 $self->{'_query_cdna_seq'} = $seq;
458 return $self->{'_query_cdna_seq'};
461 =head2 _subject_dna_seq()
463 Title : _subject_dna_seq
464 Usage : Internal function, not to be called directly
465 Function: get/set for the subject sequence
473 sub _subject_dna_seq
{
474 my ($self,$seq) = @_;
476 $self->{'_subject_dna_seq'} = $seq;
478 return $self->{'_subject_dna_seq'};
480 1; # Needed to keep compiler happy