sync w/ main trunk
[bioperl-live.git] / Bio / Tools / Analysis / Protein / Mitoprot.pm
blob4bd92de0ee63ec933bb2b90ff9f732232da7585f
1 # $Id: Mitoprot.pm,
3 # BioPerl module for Bio::Tools::Analysis::Protein::Mitoprot
4 # Copyright Richard Adams
6 # You may distribute this module under the same terms as perl itself
7 # POD documentation - main docs before the code
9 =head1 NAME
11 Bio::Tools::Analysis::Protein::Mitoprot - a wrapper around Mitoprot
12 server
14 =head1 SYNOPSIS
16 use Bio::Tools::Analysis::Protein::Mitoprot;
18 use Bio::PrimarySeq;
19 my $seq = Bio::PrimarySeq->new
20 (-seq=>'IKLCVHHJHJHJHJHJHJHNLAILAKAHLIELALAL',
21 -primary_id=>'test'); # a Bio::PrimarySeqI object
23 my $mitoprot = Bio::Tools::Analysis::Protein::Mitoprot->new
24 ( -seq => $seq
25 ); # sequence must be >!5aa long and start with an M.
27 # run Mitoprot prediction on a DNA sequence
28 my $mitoprot->run();
31 die "Could not get a result" unless $mitoprot->status =~ /^COMPLETED/;
33 print $mitoprot->result; # print raw prediction to STDOUT
35 foreach my $feat ( $mitoprot->result('Bio::SeqFeatureI') ) {
37 # do something to SeqFeature
38 # e.g. print as GFF
39 print $feat->gff_string, "\n";
40 # or store within the sequence - if it is a Bio::RichSeqI
41 $seq->add_SeqFeature($feat);
45 =head1 DESCRIPTION
47 This class is a wrapper around the Mitoprot web server which
48 calculates the probability of a sequence containing a mitochondrial
49 targetting peptide. See http://mips.gsf.de/cgi-bin/proj/medgen/mitofilter
50 for more details.
52 The results can be obtained in 3 formats:
54 =over 3
56 =item 1
58 The raw text of the program output
60 my $rawdata = $analysis_object->result;
62 =item 2
64 An reference to a hash of scores :
66 my $data_ref = $analysis_object->result('parsed'); print "predicted
67 export prob is $data_ref->{'export_prob'}\n"; #
69 key values of returned hash are input_length, basic_aas, acidic_aas,
70 export_prob, charge, cleavage_site.
72 =item 3
74 A Bio::SeqFeature::Generic object
76 my $ft = $analysis_object->result(Bio::SeqFeatureI);
77 print "export prob is ", ($ft->each_tag_value('export_prob'))[0] ,"\n";
80 This the second implentation of Bio::SimpleAnalysisI which hopefully
81 will make it easier to write wrappers on various services. This class
82 uses a web resource and therefore inherits from Bio::WebAgent.
84 =back
86 =head1 SEE ALSO
88 L<Bio::SimpleAnalysisI>,
89 L<Bio::Tools::Analysis::SimpleAnalysisBase>,
90 L<Bio::WebAgent>
92 =head1 FEEDBACK
94 =head2 Mailing Lists
96 User feedback is an integral part of the evolution of this and other
97 Bioperl modules. Send your comments and suggestions preferably to one
98 of the Bioperl mailing lists. Your participation is much appreciated.
100 bioperl-l@bioperl.org - General discussion
101 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
103 =head2 Support
105 Please direct usage questions or support issues to the mailing list:
107 L<bioperl-l@bioperl.org>
109 rather than to the module maintainer directly. Many experienced and
110 reponsive experts will be able look at the problem and quickly
111 address it. Please include a thorough description of the problem
112 with code and data examples if at all possible.
114 =head2 Reporting Bugs
116 Report bugs to the Bioperl bug tracking system to help us keep track
117 the bugs and their resolution. Bug reports can be submitted via the
118 web:
120 http://bugzilla.open-bio.org/
122 =head1 AUTHORS
124 Richard Adams, Richard.Adams@ed.ac.uk,
126 =head1 APPENDIX
128 The rest of the documentation details each of the object
129 methods. Internal methods are usually preceded with a _
131 =cut
134 # Let the code begin...
137 package Bio::Tools::Analysis::Protein::Mitoprot;
138 use vars qw($FLOAT);
139 use strict;
141 use IO::String;
142 use Bio::SeqIO;
143 use HTTP::Request::Common qw(GET);
144 use Bio::SeqFeature::Generic;
146 use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
147 $FLOAT = '[+-]?\d*\.\d*';
149 my $URL = 'http://ihg.gsf.de/cgi-bin/paolo/mitofilter?';
152 my %STATUS = map { $_ => 1 } qw(CREATED COMPLETED TERMINATED_BY_ERROR);
154 my $MIN_LEN = 60; #min len for protein analysis
155 my $ANALYSIS_NAME = "Mitoprot";
157 my $ANALYSIS_SPEC =
159 'name' => 'Mitoprot',
160 'type' => 'Protein',
161 'version' => '1.0a4',
162 'supplier' => 'Munich Information Center for ProteinSequences',
163 'description' => 'mitochondrial sig seq prediction',
166 my $INPUT_SPEC =
169 'mandatory' => 'true',
170 'type' => 'Bio::PrimarySeqI',
171 'name' => 'seq', #value must be name of method used to set value
175 my $RESULT_SPEC =
177 '' => 'raw text results', # same as undef
178 'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
179 'all' => 'hash of results',
184 ### unique to this module ##
186 =head2 result
188 Usage : $job->result (...)
189 Returns : a result created by running an analysis
190 Args : various
192 The method returns a result of an executed job. If the job was
193 terminated by an error the result may contain an error message instead
194 of the real data.
196 This implementation returns differently processed data depending on
197 argument:
199 =over 3
201 =item undef
203 Returns the raw ASCII data stream but without HTML tags
205 =item 'Bio::SeqFeatureI'
207 The argument string defines the type of bioperl objects returned in an
208 array. The objects are L<Bio::SeqFeature::Generic>. Feature primary
209 tag is "SigSeq". Feature tags are input_length , basic_aas,
210 acidic_aas, export_prob, charge, cleavage_site, method.
212 =item 'parsed'
214 hash references of parsed results { input_length =E<gt>, basic_aas=E<gt>,
215 acidic_aas=E<gt>, export_prob=E<gt>, charge=E<gt>, cleavage_site=E<gt>}.
217 =back
219 =cut
222 sub result {
223 my ($self,$value) = @_;
224 #make sec feat of above threshold scores #
226 my @sig_pdctns;
227 my @fts;
229 if ($value ) {
230 my $result = IO::String->new($self->{'_result'});
231 my %results;
232 while (my $line = <$result>) {
233 #make array of all scores or threshold depending on $value
234 next unless $line =~ /\d/ || $line =~ /^Cle/;
235 if ($line =~ /^Net[^+\-\d]+ # Net, then anything except +,- or digit
236 ((\+|-)?\d+)/x) #then get charge with optional + or -
238 $results{'charge'} = $1;
239 } elsif ($line =~ /^Input[^\d]+(\d+)/ ) {
240 $results{'input_length'} = $1;
241 } elsif ($line =~ /basic[^\d]+(\d+)$/ ) {
242 $results{'basic_aas'} = $1;
243 } elsif ($line =~ /acidic[^\d]+(\d+)$/) {
244 $results{'acidic_aas'} = $1;
245 } elsif ($line =~ /^Cleavage[^\d]+(\d+)$/) {
246 $results{'cleavage_site'} = $1;
247 } elsif ($line =~ /^Cleavage/) {
248 $results{'cleavage_site'} = 'not predictable';
249 } elsif ($line =~ /^of export[^\d]+((0|1)\.\d+)$/) {
250 $results{'export_prob'} = $1;
254 if ($value eq 'Bio::SeqFeatureI') {
255 push @fts, Bio::SeqFeature::Generic->new
257 -start => 1,
258 -end => ($results{'cleavage_site'} =~
259 /^\d+$/)?$results{'cleavage_site'}:$self->seq->length,
260 -source => 'Mitoprot',
261 -primary => 'Region',
262 -tag =>{
263 export_prob => $results{'export_prob'},
264 charge => $results{'charge'},
265 basic_aas => $results{'basic_aas'},
266 acid_aas => $results{'acidic_aas'},
267 region_name => 'Transit_peptide',
268 method => 'MitoProt',
269 cleavage_site => $results{'cleavage_site'},
272 return @fts; #return Bioseqfeature array
274 ## convert parsed data into a meta array format
275 else {
276 return \%results; # hash based results ref
279 return $self->{'_result'};
282 sub _init {
283 my $self = shift;
284 $self->url($URL);
285 $self->{'_ANALYSIS_SPEC'} =$ANALYSIS_SPEC;
286 $self->{'_INPUT_SPEC'} =$INPUT_SPEC;
287 $self->{'_RESULT_SPEC'} =$RESULT_SPEC;
288 $self->{'_ANALYSIS_NAME'} =$ANALYSIS_SPEC->{'name'};
289 return $self;
292 sub _process_arguments {
293 #extra checking for sequence length
294 #mitoprot specific argument testing
295 my ($self, $args) = @_;
296 #use base checking for existence of mandatory fields
297 $self->SUPER::_process_arguments($args) ;
299 #then check specifics
300 $self->throw ("1st_aa must be M") if $self->seq->subseq(1,1) !~ /M/i;
301 $self->throw ("sequence must be at least 15aa long") if $self->seq->length< 15;
302 return;
307 sub _run {
308 #request submitted by get not by post
309 my $self = shift;
310 $self->delay(1);
311 $self->sleep;
313 $self->status('TERMINATED_BY_ERROR');
314 my $url = $self->url . "seq=".lc($self->seq->seq). "&seqnam=";
315 my $request = GET $url;
316 my $content = $self->request($request);
317 my $text = $content->content; #1st reponse
319 #remove html stuff
320 $text =~ s/.*<PRE>(.*)<\/PRE>.*/$1/s;
321 $text =~ s/<[^>]+>//sg;
323 $self->status('COMPLETED') if $text ne '' && $self->seq->length > $MIN_LEN;
324 $self->{'_result'} = $text;