Add Root back, plus some test and doc fixes
[bioperl-live.git] / Bio / Tools / Analysis / Protein / Mitoprot.pm
blob717191d16fec5487cbbd4447131a1ade12d360b7
1 # $Id: Mitoprot.pm,
3 # BioPerl module for Bio::Tools::Analysis::Protein::Mitoprot
4 # Copyright Richard Adams
6 # You may distribute this module under the same terms as perl itself
7 # POD documentation - main docs before the code
9 =head1 NAME
11 Bio::Tools::Analysis::Protein::Mitoprot - a wrapper around Mitoprot
12 server
14 =head1 SYNOPSIS
16 use Bio::Tools::Analysis::Protein::Mitoprot;
18 use Bio::PrimarySeq;
19 my $seq = Bio::PrimarySeq->new
20 (-seq=>'IKLCVHHJHJHJHJHJHJHNLAILAKAHLIELALAL',
21 -primary_id=>'test'); # a Bio::PrimarySeqI object
23 my $mitoprot = Bio::Tools::Analysis::Protein::Mitoprot->new
24 ( -seq => $seq
25 ); # sequence must be >!5aa long and start with an M.
27 # run Mitoprot prediction on a DNA sequence
28 my $mitoprot->run();
31 die "Could not get a result" unless $mitoprot->status =~ /^COMPLETED/;
33 print $mitoprot->result; # print raw prediction to STDOUT
35 foreach my $feat ( $mitoprot->result('Bio::SeqFeatureI') ) {
37 # do something to SeqFeature
38 # e.g. print as GFF
39 print $feat->gff_string, "\n";
40 # or store within the sequence - if it is a Bio::RichSeqI
41 $seq->add_SeqFeature($feat);
45 =head1 DESCRIPTION
47 This class is a wrapper around the Mitoprot web server which
48 calculates the probability of a sequence containing a mitochondrial
49 targetting peptide. See http://mips.gsf.de/cgi-bin/proj/medgen/mitofilter
50 for more details.
52 The results can be obtained in 3 formats:
54 =over 3
56 =item 1
58 The raw text of the program output
60 my $rawdata = $analysis_object->result;
62 =item 2
64 An reference to a hash of scores :
66 my $data_ref = $analysis_object->result('parsed'); print "predicted
67 export prob is $data_ref->{'export_prob'}\n"; #
69 key values of returned hash are input_length, basic_aas, acidic_aas,
70 export_prob, charge, cleavage_site.
72 =item 3
74 A Bio::SeqFeature::Generic object
76 my $ft = $analysis_object->result(Bio::SeqFeatureI);
77 print "export prob is ", ($ft->each_tag_value('export_prob'))[0] ,"\n";
80 This the second implentation of Bio::SimpleAnalysisI which hopefully
81 will make it easier to write wrappers on various services. This class
82 uses a web resource and therefore inherits from Bio::WebAgent.
84 =back
86 =head1 SEE ALSO
88 L<Bio::SimpleAnalysisI>,
89 L<Bio::Tools::Analysis::SimpleAnalysisBase>,
90 L<Bio::WebAgent>
92 =head1 FEEDBACK
94 =head2 Mailing Lists
96 User feedback is an integral part of the evolution of this and other
97 Bioperl modules. Send your comments and suggestions preferably to one
98 of the Bioperl mailing lists. Your participation is much appreciated.
100 bioperl-l@bioperl.org - General discussion
101 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
103 =head2 Support
105 Please direct usage questions or support issues to the mailing list:
107 I<bioperl-l@bioperl.org>
109 rather than to the module maintainer directly. Many experienced and
110 reponsive experts will be able look at the problem and quickly
111 address it. Please include a thorough description of the problem
112 with code and data examples if at all possible.
114 =head2 Reporting Bugs
116 Report bugs to the Bioperl bug tracking system to help us keep track
117 the bugs and their resolution. Bug reports can be submitted via the
118 web:
120 https://github.com/bioperl/bioperl-live/issues
122 =head1 AUTHORS
124 Richard Adams, Richard.Adams@ed.ac.uk,
126 =head1 APPENDIX
128 The rest of the documentation details each of the object
129 methods. Internal methods are usually preceded with a _
131 =cut
134 # Let the code begin...
137 package Bio::Tools::Analysis::Protein::Mitoprot;
138 use vars qw($FLOAT);
139 use strict;
141 use IO::String;
142 use Bio::SeqIO;
143 use HTTP::Request::Common qw(GET);
144 use Bio::SeqFeature::Generic;
146 use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
147 $FLOAT = '[+-]?\d*\.\d*';
149 my $URL = 'https://ihg.gsf.de/cgi-bin/paolo/mitofilter?';
151 my %STATUS = map { $_ => 1 } qw(CREATED COMPLETED TERMINATED_BY_ERROR);
153 my $MIN_LEN = 60; #min len for protein analysis
154 my $ANALYSIS_NAME = "Mitoprot";
156 my $ANALYSIS_SPEC =
158 'name' => 'Mitoprot',
159 'type' => 'Protein',
160 'version' => '1.0a4',
161 'supplier' => 'Munich Information Center for ProteinSequences',
162 'description' => 'mitochondrial sig seq prediction',
165 my $INPUT_SPEC =
168 'mandatory' => 'true',
169 'type' => 'Bio::PrimarySeqI',
170 'name' => 'seq', #value must be name of method used to set value
174 my $RESULT_SPEC =
176 '' => 'raw text results', # same as undef
177 'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
178 'all' => 'hash of results',
183 ### unique to this module ##
185 =head2 result
187 Usage : $job->result (...)
188 Returns : a result created by running an analysis
189 Args : various
191 The method returns a result of an executed job. If the job was
192 terminated by an error the result may contain an error message instead
193 of the real data.
195 This implementation returns differently processed data depending on
196 argument:
198 =over 3
200 =item undef
202 Returns the raw ASCII data stream but without HTML tags
204 =item 'Bio::SeqFeatureI'
206 The argument string defines the type of bioperl objects returned in an
207 array. The objects are L<Bio::SeqFeature::Generic>. Feature primary
208 tag is "SigSeq". Feature tags are input_length , basic_aas,
209 acidic_aas, export_prob, charge, cleavage_site, method.
211 =item 'parsed'
213 hash references of parsed results { input_length =E<gt>, basic_aas=E<gt>,
214 acidic_aas=E<gt>, export_prob=E<gt>, charge=E<gt>, cleavage_site=E<gt>}.
216 =back
218 =cut
221 sub result {
222 my ($self,$value) = @_;
223 #make sec feat of above threshold scores #
225 my @sig_pdctns;
226 my @fts;
228 if ($value ) {
229 my $result = IO::String->new($self->{'_result'});
230 my %results;
231 while (my $line = <$result>) {
232 #make array of all scores or threshold depending on $value
233 next unless $line =~ /\d/ || $line =~ /^Cle/;
234 if ($line =~ /^Net[^+\-\d]+ # Net, then anything except +,- or digit
235 ((\+|-)?\d+)/x) #then get charge with optional + or -
237 $results{'charge'} = $1;
238 } elsif ($line =~ /^Input[^\d]+(\d+)/ ) {
239 $results{'input_length'} = $1;
240 } elsif ($line =~ /basic[^\d]+(\d+)$/ ) {
241 $results{'basic_aas'} = $1;
242 } elsif ($line =~ /acidic[^\d]+(\d+)$/) {
243 $results{'acidic_aas'} = $1;
244 } elsif ($line =~ /^Cleavage[^\d]+(\d+)$/) {
245 $results{'cleavage_site'} = $1;
246 } elsif ($line =~ /^Cleavage/) {
247 $results{'cleavage_site'} = 'not predictable';
248 } elsif ($line =~ /^of export[^\d]+((0|1)\.\d+)$/) {
249 $results{'export_prob'} = $1;
253 if ($value eq 'Bio::SeqFeatureI') {
254 push @fts, Bio::SeqFeature::Generic->new
256 -start => 1,
257 -end => ($results{'cleavage_site'} =~
258 /^\d+$/)?$results{'cleavage_site'}:$self->seq->length,
259 -source => 'Mitoprot',
260 -primary => 'Region',
261 -tag =>{
262 export_prob => $results{'export_prob'},
263 charge => $results{'charge'},
264 basic_aas => $results{'basic_aas'},
265 acid_aas => $results{'acidic_aas'},
266 region_name => 'Transit_peptide',
267 method => 'MitoProt',
268 cleavage_site => $results{'cleavage_site'},
271 return @fts; #return Bioseqfeature array
273 ## convert parsed data into a meta array format
274 else {
275 return \%results; # hash based results ref
278 return $self->{'_result'};
281 sub _init {
282 my $self = shift;
283 $self->url($URL);
284 $self->{'_ANALYSIS_SPEC'} =$ANALYSIS_SPEC;
285 $self->{'_INPUT_SPEC'} =$INPUT_SPEC;
286 $self->{'_RESULT_SPEC'} =$RESULT_SPEC;
287 $self->{'_ANALYSIS_NAME'} =$ANALYSIS_SPEC->{'name'};
288 return $self;
291 sub _process_arguments {
292 #extra checking for sequence length
293 #mitoprot specific argument testing
294 my ($self, $args) = @_;
295 #use base checking for existence of mandatory fields
296 $self->SUPER::_process_arguments($args) ;
298 #then check specifics
299 $self->throw ("1st_aa must be M") if $self->seq->subseq(1,1) !~ /M/i;
300 $self->throw ("sequence must be at least 15aa long") if $self->seq->length< 15;
301 return;
306 sub _run {
307 #request submitted by get not by post
308 my $self = shift;
309 $self->delay(1);
310 $self->sleep;
312 $self->status('TERMINATED_BY_ERROR');
313 my $url = $self->url . "seq=" . lc($self->seq->seq) . "&seqnam=";
314 my $request = GET $url;
315 my $content = $self->request($request);
316 my $text = $content->content; #1st reponse
318 #remove html stuff
319 $text =~ s/.*<PRE>(.*)<\/PRE>.*/$1/s;
320 $text =~ s/<[^>]+>//sg;
322 $self->status('COMPLETED') if $text ne '' && $self->seq->length > $MIN_LEN;
323 $self->{'_result'} = $text;