sync w/ main trunk
[bioperl-live.git] / Bio / Tools / Analysis / Protein / GOR4.pm
blobe167fedcb17c27c5c93f402fcda1518c7301dd93
1 # $Id: GOR4.pm,v 1.0 2003/07/ 11
3 # BioPerl module for Bio::Tools::Analysis::Protein::GOR4
5 # Copyright Richard Adams
7 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
11 =head1 NAME
13 Bio::Tools::Analysis::Protein::GOR4 - a wrapper around GOR4 protein
14 secondary structure prediction server
16 =head1 SYNOPSIS
18 use Bio::Tools::Analysis::Protein::GOR4;
19 #get a Bio::Seq or Bio::PrimarySeq
20 use Bio::PrimarySeq;
21 $seq = Bio::PrimarySeq->new
22 (-seq=>'IKLCVHHJHJHJHJHJHJHNLAILAKAHLIELALAL',
23 -primary_id=>'test'); # a Bio::PrimarySeqI object
25 my $gor4 = Bio::Tools::Analysis::Protein::GOR4->new (-seq=>$seq);
26 $gor4->run;
27 print $gor4->result;# #raw text to standard error
29 =head1 DESCRIPTION
31 A module to remotely retrieve predictions of protein secondary
32 structure. Each residue in the protein receives a score representing
33 the likelihood of existing in each of three different states (helix,
34 coil or sheet), e.g.,
36 my $analysis_object = Bio::Tools::SimpleAnalysis::Protein::GOR4->
37 new(-seq => $seq);
39 creates a new object
41 $analysis_object->run;
43 submits the query to the server and obtains raw text output
45 Given an amino acid sequence the results can be obtained in 4 formats,
46 determined by the argument to the result method
48 =over 4
50 =item 1
52 The raw text of the program output
54 my $rawdata = $analysis_object->result;
56 =item 2
58 An reference to an array of hashes of scores for each state and the
59 assigned state.
61 my $data_ref = $analysis_object->result('parsed');
62 print "score for helix at residue 2 is $data_ref->[1]{'helix'}\n";
63 print "predicted struc at residue 2 is $data_ref->[1]{'struc}\n";
65 =item 3
67 An array of Bio::SeqFeature::Generic objects where each feature is a
68 predicted unit of secondary structure. Only stretches of helix/sheet
69 predictions for longer than 4 residues are defined as helices. See
70 Bio::Tools::Analysis::Domcut.pm for examples of how to add sequence
71 features.
73 my @fts = $analysis_object->result(Bio::SeqFeatureI);
74 for my $ft (@fts) {
75 print " From ", $ft->start, " to ",$ft->end, " struc: " ,
76 ($ft->each_tag_value('type'))[0] ,"\n";
79 =item 4
81 A Bio::Seq::Meta::Array implementing sequence.
83 This is a Bio::Seq object that can also hold data about each residue
84 in the sequence In this case, the sequence can be associated with a
85 single array of GOR4 prediction scores. e.g.,
87 my $meta_sequence = $analysis_object->result('all');
88 print "helix scores from residues 10-20 are ",
89 $meta_sequence->named_submeta_text("GOR4_helix",10,20), "\n";
91 Meta sequence names are : GOR4_helix, GOR4_sheet, GOR4_coil,
92 GOR4_struc, representing the scores for each residue.
94 Many methods common to all analyses are inherited from
95 Bio::Tools::Analysis::SimpleAnalysisBase.
97 =back
99 =head1 SEE ALSO
101 L<Bio::SimpleAnalysisI>,
102 L<Bio::Tools::Analysis::SimpleAnalysisBase>,
103 L<Bio::Seq::Meta::Array>,
104 L<Bio::WebAgent>
106 =head1 FEEDBACK
108 =head2 Mailing Lists
110 User feedback is an integral part of the evolution of this and other
111 Bioperl modules. Send your comments and suggestions preferably to one
112 of the Bioperl mailing lists. Your participation is much appreciated.
114 bioperl-l@bioperl.org - General discussion
115 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
117 =head2 Support
119 Please direct usage questions or support issues to the mailing list:
121 L<bioperl-l@bioperl.org>
123 rather than to the module maintainer directly. Many experienced and
124 reponsive experts will be able look at the problem and quickly
125 address it. Please include a thorough description of the problem
126 with code and data examples if at all possible.
128 =head2 Reporting Bugs
130 Report bugs to the Bioperl bug tracking system to help us keep track
131 the bugs and their resolution. Bug reports can be submitted via the
132 web:
134 http://bugzilla.open-bio.org/
136 =head1 AUTHORS
138 Richard Adams, Richard.Adams@ed.ac.uk,
140 =head1 APPENDIX
142 The rest of the documentation details each of the object
143 methods. Internal methods are usually preceded with a _
145 =cut
147 use strict;
149 package Bio::Tools::Analysis::Protein::GOR4;
151 use IO::String;
152 use Bio::SeqIO;
153 use HTTP::Request::Common qw(POST);
154 use Bio::SeqFeature::Generic;
155 use Bio::Seq::Meta::Array;
158 use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);
160 use constant MIN_STRUC_LEN => 3;
161 my $URL = 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_gor4.pl';
162 my $ANALYSIS_NAME = 'GOR4';
163 my $ANALYSIS_SPEC = {name => 'Gor4', type => 'Protein'};
164 my $INPUT_SPEC = [
165 {mandatory =>'true',
166 type => 'Bio::PrimarySeqI',
167 'name' => 'seq',
170 my $RESULT_SPEC =
172 '' => 'bulk', # same as undef
173 'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
174 raw => '[ {struc =>, helix=> ,sheet=>, coil=>}]',
175 meta => 'Bio::Seq::Meta::Array object',
178 =head2 result
180 Name : result
181 Usage : $job->result (...)
182 Returns : a result created by running an analysis
183 Args : see keys of $RESULT_SPEC
185 The method returns a result of an executed job. If the job was
186 terminated by an error the result may contain an error message instead
187 of the real data.
189 This implementation returns differently processed data depending on
190 argument:
192 =over 3
194 =item undef
196 Returns the raw ASCII data stream but without HTML tags
198 =item 'Bio::SeqFeatureI'
200 The argument string defines the type of bioperl objects returned in an
201 array. The objects are L<Bio::SeqFeature::Generic>. Feature primary
202 tag is "2ary". Feature tags are "type" (which can be helix, sheet or
203 coil) "method" (GOR4).
205 =item 'parsed'
207 Array of hash references of { helix =E<gt>, sheet =E<gt> , coil =E<gt> , struc=E<gt>}.
209 =item 'meta'
211 A Bio::Seq::Meta::Array object. Scores can be accessed using methods
212 from this class. Meta sequence names are GOR4_helix, GOR4_sheet,
213 GOR4_coil, GOR4_struc.
216 =back
219 =cut
222 sub result {
223 my ($self,$value) = @_;
225 my @scores;
226 my @fts;
228 if ($value ) {
229 #parse into basic raw form, store this as well as '_result'
230 if (!exists($self->{'_parsed'}) ) {
231 my $result = IO::String->new($self->{'_result'});
232 while (my $line = <$result>) {
233 next unless $line =~ /^\w\s/; # or for sopma/hnn /^[A-Z]\s/
234 $line =~/(\w)\s+(\d+)\s+(\d+)\s+(\d+)/; # or for so
235 push @scores, { struc => $1,
236 helix => $2,
237 sheet => $3,
238 coil => $4,
241 $self->{'_parsed'} = \@scores;
243 if ($value eq 'Bio::SeqFeatureI') {
244 $self->_get_2ary_coords();
245 for my $type (keys %{$self->{'_parsed_coords'}} ) {
246 next if $type =~ /\w{2,}/; #if not H,C,E or T
247 for my $loc (@{$self->{'_parsed_coords'}{$type}} ) {
248 push @fts, Bio::SeqFeature::Generic->new
249 (-start => $loc->{'start'},
250 -end => $loc->{'end'},
251 -source => 'GOR4',
252 -primary => 'Region',
253 -tag => {
254 type => $type,
255 method => $self->analysis_name,
257 } #end of array of strucs of type
258 } # end of all 2nd struc elements
259 delete $self->{'_parsed_coords'}; #remove temp data
260 return @fts;
261 } #endif BioSeqFeature
263 elsif ($value eq 'meta') {
264 #1st of all make 3 or 4 arrays of scores for each type from column data
265 my %type_scores;
266 for my $aa (@{$self->{'_parsed'}}) {
267 push @{$type_scores{'struc'}}, $aa->{'struc'};
268 push @{$type_scores{'helix'}}, $aa->{'helix'};
269 push @{$type_scores{'sheet'}}, $aa->{'sheet'};
270 push @{$type_scores{'coil'}}, $aa->{'coil'};
273 ## bless if necessary ##
274 if (!$self->seq->isa("Bio::Seq::Meta::Array")){
275 bless ($self->seq, "Bio::Seq::Meta::Array");
277 $self->seq->isa("Bio::Seq::MetaI")
278 || $self->throw("$self is not a Bio::Seq::MetaI");
279 $Bio::Seq::Meta::Array::DEFAULT_NAME = 'GOR4_struc';
281 ## now make meta_Sequence
282 for my $struc_type (keys %type_scores) {
283 my $meta_name = "GOR4". "_" . "$struc_type";
284 my @meta = map{$_->{$struc_type}} @{$self->{'_parsed'}};
285 if (grep{$_ eq $meta_name}$self->seq->meta_names ) {
286 $self->warn ("$meta_name already exists , not overwriting!");
287 next;
289 $self->seq->named_meta($meta_name,\@meta );
291 # return seq array object implementing meta sequence #
292 return $self->seq;
295 else {
296 return $self->{'_parsed'};
298 } #endif ($value)
300 #return raw result if no return fomrt stated
301 return $self->{'_result'};
304 sub _get_2ary_coords {
305 #helper sub for result;
306 ##extracts runs of structure > MIN_STRUC_LENresidues or less if Turn:
307 #i.e., helical prediction for 1 residue isn't very meaningful...
308 ## and poulates array of hashes with start/end values.
309 ##keys of $Result are 'H' 'T' 'C' 'E'.
310 #could be put into a secondary base class if need be
311 my ($self) = @_;
312 my @prot = @{$self->{'_parsed'}};
313 my %Result;
314 for (my $index = 0; $index <= $#prot; $index++) {
316 my $type = $prot[$index]{'struc'};
317 next unless $type =~ /[HTCE]/;
318 my $length = 1;
319 for (my $j = $index + 1; $j <= $#prot; $j++) {
320 my $test = $prot[$j];
321 if ($test->{'struc'} eq $type) {
322 $length++;
323 } elsif ( $length > MIN_STRUC_LEN ||
324 ($length <= MIN_STRUC_LEN && $type eq 'T') ) {
325 push @{$Result{$type}}, {start => $index + 1 , end => $j};
326 $index += $length -1;
327 last;
328 } else {
329 $index += $length - 1;
330 last;
334 $self->{'_parsed_coords'} = \%Result; #temp assignment
337 sub _init {
338 my $self = shift;
339 $self->url($URL);
340 $self->{'_ANALYSIS_SPEC'} =$ANALYSIS_SPEC;
341 $self->{'_INPUT_SPEC'} =$INPUT_SPEC;
342 $self->{'_RESULT_SPEC'} =$RESULT_SPEC;
343 $self->{'_ANALYSIS_NAME'} =$ANALYSIS_NAME;
344 return $self;
348 sub _run {
349 my $self = shift;
350 $self->delay(1);
351 # delay repeated calls by default by 3 sec, set delay() to change
352 $self->sleep;
353 $self->status('TERMINATED_BY_ERROR');
354 my $request = POST $self->url,
355 Content_Type => 'form-data',
356 Content => [title => "",
357 notice => $self->seq->seq,
358 ali_width => 70,
361 my $content = $self->request($request);
362 my $text = $content->content;
363 return unless $text;
364 my ($next) = $text =~ /Prediction.*?=(.*?)>/;
365 return unless $next;
366 my $out = 'http://npsa-pbil.ibcp.fr/'.$next;
367 my $req2 = HTTP::Request->new(GET=>$out);
368 my $resp2 = $self->request($req2);
369 $self->status('COMPLETED') if $resp2 ne '';
370 $self->{'_result'} = $resp2->content;