tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / Tools / Protparam.pm
blobc7f17298369e085a17d6369ef8e383e0f4b23d65
1 # $Id$
3 # BioPerl module for Bio::Tools::Protparam
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Richard Dobson, r.j.dobson at qmul dot ac dot uk
9 # Copyright Richard Dobson
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tools::Protparam - submit to and parse output from protparam ;
19 =head1 SYNOPSIS
21 my $gb = new Bio::DB::GenBank(-retrievaltype => 'tempfile' ,
22 -format => 'Fasta');
23 my @ids=qw(O14521 O43709 O43826);
24 my $seqio = $gb->get_Stream_by_acc(\@ids );
26 while( my $seq = $seqio->next_seq ) {
28 my $pp = Protparam->new(seq=>$seq->seq);
30 print
31 "ID : ", $seq->display_id,"\n",
32 "Amino acid number : ",$pp->amino_acid_number(),"\n",
33 "Number of negative amino acids : ",$pp->num_neg(),"\n",
34 "Number of positive amino acids : ",$pp->num_pos(),"\n",
35 "Molecular weight : ",$pp->molecular_weight(),"\n",
36 "Theoretical pI : ",$pp->theoretical_pI(),"\n",
37 "Total number of atoms : ", $pp->total_atoms(),"\n",
38 "Number of carbon atoms : ",$pp->num_carbon(),"\n",
39 "Number of hydrogen atoms : ",$pp->num_hydrogen(),"\n",
40 "Number of nitrogen atoms : ",$pp->num_nitro(),"\n",
41 "Number of oxygen atoms : ",$pp->num_oxygen(),"\n",
42 "Number of sulphur atoms : ",$pp->num_sulphur(),"\n",
43 "Half life : ", $pp->half_life(),"\n",
44 "Instability Index : ", $pp->instability_index(),"\n",
45 "Stability class : ", $pp->stability(),"\n",
46 "Aliphatic_index : ",$pp->aliphatic_index(),"\n",
47 "Gravy : ", $pp->gravy(),"\n",
48 "Composition of A : ", $pp->AA_comp('A'),"\n",
49 "Composition of R : ", $pp->AA_comp('R'),"\n",
50 "Composition of N : ", $pp->AA_comp('N'),"\n",
51 "Composition of D : ", $pp->AA_comp('D'),"\n",
52 "Composition of C : ", $pp->AA_comp('C'),"\n",
53 "Composition of Q : ", $pp->AA_comp('Q'),"\n",
54 "Composition of E : ", $pp->AA_comp('E'),"\n",
55 "Composition of G : ", $pp->AA_comp('G'),"\n",
56 "Composition of H : ", $pp->AA_comp('H'),"\n",
57 "Composition of I : ", $pp->AA_comp('I'),"\n",
58 "Composition of L : ", $pp->AA_comp('L'),"\n",
59 "Composition of K : ", $pp->AA_comp('K'),"\n",
60 "Composition of M : ", $pp->AA_comp('M'),"\n",
61 "Composition of F : ", $pp->AA_comp('F'),"\n",
62 "Composition of P : ", $pp->AA_comp('P'),"\n",
63 "Composition of S : ", $pp->AA_comp('S'),"\n",
64 "Composition of T : ", $pp->AA_comp('T'),"\n",
65 "Composition of W : ", $pp->AA_comp('W'),"\n",
66 "Composition of Y : ", $pp->AA_comp('Y'),"\n",
67 "Composition of V : ", $pp->AA_comp('V'),"\n",
68 "Composition of B : ", $pp->AA_comp('B'),"\n",
69 "Composition of Z : ", $pp->AA_comp('Z'),"\n",
70 "Composition of X : ", $pp->AA_comp('X'),"\n";
73 =head1 DESCRIPTION
75 This module takes an amino acid sequence and submits it to the
76 Protparam program at www.expasy.org/cgi-bin/protparam. Many
77 properties of the submitted sequence are returned.
79 =head1 AUTHOR
81 Richard Dobson, r.j.dobson at qmul dot ac dot uk
83 =cut
85 # Let the code begin...
87 package Bio::Tools::Protparam;
89 use strict;
90 use base qw(Bio::Root::Root);
91 use LWP 5.64;
93 =head2 new
95 Title : new
96 Usage : $pp = Protparam->new(seq=>$seq->seq);
97 Function : Creates a new Protparam object
98 Returns : A Protparam object
99 Args : A sequence
101 =cut
105 sub new {
108 my ($class,@args) = @_;
111 @args=('-url'=>'http://www.expasy.org/cgi-bin/protparam','-form'=>'sequence',@args);
113 my $self=$class->SUPER::new(@args);
115 my ($url,$seq,$form)=$self->_rearrange([qw(URL SEQ FORM)],@args);
117 my $browser = LWP::UserAgent->new;
118 my $response;
120 #send request to PROTPARAM @ Expasy
121 $response = $browser->post($url,
123 $form => $seq
125 'User-Agent' => 'Mozilla/4.76 [en] (Win2000; U)',
128 #Check if successful
129 $self->throw("$self->{'url'} error: ", $response->status_line) unless $response->is_success;
130 $self->throw("Bad content type at $self->{'url'} ", $response->content_type) unless $response->content_type eq 'text/html';
132 my $protParamOutput=$response->decoded_content;
134 $self->{'output'}=$protParamOutput;
136 return bless $self,$class;
140 =head2 num_neg
142 Title : num_neg
143 Usage : $pp->num_neg()
144 Function : Retrieves the number of negative amino acids in a sequence
145 Returns : Returns the number of negative amino acids in a sequence
146 Args : none
148 =cut
152 sub num_neg{
154 my $self=shift;
156 ($self->{'negAA'})=$self->{'output'}=~/<B>Total number of negatively charged residues.*?<\/B>\s*(\d*)/;
158 return $self->{'negAA'};
163 =head2 num_pos
165 Title : num_pos
166 Usage : $pp->num_pos()
167 Function : Retrieves the number of positive amino acids in a sequence
168 Returns : Returns the number of positive amino acids in a sequence
169 Args : none
171 =cut
174 sub num_pos{
176 my $self=shift;
178 ($self->{'posAA'})=$self->{'output'}=~/<B>Total number of positively charged residues.*?<\/B>\s*(\d*)/;
180 return $self->{'posAA'};
185 =head2 amino_acid_number
187 Title : amino_acid_number
188 Usage : $pp->amino_acid_number()
189 Function : Retrieves the number of amino acids within a sequence
190 Returns : Returns the number of amino acids within a sequence
191 Args : none
193 =cut
195 sub amino_acid_number{
196 my $self=shift;
198 ($self->{'numAA'})=$self->{'output'}=~/<B>Number of amino acids:<\/B> (\d+)/;
200 return $self->{'numAA'};
203 =head2 total_atoms
205 Title : total_atoms
206 Usage : $pp->total_atoms()
207 Function : Retrieves the total number of atoms within a sequence
208 Returns : Returns the total number of atoms within a sequence
209 Args : none
211 =cut
214 sub total_atoms{
215 my $self=shift;
217 $self->{'total_atoms'}=$self->{'output'}=~/<B>Total number of atoms:<\/B>\s*(\d*)/;
219 return $self->{'total_atoms'};
222 =head2 molecular_weight
224 Title : molecular_weight
225 Usage : $pp->molecular_weight()
226 Function : Retrieves the molecular weight of a sequence
227 Returns : Returns the molecular weight of a sequence
228 Args : none
230 =cut
233 sub molecular_weight{
234 my $self=shift;
235 ($self->{'MolWt'})=$self->{'output'}=~/<B>Molecular weight:<\/B> (\d*\.{0,1}\d*)/;
236 return $self->{'MolWt'};
240 =head2 theoretical_pI
242 Title : theoretical_pI
243 Usage : $pp->theoretical_pI()
244 Function : Retrieve the theoretical pI for a sequence
245 Returns : Return the theoretical pI for a sequence
246 Args : none
248 =cut
251 sub theoretical_pI{
252 my $self=shift;
253 ($self->{'TpI'})=$self->{'output'}=~/<B>Theoretical pI:<\/B> (-{0,1}\d*\.{0,1}\d*)/;
254 return $self->{'TpI'};
257 =head2 num_carbon
259 Title : num_carbon
260 Usage : $pp->num_carbon()
261 Function : Retrieves the number of carbon atoms in a sequence
262 Returns : Returns the number of carbon atoms in a sequence
263 Args : none
265 =cut
268 sub num_carbon{
269 my $self=shift;
271 ($self->{'car'}) = $self->{'output'}=~/Carbon\s+C\s+(\d+)/;
273 return $self->{'car'};
277 =head2 num_hydrogen
279 Title : num_hydrogen
280 Usage : $pp->num_hydrogen
281 Function : Retrieves the number of hydrogen atoms in a sequence
282 Returns : Returns the number of hydrogen atoms in a sequence
283 Args : none
285 =cut
288 sub num_hydrogen{
289 my $self=shift;
291 ($self->{'hyd'}) = $self->{'output'}=~/Hydrogen\s+H\s+(\d+)/;
293 return $self->{'hyd'}
297 =head2 num_nitro
299 Title : num_nitro
300 Usage : $pp->num_nitro
301 Function : Retrieves the number of nitrogen atoms in a sequence
302 Returns : Returns the number of nitrogen atoms in a sequence
303 Args : none
305 =cut
308 sub num_nitro{
309 my $self=shift;
311 ($self->{'nitro'}) = $self->{'output'}=~/Nitrogen\s+N\s+(\d+)/;
314 return $self->{'nitro'};
318 =head2 num_oxygen
320 Title : num_oxygen
321 Usage : $pp->num_oxygen()
322 Function : Retrieves the number of oxygen atoms in a sequence
323 Returns : Returns the number of oxygen atoms in a sequence
324 Args : none
326 =cut
329 sub num_oxygen{
330 my $self=shift;
332 ($self->{'oxy'}) = $self->{'output'}=~/Oxygen\s+O\s+(\d+)/;
334 return $self->{'oxy'};
338 =head2 num_sulphur
340 Title : num_sulphur
341 Usage : $pp->num_sulphur()
342 Function : Retrieves the number of sulphur atoms in a sequence
343 Returns : Returns the number of sulphur atoms in a sequence
344 Args : none
346 =cut
349 sub num_sulphur{
350 my $self=shift;
352 ($self->{'sul'}) = $self->{'output'}=~/Sulfur\s+S\s+(\d+)/;
354 return $self->{'sul'};
357 =head2 half_life
359 Title : half_life
360 Usage : $pp->half_life()
361 Function : Retrieves the half life of a sequence
362 Returns : Returns the half life of a sequence
363 Args : none
365 =cut
368 sub half_life{
369 my $self=shift;
371 ($self->{'half_life'}) = $self->{'output'}=~/The estimated half-life is.*?(-{0,1}\d*\.{0,1}\d*)\s*hours \(mammalian reticulocytes, in vitro\)/;
373 return $self->{'half_life'};
376 =head2 instability_index
378 Title : instability_index
379 Usage : $pp->instability_index()
380 Function : Retrieves the instability index of a sequence
381 Returns : Returns the instability index of a sequence
382 Args : none
384 =cut
387 sub instability_index{
388 my $self=shift;
390 ($self->{'InstabilityIndex'})=$self->{'output'}=~/The instability index \(II\) is computed to be (-{0,1}\d*\.{0,1}\d*)/;
391 return $self->{'InstabilityIndex'};
394 =head2 stability
396 Title : stability
397 Usage : $pp->stability()
398 Function : Calculates whether the sequence is stable or unstable
399 Returns : 'stable' or 'unstable'
400 Args : none
402 =cut
405 sub stability{
406 my $self=shift;
408 ($self->{'Stability'})=$self->{'output'}=~/This classifies the protein as\s(\w+)\./;
410 return $self->{'Stability'};
414 =head2 aliphatic_index
416 Title : aliphatic_index
417 Usage : $pp->aliphatic_index()
418 Function : Retrieves the aliphatic index of the sequence
419 Returns : Returns the aliphatic index of the sequence
420 Args : none
422 =cut
425 sub aliphatic_index{
427 my $self=shift;
428 ($self->{'AliphaticIndex'})=$self->{'output'}=~/<B>Aliphatic index:<\/B>\s*(-{0,1}\d*\.{0,1}\d*)/;
429 return $self->{'AliphaticIndex'};
433 =head2 gravy
435 Title : gravy
436 Usage : $pp->gravy()
437 Function : Retrieves the grand average of hydropathicity (GRAVY) of a sequence
438 Returns : Returns the grand average of hydropathicity (GRAVY) of a sequence
439 Args : none
441 =cut
444 sub gravy{
445 my $self=shift;
447 ($self->{'GRAVY'})=$self->{'output'}=~/<B>Grand average of hydropathicity \(GRAVY\):<\/B>\s*(-{0,1}\d*\.{0,1}\d*)/;
448 return $self->{'GRAVY'};
451 =head2 AA_comp
453 Title : AA_comp
454 Usage : $pp->AA_comp('P')
455 Function : Retrieves the percentage composition of a given amino acid for a sequence
456 Returns : Returns the percentage composition of a given amino acid for a sequence
457 Args : A single letter amino acid code eg A, R, G, P etc
459 =cut
462 sub AA_comp{
464 my $self=shift;
465 my $aa=shift;
467 $aa=uc($aa);
469 my $AA={qw(A Ala R Arg N Asn D Asp C Cys Q Gln E Glu G Gly H His I Ile L Leu K Lys M Met F Phe P Pro S Ser T Thr W Trp Y Tyr V Val B Asx Z Glx X Xaa)};
471 ($self->{$aa})= $self->{'output'}=~/$AA->{$aa} \($aa\)\s+\d+\s+(\d+\.\d+)%/;
473 return $self->{$aa};