* sync with trunk
[bioperl-live.git] / Bio / Tools / pICalculator.pm
blob107cbe84e69cea00b46d792e3c95ffe1917aae18
1 # $Id$
3 # BioPerl module for Bio::Tools::pICalculator
5 # Copyright (c) 2002, Merck & Co. Inc. All Rights Reserved.
8 # You may distribute this module under the same terms as perl itself
10 # POD documentation - main docs before the code
12 =head1 NAME
14 Bio::Tools::pICalculator - calculate the isoelectric point of a protein
16 =head1 DESCRIPTION
18 Calculates the isoelectric point of a protein, the pH at which there
19 is no overall charge on the protein. Calculates the charge on a protein
20 at a given pH. Can use built-in sets of pK values or custom pK sets.
22 =head1 SYNOPSIS
24 use Bio::Tools::pICalculator;
25 use Bio::SeqIO;
27 my $in = Bio::SeqIO->new( -fh => \*STDIN ,
28 -format => 'Fasta' );
30 my $calc = Bio::Tools::pICalculator->new(-places => 2,
31 -pKset => 'EMBOSS');
33 while ( my $seq = $in->next_seq ) {
34 $calc->seq($seq);
35 my $iep = $calc->iep;
36 print sprintf( "%s\t%s\t%.2f\n",
37 $seq->id,
38 $iep,
39 $calc->charge_at_pH($iep) );
41 for( my $i = 0; $i <= 14; $i += 0.5 ){
42 print sprintf( "pH = %.2f\tCharge = %.2f\n",
43 $i,
44 $calc->charge_at_pH($i) );
48 =head1 SEE ALSO
50 http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf
51 http://emboss.sourceforge.net/apps/cvs/iep.html
52 http://us.expasy.org/tools/pi_tool.html
54 =head1 LIMITATIONS
56 There are various sources for the pK values of the amino acids.
57 The set of pK values chosen will affect the pI reported.
59 The charge state of each residue is assumed to be independent of
60 the others. Protein modifications (such as a phosphate group) that
61 have a charge are ignored.
63 =head1 FEEDBACK
65 =head2 Mailing Lists
67 User feedback is an integral part of the evolution of this
68 and other Bioperl modules. Send your comments and suggestions
69 preferably to one of the Bioperl mailing lists.
70 Your participation is much appreciated.
72 bioperl-l@bioperl.org - General discussion
73 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
75 =head2 Bugs
77 Report bugs to the Bioperl bug tracking system to help us keep track
78 the bugs and their resolution. Bug reports can be submitted via the
79 web:
81 http://bugzilla.open-bio.org/
83 =head1 AUTHOR
85 Mark Southern (mark_southern@merck.com). From an algorithm by David
86 Tabb found at http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf.
87 Modification for Bioperl, additional documentation by Brian Osborne.
89 =head1 COPYRIGHT
91 Copyright (c) 2002, Merck & Co. Inc. All Rights Reserved. This module is
92 free software. It may be used, redistributed and/or modified under the terms
93 of the Perl Artistic License (see http://www.perl.com/perl/misc/Artistic.html)
95 =head1 APPENDIX
97 The rest of the documentation details each of the object methods.
98 Private methods are usually preceded by a _.
100 =cut
102 # Let the code begin...
104 package Bio::Tools::pICalculator;
105 use strict;
108 use base qw(Bio::Root::Root);
110 # pK values from the DTASelect program from Scripps
111 # http://fields.scripps.edu/DTASelect
112 my $DTASelect_pK = { N_term => 8.0,
113 K => 10.0, # Lys
114 R => 12.0, # Arg
115 H => 6.5, # His
116 D => 4.4, # Asp
117 E => 4.4, # Glu
118 C => 8.5, # Cys
119 Y => 10.0, # Tyr
120 C_term => 3.1
123 # pK values from the iep program from EMBOSS
124 # http://emboss.sourceforge.net/apps/cvs/iep.html
125 my $Emboss_pK = { N_term => 8.6,
126 K => 10.8, # Lys
127 R => 12.5, # Arg
128 H => 6.5, # His
129 D => 3.9, # Asp
130 E => 4.1, # Glu
131 C => 8.5, # Cys
132 Y => 10.1, # Tyr
133 C_term => 3.6
136 =head2 desc
138 Title : new
139 Usage : Bio::Tools::pICalculator->new
140 Function: Instantiates the Bio::Tools::pICalculator object
141 Example : $calc = Bio::Tools::pICalculator->new( -pKset => \%pKvalues,
142 # a Bio::Seq object
143 -seq => $seq,
144 -places => 2 );
147 $calc = Bio::Tools::pICalculator->new( -pKset => 'string',
148 # a Bio::Seq object
149 -seq => $seq,
150 -places => 1 );
152 Constructs a new pICalculator. Arguments are a flattened hash.
153 Valid, optional keys are:
155 pKset - A reference to a hash with key value pairs for the
156 pK values of the charged amino acids. Required keys
157 are:
159 N_term C_term K R H D E C Y
161 pKset - A string ( 'DTASelect' or 'EMBOSS' ) that will
162 specify an internal set of pK values to be used. The
163 default is 'EMBOSS'
165 seq - A Bio::Seq sequence object to analyze
167 places - The number of decimal places to use in the
168 isoelectric point calculation. The default is 2.
170 Returns : The description
171 Args : The description or none
173 =cut
175 sub new {
176 my( $class, %opts ) = @_;
177 my $self = $class->SUPER::new(%opts);
178 $self = bless {}, ref $self || $self;
179 $self->seq( $opts{-seq} ) if exists $opts{-seq};
180 $self->pKset( $opts{-pKset} || 'EMBOSS' );
181 exists $opts{-places} ? $self->places( $opts{-places} ) :
182 $self->places(2);
183 return $self;
186 =head2 seq
188 Title : seq
189 Usage : $calc->seq($seqobj)
190 Function: Sets or returns the Bio::Seq used in the calculation
191 Example : $seqobj = Bio::Seq->new(-seq=>"gghhhmmm",-id=>"GHM");
192 $calc = Bio::Tools::pICalculator->new;
193 $calc->seq($seqobj);
194 Returns : Bio::Seq object
195 Args : Bio::Seq object or none
197 =cut
199 sub seq {
200 my( $this, $seq ) = @_;
201 unless( defined $seq && UNIVERSAL::isa($seq,'Bio::Seq') ){
202 $this->throw("$seq is not a valid Bio::Seq object");
204 $this->{-seq} = $seq;
205 $this->{-count} = count_charged_residues( $seq );
206 return $this->{-seq};
209 =head2 pKset
211 Title : pKset
212 Usage : $pkSet = $calc->pKSet(\%pKSet)
213 Function: Sets or returns the hash of pK values used in the calculation
214 Example : $calc->pKset('emboss')
215 Returns : reference to pKset hash
216 Args : The reference to a pKset hash, a string, or none. Examples:
218 pKset - A reference to a hash with key value pairs for the
219 pK values of the charged amino acids. Required keys
220 are:
222 N_term C_term K R H D E C Y
224 pKset - A valid string ( 'DTASelect' or 'EMBOSS' ) that will
225 specify an internal set of pK values to be used. The
226 default is 'EMBOSS'
228 =cut
230 sub pKset {
231 my ( $this, $pKset ) = @_;
232 if( ref $pKset eq 'HASH' ){ # user defined pK values
233 $this->{-pKset} = $pKset;
234 }elsif( $pKset =~ /^emboss$/i ){ # from EMBOSS's iep program
235 $this->{-pKset} = $Emboss_pK;
236 }elsif( $pKset =~ /^dtaselect$/i ){ # from DTASelect (scripps)
237 $this->{-pKset} = $DTASelect_pK;
238 }else{ # default to EMBOSS
239 $this->{-pKset} = $Emboss_pK;
241 return $this->{-pKset};
244 sub places {
245 my $this = shift;
246 $this->{-places} = shift if @_;
247 return $this->{-places};
250 =head2 iep
252 Title : iep
253 Usage : $calc->iep
254 Function: Returns the isoelectric point
255 Example : $calc = Bio::Tools::pICalculator->new(-places => 2);
256 $calc->seq($seqobj);
257 $iep = $calc->iep;
258 Returns : The isoelectric point of the sequence in the Bio::Seq object
259 Args : None
261 =cut
263 sub iep {
264 my $this = shift;
265 return _calculate_iep($this->{-pKset},
266 $this->{-places},
267 $this->{-seq},
268 $this->{-count}
272 =head2 charge_at_pH
274 Title : charge_at_pH
275 Usage : $charge = $calc->charge_at_pH($pH)
276 Function: Sets or gets the description of the sequence
277 Example : $calc = Bio::Tools::pICalculator->new(-places => 2);
278 $calc->seq($seqobj);
279 $charge = $calc->charge_at_ph("7");
280 Returns : The predicted charge at the given pH
281 Args : pH
283 =cut
285 sub charge_at_pH {
286 my $this = shift;
287 return _calculate_charge_at_pH( shift, $this->{-pKset},
288 $this->{-count} );
291 sub count_charged_residues {
292 my $seq = shift;
293 my $sequence = $seq->seq;
294 my $count;
295 for ( qw( K R H D E C Y ) ){ # charged AA's
296 $count->{$_}++ while $sequence =~ /$_/ig;
298 return $count;
301 sub _calculate_iep {
302 my( $pK, $places, $seq, $count ) = @_;
303 my $pH = 7.0;
304 my $step = 3.5;
305 my $last_charge = 0.0;
306 my $format = "%.${places}f";
308 unless( defined $count ){
309 $count = count_charged_residues($seq);
311 while(1){
312 my $charge = _calculate_charge_at_pH( $pH, $pK, $count );
313 last if sprintf($format,$charge) ==
314 sprintf($format,$last_charge);
315 $charge > 0 ? ( $pH += $step ) : ( $pH -= $step );
316 $step /= 2.0;
317 $last_charge = $charge;
319 return sprintf( $format, $pH );
322 # it's the sum of all the partial charges for the
323 # termini and all of the charged aa's!
324 sub _calculate_charge_at_pH {
325 no warnings; # don't complain if a given key doesn't exist
326 my( $pH, $pK, $count ) = @_;
327 my $charge = _partial_charge( $pK->{N_term}, $pH )
328 + $count->{K} * _partial_charge( $pK->{K}, $pH )
329 + $count->{R} * _partial_charge( $pK->{R}, $pH )
330 + $count->{H} * _partial_charge( $pK->{H}, $pH )
331 - $count->{D} * _partial_charge( $pH, $pK->{D} )
332 - $count->{E} * _partial_charge( $pH, $pK->{E} )
333 - $count->{C} * _partial_charge( $pH, $pK->{C} )
334 - $count->{Y} * _partial_charge( $pH, $pK->{Y} )
335 - _partial_charge( $pH, $pK->{C_term} );
336 return $charge;
339 # Concentration Ratio is 10**(pK - pH) for positive groups
340 # and 10**(pH - pK) for negative groups
341 sub _partial_charge {
342 my $cr = 10 ** ( $_[0] - $_[1] );
343 return $cr / ( $cr + 1 );
348 __END__