changes all issue tracking in preparation for switch to github issues
[bioperl-live.git] / Bio / Tools / pICalculator.pm
blob9875265afe98665351e641b4f3367e4a3292bd3e
2 # BioPerl module for Bio::Tools::pICalculator
4 # Copyright (c) 2002, Merck & Co. Inc. All Rights Reserved.
7 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
11 =head1 NAME
13 Bio::Tools::pICalculator - calculate the isoelectric point of a protein
15 =head1 DESCRIPTION
17 Calculates the isoelectric point of a protein, the pH at which there
18 is no overall charge on the protein. Calculates the charge on a protein
19 at a given pH. Can use built-in sets of pK values or custom pK sets.
21 =head1 SYNOPSIS
23 use Bio::Tools::pICalculator;
24 use Bio::SeqIO;
26 my $in = Bio::SeqIO->new( -fh => \*STDIN ,
27 -format => 'Fasta' );
29 my $calc = Bio::Tools::pICalculator->new(-places => 2,
30 -pKset => 'EMBOSS');
32 while ( my $seq = $in->next_seq ) {
33 $calc->seq($seq);
34 my $iep = $calc->iep;
35 print sprintf( "%s\t%s\t%.2f\n",
36 $seq->id,
37 $iep,
38 $calc->charge_at_pH($iep) );
40 for( my $i = 0; $i <= 14; $i += 0.5 ){
41 print sprintf( "pH = %.2f\tCharge = %.2f\n",
42 $i,
43 $calc->charge_at_pH($i) );
47 =head1 SEE ALSO
49 http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf
50 http://emboss.sourceforge.net/apps/cvs/emboss/apps/iep.html
51 http://us.expasy.org/tools/pi_tool.html
53 =head1 LIMITATIONS
55 There are various sources for the pK values of the amino acids.
56 The set of pK values chosen will affect the pI reported.
58 The charge state of each residue is assumed to be independent of
59 the others. Protein modifications (such as a phosphate group) that
60 have a charge are ignored.
62 =head1 FEEDBACK
64 =head2 Mailing Lists
66 User feedback is an integral part of the evolution of this
67 and other Bioperl modules. Send your comments and suggestions
68 preferably to one of the Bioperl mailing lists.
69 Your participation is much appreciated.
71 bioperl-l@bioperl.org - General discussion
72 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
74 =head2 Bugs
76 Report bugs to the Bioperl bug tracking system to help us keep track
77 the bugs and their resolution. Bug reports can be submitted via the
78 web:
80 https://github.com/bioperl/bioperl-live/issues
82 =head1 AUTHOR
84 Mark Southern (mark_southern@merck.com). From an algorithm by David
85 Tabb found at http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf.
86 Modification for Bioperl, additional documentation by Brian Osborne.
88 =head1 COPYRIGHT
90 Copyright (c) 2002, Merck & Co. Inc. All Rights Reserved. This module is
91 free software. It may be used, redistributed and/or modified under the terms
92 of the Perl Artistic License (see http://www.perl.com/perl/misc/Artistic.html)
94 =head1 APPENDIX
96 The rest of the documentation details each of the object methods.
97 Private methods are usually preceded by a _.
99 =cut
101 # Let the code begin...
103 package Bio::Tools::pICalculator;
104 use strict;
107 use base qw(Bio::Root::Root);
109 # pK values from the DTASelect program from Scripps
110 # http://fields.scripps.edu/DTASelect
111 my $DTASelect_pK = { N_term => 8.0,
112 K => 10.0, # Lys
113 R => 12.0, # Arg
114 H => 6.5, # His
115 D => 4.4, # Asp
116 E => 4.4, # Glu
117 C => 8.5, # Cys
118 Y => 10.0, # Tyr
119 C_term => 3.1
122 # pK values from the iep program from EMBOSS
123 # http://emboss.sourceforge.net/apps/cvs/emboss/apps/iep.html
124 my $Emboss_pK = { N_term => 8.6,
125 K => 10.8, # Lys
126 R => 12.5, # Arg
127 H => 6.5, # His
128 D => 3.9, # Asp
129 E => 4.1, # Glu
130 C => 8.5, # Cys
131 Y => 10.1, # Tyr
132 C_term => 3.6
135 =head2 desc
137 Title : new
138 Usage : Bio::Tools::pICalculator->new
139 Function: Instantiates the Bio::Tools::pICalculator object
140 Example : $calc = Bio::Tools::pICalculator->new( -pKset => \%pKvalues,
141 # a Bio::Seq object
142 -seq => $seq,
143 -places => 2 );
146 $calc = Bio::Tools::pICalculator->new( -pKset => 'string',
147 # a Bio::Seq object
148 -seq => $seq,
149 -places => 1 );
151 Constructs a new pICalculator. Arguments are a flattened hash.
152 Valid, optional keys are:
154 pKset - A reference to a hash with key value pairs for the
155 pK values of the charged amino acids. Required keys
156 are:
158 N_term C_term K R H D E C Y
160 pKset - A string ( 'DTASelect' or 'EMBOSS' ) that will
161 specify an internal set of pK values to be used. The
162 default is 'EMBOSS'
164 seq - A Bio::Seq sequence object to analyze
166 places - The number of decimal places to use in the
167 isoelectric point calculation. The default is 2.
169 Returns : The description
170 Args : The description or none
172 =cut
174 sub new {
175 my( $class, %opts ) = @_;
176 my $self = $class->SUPER::new(%opts);
177 $self = bless {}, ref $self || $self;
178 $self->seq( $opts{-seq} ) if exists $opts{-seq};
179 $self->pKset( $opts{-pKset} || 'EMBOSS' );
180 exists $opts{-places} ? $self->places( $opts{-places} ) :
181 $self->places(2);
182 return $self;
185 =head2 seq
187 Title : seq
188 Usage : $calc->seq($seqobj)
189 Function: Sets or returns the Bio::Seq used in the calculation
190 Example : $seqobj = Bio::Seq->new(-seq=>"gghhhmmm",-id=>"GHM");
191 $calc = Bio::Tools::pICalculator->new;
192 $calc->seq($seqobj);
193 Returns : Bio::Seq object
194 Args : Bio::Seq object or none
196 =cut
198 sub seq {
199 my( $this, $seq ) = @_;
200 unless( defined $seq && UNIVERSAL::isa($seq,'Bio::Seq') ){
201 $this->throw("$seq is not a valid Bio::Seq object");
203 $this->{-seq} = $seq;
204 $this->{-count} = count_charged_residues( $seq );
205 return $this->{-seq};
208 =head2 pKset
210 Title : pKset
211 Usage : $pkSet = $calc->pKSet(\%pKSet)
212 Function: Sets or returns the hash of pK values used in the calculation
213 Example : $calc->pKset('emboss')
214 Returns : reference to pKset hash
215 Args : The reference to a pKset hash, a string, or none. Examples:
217 pKset - A reference to a hash with key value pairs for the
218 pK values of the charged amino acids. Required keys
219 are:
221 N_term C_term K R H D E C Y
223 pKset - A valid string ( 'DTASelect' or 'EMBOSS' ) that will
224 specify an internal set of pK values to be used. The
225 default is 'EMBOSS'
227 =cut
229 sub pKset {
230 my ( $this, $pKset ) = @_;
231 if( ref $pKset eq 'HASH' ){ # user defined pK values
232 $this->{-pKset} = $pKset;
233 }elsif( $pKset =~ /^emboss$/i ){ # from EMBOSS's iep program
234 $this->{-pKset} = $Emboss_pK;
235 }elsif( $pKset =~ /^dtaselect$/i ){ # from DTASelect (scripps)
236 $this->{-pKset} = $DTASelect_pK;
237 }else{ # default to EMBOSS
238 $this->{-pKset} = $Emboss_pK;
240 return $this->{-pKset};
243 sub places {
244 my $this = shift;
245 $this->{-places} = shift if @_;
246 return $this->{-places};
249 =head2 iep
251 Title : iep
252 Usage : $calc->iep
253 Function: Returns the isoelectric point
254 Example : $calc = Bio::Tools::pICalculator->new(-places => 2);
255 $calc->seq($seqobj);
256 $iep = $calc->iep;
257 Returns : The isoelectric point of the sequence in the Bio::Seq object
258 Args : None
260 =cut
262 sub iep {
263 my $this = shift;
264 return _calculate_iep($this->{-pKset},
265 $this->{-places},
266 $this->{-seq},
267 $this->{-count}
271 =head2 charge_at_pH
273 Title : charge_at_pH
274 Usage : $charge = $calc->charge_at_pH($pH)
275 Function: Sets or gets the description of the sequence
276 Example : $calc = Bio::Tools::pICalculator->new(-places => 2);
277 $calc->seq($seqobj);
278 $charge = $calc->charge_at_ph("7");
279 Returns : The predicted charge at the given pH
280 Args : pH
282 =cut
284 sub charge_at_pH {
285 my $this = shift;
286 return _calculate_charge_at_pH( shift, $this->{-pKset},
287 $this->{-count} );
290 sub count_charged_residues {
291 my $seq = shift;
292 my $sequence = $seq->seq;
293 my $count;
294 for ( qw( K R H D E C Y ) ){ # charged AA's
295 $count->{$_}++ while $sequence =~ /$_/ig;
297 return $count;
300 sub _calculate_iep {
301 my( $pK, $places, $seq, $count ) = @_;
302 my $pH = 7.0;
303 my $step = 3.5;
304 my $last_charge = 0.0;
305 my $format = "%.${places}f";
307 unless( defined $count ){
308 $count = count_charged_residues($seq);
310 while(1){
311 my $charge = _calculate_charge_at_pH( $pH, $pK, $count );
312 last if sprintf($format,$charge) ==
313 sprintf($format,$last_charge);
314 $charge > 0 ? ( $pH += $step ) : ( $pH -= $step );
315 $step /= 2.0;
316 $last_charge = $charge;
318 return sprintf( $format, $pH );
321 # it's the sum of all the partial charges for the
322 # termini and all of the charged aa's!
323 sub _calculate_charge_at_pH {
324 no warnings; # don't complain if a given key doesn't exist
325 my( $pH, $pK, $count ) = @_;
326 my $charge = _partial_charge( $pK->{N_term}, $pH )
327 + $count->{K} * _partial_charge( $pK->{K}, $pH )
328 + $count->{R} * _partial_charge( $pK->{R}, $pH )
329 + $count->{H} * _partial_charge( $pK->{H}, $pH )
330 - $count->{D} * _partial_charge( $pH, $pK->{D} )
331 - $count->{E} * _partial_charge( $pH, $pK->{E} )
332 - $count->{C} * _partial_charge( $pH, $pK->{C} )
333 - $count->{Y} * _partial_charge( $pH, $pK->{Y} )
334 - _partial_charge( $pH, $pK->{C_term} );
335 return $charge;
338 # Concentration Ratio is 10**(pK - pH) for positive groups
339 # and 10**(pH - pK) for negative groups
340 sub _partial_charge {
341 my $cr = 10 ** ( $_[0] - $_[1] );
342 return $cr / ( $cr + 1 );
347 __END__