3 # BioPerl module for Bio::Tools::pICalculator
5 # Copyright (c) 2002, Merck & Co. Inc. All Rights Reserved.
8 # You may distribute this module under the same terms as perl itself
10 # POD documentation - main docs before the code
14 Bio::Tools::pICalculator - calculate the isoelectric point of a protein
18 Calculates the isoelectric point of a protein, the pH at which there
19 is no overall charge on the protein. Calculates the charge on a protein
20 at a given pH. Can use built-in sets of pK values or custom pK sets.
24 use Bio::Tools::pICalculator;
27 my $in = Bio::SeqIO->new( -fh => \*STDIN ,
30 my $calc = Bio::Tools::pICalculator->new(-places => 2,
33 while ( my $seq = $in->next_seq ) {
36 print sprintf( "%s\t%s\t%.2f\n",
39 $calc->charge_at_pH($iep) );
41 for( my $i = 0; $i <= 14; $i += 0.5 ){
42 print sprintf( "pH = %.2f\tCharge = %.2f\n",
44 $calc->charge_at_pH($i) );
50 http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf
51 http://emboss.sourceforge.net/apps/cvs/iep.html
52 http://us.expasy.org/tools/pi_tool.html
56 There are various sources for the pK values of the amino acids.
57 The set of pK values chosen will affect the pI reported.
59 The charge state of each residue is assumed to be independent of
60 the others. Protein modifications (such as a phosphate group) that
61 have a charge are ignored.
67 User feedback is an integral part of the evolution of this
68 and other Bioperl modules. Send your comments and suggestions
69 preferably to one of the Bioperl mailing lists.
70 Your participation is much appreciated.
72 bioperl-l@bioperl.org - General discussion
73 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
77 Report bugs to the Bioperl bug tracking system to help us keep track
78 the bugs and their resolution. Bug reports can be submitted via the
81 http://bugzilla.open-bio.org/
85 Mark Southern (mark_southern@merck.com). From an algorithm by David
86 Tabb found at http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf.
87 Modification for Bioperl, additional documentation by Brian Osborne.
91 Copyright (c) 2002, Merck & Co. Inc. All Rights Reserved. This module is
92 free software. It may be used, redistributed and/or modified under the terms
93 of the Perl Artistic License (see http://www.perl.com/perl/misc/Artistic.html)
97 The rest of the documentation details each of the object methods.
98 Private methods are usually preceded by a _.
102 # Let the code begin...
104 package Bio
::Tools
::pICalculator
;
108 use base
qw(Bio::Root::Root);
110 # pK values from the DTASelect program from Scripps
111 # http://fields.scripps.edu/DTASelect
112 my $DTASelect_pK = { N_term
=> 8.0,
123 # pK values from the iep program from EMBOSS
124 # http://emboss.sourceforge.net/apps/cvs/iep.html
125 my $Emboss_pK = { N_term
=> 8.6,
139 Usage : Bio::Tools::pICalculator->new
140 Function: Instantiates the Bio::Tools::pICalculator object
141 Example : $calc = Bio::Tools::pICalculator->new( -pKset => \%pKvalues,
147 $calc = Bio::Tools::pICalculator->new( -pKset => 'string',
152 Constructs a new pICalculator. Arguments are a flattened hash.
153 Valid, optional keys are:
155 pKset - A reference to a hash with key value pairs for the
156 pK values of the charged amino acids. Required keys
159 N_term C_term K R H D E C Y
161 pKset - A string ( 'DTASelect' or 'EMBOSS' ) that will
162 specify an internal set of pK values to be used. The
165 seq - A Bio::Seq sequence object to analyze
167 places - The number of decimal places to use in the
168 isoelectric point calculation. The default is 2.
170 Returns : The description
171 Args : The description or none
176 my( $class, %opts ) = @_;
177 my $self = $class->SUPER::new
(%opts);
178 $self = bless {}, ref $self || $self;
179 $self->seq( $opts{-seq
} ) if exists $opts{-seq
};
180 $self->pKset( $opts{-pKset
} || 'EMBOSS' );
181 exists $opts{-places
} ?
$self->places( $opts{-places
} ) :
189 Usage : $calc->seq($seqobj)
190 Function: Sets or returns the Bio::Seq used in the calculation
191 Example : $seqobj = Bio::Seq->new(-seq=>"gghhhmmm",-id=>"GHM");
192 $calc = Bio::Tools::pICalculator->new;
194 Returns : Bio::Seq object
195 Args : Bio::Seq object or none
200 my( $this, $seq ) = @_;
201 unless( defined $seq && UNIVERSAL
::isa
($seq,'Bio::Seq') ){
202 $this->throw("$seq is not a valid Bio::Seq object");
204 $this->{-seq
} = $seq;
205 $this->{-count
} = count_charged_residues
( $seq );
206 return $this->{-seq
};
212 Usage : $pkSet = $calc->pKSet(\%pKSet)
213 Function: Sets or returns the hash of pK values used in the calculation
214 Example : $calc->pKset('emboss')
215 Returns : reference to pKset hash
216 Args : The reference to a pKset hash, a string, or none. Examples:
218 pKset - A reference to a hash with key value pairs for the
219 pK values of the charged amino acids. Required keys
222 N_term C_term K R H D E C Y
224 pKset - A valid string ( 'DTASelect' or 'EMBOSS' ) that will
225 specify an internal set of pK values to be used. The
231 my ( $this, $pKset ) = @_;
232 if( ref $pKset eq 'HASH' ){ # user defined pK values
233 $this->{-pKset
} = $pKset;
234 }elsif( $pKset =~ /^emboss$/i ){ # from EMBOSS's iep program
235 $this->{-pKset
} = $Emboss_pK;
236 }elsif( $pKset =~ /^dtaselect$/i ){ # from DTASelect (scripps)
237 $this->{-pKset
} = $DTASelect_pK;
238 }else{ # default to EMBOSS
239 $this->{-pKset
} = $Emboss_pK;
241 return $this->{-pKset
};
246 $this->{-places
} = shift if @_;
247 return $this->{-places
};
254 Function: Returns the isoelectric point
255 Example : $calc = Bio::Tools::pICalculator->new(-places => 2);
258 Returns : The isoelectric point of the sequence in the Bio::Seq object
265 return _calculate_iep
($this->{-pKset
},
275 Usage : $charge = $calc->charge_at_pH($pH)
276 Function: Sets or gets the description of the sequence
277 Example : $calc = Bio::Tools::pICalculator->new(-places => 2);
279 $charge = $calc->charge_at_ph("7");
280 Returns : The predicted charge at the given pH
287 return _calculate_charge_at_pH
( shift, $this->{-pKset
},
291 sub count_charged_residues
{
293 my $sequence = $seq->seq;
295 for ( qw( K R H D E C Y ) ){ # charged AA's
296 $count->{$_}++ while $sequence =~ /$_/ig;
302 my( $pK, $places, $seq, $count ) = @_;
305 my $last_charge = 0.0;
306 my $format = "%.${places}f";
308 unless( defined $count ){
309 $count = count_charged_residues
($seq);
312 my $charge = _calculate_charge_at_pH
( $pH, $pK, $count );
313 last if sprintf($format,$charge) ==
314 sprintf($format,$last_charge);
315 $charge > 0 ?
( $pH += $step ) : ( $pH -= $step );
317 $last_charge = $charge;
319 return sprintf( $format, $pH );
322 # it's the sum of all the partial charges for the
323 # termini and all of the charged aa's!
324 sub _calculate_charge_at_pH
{
325 no warnings
; # don't complain if a given key doesn't exist
326 my( $pH, $pK, $count ) = @_;
327 my $charge = _partial_charge
( $pK->{N_term
}, $pH )
328 + $count->{K
} * _partial_charge
( $pK->{K
}, $pH )
329 + $count->{R
} * _partial_charge
( $pK->{R
}, $pH )
330 + $count->{H
} * _partial_charge
( $pK->{H
}, $pH )
331 - $count->{D
} * _partial_charge
( $pH, $pK->{D
} )
332 - $count->{E
} * _partial_charge
( $pH, $pK->{E
} )
333 - $count->{C
} * _partial_charge
( $pH, $pK->{C
} )
334 - $count->{Y
} * _partial_charge
( $pH, $pK->{Y
} )
335 - _partial_charge
( $pH, $pK->{C_term
} );
339 # Concentration Ratio is 10**(pK - pH) for positive groups
340 # and 10**(pH - pK) for negative groups
341 sub _partial_charge
{
342 my $cr = 10 ** ( $_[0] - $_[1] );
343 return $cr / ( $cr + 1 );