tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / Variation / AAChange.pm
blobfbb0f1a35173b856133fb62d04821722b23f9cde
1 # $Id$
3 # BioPerl module for Bio::Variation::AAChange
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
9 # Copyright Heikki Lehvaslaiho
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Variation::AAChange - Sequence change class for polypeptides
19 =head1 SYNOPSIS
21 $aamut = Bio::Variation::AAChange->new
22 ('-start' => $start,
23 '-end' => $end,
24 '-length' => $len,
25 '-proof' => $proof,
26 '-isMutation' => 1,
27 '-mut_number' => $mut_number
30 my $a1 = Bio::Variation::Allele->new;
31 $a1->seq($ori) if $ori;
32 $aamut->allele_ori($a1);
33 my $a2 = Bio::Variation::Allele->new;
34 $a2->seq($mut) if $mut;
35 $aachange->add_Allele($a2);
36 $aachange->allele_mut($a2);
38 print "\n";
40 # add it to a SeqDiff container object
41 $seqdiff->add_Variant($rnachange);
43 # and create links to and from RNA level variant objects
44 $aamut->RNAChange($rnachange);
45 $rnachange->AAChange($rnachange);
47 =head1 DESCRIPTION
49 The instantiable class Bio::Variation::RNAChange describes basic
50 sequence changes at polypeptide level. It uses methods defined in
51 superclass Bio::Variation::VariantI, see L<Bio::Variation::VariantI>
52 for details.
54 If the variation described by a AAChange object has a known
55 Bio::Variation::RNAAChange object, create the link with method
56 AAChange(). See L<Bio::Variation::AAChange> for more information.
58 =head1 FEEDBACK
60 =head2 Mailing Lists
62 User feedback is an integral part of the evolution of this and other
63 Bioperl modules. Send your comments and suggestions preferably to the
64 Bioperl mailing lists Your participation is much appreciated.
66 bioperl-l@bioperl.org - General discussion
67 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
69 =head2 Support
71 Please direct usage questions or support issues to the mailing list:
73 I<bioperl-l@bioperl.org>
75 rather than to the module maintainer directly. Many experienced and
76 reponsive experts will be able look at the problem and quickly
77 address it. Please include a thorough description of the problem
78 with code and data examples if at all possible.
80 =head2 Reporting Bugs
82 Report bugs to the Bioperl bug tracking system to help us keep track
83 the bugs and their resolution. Bug reports can be submitted via the
84 web:
86 http://bugzilla.open-bio.org/
88 =head1 AUTHOR - Heikki Lehvaslaiho
90 Email: heikki-at-bioperl-dot-org
92 =head1 APPENDIX
94 The rest of the documentation details each of the object
95 methods. Internal methods are usually preceded with a _
97 =cut
100 # Let the code begin...
103 package Bio::Variation::AAChange;
105 use vars qw($MATRIX);
106 use strict;
108 # Object preamble - inheritance
110 use base qw(Bio::Variation::VariantI);
112 BEGIN {
114 my $matrix = << "__MATRIX__";
115 # Matrix made by matblas from blosum62.iij
116 # * column uses minimum score
117 # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
118 # Blocks Database = /data/blocks_5.0/blocks.dat
119 # Cluster Percentage: >= 62
120 # Entropy = 0.6979, Expected = -0.5209
121 A R N D C Q E G H I L K M F P S T W Y V B Z X *
122 A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
123 R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4
124 N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
125 D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
126 C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
127 Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
128 E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
129 G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
130 H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
131 I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4
132 L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4
133 K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
134 M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
135 F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
136 P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
137 S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
138 T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4
139 W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4
140 Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4
141 V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
142 B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4
143 Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
144 X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
145 * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
146 __MATRIX__
148 my %blosum = ();
149 $matrix =~ /^ +(.+)$/m;
150 my @aas = split / +/, $1;
151 foreach my $aa (@aas) {
152 my $tmp = $aa;
153 $tmp = "\\$aa" if $aa eq '*';
154 $matrix =~ /^($tmp) +([-+]?\d.*)$/m;
155 my @scores = split / +/, $2 if defined $2;
156 my $count = 0;
157 foreach my $ak (@aas) {
158 $blosum{$aa}->{$aas[$count]} = $scores[$count];
159 $count++;
162 sub _matrix;
163 $MATRIX = \%blosum;
166 sub new {
167 my($class,@args) = @_;
168 my $self = $class->SUPER::new(@args);
170 my ($start, $end, $length, $strand, $primary, $source,
171 $frame, $score, $gff_string,
172 $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq,
173 $label, $status, $proof, $re_changes, $region, $region_value,
174 $region_dist,
175 $numbering, $mut_number, $ismutation) =
176 $self->_rearrange([qw(START
178 LENGTH
179 STRAND
180 PRIMARY
181 SOURCE
182 FRAME
183 SCORE
184 GFF_STRING
185 ALLELE_ORI
186 ALLELE_MUT
187 UPSTREAMSEQ
188 DNSTREAMSEQ
189 LABEL
190 STATUS
191 PROOF
192 RE_CHANGES
193 REGION
194 REGION_VALUE
195 REGION_DIST
196 NUMBERING
197 MUT_NUMBER
198 ISMUTATION
199 )],@args);
201 $self->primary_tag("Variation");
203 $self->{ 'alleles' } = [];
205 $start && $self->start($start);
206 $end && $self->end($end);
207 $length && $self->length($length);
208 $strand && $self->strand($strand);
209 $primary && $self->primary_tag($primary);
210 $source && $self->source_tag($source);
211 $frame && $self->frame($frame);
212 $score && $self->score($score);
213 $gff_string && $self->_from_gff_string($gff_string);
215 $allele_ori && $self->allele_ori($allele_ori);
216 $allele_mut && $self->allele_mut($allele_mut);
217 $upstreamseq && $self->upstreamseq($upstreamseq);
218 $dnstreamseq && $self->dnstreamseq($dnstreamseq);
220 $label && $self->label($label);
221 $status && $self->status($status);
222 $proof && $self->proof($proof);
223 $region && $self->region($region);
224 $region_value && $self->region_value($region_value);
225 $region_dist && $self->region_dist($region_dist);
226 $numbering && $self->numbering($numbering);
227 $mut_number && $self->mut_number($mut_number);
228 $ismutation && $self->isMutation($ismutation);
230 return $self; # success - we hope!
233 =head2 RNAChange
235 Title : RNAChange
236 Usage : $mutobj = $self->RNAChange;
237 : $mutobj = $self->RNAChange($objref);
238 Function: Returns or sets the link-reference to a mutation/change object.
239 If there is no link, it will return undef
240 Returns : an obj_ref or undef
242 =cut
244 sub RNAChange {
245 my ($self,$value) = @_;
246 if (defined $value) {
247 if( ! $value->isa('Bio::Variation::RNAChange') ) {
248 $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]");
249 return;
251 else {
252 $self->{'RNAChange'} = $value;
255 unless (exists $self->{'RNAChange'}) {
256 return;
257 } else {
258 return $self->{'RNAChange'};
264 =head2 label
266 Title : label
267 Usage : $obj->label();
268 Function:
270 Sets and returns mutation event label(s). If value is not
271 set, or no argument is given returns false. Each
272 instantiable subclass of L<Bio::Variation::VariantI> needs
273 to implement this method. Valid values are listed in
274 'Mutation event controlled vocabulary' in
275 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
277 Example :
278 Returns : string
279 Args : string
281 =cut
284 sub label {
285 my ($self) = @_;
286 my ($o, $m, $type);
287 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
288 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
290 if ($self->start == 1 ) {
291 if ($o and substr($o, 0, 1) ne substr($m, 0, 1)) {
292 $type = 'no translation';
294 elsif ($o and $m and $o eq $m ) {
295 $type = 'silent';
297 # more ...
299 elsif ($o and substr($o, 0, 1) eq '*' ) {
300 if ($m and substr($o, 0, 1) ne substr($m, 0, 1)) {
301 $type = 'post-elongation';
303 elsif ($m and $o eq $m ) {
304 $type = 'silent, conservative';
307 elsif ($o and $m and $o eq $m) {
308 $type = 'silent, conservative';
310 elsif ($m and $m eq '*') {
311 $type = 'truncation';
313 elsif ($o and $m and $o eq $m) {
314 $type = 'silent, conservative';
316 elsif (not $m or
317 ($o and $m and length($o) > length($m) and
318 substr($m, -1, 1) ne '*')) {
319 $type = 'deletion';
320 if ($o and $m and $o !~ $m and $o !~ $m) {
321 $type .= ', complex';
324 elsif (not $o or
325 ($o and $m and length($o) < length($m) and
326 substr($m, -1, 1) ne '*' ) ) {
327 $type = 'insertion';
328 if ($o and $m and $o !~ $m and $o !~ $m) {
329 $type .= ', complex';
332 elsif ($o and $m and $o ne $m and
333 length $o == 1 and length $m == 1 ) {
334 $type = 'substitution';
335 my $value = $self->similarity_score;
336 if (defined $value) {
337 my $cons = ($value < 0) ? 'nonconservative' : 'conservative';
338 $type .= ", ". $cons;
340 } else {
341 $type = 'out-of-frame translation, truncation';
343 $self->{'label'} = $type;
344 return $self->{'label'};
348 =head2 similarity_score
350 Title : similarity_score
351 Usage : $self->similarity_score
352 Function: Measure for evolutionary conservativeness
353 of single amino substitutions. Uses BLOSUM62.
354 Negative numbers are noncoservative changes.
355 Returns : integer, undef if not single amino acid change
357 =cut
359 sub similarity_score {
360 my ($self) = @_;
361 my ($o, $m, $type);
362 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
363 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
364 return unless $o and $m and length $o == 1 and length $m == 1;
365 return unless $o =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i and
366 $m =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i;
367 return $MATRIX->{"\U$o"}->{"\U$m"};
370 =head2 trivname
372 Title : trivname
373 Usage : $self->trivname
374 Function:
376 Given a Bio::Variation::AAChange object with linked
377 Bio::Variation::RNAChange and Bio::Variation::DNAMutation
378 objects, this subroutine creates a string corresponding to
379 the 'trivial name' of the mutation. Trivial name is
380 specified in Antonorakis & MDI Nomenclature Working Group:
381 Human Mutation 11:1-3, 1998.
383 Returns : string
385 =cut
388 sub trivname {
389 my ($self,$value) = @_;
390 if( defined $value) {
391 $self->{'trivname'} = $value;
392 } else {
393 my ( $aaori, $aamut,$aamutsymbol, $aatermnumber, $aamutterm) =
394 ('', '', '', '', '');
395 my $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
396 #my $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
398 $aaori = substr ($o, 0, 1) if $o;
399 $aaori =~ tr/\*/X/;
401 my $sep;
402 if ($self->isMutation) {
403 $sep = '>';
404 } else {
405 $sep = '|';
407 my $trivname = $aaori. $self->start;
408 $trivname .= $sep if $sep eq '|';
410 my @alleles = $self->each_Allele;
411 foreach my $allele (@alleles) {
412 my $m = $allele->seq if $allele->seq;
414 $self->allele_mut($allele);
415 #$trivname .= $sep. uc $m if $m;
417 $aamutterm = substr ($m, -1, 1) if $m;
418 if ($self->RNAChange->label =~ /initiation codon/ and
419 ( $o and $m and $o ne $m)) {
420 $aamut = 'X';
422 elsif (CORE::length($o) == 1 and CORE::length($m) == 1 ) {
423 $aamutsymbol = '';
424 $aamut = $aamutterm;
426 elsif ($self->RNAChange->label =~ /deletion/) {
427 $aamutsymbol = 'del';
428 if ($aamutterm eq '*') {
429 $aatermnumber = $self->start + length($m) -1;
430 $aamut = 'X'. $aatermnumber;
432 if ($self->RNAChange && $self->RNAChange->label =~ /inframe/){
433 $aamut = '-'. length($self->RNAChange->allele_ori->seq)/3 ;
436 elsif ($self->RNAChange->label =~ /insertion/) {
437 $aamutsymbol = 'ins';
438 if (($aamutterm eq '*') && (length($m)-1 != 0)) {
439 $aatermnumber = $self->start + length($m)-1;
440 $aamut = $aatermnumber. 'X';
442 if ($self->RNAChange->label =~ /inframe/){
443 $aamut = '+'. int length($self->RNAChange->allele_mut->seq)/3 ;
446 elsif ($self->RNAChange->label =~ /complex/ ) {
447 my $diff = length($m) - length($o);
448 if ($diff >= 0 ) {
449 $aamutsymbol = 'ins';
450 } else {
451 $aamutsymbol = 'del' ;
453 if (($aamutterm eq '*') && (length($m)-1 != 0)) {
454 $aatermnumber = $self->start + length($m)-1;
455 $aamut = $aatermnumber. 'X';
457 if ($self->RNAChange->label =~ /inframe/){
459 if ($diff >= 0 ) {
460 $aamut = '+'. $diff ;
461 } else {
462 $aamut = $diff ;
466 elsif ($self->label =~ /truncation/) {
467 $aamut = $m;
468 } else {
469 $aamutsymbol = '';
470 $aamut = $aamutterm;
472 $aamut =~ tr/\*/X/;
473 $trivname .= $aamutsymbol. $aamut. $sep;
475 chop $trivname;
476 $self->{'trivname'} = $trivname;
478 return $self->{'trivname'};