* seq_inds is not defined for Model-based HSPs
[bioperl-live.git] / Bio / Variation / AAChange.pm
blobd1468a800efa98c093e13bacb61aec79ab0bf3c7
1 # $Id$
3 # BioPerl module for Bio::Variation::AAChange
5 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Variation::AAChange - Sequence change class for polypeptides
17 =head1 SYNOPSIS
19 $aamut = Bio::Variation::AAChange->new
20 ('-start' => $start,
21 '-end' => $end,
22 '-length' => $len,
23 '-proof' => $proof,
24 '-isMutation' => 1,
25 '-mut_number' => $mut_number
28 my $a1 = Bio::Variation::Allele->new;
29 $a1->seq($ori) if $ori;
30 $aamut->allele_ori($a1);
31 my $a2 = Bio::Variation::Allele->new;
32 $a2->seq($mut) if $mut;
33 $aachange->add_Allele($a2);
34 $aachange->allele_mut($a2);
36 print "\n";
38 # add it to a SeqDiff container object
39 $seqdiff->add_Variant($rnachange);
41 # and create links to and from RNA level variant objects
42 $aamut->RNAChange($rnachange);
43 $rnachange->AAChange($rnachange);
45 =head1 DESCRIPTION
47 The instantiable class Bio::Variation::RNAChange describes basic
48 sequence changes at polypeptide level. It uses methods defined in
49 superclass Bio::Variation::VariantI, see L<Bio::Variation::VariantI>
50 for details.
52 If the variation described by a AAChange object has a known
53 Bio::Variation::RNAAChange object, create the link with method
54 AAChange(). See L<Bio::Variation::AAChange> for more information.
56 =head1 FEEDBACK
58 =head2 Mailing Lists
60 User feedback is an integral part of the evolution of this and other
61 Bioperl modules. Send your comments and suggestions preferably to the
62 Bioperl mailing lists Your participation is much appreciated.
64 bioperl-l@bioperl.org - General discussion
65 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
67 =head2 Reporting Bugs
69 Report bugs to the Bioperl bug tracking system to help us keep track
70 the bugs and their resolution. Bug reports can be submitted via the
71 web:
73 http://bugzilla.open-bio.org/
75 =head1 AUTHOR - Heikki Lehvaslaiho
77 Email: heikki-at-bioperl-dot-org
79 =head1 APPENDIX
81 The rest of the documentation details each of the object
82 methods. Internal methods are usually preceded with a _
84 =cut
87 # Let the code begin...
90 package Bio::Variation::AAChange;
92 use vars qw($MATRIX);
93 use strict;
95 # Object preamble - inheritance
97 use base qw(Bio::Variation::VariantI);
99 BEGIN {
101 my $matrix = << "__MATRIX__";
102 # Matrix made by matblas from blosum62.iij
103 # * column uses minimum score
104 # BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
105 # Blocks Database = /data/blocks_5.0/blocks.dat
106 # Cluster Percentage: >= 62
107 # Entropy = 0.6979, Expected = -0.5209
108 A R N D C Q E G H I L K M F P S T W Y V B Z X *
109 A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
110 R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4
111 N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
112 D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
113 C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
114 Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
115 E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
116 G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
117 H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
118 I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4
119 L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4
120 K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
121 M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
122 F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
123 P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
124 S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
125 T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4
126 W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4
127 Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4
128 V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
129 B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4
130 Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
131 X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
132 * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
133 __MATRIX__
135 my %blosum = ();
136 $matrix =~ /^ +(.+)$/m;
137 my @aas = split / +/, $1;
138 foreach my $aa (@aas) {
139 my $tmp = $aa;
140 $tmp = "\\$aa" if $aa eq '*';
141 $matrix =~ /^($tmp) +([-+]?\d.*)$/m;
142 my @scores = split / +/, $2 if defined $2;
143 my $count = 0;
144 foreach my $ak (@aas) {
145 $blosum{$aa}->{$aas[$count]} = $scores[$count];
146 $count++;
149 sub _matrix;
150 $MATRIX = \%blosum;
153 sub new {
154 my($class,@args) = @_;
155 my $self = $class->SUPER::new(@args);
157 my ($start, $end, $length, $strand, $primary, $source,
158 $frame, $score, $gff_string,
159 $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq,
160 $label, $status, $proof, $re_changes, $region, $region_value,
161 $region_dist,
162 $numbering, $mut_number, $ismutation) =
163 $self->_rearrange([qw(START
165 LENGTH
166 STRAND
167 PRIMARY
168 SOURCE
169 FRAME
170 SCORE
171 GFF_STRING
172 ALLELE_ORI
173 ALLELE_MUT
174 UPSTREAMSEQ
175 DNSTREAMSEQ
176 LABEL
177 STATUS
178 PROOF
179 RE_CHANGES
180 REGION
181 REGION_VALUE
182 REGION_DIST
183 NUMBERING
184 MUT_NUMBER
185 ISMUTATION
186 )],@args);
188 $self->primary_tag("Variation");
190 $self->{ 'alleles' } = [];
192 $start && $self->start($start);
193 $end && $self->end($end);
194 $length && $self->length($length);
195 $strand && $self->strand($strand);
196 $primary && $self->primary_tag($primary);
197 $source && $self->source_tag($source);
198 $frame && $self->frame($frame);
199 $score && $self->score($score);
200 $gff_string && $self->_from_gff_string($gff_string);
202 $allele_ori && $self->allele_ori($allele_ori);
203 $allele_mut && $self->allele_mut($allele_mut);
204 $upstreamseq && $self->upstreamseq($upstreamseq);
205 $dnstreamseq && $self->dnstreamseq($dnstreamseq);
207 $label && $self->label($label);
208 $status && $self->status($status);
209 $proof && $self->proof($proof);
210 $region && $self->region($region);
211 $region_value && $self->region_value($region_value);
212 $region_dist && $self->region_dist($region_dist);
213 $numbering && $self->numbering($numbering);
214 $mut_number && $self->mut_number($mut_number);
215 $ismutation && $self->isMutation($ismutation);
217 return $self; # success - we hope!
220 =head2 RNAChange
222 Title : RNAChange
223 Usage : $mutobj = $self->RNAChange;
224 : $mutobj = $self->RNAChange($objref);
225 Function: Returns or sets the link-reference to a mutation/change object.
226 If there is no link, it will return undef
227 Returns : an obj_ref or undef
229 =cut
231 sub RNAChange {
232 my ($self,$value) = @_;
233 if (defined $value) {
234 if( ! $value->isa('Bio::Variation::RNAChange') ) {
235 $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]");
236 return;
238 else {
239 $self->{'RNAChange'} = $value;
242 unless (exists $self->{'RNAChange'}) {
243 return;
244 } else {
245 return $self->{'RNAChange'};
251 =head2 label
253 Title : label
254 Usage : $obj->label();
255 Function:
257 Sets and returns mutation event label(s). If value is not
258 set, or no argument is given returns false. Each
259 instantiable subclass of L<Bio::Variation::VariantI> needs
260 to implement this method. Valid values are listed in
261 'Mutation event controlled vocabulary' in
262 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
264 Example :
265 Returns : string
266 Args : string
268 =cut
271 sub label {
272 my ($self) = @_;
273 my ($o, $m, $type);
274 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
275 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
277 if ($self->start == 1 ) {
278 if ($o and substr($o, 0, 1) ne substr($m, 0, 1)) {
279 $type = 'no translation';
281 elsif ($o and $m and $o eq $m ) {
282 $type = 'silent';
284 # more ...
286 elsif ($o and substr($o, 0, 1) eq '*' ) {
287 if ($m and substr($o, 0, 1) ne substr($m, 0, 1)) {
288 $type = 'post-elongation';
290 elsif ($m and $o eq $m ) {
291 $type = 'silent, conservative';
294 elsif ($o and $m and $o eq $m) {
295 $type = 'silent, conservative';
297 elsif ($m and $m eq '*') {
298 $type = 'truncation';
300 elsif ($o and $m and $o eq $m) {
301 $type = 'silent, conservative';
303 elsif (not $m or
304 ($o and $m and length($o) > length($m) and
305 substr($m, -1, 1) ne '*')) {
306 $type = 'deletion';
307 if ($o and $m and $o !~ $m and $o !~ $m) {
308 $type .= ', complex';
311 elsif (not $o or
312 ($o and $m and length($o) < length($m) and
313 substr($m, -1, 1) ne '*' ) ) {
314 $type = 'insertion';
315 if ($o and $m and $o !~ $m and $o !~ $m) {
316 $type .= ', complex';
319 elsif ($o and $m and $o ne $m and
320 length $o == 1 and length $m == 1 ) {
321 $type = 'substitution';
322 my $value = $self->similarity_score;
323 if (defined $value) {
324 my $cons = ($value < 0) ? 'nonconservative' : 'conservative';
325 $type .= ", ". $cons;
327 } else {
328 $type = 'out-of-frame translation, truncation';
330 $self->{'label'} = $type;
331 return $self->{'label'};
335 =head2 similarity_score
337 Title : similarity_score
338 Usage : $self->similarity_score
339 Function: Measure for evolutionary conservativeness
340 of single amino substitutions. Uses BLOSUM62.
341 Negative numbers are noncoservative changes.
342 Returns : integer, undef if not single amino acid change
344 =cut
346 sub similarity_score {
347 my ($self) = @_;
348 my ($o, $m, $type);
349 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
350 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
351 return unless $o and $m and length $o == 1 and length $m == 1;
352 return unless $o =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i and
353 $m =~ /[ARNDCQEGHILKMFPSTWYVBZX*]/i;
354 return $MATRIX->{"\U$o"}->{"\U$m"};
357 =head2 trivname
359 Title : trivname
360 Usage : $self->trivname
361 Function:
363 Given a Bio::Variation::AAChange object with linked
364 Bio::Variation::RNAChange and Bio::Variation::DNAMutation
365 objects, this subroutine creates a string corresponding to
366 the 'trivial name' of the mutation. Trivial name is
367 specified in Antonorakis & MDI Nomenclature Working Group:
368 Human Mutation 11:1-3, 1998.
369 http://www3.interscience.wiley.com/cgi-bin/abstract/5001291/ABSTRACT
371 Returns : string
373 =cut
376 sub trivname {
377 my ($self,$value) = @_;
378 if( defined $value) {
379 $self->{'trivname'} = $value;
380 } else {
381 my ( $aaori, $aamut,$aamutsymbol, $aatermnumber, $aamutterm) =
382 ('', '', '', '', '');
383 my $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
384 #my $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
386 $aaori = substr ($o, 0, 1) if $o;
387 $aaori =~ tr/\*/X/;
389 my $sep;
390 if ($self->isMutation) {
391 $sep = '>';
392 } else {
393 $sep = '|';
395 my $trivname = $aaori. $self->start;
396 $trivname .= $sep if $sep eq '|';
398 my @alleles = $self->each_Allele;
399 foreach my $allele (@alleles) {
400 my $m = $allele->seq if $allele->seq;
402 $self->allele_mut($allele);
403 #$trivname .= $sep. uc $m if $m;
405 $aamutterm = substr ($m, -1, 1) if $m;
406 if ($self->RNAChange->label =~ /initiation codon/ and
407 ( $o and $m and $o ne $m)) {
408 $aamut = 'X';
410 elsif (CORE::length($o) == 1 and CORE::length($m) == 1 ) {
411 $aamutsymbol = '';
412 $aamut = $aamutterm;
414 elsif ($self->RNAChange->label =~ /deletion/) {
415 $aamutsymbol = 'del';
416 if ($aamutterm eq '*') {
417 $aatermnumber = $self->start + length($m) -1;
418 $aamut = 'X'. $aatermnumber;
420 if ($self->RNAChange && $self->RNAChange->label =~ /inframe/){
421 $aamut = '-'. length($self->RNAChange->allele_ori->seq)/3 ;
424 elsif ($self->RNAChange->label =~ /insertion/) {
425 $aamutsymbol = 'ins';
426 if (($aamutterm eq '*') && (length($m)-1 != 0)) {
427 $aatermnumber = $self->start + length($m)-1;
428 $aamut = $aatermnumber. 'X';
430 if ($self->RNAChange->label =~ /inframe/){
431 $aamut = '+'. int length($self->RNAChange->allele_mut->seq)/3 ;
434 elsif ($self->RNAChange->label =~ /complex/ ) {
435 my $diff = length($m) - length($o);
436 if ($diff >= 0 ) {
437 $aamutsymbol = 'ins';
438 } else {
439 $aamutsymbol = 'del' ;
441 if (($aamutterm eq '*') && (length($m)-1 != 0)) {
442 $aatermnumber = $self->start + length($m)-1;
443 $aamut = $aatermnumber. 'X';
445 if ($self->RNAChange->label =~ /inframe/){
447 if ($diff >= 0 ) {
448 $aamut = '+'. $diff ;
449 } else {
450 $aamut = $diff ;
454 elsif ($self->label =~ /truncation/) {
455 $aamut = $m;
456 } else {
457 $aamutsymbol = '';
458 $aamut = $aamutterm;
460 $aamut =~ tr/\*/X/;
461 $trivname .= $aamutsymbol. $aamut. $sep;
463 chop $trivname;
464 $self->{'trivname'} = $trivname;
466 return $self->{'trivname'};