sync w/ main trunk
[bioperl-live.git] / Bio / Variation / RNAChange.pm
bloba90ac3e63b921bf40cb2fce3bd0eb02dce624446
1 # $Id$
3 # BioPerl module for Bio::Variation::RNAChange
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
9 # Copyright Heikki Lehvaslaiho
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Variation::RNAChange - Sequence change class for RNA level
19 =head1 SYNOPSIS
21 $rnachange = Bio::Variation::RNAChange->new
22 ('-start' => $start,
23 '-end' => $end,
24 '-length' => $len,
25 '-codon_pos' => $cp,
26 '-upStreamSeq' => $upflank,
27 '-dnStreamSeq' => $dnflank,
28 '-proof' => $proof,
29 '-isMutation' => 1,
30 '-mut_number' => $mut_number
32 $a1 = Bio::Variation::Allele->new;
33 $a1->seq('a');
34 $rnachange->allele_ori($a1);
35 my $a2 = Bio::Variation::Allele->new;
36 $a2->seq('t');
37 $rnachange->add_Allele($a2);
38 $rnachange->allele_mut($a2);
40 print "The codon change is ", $rnachange->codon_ori,
41 ">", $rnachange->codon_mut, "\n";
43 # add it to a SeqDiff container object
44 $seqdiff->add_Variant($rnachange);
46 # and create links to and from DNA level mutation objects
47 $rnachange->DNAMutation($dnamut);
48 $dnamut->RNAChange($rnachange);
50 =head1 DESCRIPTION
52 The instantiable class Bio::Variation::DNAMutation describes basic
53 sequence changes at RNA molecule level. It uses methods defined in
54 superclass Bio::Variation::VariantI. See L<Bio::Variation::VariantI>
55 for details.
57 You are normally expected to create a corresponding
58 Bio::Variation::DNAMutation object even if mutation is defined at
59 RNA level. The numbering follows then cDNA numbering. Link the
60 DNAMutation object to the RNAChange object using the method
61 DNAMutation(). If the variation described by a RNAChange object is
62 translated, link the corresponding Bio::Variation::AAChange object
63 to it using method AAChange(). See L<Bio::Variation::DNAMutation> and
64 L<Bio::Variation::AAChange> for more information.
67 =head1 FEEDBACK
69 =head2 Mailing Lists
71 User feedback is an integral part of the evolution of this and other
72 Bioperl modules. Send your comments and suggestions preferably to the
73 Bioperl mailing lists Your participation is much appreciated.
75 bioperl-l@bioperl.org - General discussion
76 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
79 =head2 Support
81 Please direct usage questions or support issues to the mailing list:
83 L<bioperl-l@bioperl.org>
85 rather than to the module maintainer directly. Many experienced and
86 reponsive experts will be able look at the problem and quickly
87 address it. Please include a thorough description of the problem
88 with code and data examples if at all possible.
90 =head2 Reporting Bugs
92 Report bugs to the Bioperl bug tracking system to help us keep track
93 the bugs and their resolution. Bug reports can be submitted via the
94 web:
96 http://bugzilla.open-bio.org/
98 =head1 AUTHOR - Heikki Lehvaslaiho
100 Email: heikki-at-bioperl-dot-org
102 =head1 APPENDIX
104 The rest of the documentation details each of the object
105 methods. Internal methods are usually preceded with a _
107 =cut
110 # Let the code begin...
113 package Bio::Variation::RNAChange;
114 use strict;
116 # Object preamble - inheritance
118 use Bio::Tools::CodonTable;
120 use base qw(Bio::Variation::VariantI);
122 sub new {
123 my($class,@args) = @_;
124 my $self = $class->SUPER::new(@args);
126 my ($start, $end, $length, $strand, $primary, $source,
127 $frame, $score, $gff_string,
128 $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq,
129 $label, $status, $proof, $region, $region_value, $region_dist, $numbering,
130 $mut_number, $isMutation,
131 $codon_ori, $codon_mut, $codon_pos, $codon_table, $cds_end) =
132 $self->_rearrange([qw(START
134 LENGTH
135 STRAND
136 PRIMARY
137 SOURCE
138 FRAME
139 SCORE
140 GFF_STRING
141 ALLELE_ORI
142 ALLELE_MUT
143 UPSTREAMSEQ
144 DNSTREAMSEQ
145 LABEL
146 STATUS
147 PROOF
148 REGION
149 REGION_VALUE
150 REGION_DIST
151 NUMBERING
152 MUT_NUMBER
153 ISMUTATION
154 CODON_ORI
155 CODON_MUT
156 CODON_POS
157 TRANSLATION_TABLE
158 CDS_END
159 )],@args);
161 $self->primary_tag("Variation");
163 $self->{ 'alleles' } = [];
165 $start && $self->start($start);
166 $end && $self->end($end);
167 $length && $self->length($length);
168 $strand && $self->strand($strand);
169 $primary && $self->primary_tag($primary);
170 $source && $self->source_tag($source);
171 $frame && $self->frame($frame);
172 $score && $self->score($score);
173 $gff_string && $self->_from_gff_string($gff_string);
175 $allele_ori && $self->allele_ori($allele_ori);
176 $allele_mut && $self->allele_mut($allele_mut);
177 $upstreamseq && $self->upStreamSeq($upstreamseq);
178 $dnstreamseq && $self->dnStreamSeq($dnstreamseq);
180 $label && $self->label($label);
181 $status && $self->status($status);
182 $proof && $self->proof($proof);
183 $region && $self->region($region);
184 $region_value && $self->region_value($region_value);
185 $region_dist && $self->region_dist($region_dist);
186 $numbering && $self->numbering($numbering);
187 $mut_number && $self->mut_number($mut_number);
188 $isMutation && $self->isMutation($isMutation);
190 $codon_ori && $self->codon_ori($codon_ori);
191 $codon_mut && $self->codon_mut($codon_mut);
192 $codon_pos && $self->codon_pos($codon_pos);
193 $codon_table && $self->codon_table($codon_table);
194 $cds_end && $self->cds_end($cds_end);
195 return $self; # success - we hope!
199 =head2 codon_ori
201 Title : codon_ori
202 Usage : $obj->codon_ori();
203 Function:
205 Sets and returns codon_ori triplet. If value is not set,
206 creates the codon triplet from the codon position and
207 flanking sequences. The string has to be three characters
208 long. The character content is not checked.
210 Example :
211 Returns : string
212 Args : string
214 =cut
216 sub codon_ori {
217 my ($self,$value) = @_;
218 if (defined $value) {
219 if (length $value != 3) {
220 $self->warn("Codon string \"$value\" is not three characters long");
222 $self->{'codon_ori'} = $value;
224 elsif (! $self->{'codon_ori'}) {
225 my $codon_ori = '';
227 if ($self->region eq 'coding' && $self->start && $self->start >= 1) {
229 $self->warn('Codon position is not defined')
230 if not defined $self->codon_pos;
231 $self->warn('Upstream flanking sequence is not defined')
232 if not defined $self->upStreamSeq;
233 $self->warn('Downstream flanking sequence is not defined')
234 if not defined $self->dnStreamSeq;
236 my $cpos = $self->codon_pos;
237 $codon_ori = substr($self->upStreamSeq, -$cpos +1 , $cpos-1);
238 $codon_ori .= substr($self->allele_ori->seq, 0, 4-$cpos)
239 if $self->allele_ori and $self->allele_ori->seq;
240 $codon_ori .= substr($self->dnStreamSeq, 0, 3-length($codon_ori));
242 $self->{'codon_ori'} = lc $codon_ori;
244 return $self->{'codon_ori'};
248 =head2 codon_mut
250 Title : codon_mut
251 Usage : $obj->codon_mut();
252 Function:
254 Sets and returns codon_mut triplet. If value is not
255 set, creates the codon triplet from the codon position and
256 flanking sequences. Return undef for other than point mutations.
258 Example :
259 Returns : string
260 Args : string
262 =cut
265 sub codon_mut {
266 my ($self,$value) = @_;
267 if (defined $value) {
268 if (length $value != 3 ) {
269 $self->warn("Codon string \"$value\" is not three characters long");
271 $self->{'codon_mut'} = $value;
273 else {
274 my $codon_mut = '';
275 if ($self->allele_ori->seq and $self->allele_mut->seq and
276 CORE::length($self->allele_ori->seq) == 1 and
277 CORE::length($self->allele_mut->seq) == 1 and
278 $self->region eq 'coding' and $self->start >= 1) {
280 $self->warn('Codon position is not defined')
281 if not defined $self->codon_pos;
282 $self->warn('Upstream flanking sequnce is not defined')
283 if not defined $self->upStreamSeq;
284 $self->warn('Downstream flanking sequnce is not defined')
285 if not defined $self->dnStreamSeq;
286 $self->throw('Mutated allele is not defined')
287 if not defined $self->allele_mut;
289 my $cpos = $self->codon_pos;
290 $codon_mut = substr($self->upStreamSeq, -$cpos +1 , $cpos-1);
291 $codon_mut .= substr($self->allele_mut->seq, 0, 4-$cpos)
292 if $self->allele_mut and $self->allele_mut->seq;
293 $codon_mut .= substr($self->dnStreamSeq, 0, 3-length($codon_mut));
295 $self->{'codon_mut'} = lc $codon_mut;
298 return $self->{'codon_mut'};
302 =head2 codon_pos
304 Title : codon_pos
305 Usage : $obj->codon_pos();
306 Function:
308 Sets and returns the position of the mutation start in the
309 codon. If value is not set, returns false.
311 Example :
312 Returns : 1,2,3
313 Args : none if get, the new value if set
315 =cut
318 sub codon_pos {
319 my ($self,$value) = @_;
320 if( defined $value) {
321 if ( $value !~ /[123]/ ) {
322 $self->throw("'$value' is not a valid codon position");
324 $self->{'codon_pos'} = $value;
326 return $self->{'codon_pos'};
330 =head2 codon_table
332 Title : codon_table
333 Usage : $obj->codon_table();
334 Function:
336 Sets and returns the codon table id of the RNA
337 If value is not set, returns 1, 'universal' code, as the default.
339 Example :
340 Returns : integer
341 Args : none if get, the new value if set
343 =cut
346 sub codon_table {
347 my ($self,$value) = @_;
348 if( defined $value) {
349 if ( not $value =~ /^\d$/ ) {
350 $self->throw("'$value' is not a valid codon table ID\n".
351 "Has to be a positive integer. Defaulting to 1\n");
352 } else {
353 $self->{'codon_table'} = $value;
356 if( ! exists $self->{'codon_table'} ) {
357 return 1;
358 } else {
359 return $self->{'codon_table'};
364 =head2 DNAMutation
366 Title : DNAMutation
367 Usage : $mutobj = $obj->DNAMutation;
368 : $mutobj = $obj->DNAMutation($objref);
369 Function: Returns or sets the link-reference to a mutation/change object.
370 If there is no link, it will return undef
371 Returns : an obj_ref or undef
373 =cut
376 sub DNAMutation {
377 my ($self,$value) = @_;
378 if (defined $value) {
379 if( ! $value->isa('Bio::Variation::DNAMutation') ) {
380 $self->throw("Is not a Bio::Variation::DNAMutation object but a [$self]");
381 return;
383 else {
384 $self->{'DNAMutation'} = $value;
387 unless (exists $self->{'DNAMutation'}) {
388 return;
389 } else {
390 return $self->{'DNAMutation'};
395 =head2 AAChange
397 Title : AAChange
398 Usage : $mutobj = $obj->AAChange;
399 : $mutobj = $obj->AAChange($objref);
400 Function: Returns or sets the link-reference to a mutation/change object.
401 If there is no link, it will return undef
402 Returns : an obj_ref or undef
404 =cut
406 sub AAChange {
407 my ($self,$value) = @_;
408 if (defined $value) {
409 if( ! $value->isa('Bio::Variation::AAChange') ) {
410 $self->throw("Is not a Bio::Variation::AAChange object but a [$self]");
411 return;
413 else {
414 $self->{'AAChange'} = $value;
417 unless (exists $self->{'AAChange'}) {
418 return;
419 } else {
420 return $self->{'AAChange'};
425 =head2 exons_modified
427 Title : exons_modified
428 Usage : $modified = $obj->exons_modified;
429 : $modified = $obj->exons_modified(1);
430 Function: Returns or sets information (example: a simple boolean flag) about
431 the modification of exons as a result of a mutation.
433 =cut
435 sub exons_modified {
436 my ($self,$value)=@_;
437 if (defined($value)) {
438 $self->{'exons_modified'}=$value;
440 return ($self->{'exons_modified'});
443 =head2 region
445 Title : region
446 Usage : $obj->region();
447 Function:
449 Sets and returns the name of the sequence region type or
450 protein domain at this location. If value is not set,
451 returns false.
453 Example :
454 Returns : string
455 Args : string
457 =cut
461 sub region {
462 my ($self,$value) = @_;
463 if( defined $value) {
464 $self->{'region'} = $value;
466 elsif (not defined $self->{'region'}) {
468 $self->warn('Mutation start position is not defined')
469 if not defined $self->start and $self->verbose;
470 $self->warn('Mutation end position is not defined')
471 if not defined $self->end and $self->verbose;
472 $self->warn('Length of the CDS is not defined, the mutation can be beyond coding region!')
473 if not defined $self->cds_end and $self->verbose;
475 $self->region('coding');
476 if ($self->end && $self->end < 0 ){
477 $self->region('5\'UTR');
479 elsif ($self->start && $self->cds_end && $self->start > $self->cds_end ) {
480 $self->region('3\'UTR');
483 return $self->{'region'};
486 =head2 cds_end
488 Title : cds_end
489 Usage : $cds_end = $obj->get_cds_end();
490 Function:
492 Sets or returns the cds_end from the beginning of the DNA sequence
493 to the coordinate start used to describe variants.
494 Should be the location of the last nucleotide of the
495 terminator codon of the gene.
497 Example :
498 Returns : value of cds_end, a scalar
499 Args :
501 =cut
505 sub cds_end {
506 my ($self, $value) = @_;
507 if (defined $value) {
508 $self->warn("[$value] is not a good value for sequence position")
509 if not $value =~ /^\d+$/ ;
510 $self->{'cds_end'} = $value;
511 } else {
512 $self->{'cds_end'} = $self->SeqDiff->cds_end if $self->SeqDiff;
514 return $self->{'cds_end'};
518 =head2 label
520 Title : label
521 Usage : $obj->label();
522 Function:
524 Sets and returns mutation event label(s). If value is not
525 set, or no argument is given returns false. Each
526 instantiable subclass of L<Bio::Variation::VariantI> needs
527 to implement this method. Valid values are listed in
528 'Mutation event controlled vocabulary' in
529 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
531 Example :
532 Returns : string
533 Args : string
535 =cut
537 sub label {
538 my ($self) = @_;
539 my ($o, $m, $type);
540 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq;
541 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq;
543 my $ct = Bio::Tools::CodonTable -> new ( -id => $self->codon_table );
544 if ($o and $m and CORE::length($o) == 1 and CORE::length($m) == 1) {
545 if (defined $self->AAChange) {
546 if ($self->start > 0 and $self->start < 4 ) {
547 $type = 'initiation codon';
549 elsif ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) {
550 #AAChange->allele_ori and $self->AAChange->allele_ori->seq eq '*' ) {
551 $type = 'termination codon';
553 elsif ($self->codon_mut && $ct->is_ter_codon($self->codon_mut) ) {
554 #elsif ($self->AAChange->allele_mut and $self->AAChange->allele_mut->seq eq "*") {
555 $type = 'nonsense';
557 elsif ($o and $m and ($o eq $m or
558 $self->AAChange->allele_ori->seq eq
559 $self->AAChange->allele_mut->seq)) {
560 $type = 'silent';
561 } else {
562 $type = 'missense';
564 } else {
565 $type = 'unknown';
567 } else {
568 my $len = 0;
569 $len = CORE::length($o) if $o;
570 $len -= CORE::length($m) if $m;
571 if ($len%3 == 0 ) {
572 $type = 'inframe';
573 } else {
574 $type = 'frameshift';
576 if (not $m ) {
577 $type .= ', '. 'deletion';
579 elsif (not $o ) {
580 $type .= ', '. 'insertion';
582 else {
583 $type .= ', '. 'complex';
585 if ($self->codon_ori && $ct->is_ter_codon($self->codon_ori) ) {
586 $type .= ', '. 'termination codon';
590 $self->{'label'} = $type;
591 return $self->{'label'};
595 =head2 _change_codon_pos
597 Title : _change_codon_pos
598 Usage : $newCodonPos = _change_codon_pos($myCodonPos, 5)
599 Function:
601 Keeps track of the codon position in a changeing sequence
603 Returns : codon_pos = integer 1, 2 or 3
604 Args : valid codon position
605 signed integer offset to a new location in sequence
607 =cut
610 sub _change_codon_pos ($$) {
611 my ($cpos, $i) = @_;
613 $cpos = ($cpos + $i%3)%3;
614 if ($cpos > 3 ) {
615 $cpos = $cpos - 3;
617 elsif ($cpos < 1 ) {
618 $cpos = $cpos + 3;
620 return $cpos;