2 # bioperl module for Bio::LiveSeq::Mutator
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
8 # Copyright Joseph Insana
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::LiveSeq::Mutator - Package mutating LiveSequences
20 # $gene is a Bio::LiveSeq::Gene object
21 my $mutate = Bio::LiveSeq::Mutator->new('-gene' => $gene,
22 '-numbering' => "coding"
24 # $mut is a Bio::LiveSeq::Mutation object
25 $mutate->add_Mutation($mut);
26 # $results is a Bio::Variation::SeqDiff object
27 my $results=$mutate->change_gene();
29 my $out = Bio::Variation::IO->new( '-format' => 'flat');
30 $out->write($results);
35 This class mutates Bio::LiveSeq::Gene objects and returns a
36 Bio::Variation::SeqDiff object. Mutations are described as
37 Bio::LiveSeq::Mutation objects. See L<Bio::LiveSeq::Gene>,
38 L<Bio::Variation::SeqDiff>, and L<Bio::LiveSeq::Mutation> for details.
43 User feedback is an integral part of the evolution of this and other
44 Bioperl modules. Send your comments and suggestions preferably to the
45 Bioperl mailing lists Your participation is much appreciated.
47 bioperl-l@bioperl.org - General discussion
48 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52 Please direct usage questions or support issues to the mailing list:
54 I<bioperl-l@bioperl.org>
56 rather than to the module maintainer directly. Many experienced and
57 reponsive experts will be able look at the problem and quickly
58 address it. Please include a thorough description of the problem
59 with code and data examples if at all possible.
63 report bugs to the Bioperl bug tracking system to help us keep track
64 the bugs and their resolution. Bug reports can be submitted via the
67 https://github.com/bioperl/bioperl-live/issues
69 =head1 AUTHOR - Heikki Lehvaslaiho & Joseph A.L. Insana
71 Email: heikki-at-bioperl-dot-org
72 insana@ebi.ac.uk, jinsana@gmx.net
76 The rest of the documentation details each of the object
77 methods. Internal methods are usually preceded with a _
81 # Let the code begin...
83 package Bio
::LiveSeq
::Mutator
;
86 use Bio
::Variation
::SeqDiff
;
87 use Bio
::Variation
::DNAMutation
;
88 use Bio
::Variation
::RNAChange
;
89 use Bio
::Variation
::AAChange
;
90 use Bio
::Variation
::Allele
;
91 use Bio
::LiveSeq
::Mutation
;
94 # Object preamble - inheritance
97 use base
qw(Bio::Root::Root);
100 my($class,@args) = @_;
105 my ($gene, $numbering) =
106 $self->_rearrange([qw(GENE
111 $self->{ 'mutations' } = [];
113 $gene && $self->gene($gene);
114 $numbering && $self->numbering($numbering);
117 $self->{'flanklen'} = 25;
118 return $self; # success - we hope!
124 Usage : $mutobj = $obj->gene;
125 : $mutobj = $obj->gene($objref);
128 Returns or sets the link-reference to a
129 Bio::LiveSeq::Gene object. If no value has ben set, it
132 Returns : an object reference or undef
133 Args : a Bio::LiveSeq::Gene
135 See L<Bio::LiveSeq::Gene> for more information.
140 my ($self,$value) = @_;
141 if (defined $value) {
142 if( ! $value->isa('Bio::LiveSeq::Gene') ) {
143 $self->throw("Is not a Bio::LiveSeq::Gene object but a [$value]");
147 $self->{'gene'} = $value;
150 unless (exists $self->{'gene'}) {
153 return $self->{'gene'};
161 Usage : $obj->numbering();
164 Sets and returns coordinate system used in positioning the
165 mutations. See L<change_gene> for details.
169 Args : string (coding [transcript number] | gene | entry)
175 my ($self,$value) = @_;
176 if( defined $value) {
177 if ($value =~ /(coding)( )?(\d+)?/ or $value eq 'entry' or $value eq 'gene') {
178 $self->{'numbering'} = $value;
179 } else { # defaulting to 'coding'
180 $self->{'numbering'} = 'coding';
183 unless (exists $self->{'numbering'}) {
186 return $self->{'numbering'};
193 Usage : $self->add_Mutation($ref)
194 Function: adds a Bio::LiveSeq::Mutation object
197 Args : a Bio::LiveSeq::Mutation
199 See L<Bio::LiveSeq::Mutation> for more information.
204 my ($self,$value) = @_;
205 if( $value->isa('Bio::Liveseq::Mutation') ) {
206 my $com = ref $value;
207 $self->throw("Is not a Mutation object but a [$com]" );
211 $self->warn("No value for mutation position in the sequence!");
214 if (! $value->seq && ! $value->len) {
215 $self->warn("Either mutated sequence or length of the deletion must be given!");
218 push(@
{$self->{'mutations'}},$value);
223 Title : each_Mutation
224 Usage : foreach $ref ( $a->each_Mutation )
225 Function: gets an array of Bio::LiveSeq::Mutation objects
227 Returns : array of Mutations
230 See L<Bio::LiveSeq::Mutation> for more information.
236 return @
{$self->{'mutations'}};
243 Usage : $mutobj = $obj->mutation;
244 : $mutobj = $obj->mutation($objref);
247 Returns or sets the link-reference to the current mutation
248 object. If the value is not set, it will return undef.
251 Returns : an object reference or undef
257 my ($self,$value) = @_;
258 if (defined $value) {
259 if( ! $value->isa('Bio::LiveSeq::Mutation') ) {
260 $self->throw("Is not a Bio::LiveSeq::Mutation object but a [$value]");
264 $self->{'mutation'} = $value;
267 unless (exists $self->{'mutation'}) {
270 return $self->{'mutation'};
277 Usage : $mutobj = $obj->DNA;
278 : $mutobj = $obj->DNA($objref);
281 Returns or sets the reference to the LiveSeq object holding
282 the reference sequence. If there is no link, it will return
286 Returns : an object reference or undef
291 my ($self,$value) = @_;
292 if (defined $value) {
293 if( ! $value->isa('Bio::LiveSeq::DNA') and ! $value->isa('Bio::LiveSeq::Transcript') ) {
294 $self->throw("Is not a Bio::LiveSeq::DNA/Transcript object but a [$value]");
298 $self->{'DNA'} = $value;
301 unless (exists $self->{'DNA'}) {
304 return $self->{'DNA'};
312 Usage : $mutobj = $obj->RNA;
313 : $mutobj = $obj->RNA($objref);
316 Returns or sets the reference to the LiveSeq object holding
317 the reference sequence. If the value is not set, it will return
321 Returns : an object reference or undef
327 my ($self,$value) = @_;
328 if (defined $value) {
329 if( ! $value->isa('Bio::LiveSeq::Transcript') ) {
330 $self->throw("Is not a Bio::LiveSeq::RNA/Transcript object but a [$value]");
334 $self->{'RNA'} = $value;
337 unless (exists $self->{'RNA'}) {
340 return $self->{'RNA'};
348 Usage : $mutobj = $obj->dnamut;
349 : $mutobj = $obj->dnamut($objref);
352 Returns or sets the reference to the current DNAMutation object.
353 If the value is not set, it will return undef.
356 Returns : a Bio::Variation::DNAMutation object or undef
358 See L<Bio::Variation::DNAMutation> for more information.
364 my ($self,$value) = @_;
365 if (defined $value) {
366 if( ! $value->isa('Bio::Variation::DNAMutation') ) {
367 $self->throw("Is not a Bio::Variation::DNAMutation object but a [$value]");
371 $self->{'dnamut'} = $value;
374 unless (exists $self->{'dnamut'}) {
377 return $self->{'dnamut'};
385 Usage : $mutobj = $obj->rnachange;
386 : $mutobj = $obj->rnachange($objref);
389 Returns or sets the reference to the current RNAChange object.
390 If the value is not set, it will return undef.
393 Returns : a Bio::Variation::RNAChange object or undef
395 See L<Bio::Variation::RNAChange> for more information.
401 my ($self,$value) = @_;
402 if (defined $value) {
403 if( ! $value->isa('Bio::Variation::RNAChange') ) {
404 $self->throw("Is not a Bio::Variation::RNAChange object but a [$value]");
408 $self->{'rnachange'} = $value;
411 unless (exists $self->{'rnachange'}) {
414 return $self->{'rnachange'};
422 Usage : $mutobj = $obj->aachange;
423 : $mutobj = $obj->aachange($objref);
426 Returns or sets the reference to the current AAChange object.
427 If the value is not set, it will return undef.
430 Returns : a Bio::Variation::AAChange object or undef
432 See L<Bio::Variation::AAChange> for more information.
438 my ($self,$value) = @_;
439 if (defined $value) {
440 if( ! $value->isa('Bio::Variation::AAChange') ) {
441 $self->throw("Is not a Bio::Variation::AAChange object but a [$value]");
445 $self->{'aachange'} = $value;
448 unless (exists $self->{'aachange'}) {
451 return $self->{'aachange'};
459 Usage : $mutobj = $obj->exons;
460 : $mutobj = $obj->exons($objref);
463 Returns or sets the reference to a current array of Exons.
464 If the value is not set, it will return undef.
467 Returns : an array of Bio::LiveSeq::Exon objects or undef
469 See L<Bio::LiveSeq::Exon> for more information.
475 my ($self,$value) = @_;
476 if (defined $value) {
477 $self->{'exons'} = $value;
479 unless (exists $self->{'exons'}) {
482 return $self->{'exons'};
486 =head2 change_gene_with_alignment
488 Title : change_gene_with_alignment
489 Usage : $results=$mutate->change_gene_with_alignment($aln);
493 Returns a Bio::Variation::SeqDiff object containing the
494 results of the changes in the alignment. The alignment has
495 to be pairwise and have one sequence named 'QUERY', the
496 other one is assumed to be a part of the sequence from
499 This method offers a shortcut to change_gene and
500 automates the creation of Bio::LiveSeq::Mutation objects.
501 Use it with almost identical sequnces, e.g. to locate a SNP.
503 Args : Bio::SimpleAlign object representing a short local alignment
504 Returns : Bio::Variation::SeqDiff object or 0 on error
506 See L<Bio::LiveSeq::Mutation>, L<Bio::SimpleAlign>, and
507 L<Bio::Variation::SeqDiff> for more information.
511 sub change_gene_with_alignment
{
512 my ($self, $aln) = @_;
518 $self->throw("Argument is not a Bio::SimpleAlign object but a [$aln]")
519 unless $aln->isa('Bio::SimpleAlign');
520 $self->throw("'Pairwise alignments only, please")
521 if $aln->no_sequences != 2;
523 # find out the order the two sequences are given
524 my $queryseq_pos = 1; #default
526 unless ($aln->get_seq_by_pos(1)->id eq 'QUERY') {
527 carp
('Query sequence has to be named QUERY')
528 if $aln->get_seq_by_pos(2)->id ne 'QUERY';
529 $queryseq_pos = 2; # alternative
534 my $start = $aln->column_from_residue_number('QUERY', 1);
535 my $end = $aln->column_from_residue_number('QUERY',
536 $aln->get_seq_by_pos($queryseq_pos)->end );
538 my $aln2 = $aln->slice($start, $end);
541 # extracting mutations
544 my $cs = $aln2->consensus_string(51);
545 my $queryseq = $aln2->get_seq_by_pos($queryseq_pos);
546 my $refseq = $aln2->get_seq_by_pos($refseq_pos);
548 while ( $cs =~ /(\?+)/g) {
549 # pos in local coordinates
550 my $pos = pos($cs) - length($1) + 1;
551 my $mutation = create_mutation
($self,
557 # reset pos to refseq coordinates
558 $pos += $refseq->start - 1;
559 $mutation->pos($pos);
561 $self->add_Mutation($mutation);
563 return $self->change_gene();
566 =head2 create_mutation
568 Title : create_mutation
572 Formats sequence differences from two sequences into
573 Bio::LiveSeq::Mutation objects which can be applied to a
576 To keep it generic, sequence arguments need not to be
577 Bio::LocatableSeq. Coordinate change to parent sequence
578 numbering needs to be done by the calling code.
580 Called from change_gene_with_alignment
582 Args : Bio::PrimarySeqI inheriting object for the reference sequence
583 Bio::PrimarySeqI inheriting object for the query sequence
584 integer for the start position of the local sequence difference
585 integer for the length of the sequence difference
586 Returns : Bio::LiveSeq::Mutation object
590 sub create_mutation
{
591 my ($self, $refseq, $queryseq, $pos, $len) = @_;
593 $self->throw("Is not a Bio::PrimarySeqI object but a [$refseq]")
594 unless $refseq->isa('Bio::PrimarySeqI');
595 $self->throw("Is not a Bio::PrimarySeqI object but a [$queryseq]")
596 unless $queryseq->isa('Bio::PrimarySeqI');
597 $self->throw("Position is not a positive integer but [$pos]")
598 unless $pos =~ /^\+?\d+$/;
599 $self->throw("Length is not a positive integer but [$len]")
600 unless $len =~ /^\+?\d+$/;
603 my $refstring = $refseq->subseq($pos, $pos + $len - 1);
604 my $varstring = $queryseq->subseq($pos, $pos + $len - 1);
606 if ($len == 1 and $refstring =~ /[^\.\-\*\?]/ and
607 $varstring =~ /[^\.\-\*\?]/ ) { #point
608 $mutation = Bio
::LiveSeq
::Mutation
->new(-seq
=> $varstring,
612 elsif ( $refstring =~ /^[^\.\-\*\?]+$/ and
613 $varstring !~ /^[^\.\-\*\?]+$/ ) { # deletion
614 $mutation = Bio
::LiveSeq
::Mutation
->new(-pos => $pos,
618 elsif ( $refstring !~ /^[^\.\-\*\?]+$/ and
619 $varstring =~ /^[^\.\-\*\?]+$/ ) { # insertion
620 $mutation = Bio
::LiveSeq
::Mutation
->new(-seq
=> $varstring,
625 $mutation = Bio
::LiveSeq
::Mutation
->new(-seq
=> $varstring,
637 Usage : my $mutate = Bio::LiveSeq::Mutator->new(-gene => $gene,
638 numbering => "coding"
640 # $mut is Bio::LiveSeq::Mutation object
641 $mutate->add_Mutation($mut);
642 my $results=$mutate->change_gene();
646 Returns a Bio::Variation::SeqDiff object containing the
647 results of the changes performed according to the
648 instructions present in Mutation(s). The -numbering
649 argument decides what molecule is being changed and what
650 numbering scheme being used:
652 -numbering => "entry"
654 determines the DNA level, using the numbering from the
655 beginning of the sequence
657 -numbering => "coding"
659 determines the RNA level, using the numbering from the
660 beginning of the 1st transcript
662 Alternative transcripts can be used by specifying
663 "coding 2" or "coding 3" ...
667 determines the DNA level, using the numbering from the
668 beginning of the 1st transcript and inluding introns.
669 The meaning equals 'coding' if the reference molecule
672 Args : Bio::LiveSeq::Gene object
673 Bio::LiveSeq::Mutation object(s)
674 string specifying a numbering scheme (defaults to 'coding')
675 Returns : Bio::Variation::SeqDiff object or 0 on error
685 unless ($self->gene) {
686 $self->warn("Input object Bio::LiveSeq::Gene is not given");
690 # Setting the reference sequence based on -numbering
692 my @transcripts=@
{$self->gene->get_Transcripts};
693 my $refseq; # will hold Bio::LiveSeq:Transcript object or Bio::LiveSeq::DNA
695 # 'gene' eq 'coding' if reference sequence is cDNA
696 $self->numbering ('coding') if $self->gene->get_DNA->alphabet eq 'rna' and $self->numbering eq 'gene';
698 if ($self->numbering =~ /(coding)( )?(\d+)?/ ) {
699 $self->numbering($1);
700 my $transnumber = $3;
701 $transnumber-- if $3; # 1 -> 0, 2 -> 1
702 if ($transnumber && $transnumber >= 0 && $transnumber <= $#transcripts) {
703 $refseq=$transcripts[$transnumber];
705 $transnumber && $self->warn("The alternative transcript number ". $transnumber+1 .
706 "- does not exist. Reverting to the 1st transcript\n");
707 $refseq=$transcripts[0];
710 $refseq=$transcripts[0]->{'seq'};
713 # Recording the state: SeqDiff object creation ?? transcript no.??
715 my $seqDiff = Bio
::Variation
::SeqDiff
->new(-verbose
=> $self->verbose);
716 $seqDiff->alphabet($self->gene->get_DNA->alphabet);
717 $seqDiff->numbering($self->numbering);
718 my ($DNAobj, $RNAobj);
719 if ($refseq->isa("Bio::LiveSeq::Transcript")) {
721 $self->DNA($refseq->{'seq'});
722 $seqDiff->rna_ori($refseq->seq );
723 $seqDiff->aa_ori($refseq->get_Translation->seq);
726 $self->RNA($transcripts[0]);
727 $seqDiff->rna_ori($self->RNA->seq);
728 $seqDiff->aa_ori($self->RNA->get_Translation->seq);
730 $seqDiff->dna_ori($self->DNA->seq);
731 # put the accession number into the SeqDiff object ID
732 $seqDiff->id($self->DNA->accession_number);
734 # the atg_offset takes in account that DNA object could be a subset of the
735 # whole entry (via the light_weight loader)
736 my $atg_label=$self->RNA->start;
737 my $atg_offset=$self->DNA->position($atg_label)+($self->DNA->start)-1;
738 $seqDiff->offset($atg_offset - 1);
739 $self->DNA->coordinate_start($atg_label);
741 my @exons = $self->RNA->all_Exons;
742 $seqDiff->cds_end($exons[$#exons]->end);
745 # Converting mutation positions to labels
747 $self->warn("no mutations"), return 0
748 unless $self->_mutationpos2label($refseq, $seqDiff);
750 # need to add more than one rna & aa
751 #foreach $transcript (@transcripts) {
752 # $seqDiff{"ori_transcript_${i}_seq"}=$transcript->seq;
753 # $seqDiff{"ori_translation_${i}_seq"}=$transcript->get_Translation->seq;
758 foreach my $mutation ($self->each_Mutation) {
759 next unless $mutation->label > 0;
760 $self->mutation($mutation);
762 $mutation->issue(++$k);
764 # current position on the transcript
766 if ($self->numbering =~ /coding/) {
767 $mutation->transpos($mutation->pos); # transpos given by user
769 #transpos of label / It will be 0 if mutating an intron, negative if upstream of ATG
770 $mutation->transpos($self->RNA->position($mutation->label,$atg_label));
773 # Calculate adjacent labels based on the position on the current sequence
775 $mutation->prelabel($self->DNA->label(-1, $mutation->label)); # 1 before label
776 if ($mutation->len == 0) {
777 $mutation->postlabel($mutation->label);
778 $mutation->lastlabel($mutation->label);
779 } elsif ($mutation->len == 1) {
780 $mutation->lastlabel($mutation->label); # last nucleotide affected
781 $mutation->postlabel($self->DNA->label(2,$mutation->lastlabel)); # $len after label
783 $mutation->lastlabel($self->DNA->label($mutation->len,$mutation->label));
784 $mutation->postlabel($self->DNA->label(2,$mutation->lastlabel));
786 my $dnamut = $self->_set_DNAMutation($seqDiff);
790 if ($self->_rnaAffected) {
791 $self->_set_effects($seqDiff, $dnamut);
793 elsif ($seqDiff->offset != 0 and $dnamut->region ne 'intron') {
794 $self->_untranslated ($seqDiff, $dnamut);
796 #$self->warn("Mutation starts outside coding region, RNAChange object not created");
799 #########################################################################
800 # Mutations are done here! #
801 $refseq->labelchange($mutation->seq, $mutation->label, $mutation->len); #
802 #########################################################################
804 $self->_post_mutation ($seqDiff);
806 $self->dnamut(undef);
807 $self->rnachange(undef);
808 $self->aachange(undef);
811 # record the final state of all three sequences
812 $seqDiff->dna_mut($self->DNA->seq);
813 $seqDiff->rna_mut($self->RNA->seq);
814 if ($refseq->isa("Bio::LiveSeq::Transcript")) {
815 $seqDiff->aa_mut($refseq->get_Translation->seq);
817 $seqDiff->aa_mut($self->RNA->get_Translation->seq);
820 #$seqDiff{mut_dna_seq}=$gene->get_DNA->seq;
822 #foreach $transcript (@transcripts) {
823 # $seqDiff{"mut_transcript_${i}_seq"}=$transcript->seq;
824 # $seqDiff{"mut_translation_${i}_seq"}=$transcript->get_Translation->seq;
829 =head2 _mutationpos2label
831 Title : _mutationpos2label
833 Function: converts mutation positions into labels
835 Returns : number of valid mutations
836 Args : LiveSeq sequence object
840 sub _mutationpos2label
{
841 my ($self, $refseq, $SeqDiff) = @_;
843 my @bb = @
{$self->{'mutations'}};
845 foreach my $mut (@
{$self->{'mutations'}}) {
846 # if ($self->numbering eq 'gene' and $mut->pos < 1) {
847 # my $tmp = $mut->pos;
848 # print STDERR "pos: ", "$tmp\n";
849 # $tmp++ if $tmp < 1;
850 # $tmp += $SeqDiff->offset;
851 # print STDERR "pos2: ", "$tmp\n";
854 # elsif ($self->numbering eq 'entry') {
855 if ($self->numbering eq 'entry') {
857 $tmp -= $SeqDiff->offset;
862 my $label = $refseq->label($mut->pos); # get the label for the position
863 $mut->label($label), $count++ if $label > 0 ;
864 #print STDERR "x", $mut->pos,'|' ,$mut->label, "\n";
870 # Calculate labels around mutated nucleotide
873 =head2 _set_DNAMutation
875 Title : _set_DNAMutation
879 Stores DNA level mutation attributes before mutation into
880 Bio::Variation::DNAMutation object. Links it to SeqDiff
884 Returns : Bio::Variation::DNAMutation object
885 Args : Bio::Variation::SeqDiff object
887 See L<Bio::Variation::DNAMutation> and L<Bio::Variation::SeqDiff>.
891 sub _set_DNAMutation
{
892 my ($self, $seqDiff) = @_;
894 my $dnamut_start = $self->mutation->label - $seqDiff->offset;
895 # if negative DNA positions (before ATG)
896 $dnamut_start-- if $dnamut_start <= 0;
898 ($self->mutation->len == 0 or $self->mutation->len == 1) ?
899 ($dnamut_end = $dnamut_start) :
900 ($dnamut_end = $dnamut_start+$self->mutation->len);
901 #print "start:$dnamut_start, end:$dnamut_end\n";
902 my $dnamut = Bio
::Variation
::DNAMutation
->new(-start
=> $dnamut_start,
905 $dnamut->mut_number($self->mutation->issue);
906 $dnamut->isMutation(1);
907 my $da_m = Bio
::Variation
::Allele
->new;
908 $da_m->seq($self->mutation->seq) if $self->mutation->seq;
909 $dnamut->allele_mut($da_m);
910 $dnamut->add_Allele($da_m);
912 my $allele_ori = $self->DNA->labelsubseq($self->mutation->prelabel,
914 $self->mutation->postlabel); # get seq
915 chop $allele_ori; # chop the postlabel nucleotide
916 $allele_ori=substr($allele_ori,1); # away the prelabel nucleotide
917 my $da_o = Bio
::Variation
::Allele
->new;
918 $da_o->seq($allele_ori) if $allele_ori;
919 $dnamut->allele_ori($da_o);
920 ($self->mutation->len == 0) ?
921 ($dnamut->length($self->mutation->len)) : ($dnamut->length(CORE
::length $allele_ori));
922 #print " --------------- $dnamut_start -$len- $dnamut_end -\n";
923 $seqDiff->add_Variant($dnamut);
924 $self->dnamut($dnamut);
925 $dnamut->mut_number($self->mutation->issue);
927 if ($seqDiff->numbering eq "entry" or $seqDiff->numbering eq "gene") {
928 $dnamut->proof('experimental');
930 $dnamut->proof('computed');
932 # how many nucleotides to store upstream and downstream of the change
933 my $flanklen = $self->{'flanklen'};
934 #print `date`, " flanking sequences start\n";
935 my $uplabel = $self->DNA->label(1-$flanklen,$self->mutation->prelabel); # this could be unavailable!
940 $self->DNA->labelsubseq($uplabel, undef, $self->mutation->prelabel);
941 } else { # from start (less than $flanklen nucleotides)
943 $self->DNA->labelsubseq($self->DNA->start, undef, $self->mutation->prelabel);
945 $dnamut->upStreamSeq($upstreamseq);
946 my $dnstreamseq = $self->DNA->labelsubseq($self->mutation->postlabel, $flanklen);
947 $dnamut->dnStreamSeq($dnstreamseq); # $flanklen or less nucleotides
953 ### Check if mutation propagates to RNA (and AA) level
955 # side effect: sets intron/exon information
956 # returns a boolean value
961 my @exons=$self->RNA->all_Exons;
962 my $RNAstart=$self->RNA->start;
963 my $RNAend=$self->RNA->end;
964 my ($firstexon,$before,$after,$i);
965 my ($rnaAffected) = 0;
967 # check for inserted labels (that require follows instead of <,>)
968 my $DNAend=$self->RNA->{'seq'}->end;
969 if ($self->mutation->prelabel > $DNAend or $self->mutation->postlabel > $DNAend) {
970 #this means one of the two labels is an inserted one
971 #(coming from a previous mutation. This would falsify all <,>
972 #checks, so the follow() has to be used
973 $self->warn("Attention, workaround not fully tested yet! Expect unpredictable results.\n");
974 if (($self->mutation->postlabel==$RNAstart) or (follows
($self->mutation->postlabel,$RNAstart))) {
975 $self->warn("RNA not affected because change occurs before RNAstart");
977 elsif (($RNAend==$self->mutation->prelabel) or (follows
($RNAend,$self->mutation->prelabel))) {
978 $self->warn("RNA not affected because change occurs after RNAend");
980 elsif (scalar @exons == 1) {
981 #no introns, just one exon
982 $rnaAffected = 1; # then RNA is affected!
984 # otherwise check for change occurring inside an intron
985 $firstexon=shift(@exons);
986 $before=$firstexon->end;
988 foreach $i (0..$#exons) {
989 $after=$exons[$i]->start;
990 if (follows
($self->mutation->prelabel,$before) or
991 ($after==$self->mutation->prelabel) or
992 follows
($after,$self->mutation->prelabel) or
993 follows
($after,$self->mutation->postlabel)) {
996 # $i is number of exon and can be used for proximity check
998 $before=$exons[$i]->end;
1003 my $strand = $exons[0]->strand;
1004 if (($strand == 1 and $self->mutation->postlabel <= $RNAstart) or
1005 ($strand != 1 and $self->mutation->postlabel >= $RNAstart)) {
1006 #$self->warn("RNA not affected because change occurs before RNAstart");
1009 elsif (($strand == 1 and $self->mutation->prelabel >= $RNAend) or
1010 ($strand != 1 and $self->mutation->prelabel <= $RNAend)) {
1011 #$self->warn("RNA not affected because change occurs after RNAend");
1015 $dist = $self->mutation->prelabel - $RNAend;
1017 $dist = $RNAend - $self->mutation->prelabel;
1019 $self->dnamut->region_dist($dist);
1021 elsif (scalar @exons == 1) {
1022 #if just one exon -> no introns,
1023 $rnaAffected = 1; # then RNA is affected!
1025 # otherwise check for mutation occurring inside an intron
1026 $firstexon=shift(@exons);
1027 $before=$firstexon->end;
1028 if ( ($strand == 1 and $self->mutation->prelabel < $before) or
1029 ($strand == -1 and $self->mutation->prelabel > $before)
1033 #print "Exon 1 : ", $firstexon->start, " - ", $firstexon->end, "<br>\n";
1034 my $afterdist = $self->mutation->prelabel - $firstexon->start;
1035 my $beforedist = $firstexon->end - $self->mutation->postlabel;
1036 my $exonvalue = $i + 1;
1037 $self->dnamut->region('exon');
1038 $self->dnamut->region_value($exonvalue);
1039 if ($afterdist < $beforedist) {
1042 $self->dnamut->region_dist($afterdist);
1043 #print "splice site $afterdist nt upstream!<br>";
1045 $self->dnamut->region_dist($beforedist);
1046 #print "splice site $beforedist nt downstream!<br>";
1049 #print "first exon : ", $firstexon->start, " - ", $firstexon->end, "<br>\n";
1050 foreach $i (0..$#exons) {
1051 $after=$exons[$i]->start;
1052 #proximity test for intronic mutations
1053 if ( ($strand == 1 and
1054 $self->mutation->prelabel >= $before and
1055 $self->mutation->postlabel <= $after)
1058 $self->mutation->prelabel <= $before and
1059 $self->mutation->postlabel >= $after) ) {
1060 $self->dnamut->region('intron');
1061 #$self->dnamut->region_value($i);
1062 my $afterdist = $self->mutation->prelabel - $before;
1063 my $beforedist = $after - $self->mutation->postlabel;
1064 my $intronvalue = $i + 1;
1065 if ($afterdist < $beforedist) {
1067 $self->dnamut->region_value($intronvalue);
1068 $self->dnamut->region_dist($afterdist);
1069 #print "splice site $afterdist nt upstream!<br>";
1071 $self->dnamut->region_value($intronvalue);
1072 $self->dnamut->region_dist($beforedist * -1);
1073 #print "splice site $beforedist nt downstream!<br>";
1075 $self->rnachange(undef);
1078 #proximity test for exon mutations
1079 #proximity test for exon mutations
1080 elsif ( ( $strand == 1 and
1081 $exons[$i]->start < $self->mutation->prelabel and
1082 $exons[$i]->end > $self->mutation->prelabel) or
1084 $exons[$i]->start < $self->mutation->postlabel and
1085 $exons[$i]->end > $self->mutation->postlabel) or
1087 $exons[$i]->start > $self->mutation->prelabel and
1088 $exons[$i]->end < $self->mutation->prelabel) or
1090 $exons[$i]->start > $self->mutation->postlabel and
1091 $exons[$i]->end < $self->mutation->postlabel) ) {
1094 my $afterdist = $self->mutation->prelabel - $exons[$i]->start;
1095 my $beforedist = $exons[$i]->end - $self->mutation->postlabel;
1096 my $exonvalue = $i + 1;
1097 $self->dnamut->region('exon');
1098 if ($afterdist < $beforedist) {
1100 $self->dnamut->region_value($exonvalue+1);
1101 $self->dnamut->region_dist($afterdist);
1102 #print "splice site $afterdist nt upstream!<br>";
1105 $self->dnamut->region_value($exonvalue+1);
1106 $self->dnamut->region_dist($beforedist * -1);
1107 #print "splice site $beforedist nt downstream!<br>";
1111 $before=$exons[$i]->end;
1116 #$self->warn("RNA not affected because change occurs inside an intron");
1117 #return(0); # if still not returned, then not affected, return 0
1118 return $rnaAffected;
1122 # ### Creation of RNA and AA variation objects
1127 Title : _set_effects
1131 Stores RNA and AA level mutation attributes before mutation
1132 into Bio::Variation::RNAChange and
1133 Bio::Variation::AAChange objects. Links them to
1138 Args : Bio::Variation::SeqDiff object
1139 Bio::Variation::DNAMutation object
1141 See L<Bio::Variation::RNAChange>, L<Bio::Variation::RNAChange>,
1142 L<Bio::Variation::SeqDiff>, and L<Bio::Variation::DNAMutation>.
1147 my ($self, $seqDiff, $dnamut) = @_;
1148 my ($rnapos_end, $upstreamseq, $dnstreamseq);
1149 my $flanklen = $self->{'flanklen'};
1151 ($self->mutation->len == 0) ?
1152 ($rnapos_end = $self->mutation->transpos) :
1153 ($rnapos_end = $self->mutation->transpos + $self->mutation->len -1);
1154 my $rnachange = Bio
::Variation
::RNAChange
->new(-start
=> $self->mutation->transpos,
1157 $rnachange->isMutation(1);
1160 if ($seqDiff->numbering eq "coding") {
1161 $rnachange->proof('experimental');
1163 $rnachange->proof('computed');
1166 $seqDiff->add_Variant($rnachange);
1167 $self->rnachange($rnachange);
1168 $rnachange->DNAMutation($dnamut);
1169 $dnamut->RNAChange($rnachange);
1170 $rnachange->mut_number($self->mutation->issue);
1172 # setting the codon_position of the "start" nucleotide of the change
1173 $rnachange->codon_pos(($self->RNA->frame($self->mutation->label))+1); # codon_pos=frame+1
1175 my @exons=$self->RNA->all_Exons;
1176 $self->exons(\
@exons);
1177 #print `date`, " before flank, after exons. RNAObj query\n";
1178 # if cannot retrieve from Transcript, Transcript::upstream_seq will be used
1179 # before "fac7 g 65" bug discovered
1180 # $uplabel=$self->RNA->label(1-$flanklen,$prelabel);
1181 my $RNAprelabel=$self->RNA->label(-1,$self->mutation->label); # to fix fac7g65 bug
1182 # for the fix, all prelabel used in the next block have been changed to RNAprelabel
1183 my $uplabel=$self->RNA->label(1-$flanklen,$RNAprelabel);
1184 if ($self->RNA->valid($uplabel)) {
1185 $upstreamseq = $self->RNA->labelsubseq($uplabel, undef, $RNAprelabel);
1187 $upstreamseq = $self->RNA->labelsubseq($self->RNA->start, undef, $RNAprelabel)
1188 if $self->RNA->valid($RNAprelabel);
1189 my $lacking=$flanklen-length($upstreamseq); # how many missing
1190 my $upstream_atg=$exons[0]->subseq(-$lacking,-1);
1191 $upstreamseq=$upstream_atg . $upstreamseq;
1194 $rnachange->upStreamSeq($upstreamseq);
1196 # won't work OK if postlabel NOT in Transcript
1197 # now added RNApostlabel but this has to be /fully tested/
1198 # for the fix, all postlabel used in the next block have been changed to RNApostlabel
1199 my $RNApostlabel; # to fix fac7g64 bug
1200 if ($self->mutation->len == 0) {
1201 $RNApostlabel=$self->mutation->label;
1203 my $mutlen = 1 + $self->mutation->len;
1204 $RNApostlabel=$self->RNA->label($mutlen,$self->mutation->label);
1206 $dnstreamseq=$self->RNA->labelsubseq($RNApostlabel, $flanklen);
1207 if ($dnstreamseq eq '-1') { # if out of transcript was requested
1208 my $lastexon=$exons[-1];
1209 my $lastexonlength=$lastexon->length;
1210 $dnstreamseq=$self->RNA->labelsubseq($RNApostlabel); # retrieves till RNAend
1211 my $lacking=$flanklen-length($dnstreamseq); # how many missing
1212 my $downstream_stop=$lastexon->subseq($lastexonlength+1,undef,$lacking);
1213 $dnstreamseq .= $downstream_stop;
1215 $rnachange->dnStreamSeq($dnstreamseq);
1218 my $AAobj=$self->RNA->get_Translation;
1219 # storage of prelabel here, to be used in create_mut_objs_after
1220 my $aachange = Bio
::Variation
::AAChange
->new(-start
=> $RNAprelabel
1222 $aachange->isMutation(1);
1223 $aachange->proof('computed');
1225 $seqDiff->add_Variant($aachange);
1226 $self->aachange($aachange);
1227 $rnachange->AAChange($aachange);
1228 $aachange->RNAChange($rnachange);
1230 $aachange->mut_number($self->mutation->issue);
1231 # $before_mutation{aachange}=$aachange;
1233 my $ra_o = Bio
::Variation
::Allele
->new;
1234 $ra_o->seq($dnamut->allele_ori->seq) if $dnamut->allele_ori->seq;
1235 $rnachange->allele_ori($ra_o);
1237 $rnachange->length(CORE
::length $rnachange->allele_ori->seq);
1239 my $ra_m = Bio
::Variation
::Allele
->new;
1240 $ra_m->seq($self->mutation->seq) if $self->mutation->seq;
1241 $rnachange->allele_mut($ra_m);
1242 $rnachange->add_Allele($ra_m);
1244 #$rnachange->allele_mut($seq);
1245 $rnachange->end($rnachange->start) if $rnachange->length == 0;
1247 # this holds the aminoacid sequence that will be affected by the mutation
1248 my $aa_allele_ori=$AAobj->labelsubseq($self->mutation->label,undef,
1249 $self->mutation->lastlabel);
1251 my $aa_o = Bio
::Variation
::Allele
->new;
1252 $aa_o->seq($aa_allele_ori) if $aa_allele_ori;
1253 $aachange->allele_ori($aa_o);
1254 #$aachange->allele_ori($aa_allele_ori);
1256 my $aa_length_ori = length($aa_allele_ori);
1257 $aachange->length($aa_length_ori); #print "==========$aa_length_ori\n";
1258 $aachange->end($aachange->start + $aa_length_ori - 1 );
1261 =head2 _untranslated
1263 Title : _untranslated
1267 Stores RNA change attributes before mutation
1268 into Bio::Variation::RNAChange object. Links it to
1273 Args : Bio::Variation::SeqDiff object
1274 Bio::Variation::DNAMutation object
1276 See L<Bio::Variation::RNAChange>, L<Bio::Variation::SeqDiff> and
1277 L<Bio::Variation::DNAMutation> for details.
1282 my ($self, $seqDiff, $dnamut) = @_;
1284 ($self->mutation->len == 0) ?
1285 ($rnapos_end = $self->mutation->transpos) :
1286 ($rnapos_end = $self->mutation->transpos + $self->mutation->len -1);
1287 my $rnachange = Bio
::Variation
::RNAChange
->new(-start
=> $self->mutation->transpos,
1290 #my $rnachange = Bio::Variation::RNAChange->new;
1292 $rnachange->isMutation(1);
1293 my $ra_o = Bio
::Variation
::Allele
->new;
1294 $ra_o->seq($dnamut->allele_ori->seq) if $dnamut->allele_ori->seq;
1295 $rnachange->allele_ori($ra_o);
1296 my $ra_m = Bio
::Variation
::Allele
->new;
1297 $ra_m->seq($dnamut->allele_mut->seq) if $dnamut->allele_mut->seq;
1298 $rnachange->allele_mut($ra_m);
1299 $rnachange->add_Allele($ra_m);
1300 $rnachange->upStreamSeq($dnamut->upStreamSeq);
1301 $rnachange->dnStreamSeq($dnamut->dnStreamSeq);
1302 $rnachange->length($dnamut->length);
1303 $rnachange->mut_number($dnamut->mut_number);
1305 if ($seqDiff->numbering eq "coding") {
1306 $rnachange->proof('experimental');
1308 $rnachange->proof('computed');
1312 if ($rnachange->end < 0) {
1313 $rnachange->region('5\'UTR');
1314 $dnamut->region('5\'UTR');
1315 my $dist = $dnamut->end ;
1316 $dnamut->region_dist($dist);
1317 $dist = $seqDiff->offset - $self->gene->maxtranscript->start + 1 + $dist;
1318 $rnachange->region_dist($dist);
1319 return if $dist < 1; # if mutation is not in mRNA
1321 $rnachange->region('3\'UTR');
1322 $dnamut->region('3\'UTR');
1323 my $dist = $dnamut->start - $seqDiff->cds_end + $seqDiff->offset;
1324 $dnamut->region_dist($dist);
1325 $dist = $seqDiff->cds_end - $self->gene->maxtranscript->end -1 + $dist;
1326 $rnachange->region_dist($dist);
1327 return if $dist > 0; # if mutation is not in mRNA
1329 $seqDiff->add_Variant($rnachange);
1330 $self->rnachange($rnachange);
1331 $rnachange->DNAMutation($dnamut);
1332 $dnamut->RNAChange($rnachange);
1335 # args: reference to label changearray, reference to position changearray
1336 # Function: take care of the creation of mutation objects, with
1337 # information AFTER the change takes place
1338 sub _post_mutation
{
1339 my ($self, $seqDiff) = @_;
1341 if ($self->rnachange and $self->rnachange->region eq 'coding') {
1343 #$seqDiff->add_Variant($self->rnachange);
1345 my $aachange=$self->aachange;
1346 my ($AAobj,$aa_start_prelabel,$aa_start,$mut_translation);
1347 $AAobj=$self->RNA->get_Translation;
1348 $aa_start_prelabel=$aachange->start;
1349 $aa_start=$AAobj->position($self->RNA->label(2,$aa_start_prelabel));
1350 $aachange->start($aa_start);
1351 $mut_translation=$AAobj->seq;
1353 # this now takes in account possible preinsertions
1354 my $aa_m = Bio
::Variation
::Allele
->new;
1355 $aa_m->seq(substr($mut_translation,$aa_start-1)) if substr($mut_translation,$aa_start-1);
1356 $aachange->allele_mut($aa_m);
1357 $aachange->add_Allele($aa_m);
1358 #$aachange->allele_mut(substr($mut_translation,$aa_start-1));
1359 #$aachange->allele_mut($mut_translation);
1360 my ($rlenori, $rlenmut);
1361 $rlenori = CORE
::length($aachange->RNAChange->allele_ori->seq);
1362 $rlenmut = CORE
::length($aachange->RNAChange->allele_mut->seq);
1365 if ($rlenori == 1 and $rlenmut == 1 and $aachange->allele_ori->seq ne '*') {
1367 if ($aachange->allele_mut->seq) {
1368 $alleleseq = substr($aachange->allele_mut->seq, 0, 1);
1369 $aachange->allele_mut->seq($alleleseq);
1371 $aachange->end($aachange->start);
1372 $aachange->length(1);
1374 elsif ( $rlenori == $rlenmut and
1375 $aachange->allele_ori->seq ne '*' ) { #complex inframe mutation
1376 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq,
1378 length($aachange->allele_ori->seq));
1381 elsif ((int($rlenori-$rlenmut))%3 == 0) {
1382 if ($aachange->RNAChange->allele_mut->seq and
1383 $aachange->RNAChange->allele_ori->seq ) {
1385 my $rna_len = length ($aachange->RNAChange->allele_mut->seq);
1386 my $len = $rna_len/3;
1387 $len++ unless $rna_len%3 == 0;
1388 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, $len );
1390 elsif ($aachange->RNAChange->codon_pos == 1){
1392 if ($aachange->RNAChange->allele_mut->seq eq '') {
1393 $aachange->allele_mut->seq('');
1394 $aachange->end($aachange->start + $aachange->length - 1 );
1397 elsif ($aachange->RNAChange->allele_ori->seq eq '' ) {
1398 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0,
1399 length ($aachange->RNAChange->allele_mut->seq) / 3);
1400 $aachange->allele_ori->seq('');
1401 $aachange->end($aachange->start + $aachange->length - 1 );
1402 $aachange->length(0);
1405 #elsif ($aachange->RNAChange->codon_pos == 2){
1407 if (not $aachange->RNAChange->allele_mut->seq ) {
1408 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0, 1);
1411 elsif (not $aachange->RNAChange->allele_ori->seq) {
1412 $aachange->allele_mut->seq(substr $aachange->allele_mut->seq, 0,
1413 length ($aachange->RNAChange->allele_mut->seq) / 3 +1);
1418 #my $pos = index $aachange->allele_mut
1419 #$aachange->allele_mut(substr($aachange->allele_mut, 0, 1));
1420 $aachange->length(CORE
::length($aachange->allele_ori->seq));
1421 my $aaend = $aachange->start + $aachange->length -1;
1422 $aachange->end($aachange->start);
1425 # splicing site deletion check
1426 my @beforeexons=@
{$self->exons};
1427 my @afterexons=$self->RNA->all_Exons;
1429 if (scalar(@beforeexons) ne scalar(@afterexons)) {
1430 my $mut_number = $self->mutation->issue;
1431 $self->warn("Exons have been modified at mutation n.$mut_number!");
1432 $self->rnachange->exons_modified(1);
1435 foreach $i (0..$#beforeexons) {
1436 if ($beforeexons[$i] ne $afterexons[$i]) {
1437 my $mut_number = $self->mutation->issue;
1438 $self->warn("Exons have been modified at mutation n.$mut_number!");
1439 $self->rnachange->exons_modified(1);
1445 #$seqDiff->rnachange(undef);
1446 #print "getting here?";