2 # BioPerl module for Bio::SeqEvolution::DNAPoint
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Heikki Lehvaslaiho <heikki at bioperl dot org>
8 # Copyright Heikki Lehvaslaiho
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::SeqEvolution::DNAPoint - evolve a sequence by point mutations
20 # $seq is a Bio::PrimarySeqI to mutate
21 $evolve = Bio::SeqEvolution::Factory->new (-rate => 5,
25 $newseq = $evolve->next_seq;
30 Bio::SeqEvolution::DNAPoint implements the simplest evolution model:
31 nucleotides change by point mutations, only. Transition/transversion
32 rate of the change, rate(), can be set.
34 The new sequences are named with the id of the reference sequence
35 added with a running number. Placing a new sequence into a factory to
36 be evolved resets that counter. It can also be called directly with
37 L<reset_sequence_counter>.
39 The default sequence type returned is Bio::PrimarySeq. This can be
40 changed to any Bio::PrimarySeqI compliant sequence class.
42 Internally the probability of the change of one nucleotide is mapped
43 to scale from 0 to 100. The probability of the transition occupies
44 range from 0 to some value. The remaining range is divided equally
45 among the two transversion nucleotides. A random number is then
46 generated to pick up one change.
48 Not that the default transition/transversion rate, 1:1, leads to
49 observed transition/transversion ratio of 1:2 simply because there is
50 only one transition nucleotide versus two transversion nucleotides.
56 User feedback is an integral part of the evolution of this and other
57 Bioperl modules. Send your comments and suggestions preferably to
58 the Bioperl mailing list. Your participation is much appreciated.
60 bioperl-l@bioperl.org - General discussion
61 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
65 Please direct usage questions or support issues to the mailing list:
67 I<bioperl-l@bioperl.org>
69 rather than to the module maintainer directly. Many experienced and
70 reponsive experts will be able look at the problem and quickly
71 address it. Please include a thorough description of the problem
72 with code and data examples if at all possible.
76 Report bugs to the Bioperl bug tracking system to help us keep track
77 of the bugs and their resolution. Bug reports can be submitted via the
80 https://github.com/bioperl/bioperl-live/issues
84 Heikki Lehvaslaiho E<lt>heikki at bioperl dot orgE<gt>
88 Additional contributor's names and emails here
92 The rest of the documentation details each of the object methods.
93 Internal methods are usually preceded with a _
98 # Let the code begin...
101 package Bio
::SeqEvolution
::DNAPoint
;
104 use Bio
::SeqEvolution
::Factory
;
106 use Bio
::Variation
::DNAMutation
;
107 use Bio
::Variation
::Allele
;
108 use Bio
::SimpleAlign
;
110 use base
qw(Bio::SeqEvolution::Factory);
114 my($self, @args) = @_;
116 $self->SUPER::_initialize
(@args);
118 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
120 exists $param{'-rate'} && $self->rate($param{'-rate'});
122 $self->_init_mutation_engine;
126 sub _init_mutation_engine
{
129 # arrays of possible changes have transitions as first items
131 $self->{'_changes'}->{'a'} = ['t', 'c', 'g'];
132 $self->{'_changes'}->{'t'} = ['a', 'c', 'g'];
133 $self->{'_changes'}->{'c'} = ['g', 'a', 't'];
134 $self->{'_changes'}->{'g'} = ['c', 'a', 't'];
137 # given the desired rate, find out where cut off points need to be
138 # when random numbers are generated from 0 to 100
139 # we are ignoring identical mutations (e.g. A->A) to speed things up
140 my $bin_size = 100/($self->rate + 2);
141 $self->{'_transition'} = 100 - (2*$bin_size);
142 $self->{'_first_transversion'} = $self->{'_transition'} + $bin_size;
144 $self->_init_alignment;
147 sub _init_alignment
{
150 # put the initial sequence into the alignment object
151 $self->{'_align'} = Bio
::SimpleAlign
->new(-verbose
=> -1);
152 return unless $self->seq;
153 $self->{'_ori_locatableseq'} = Bio
::LocatableSeq
->new(-id
=> 'ori',
154 -seq
=> $self->seq->seq);
155 $self->{'_mut_locatableseq'} = Bio
::LocatableSeq
->new(-id
=> 'mut',
156 -seq
=> $self->seq->seq);
157 $self->{'_align'}->add_seq($self->{'_ori_locatableseq'});
158 $self->{'_align'}->add_seq($self->{'_mut_locatableseq'});
164 Usage : $obj->seq($newval)
165 Function: Set the sequence object for the original sequence
166 Returns : The sequence object
167 Args : newvalue (optional)
169 Setting this will reset mutation and generated mutation counters.
178 $self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
179 unless $seq->isa('Bio::PrimarySeqI');
181 $self->throw('Only nucleotide sequences are supported')
182 if $seq->alphabet eq 'protein';
183 $self->throw('No ambiquos nucleotides allowed in the input sequence')
184 if $seq->seq =~ m/[^acgt]/;
186 $self->{'_seq'} = $seq;
188 # unify the look of sequence strings and cache the information
189 $self->{'_ori_string'} = lc $seq->seq; # lower case
190 $self->{'_ori_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
191 $self->{'_seq_length'} = $seq->length;
193 $self->reset_sequence_counter;
195 return $self->{'_seq'};
198 =head2 set_mutated_seq
200 Title : seq_mutated_seq
201 Usage : $obj->set_mutated_seq($newval)
202 Function: In case of mutating a sequence with multiple evolvers, this
203 Returns : set_mutated_seq
204 Args : newvalue (optional)
208 sub set_mutated_seq
{
213 $self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
214 unless $seq->isa('Bio::PrimarySeqI');
215 $self->throw('Only nucleotide sequences are supported')
216 if $seq->alphabet eq 'protein';
217 $self->throw('No ambiquos nucleotides allowed in the input sequence')
218 if $seq->seq =~ m/[^acgt]/;
220 $self->{'_seq_mutated'} = $seq;
222 # unify the look of sequence strings and cache the information
223 $self->{'_mut_string'} = lc $seq->seq; # lower case
224 $self->{'_mut_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
227 $self->reset_sequence_counter;
229 #set returned sequence to be the last mutated string
230 $self->{'_seq'}->seq($self->{'_mut_string'});
231 return $self->{'_seq'};
238 Usage : $obj->rate($newval)
239 Function: Set the transition/transversion rate.
240 Returns : value of rate
241 Args : newvalue (optional)
243 Transition/transversion ratio is an observed attribute of an sequence
244 comparison. We are dealing here with the transition/transversion rate
245 that we set for our model of sequence evolution.
247 Note that we are using standard nucleotide alphabet and that there can
248 there is only one transition versus two possible transversions. Rate 2
249 is needed to have an observed transition/transversion ratio of 1.
256 $self->{'_rate'} = shift @_;
257 $self->_init_mutation_engine;
259 return $self->{'_rate'} || 1;
265 Usage : $obj->next_seq
266 Function: Evolve the reference sequence to desired level
267 Returns : A new sequence object mutated from the reference sequence
274 $self->{'_mut_string'} = $self->{'_ori_string'};
275 $self->reset_mutation_counter;
277 $self->{'_mutations'} = [];
280 # find the location in the string to change
281 my $loc = int (rand length($self->{'_mut_string'})) + 1;
283 $self->mutate($loc); # for modularity
285 # stop evolving if any of the limit has been reached
286 last if $self->identity && $self->get_alignment_identity <= $self->identity;
287 last if $self->pam && 100*$self->get_mutation_counter/$self->{'_seq_length'} >= $self->pam;
288 last if $self->mutation_count && $self->get_mutation_counter >= $self->mutation_count;
290 $self->_increase_sequence_counter;
292 my $type = $self->seq_type;
293 return $type->new(-id
=> $self->seq->id. "-". $self->get_sequence_counter,
294 -description
=> $self->seq->description,
295 -seq
=> $self->{'_mut_string'}
303 Function: mutate the sequence at the given location according to the model
305 Args : integer, start location of the mutation, required argument
307 Called from next_seq().
314 $self->throw('the first argument is the location of the mutation') unless $loc;
316 # nucleotide to change
317 my $oldnuc = substr $self->{'_mut_string'}, $loc-1, 1;
321 # find the nucleotide it is changed to
322 my $choose = rand(100); # scale is 0-100
323 if ($choose < $self->{'_transition'} ) {
324 $newnuc = $self->{'_changes'}->{$oldnuc}[0];
325 } elsif ($choose < $self->{'_first_transversion'} ) {
326 $newnuc = $self->{'_changes'}->{$oldnuc}[1];
328 $newnuc = $self->{'_changes'}->{$oldnuc}[2];
332 substr $self->{'_mut_string'}, $loc-1, 1 , $newnuc;
333 $self->_increase_mutation_counter;
334 $self->{'_mut_locatableseq'}->seq($self->{'_mut_string'});
336 print STDERR
"$loc$oldnuc>$newnuc\n" if $self->verbose > 0;
338 push @
{$self->{'_mutations'}}, "$loc$oldnuc>$newnuc";