3 # BioPerl module for Bio::SeqEvolution::DNAPoint
5 # Cared for by Heikki Lehvaslaiho <heikki at bioperl dot org>
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::SeqEvolution::DNAPoint - evolve a sequence by point mutations
19 # $seq is a Bio::PrimarySeqI to mutate
20 $evolve = Bio::SeqEvolution::Factory->new (-rate => 5,
24 $newseq = $evolve->next_seq;
29 Bio::SeqEvolution::DNAPoint implements the simplest evolution model:
30 nucleotides change by point mutations, only. Transition/transversion
31 rate of the change, rate(), can be set.
33 The new sequences are named with the id of the reference sequence
34 added with a running number. Placing a new sequence into a factory to
35 be evolved resets that counter. It can also be called directly with
36 L<reset_sequence_counter>.
38 The default sequence type returned is Bio::PrimarySeq. This can be
39 changed to any Bio::PrimarySeqI compliant sequence class.
41 Internally the probability of the change of one nucleotide is mapped
42 to scale from 0 to 100. The probability of the transition occupies
43 range from 0 to some value. The remaining range is divided equally
44 among the two transversion nucleotides. A random number is then
45 generated to pick up one change.
47 Not that the default transition/transversion rate, 1:1, leads to
48 observed transition/transversion ratio of 1:2 simply because there is
49 only one transition nucleotide versus two transversion nucleotides.
56 User feedback is an integral part of the evolution of this and other
57 Bioperl modules. Send your comments and suggestions preferably to
58 the Bioperl mailing list. Your participation is much appreciated.
60 bioperl-l@bioperl.org - General discussion
61 http://bioperl.org/MailList.shtml - About the mailing lists
65 Report bugs to the Bioperl bug tracking system to help us keep track
66 of the bugs and their resolution. Bug reports can be submitted via the
69 http://bugzilla.bioperl.org/
73 Heikki Lehvaslaiho E<lt>heikki at bioperl dot orgE<gt>
77 Additional contributor's names and emails here
81 The rest of the documentation details each of the object methods.
82 Internal methods are usually preceded with a _
87 # Let the code begin...
90 package Bio
::SeqEvolution
::DNAPoint
;
93 use Bio
::SeqEvolution
::Factory
;
95 use Bio
::Variation
::DNAMutation
;
96 use Bio
::Variation
::Allele
;
99 use base
qw(Bio::SeqEvolution::Factory);
103 my($self, @args) = @_;
105 $self->SUPER::_initialize
(@args);
107 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
109 exists $param{'-rate'} && $self->rate($param{'-rate'});
111 $self->_init_mutation_engine;
115 sub _init_mutation_engine
{
118 # arrays of possible changes have transitions as first items
120 $self->{'_changes'}->{'a'} = ['t', 'c', 'g'];
121 $self->{'_changes'}->{'t'} = ['a', 'c', 'g'];
122 $self->{'_changes'}->{'c'} = ['g', 'a', 't'];
123 $self->{'_changes'}->{'g'} = ['c', 'a', 't'];
126 # given the desired rate, find out where cut off points need to be
127 # when random numbers are generated from 0 to 100
128 # we are ignoring identical mutations (e.g. A->A) to speed things up
129 my $bin_size = 100/($self->rate + 2);
130 $self->{'_transition'} = 100 - (2*$bin_size);
131 $self->{'_first_transversion'} = $self->{'_transition'} + $bin_size;
133 $self->_init_alignment;
136 sub _init_alignment
{
139 # put the initial sequence into the alignment object
140 $self->{'_align'} = Bio
::SimpleAlign
->new(-verbose
=> -1);
141 return unless $self->seq;
142 $self->{'_ori_locatableseq'} = Bio
::LocatableSeq
->new(-id
=> 'ori',
143 -seq
=> $self->seq->seq);
144 $self->{'_mut_locatableseq'} = Bio
::LocatableSeq
->new(-id
=> 'mut',
145 -seq
=> $self->seq->seq);
146 $self->{'_align'}->add_seq($self->{'_ori_locatableseq'});
147 $self->{'_align'}->add_seq($self->{'_mut_locatableseq'});
153 Usage : $obj->seq($newval)
154 Function: Set the sequence object for the original sequence
155 Returns : The sequence object
156 Args : newvalue (optional)
158 Setting this will reset mutation and generated mutation counters.
167 $self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
168 unless $seq->isa('Bio::PrimarySeqI');
170 $self->throw('Only nucleotide sequences are supported')
171 if $seq->alphabet eq 'protein';
172 $self->throw('No ambiquos nucleotides allowed in the input sequence')
173 if $seq->seq =~ m/[^acgt]/;
175 $self->{'_seq'} = $seq;
177 # unify the look of sequence strings and cache the information
178 $self->{'_ori_string'} = lc $seq->seq; # lower case
179 $self->{'_ori_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
180 $self->{'_seq_length'} = $seq->length;
182 $self->reset_sequence_counter;
184 return $self->{'_seq'};
187 =head2 set_mutated_seq
189 Title : seq_mutated_seq
190 Usage : $obj->set_mutated_seq($newval)
191 Function: In case of mutating a sequence with multiple evolvers, this
192 Returns : set_mutated_seq
193 Args : newvalue (optional)
197 sub set_mutated_seq
{
202 $self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
203 unless $seq->isa('Bio::PrimarySeqI');
204 $self->throw('Only nucleotide sequences are supported')
205 if $seq->alphabet eq 'protein';
206 $self->throw('No ambiquos nucleotides allowed in the input sequence')
207 if $seq->seq =~ m/[^acgt]/;
209 $self->{'_seq_mutated'} = $seq;
211 # unify the look of sequence strings and cache the information
212 $self->{'_mut_string'} = lc $seq->seq; # lower case
213 $self->{'_mut_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
216 $self->reset_sequence_counter;
218 #set returned sequence to be the last mutated string
219 $self->{'_seq'}->seq($self->{'_mut_string'});
220 return $self->{'_seq'};
227 Usage : $obj->rate($newval)
228 Function: Set the transition/transversion rate.
229 Returns : value of rate
230 Args : newvalue (optional)
232 Transition/transversion ratio is an observed attribute of an sequence
233 comparison. We are dealing here with the transition/transversion rate
234 that we set for our model of sequence evolution.
236 Note that we are using standard nucleotide alphabet and that there can
237 there is only one transition versus two possible transversions. Rate 2
238 is needed to have an observed transition/transversion ratio of 1.
245 $self->{'_rate'} = shift @_;
246 $self->_init_mutation_engine;
248 return $self->{'_rate'} || 1;
254 Usage : $obj->next_seq
255 Function: Evolve the reference sequence to desired level
256 Returns : A new sequence object mutated from the reference sequence
263 $self->{'_mut_string'} = $self->{'_ori_string'};
264 $self->reset_mutation_counter;
266 $self->{'_mutations'} = [];
269 # find the location in the string to change
270 my $loc = int (rand length($self->{'_mut_string'})) + 1;
272 $self->mutate($loc); # for modularity
274 # stop evolving if any of the limit has been reached
275 last if $self->identity && $self->get_alignment_identity <= $self->identity;
276 last if $self->pam && 100*$self->get_mutation_counter/$self->{'_seq_length'} >= $self->pam;
277 last if $self->mutation_count && $self->get_mutation_counter >= $self->mutation_count;
279 $self->_increase_sequence_counter;
281 my $type = $self->seq_type;
282 return $type->new(-id
=> $self->seq->id. "-". $self->get_sequence_counter,
283 -description
=> $self->seq->description,
284 -seq
=> $self->{'_mut_string'}
292 Function: mutate the sequence at the given location according to the model
294 Args : integer, start location of the mutation, required argument
296 Called from next_seq().
303 $self->throw('the first argument is the location of the mutation') unless $loc;
305 # nucleotide to change
306 my $oldnuc = substr $self->{'_mut_string'}, $loc-1, 1;
310 # find the nucleotide it is changed to
311 my $choose = rand(100); # scale is 0-100
312 if ($choose < $self->{'_transition'} ) {
313 $newnuc = $self->{'_changes'}->{$oldnuc}[0];
314 } elsif ($choose < $self->{'_first_transversion'} ) {
315 $newnuc = $self->{'_changes'}->{$oldnuc}[1];
317 $newnuc = $self->{'_changes'}->{$oldnuc}[2];
321 substr $self->{'_mut_string'}, $loc-1, 1 , $newnuc;
322 $self->_increase_mutation_counter;
323 $self->{'_mut_locatableseq'}->seq($self->{'_mut_string'});
325 print STDERR
"$loc$oldnuc>$newnuc\n" if $self->verbose > 0;
327 push @
{$self->{'_mutations'}}, "$loc$oldnuc>$newnuc";