squash waffling test
[bioperl-live.git] / Bio / SeqEvolution / DNAPoint.pm
blob59d063c9418ba800a1711f50e2a3b8fe8d8211c5
2 # BioPerl module for Bio::SeqEvolution::DNAPoint
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Heikki Lehvaslaiho <heikki at bioperl dot org>
8 # Copyright Heikki Lehvaslaiho
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SeqEvolution::DNAPoint - evolve a sequence by point mutations
18 =head1 SYNOPSIS
20 # $seq is a Bio::PrimarySeqI to mutate
21 $evolve = Bio::SeqEvolution::Factory->new (-rate => 5,
22 -seq => $seq,
23 -identity => 50
25 $newseq = $evolve->next_seq;
28 =head1 DESCRIPTION
30 Bio::SeqEvolution::DNAPoint implements the simplest evolution model:
31 nucleotides change by point mutations, only. Transition/transversion
32 rate of the change, rate(), can be set.
34 The new sequences are named with the id of the reference sequence
35 added with a running number. Placing a new sequence into a factory to
36 be evolved resets that counter. It can also be called directly with
37 L<reset_sequence_counter>.
39 The default sequence type returned is Bio::PrimarySeq. This can be
40 changed to any Bio::PrimarySeqI compliant sequence class.
42 Internally the probability of the change of one nucleotide is mapped
43 to scale from 0 to 100. The probability of the transition occupies
44 range from 0 to some value. The remaining range is divided equally
45 among the two transversion nucleotides. A random number is then
46 generated to pick up one change.
48 Not that the default transition/transversion rate, 1:1, leads to
49 observed transition/transversion ratio of 1:2 simply because there is
50 only one transition nucleotide versus two transversion nucleotides.
52 =head1 FEEDBACK
54 =head2 Mailing Lists
56 User feedback is an integral part of the evolution of this and other
57 Bioperl modules. Send your comments and suggestions preferably to
58 the Bioperl mailing list. Your participation is much appreciated.
60 bioperl-l@bioperl.org - General discussion
61 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
63 =head2 Support
65 Please direct usage questions or support issues to the mailing list:
67 I<bioperl-l@bioperl.org>
69 rather than to the module maintainer directly. Many experienced and
70 reponsive experts will be able look at the problem and quickly
71 address it. Please include a thorough description of the problem
72 with code and data examples if at all possible.
74 =head2 Reporting Bugs
76 Report bugs to the Bioperl bug tracking system to help us keep track
77 of the bugs and their resolution. Bug reports can be submitted via the
78 web:
80 https://github.com/bioperl/bioperl-live/issues
82 =head1 AUTHOR
84 Heikki Lehvaslaiho E<lt>heikki at bioperl dot orgE<gt>
86 =head1 CONTRIBUTORS
88 Additional contributor's names and emails here
90 =head1 APPENDIX
92 The rest of the documentation details each of the object methods.
93 Internal methods are usually preceded with a _
95 =cut
98 # Let the code begin...
101 package Bio::SeqEvolution::DNAPoint;
102 use strict;
103 use Bio::Root::Root;
104 use Bio::SeqEvolution::Factory;
106 use Bio::Variation::DNAMutation;
107 use Bio::Variation::Allele;
108 use Bio::SimpleAlign;
110 use base qw(Bio::SeqEvolution::Factory);
113 sub _initialize {
114 my($self, @args) = @_;
116 $self->SUPER::_initialize(@args);
117 my %param = @args;
118 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
120 exists $param{'-rate'} && $self->rate($param{'-rate'});
122 $self->_init_mutation_engine;
126 sub _init_mutation_engine {
127 my $self = shift;
129 # arrays of possible changes have transitions as first items
130 my %changes;
131 $self->{'_changes'}->{'a'} = ['t', 'c', 'g'];
132 $self->{'_changes'}->{'t'} = ['a', 'c', 'g'];
133 $self->{'_changes'}->{'c'} = ['g', 'a', 't'];
134 $self->{'_changes'}->{'g'} = ['c', 'a', 't'];
137 # given the desired rate, find out where cut off points need to be
138 # when random numbers are generated from 0 to 100
139 # we are ignoring identical mutations (e.g. A->A) to speed things up
140 my $bin_size = 100/($self->rate + 2);
141 $self->{'_transition'} = 100 - (2*$bin_size);
142 $self->{'_first_transversion'} = $self->{'_transition'} + $bin_size;
144 $self->_init_alignment;
147 sub _init_alignment {
148 my $self = shift;
150 # put the initial sequence into the alignment object
151 $self->{'_align'} = Bio::SimpleAlign->new(-verbose => -1);
152 return unless $self->seq;
153 $self->{'_ori_locatableseq'} = Bio::LocatableSeq->new(-id => 'ori',
154 -seq=> $self->seq->seq);
155 $self->{'_mut_locatableseq'} = Bio::LocatableSeq->new(-id => 'mut',
156 -seq=> $self->seq->seq);
157 $self->{'_align'}->add_seq($self->{'_ori_locatableseq'});
158 $self->{'_align'}->add_seq($self->{'_mut_locatableseq'});
161 =head2 seq
163 Title : seq
164 Usage : $obj->seq($newval)
165 Function: Set the sequence object for the original sequence
166 Returns : The sequence object
167 Args : newvalue (optional)
169 Setting this will reset mutation and generated mutation counters.
171 =cut
173 sub seq{
174 my $self = shift;
176 if (@_) {
177 my $seq = shift;
178 $self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
179 unless $seq->isa('Bio::PrimarySeqI');
181 $self->throw('Only nucleotide sequences are supported')
182 if $seq->alphabet eq 'protein';
183 $self->throw('No ambiquos nucleotides allowed in the input sequence')
184 if $seq->seq =~ m/[^acgt]/;
186 $self->{'_seq'} = $seq;
188 # unify the look of sequence strings and cache the information
189 $self->{'_ori_string'} = lc $seq->seq; # lower case
190 $self->{'_ori_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
191 $self->{'_seq_length'} = $seq->length;
193 $self->reset_sequence_counter;
195 return $self->{'_seq'};
198 =head2 set_mutated_seq
200 Title : seq_mutated_seq
201 Usage : $obj->set_mutated_seq($newval)
202 Function: In case of mutating a sequence with multiple evolvers, this
203 Returns : set_mutated_seq
204 Args : newvalue (optional)
206 =cut
208 sub set_mutated_seq {
209 my $self = shift;
211 if (@_) {
212 my $seq = shift;
213 $self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
214 unless $seq->isa('Bio::PrimarySeqI');
215 $self->throw('Only nucleotide sequences are supported')
216 if $seq->alphabet eq 'protein';
217 $self->throw('No ambiquos nucleotides allowed in the input sequence')
218 if $seq->seq =~ m/[^acgt]/;
220 $self->{'_seq_mutated'} = $seq;
222 # unify the look of sequence strings and cache the information
223 $self->{'_mut_string'} = lc $seq->seq; # lower case
224 $self->{'_mut_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
227 $self->reset_sequence_counter;
229 #set returned sequence to be the last mutated string
230 $self->{'_seq'}->seq($self->{'_mut_string'});
231 return $self->{'_seq'};
235 =head2 rate
237 Title : rate
238 Usage : $obj->rate($newval)
239 Function: Set the transition/transversion rate.
240 Returns : value of rate
241 Args : newvalue (optional)
243 Transition/transversion ratio is an observed attribute of an sequence
244 comparison. We are dealing here with the transition/transversion rate
245 that we set for our model of sequence evolution.
247 Note that we are using standard nucleotide alphabet and that there can
248 there is only one transition versus two possible transversions. Rate 2
249 is needed to have an observed transition/transversion ratio of 1.
251 =cut
253 sub rate{
254 my $self = shift;
255 if (@_) {
256 $self->{'_rate'} = shift @_;
257 $self->_init_mutation_engine;
259 return $self->{'_rate'} || 1;
262 =head2 next_seq
264 Title : next_seq
265 Usage : $obj->next_seq
266 Function: Evolve the reference sequence to desired level
267 Returns : A new sequence object mutated from the reference sequence
268 Args : -
270 =cut
272 sub next_seq {
273 my $self = shift;
274 $self->{'_mut_string'} = $self->{'_ori_string'};
275 $self->reset_mutation_counter;
277 $self->{'_mutations'} = [];
279 while (1) {
280 # find the location in the string to change
281 my $loc = int (rand length($self->{'_mut_string'})) + 1;
283 $self->mutate($loc); # for modularity
285 # stop evolving if any of the limit has been reached
286 last if $self->identity && $self->get_alignment_identity <= $self->identity;
287 last if $self->pam && 100*$self->get_mutation_counter/$self->{'_seq_length'} >= $self->pam;
288 last if $self->mutation_count && $self->get_mutation_counter >= $self->mutation_count;
290 $self->_increase_sequence_counter;
292 my $type = $self->seq_type;
293 return $type->new(-id => $self->seq->id. "-". $self->get_sequence_counter,
294 -description => $self->seq->description,
295 -seq => $self->{'_mut_string'}
299 =head2 mutate
301 Title : mutate
302 Usage : $obj->mutate
303 Function: mutate the sequence at the given location according to the model
304 Returns : true
305 Args : integer, start location of the mutation, required argument
307 Called from next_seq().
309 =cut
311 sub mutate {
312 my $self = shift;
313 my $loc = shift;
314 $self->throw('the first argument is the location of the mutation') unless $loc;
316 # nucleotide to change
317 my $oldnuc = substr $self->{'_mut_string'}, $loc-1, 1;
318 my $newnuc;
321 # find the nucleotide it is changed to
322 my $choose = rand(100); # scale is 0-100
323 if ($choose < $self->{'_transition'} ) {
324 $newnuc = $self->{'_changes'}->{$oldnuc}[0];
325 } elsif ($choose < $self->{'_first_transversion'} ) {
326 $newnuc = $self->{'_changes'}->{$oldnuc}[1];
327 } else {
328 $newnuc = $self->{'_changes'}->{$oldnuc}[2];
331 # do the change
332 substr $self->{'_mut_string'}, $loc-1, 1 , $newnuc;
333 $self->_increase_mutation_counter;
334 $self->{'_mut_locatableseq'}->seq($self->{'_mut_string'});
336 print STDERR "$loc$oldnuc>$newnuc\n" if $self->verbose > 0;
338 push @{$self->{'_mutations'}}, "$loc$oldnuc>$newnuc";