Bio/SeqEvolution/DNAPoint.pm

   1 #
   2 # BioPerl module for Bio::SeqEvolution::DNAPoint
   3 #
   4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
   5 #
   6 # Cared for by Heikki Lehvaslaiho <heikki at bioperl dot org>
   7 #
   8 # Copyright Heikki Lehvaslaiho
   9 #
  10 # You may distribute this module under the same terms as perl itself
  11
  12 # POD documentation - main docs before the code
  13
  14 =head1 NAME
  15
  16 Bio::SeqEvolution::DNAPoint - evolve a sequence by point mutations
  17
  18 =head1 SYNOPSIS
  19
  20   # $seq is a Bio::PrimarySeqI to mutate
  21   $evolve = Bio::SeqEvolution::Factory->new (-rate => 5,
  22                                              -seq => $seq,
  23                                              -identity => 50
  24                                              );
  25   $newseq = $evolve->next_seq;
  26
  27
  28 =head1 DESCRIPTION
  29
  30 Bio::SeqEvolution::DNAPoint implements the simplest evolution model:
  31 nucleotides change by point mutations, only. Transition/transversion
  32 rate of the change, rate(), can be set.
  33
  34 The new sequences are named with the id of the reference sequence
  35 added with a running number. Placing a new sequence into a factory to
  36 be evolved resets that counter. It can also be called directly with
  37 L<reset_sequence_counter>.
  38
  39 The default sequence type returned is Bio::PrimarySeq. This can be
  40 changed to any Bio::PrimarySeqI compliant sequence class.
  41
  42 Internally the probability of the change of one nucleotide is mapped
  43 to scale from 0 to 100. The probability of the transition occupies
  44 range from 0 to some value. The remaining range is divided equally
  45 among the two transversion nucleotides. A random number is then
  46 generated to pick up one change.
  47
  48 Not that the default transition/transversion rate, 1:1, leads to
  49 observed transition/transversion ratio of 1:2 simply because there is
  50 only one transition nucleotide versus two transversion nucleotides.
  51
  52 =head1 FEEDBACK
  53
  54 =head2 Mailing Lists
  55
  56 User feedback is an integral part of the evolution of this and other
  57 Bioperl modules. Send your comments and suggestions preferably to
  58 the Bioperl mailing list.  Your participation is much appreciated.
  59
  60   bioperl-l@bioperl.org                  - General discussion
  61   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  62
  63 =head2 Support
  64
  65 Please direct usage questions or support issues to the mailing list:
  66
  67 I<bioperl-l@bioperl.org>
  68
  69 rather than to the module maintainer directly. Many experienced and
  70 reponsive experts will be able look at the problem and quickly
  71 address it. Please include a thorough description of the problem
  72 with code and data examples if at all possible.
  73
  74 =head2 Reporting Bugs
  75
  76 Report bugs to the Bioperl bug tracking system to help us keep track
  77 of the bugs and their resolution. Bug reports can be submitted via the
  78 web:
  79
  80   https://github.com/bioperl/bioperl-live/issues
  81
  82 =head1 AUTHOR
  83
  84   Heikki Lehvaslaiho E<lt>heikki at bioperl dot orgE<gt>
  85
  86 =head1 CONTRIBUTORS
  87
  88 Additional contributor's names and emails here
  89
  90 =head1 APPENDIX
  91
  92 The rest of the documentation details each of the object methods.
  93 Internal methods are usually preceded with a _
  94
  95 =cut
  96
  97
  98 # Let the code begin...
  99
 100
 101 package Bio::SeqEvolution::DNAPoint;
 102 use strict;
 103 use Bio::Root::Root;
 104 use Bio::SeqEvolution::Factory;
 105
 106 use Bio::Variation::DNAMutation;
 107 use Bio::Variation::Allele;
 108 use Bio::SimpleAlign;
 109
 110 use base qw(Bio::SeqEvolution::Factory);
 111
 112
 113 sub _initialize {
 114     my($self, @args) = @_;
 115
 116     $self->SUPER::_initialize(@args);
 117     my %param = @args;
 118     @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
 119
 120     exists $param{'-rate'} && $self->rate($param{'-rate'});
 121
 122     $self->_init_mutation_engine;
 123 }
 124
 125
 126 sub _init_mutation_engine {
 127     my $self = shift;
 128
 129     # arrays of possible changes have transitions as first items
 130     my %changes;
 131     $self->{'_changes'}->{'a'} = ['t', 'c', 'g'];
 132     $self->{'_changes'}->{'t'} = ['a', 'c', 'g'];
 133     $self->{'_changes'}->{'c'} = ['g', 'a', 't'];
 134     $self->{'_changes'}->{'g'} = ['c', 'a', 't'];
 135
 136
 137     # given the desired rate, find out where cut off points need to be
 138     # when random numbers are generated from 0 to 100
 139     # we are ignoring identical mutations (e.g. A->A) to speed things up
 140     my $bin_size = 100/($self->rate + 2);
 141     $self->{'_transition'} = 100 - (2*$bin_size);
 142     $self->{'_first_transversion'} = $self->{'_transition'} + $bin_size;
 143
 144     $self->_init_alignment;
 145 }
 146
 147 sub _init_alignment {
 148     my $self = shift;
 149
 150     # put the initial sequence into the alignment object
 151     $self->{'_align'} = Bio::SimpleAlign->new(-verbose => -1);
 152     return unless $self->seq;
 153     $self->{'_ori_locatableseq'} = Bio::LocatableSeq->new(-id => 'ori',
 154                                                          -seq=> $self->seq->seq);
 155     $self->{'_mut_locatableseq'} = Bio::LocatableSeq->new(-id => 'mut',
 156                                                          -seq=> $self->seq->seq);
 157     $self->{'_align'}->add_seq($self->{'_ori_locatableseq'});
 158     $self->{'_align'}->add_seq($self->{'_mut_locatableseq'});
 159 }
 160
 161 =head2 seq
 162
 163  Title   : seq
 164  Usage   : $obj->seq($newval)
 165  Function: Set the sequence object for the original sequence
 166  Returns : The sequence object
 167  Args    : newvalue (optional)
 168
 169 Setting this will reset mutation and generated mutation counters.
 170
 171 =cut
 172
 173 sub seq{
 174    my $self = shift;
 175
 176    if (@_) {
 177        my $seq = shift;
 178        $self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
 179            unless $seq->isa('Bio::PrimarySeqI');
 180
 181        $self->throw('Only nucleotide sequences are supported')
 182            if $seq->alphabet eq 'protein';
 183        $self->throw('No ambiquos nucleotides allowed in the input sequence')
 184            if $seq->seq =~ m/[^acgt]/;
 185
 186        $self->{'_seq'} = $seq;
 187
 188        # unify the look of sequence strings and cache the information
 189        $self->{'_ori_string'} = lc $seq->seq; # lower case
 190        $self->{'_ori_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
 191        $self->{'_seq_length'} = $seq->length;
 192
 193        $self->reset_sequence_counter;
 194    }
 195    return $self->{'_seq'};
 196 }
 197
 198 =head2 set_mutated_seq
 199
 200   Title   : seq_mutated_seq
 201   Usage   : $obj->set_mutated_seq($newval)
 202   Function: In case of mutating a sequence with multiple evolvers, this
 203   Returns : set_mutated_seq
 204   Args    : newvalue (optional)
 205
 206 =cut
 207
 208 sub set_mutated_seq {
 209     my $self = shift;
 210
 211     if (@_) {
 212         my $seq = shift;
 213         $self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
 214             unless $seq->isa('Bio::PrimarySeqI');
 215         $self->throw('Only nucleotide sequences are supported')
 216             if $seq->alphabet eq 'protein';
 217         $self->throw('No ambiquos nucleotides allowed in the input sequence')
 218             if $seq->seq =~ m/[^acgt]/;
 219
 220         $self->{'_seq_mutated'} = $seq;
 221
 222         # unify the look of sequence strings and cache the information
 223         $self->{'_mut_string'} = lc $seq->seq; # lower case
 224         $self->{'_mut_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
 225
 226
 227         $self->reset_sequence_counter;
 228     }
 229     #set returned sequence to be the last mutated string
 230     $self->{'_seq'}->seq($self->{'_mut_string'});
 231     return $self->{'_seq'};
 232 }
 233
 234
 235 =head2 rate
 236
 237   Title   : rate
 238   Usage   : $obj->rate($newval)
 239   Function: Set the transition/transversion rate.
 240   Returns : value of rate
 241   Args    : newvalue (optional)
 242
 243 Transition/transversion ratio is an observed attribute of an sequence
 244 comparison. We are dealing here with the transition/transversion rate
 245 that we set for our model of sequence evolution.
 246
 247 Note that we are using standard nucleotide alphabet and that there can
 248 there is only one transition versus two possible transversions. Rate 2
 249 is needed to have an observed transition/transversion ratio of 1.
 250
 251 =cut
 252
 253 sub rate{
 254    my $self = shift;
 255    if (@_) {
 256        $self->{'_rate'} = shift @_;
 257        $self->_init_mutation_engine;
 258    }
 259    return $self->{'_rate'} || 1;
 260 }
 261
 262 =head2 next_seq
 263
 264   Title   : next_seq
 265   Usage   : $obj->next_seq
 266   Function: Evolve the reference sequence to desired level
 267   Returns : A new sequence object mutated from the reference sequence
 268   Args    : -
 269
 270 =cut
 271
 272 sub next_seq {
 273     my $self = shift;
 274     $self->{'_mut_string'} = $self->{'_ori_string'};
 275     $self->reset_mutation_counter;
 276
 277     $self->{'_mutations'} = [];
 278
 279     while (1) {
 280         # find the location in the string to change
 281         my $loc = int (rand length($self->{'_mut_string'})) + 1;
 282
 283         $self->mutate($loc); # for modularity
 284
 285         # stop evolving if any of the limit has been reached
 286         last if $self->identity && $self->get_alignment_identity <= $self->identity;
 287         last if $self->pam &&  100*$self->get_mutation_counter/$self->{'_seq_length'} >= $self->pam;
 288         last if $self->mutation_count &&  $self->get_mutation_counter >= $self->mutation_count;
 289     }
 290     $self->_increase_sequence_counter;
 291
 292     my $type = $self->seq_type;
 293     return $type->new(-id => $self->seq->id.  "-". $self->get_sequence_counter,
 294                       -description => $self->seq->description,
 295                       -seq => $self->{'_mut_string'}
 296                      )
 297 }
 298
 299 =head2 mutate
 300
 301   Title   : mutate
 302   Usage   : $obj->mutate
 303   Function: mutate the sequence at the given location according to the model
 304   Returns : true
 305   Args    : integer, start location of the mutation, required argument
 306
 307 Called from next_seq().
 308
 309 =cut
 310
 311 sub mutate {
 312     my $self = shift;
 313     my $loc = shift;
 314     $self->throw('the first argument is the location of the mutation') unless $loc;
 315
 316     # nucleotide to change
 317     my $oldnuc = substr $self->{'_mut_string'}, $loc-1, 1;
 318     my $newnuc;
 319
 320
 321     # find the nucleotide it is changed to
 322     my $choose = rand(100);     # scale is 0-100
 323     if ($choose < $self->{'_transition'} ) {
 324         $newnuc =  $self->{'_changes'}->{$oldnuc}[0];
 325     } elsif ($choose < $self->{'_first_transversion'} ) {
 326         $newnuc =  $self->{'_changes'}->{$oldnuc}[1];
 327     } else {
 328         $newnuc =  $self->{'_changes'}->{$oldnuc}[2];
 329     }
 330
 331     # do the change
 332     substr $self->{'_mut_string'}, $loc-1, 1 , $newnuc;
 333     $self->_increase_mutation_counter;
 334     $self->{'_mut_locatableseq'}->seq($self->{'_mut_string'});
 335
 336     print STDERR "$loc$oldnuc>$newnuc\n" if $self->verbose > 0;
 337
 338     push @{$self->{'_mutations'}}, "$loc$oldnuc>$newnuc";
 339 }
 340
 341
 342 1;