1 # BioPerl module for Bio::SeqUtils
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::SeqUtils - Additional methods for PrimarySeq objects
20 # get a Bio::PrimarySeqI compliant object, $seq, somehow
21 $util = Bio::SeqUtils->new();
22 $polypeptide_3char = $util->seq3($seq);
24 $polypeptide_3char = Bio::SeqUtils->seq3($seq);
26 # set the sequence string (stored in one char code in the object)
27 Bio::SeqUtils->seq3($seq, $polypeptide_3char);
29 # translate a sequence in all six frames
30 @seqs = Bio::SeqUtils->translate_6frames($seq);
32 # inplace editing of the sequence
33 Bio::SeqUtils->mutate($seq,
34 Bio::LiveSeq::Mutation->new(-seq => 'c',
37 # mutate a sequence to desired similarity%
38 $newseq = Bio::SeqUtils-> evolve
39 ($seq, $similarity, $transition_transversion_rate);
41 # concatenate two or more sequences with annotations and features,
42 # the first sequence will be modified
43 Bio::SeqUtils->cat(@seqs);
46 # truncate a sequence, retaining features and adjusting their
47 # coordinates if necessary
48 my $truncseq = Bio::SeqUtils->trunc_with_features($seq, 100, 200);
50 # reverse complement a sequence and its features
51 my $revcomseq = Bio::SeqUtils->revcom_with_features($seq);
53 # simulate cloning of a fragment into a vector. Cut the vector at
54 # positions 1000 and 1100 (deleting postions 1001 to 1099) and
55 # "ligate" a fragment into the sites. The fragment is
56 # reverse-complemented in this example (option "flip").
57 # All features of the vector and fragment are preserved and
58 # features that are affected by the deletion/insertion are
59 # modified accordingly.
60 # $vector and $fragment must be Bio::SeqI compliant objects
61 my $new_molecule = Bio::Sequtils->ligate(
63 -fragment => $fragment,
69 # delete a segment of a sequence (from pos 1000 to 1100, inclusive),
70 # again preserving features and annotations
71 my $new_molecule = Bio::SeqUtils->cut( $seq, 1000, 1100 );
73 # insert a fragment into a recipient between positions 1000 and
74 # 1001. $recipient is a Bio::SeqI compliant object
75 my $new_molecule = Bio::SeqUtils::PbrTools->insert(
83 This class is a holder of methods that work on Bio::PrimarySeqI-
84 compliant sequence objects, e.g. Bio::PrimarySeq and
85 Bio::Seq. These methods are not part of the Bio::PrimarySeqI
86 interface and should in general not be essential to the primary function
87 of sequence objects. If you are thinking of adding essential
88 functions, it might be better to create your own sequence class.
89 See L<Bio::PrimarySeqI>, L<Bio::PrimarySeq>, and L<Bio::Seq> for more.
91 The methods take as their first argument a sequence object. It is
92 possible to use methods without first creating a SeqUtils object,
93 i.e. use it as an anonymous hash.
95 The first two methods, seq3() and seq3in(), give out or read in protein
96 sequences coded in three letter IUPAC amino acid codes.
98 The next two methods, translate_3frames() and translate_6frames(), wrap
99 around the standard translate method to give back an array of three
100 forward or all six frame translations.
102 The mutate() method mutates the sequence string with a mutation
105 The cat() method concatenates two or more sequences. The first sequence
106 is modified by addition of the remaining sequences. All annotations and
107 sequence features will be transferred.
109 The revcom_with_features() and trunc_with_features() methods are similar
110 to the revcom() and trunc() methods from Bio::Seq, but also adjust any
111 features associated with the sequence as appropriate.
113 There are also methods that simulate molecular cloning with rich
115 The delete() method cuts a segment out of a sequence and re-joins the
116 left and right fragments (like splicing or digesting and re-ligating a
117 molecule). Positions (and types) of sequence features are adjusted
119 Features that span the deleted segment are converted to split featuress
120 to indicate the disruption. (Sub)Features that extend into the deleted
121 segment are truncated.
122 A new molecule is created and returned.
124 The insert() method inserts a fragment (which can be a rich Bio::Seq
125 object) into another sequence object adding all annotations and
126 features to the final product.
127 Features that span the insertion site are converted to split features
128 to indicate the disruption.
129 A new feature is added to indicate the inserted fragment itself.
130 A new molecule is created and returned.
132 The ligate() method simulates digesting a recipient (vector) and
133 ligating a fragment into it, which can also be flipped if needed. It
134 is simply a combination of a deletion and an insertion step and
135 returns a new molecule. The rules for modifying feature locations
136 outlined above are also used here, e.g. features that span the cut
137 sites are converted to split features with truncated sub-locations.
144 User feedback is an integral part of the evolution of this and other
145 Bioperl modules. Send your comments and suggestions preferably to one
146 of the Bioperl mailing lists. Your participation is much appreciated.
148 bioperl-l@bioperl.org - General discussion
149 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
153 Please direct usage questions or support issues to the mailing list:
155 I<bioperl-l@bioperl.org>
157 rather than to the module maintainer directly. Many experienced and
158 reponsive experts will be able look at the problem and quickly
159 address it. Please include a thorough description of the problem
160 with code and data examples if at all possible.
162 =head2 Reporting Bugs
164 Report bugs to the Bioperl bug tracking system to help us keep track
165 the bugs and their resolution. Bug reports can be submitted via the
168 https://github.com/bioperl/bioperl-live/issues
170 =head1 AUTHOR - Heikki Lehvaslaiho
172 Email: heikki-at-bioperl-dot-org
176 Roy R. Chaudhuri - roy.chaudhuri at gmail.com
177 Frank Schwach - frank.schwach@sanger.ac.uk
181 The rest of the documentation details each of the object
182 methods. Internal methods are usually preceded with a _
186 # Let the code begin...
188 package Bio
::SeqUtils
;
191 use Scalar
::Util
qw(blessed);
192 use parent
qw(Bio::Root::Root);
194 # new inherited from RootI
259 Usage : $string = Bio::SeqUtils->seq3($seq)
260 Function: Read only method that returns the amino acid sequence as a
261 string of three letter codes. alphabet has to be
262 'protein'. Output follows the IUPAC standard plus 'Ter' for
263 terminator. Any unknown character, including the default
264 unknown character 'X', is changed into 'Xaa'. A noncoded
265 aminoacid selenocystein is recognized (Sec, U).
268 Args : character used for stop in the protein sequence optional,
269 defaults to '*' string used to separate the output amino
270 acid codes, optional, defaults to ''
275 my ( $self, $seq, $stop, $sep ) = @_;
277 $seq->isa('Bio::PrimarySeqI')
278 || $self->throw('Not a Bio::PrimarySeqI object but [$self]');
279 $seq->alphabet eq 'protein'
280 || $self->throw('Not a protein sequence');
282 if ( defined $stop ) {
284 and $self->throw('One character stop needed, not [$stop]');
285 $THREECODE{$stop} = "Ter";
290 foreach my $aa ( split //, uc $seq->seq ) {
291 $THREECODE{$aa} and $aa3s .= $THREECODE{$aa} . $sep, next;
292 $aa3s .= 'Xaa' . $sep;
294 $sep and substr( $aa3s, -( length $sep ), length $sep ) = '';
301 Usage : $seq = Bio::SeqUtils->seq3in($seq, 'MetGlyTer')
302 Function: Method for changing of the sequence of a
303 Bio::PrimarySeqI sequence object. The three letter amino
304 acid input string is converted into one letter code. Any
305 unknown character triplet, including the default 'Xaa', is
308 Returns : Bio::PrimarySeq object
309 Args : sequence string
310 optional character to be used for stop in the protein sequence,
312 optional character to be used for unknown in the protein sequence,
318 my ( $self, $seq, $string, $stop, $unknown ) = @_;
320 $seq->isa('Bio::PrimarySeqI')
321 || $self->throw("Not a Bio::PrimarySeqI object but [$self]");
322 $seq->alphabet eq 'protein'
323 || $self->throw('Not a protein sequence');
325 if ( defined $stop ) {
327 and $self->throw("One character stop needed, not [$stop]");
328 $ONECODE{'Ter'} = $stop;
330 if ( defined $unknown ) {
332 and $self->throw("One character stop needed, not [$unknown]");
333 $ONECODE{'Xaa'} = $unknown;
337 my $length = ( length $string ) - 2;
338 for ( my $i = 0 ; $i < $length ; $i += 3 ) {
339 $aa3 = substr( $string, $i, 3 );
340 $aa3 = ucfirst( lc($aa3) );
341 $ONECODE{$aa3} and $aas .= $ONECODE{$aa3}, next;
342 $aas .= $ONECODE{'Xaa'};
348 =head2 translate_3frames
350 Title : translate_3frames
351 Usage : @prots = Bio::SeqUtils->translate_3frames($seq)
352 Function: Translate a nucleotide sequence in three forward frames.
353 The IDs of the sequences are appended with '-0F', '-1F', '-2F'.
354 Returns : An array of seq objects
355 Args : sequence object
356 same arguments as to Bio::PrimarySeqI::translate
360 sub translate_3frames
{
361 my ( $self, $seq, @args ) = @_;
363 $self->throw( 'Object [$seq] '
366 . '] can not be translated.' )
367 unless $seq->can('translate');
369 my ( $stop, $unknown, $frame, $tableid, $fullCDS, $throw ) = @args;
374 $seq->translate( $stop, $unknown, $f, $tableid, $fullCDS, $throw );
375 $translation->id( $seq->id . "-" . $f . "F" );
376 push @seqs, $translation;
383 =head2 translate_6frames
385 Title : translate_6frames
386 Usage : @prots = Bio::SeqUtils->translate_6frames($seq)
387 Function: translate a nucleotide sequence in all six frames
388 The IDs of the sequences are appended with '-0F', '-1F', '-2F',
390 Returns : An array of seq objects
391 Args : sequence object
392 same arguments as to Bio::PrimarySeqI::translate
396 sub translate_6frames
{
397 my ( $self, $seq, @args ) = @_;
399 my @seqs = $self->translate_3frames( $seq, @args );
400 my @seqs2 = $self->translate_3frames( $seq->revcom, @args );
401 foreach my $seq2 (@seqs2) {
402 my ($tmp) = $seq2->id;
406 return @seqs, @seqs2;
412 Usage : my @aa = $table->valid_aa
413 Function: Retrieves a list of the valid amino acid codes.
414 The list is ordered so that first 21 codes are for unique
415 amino acids. The rest are ['B', 'Z', 'X', '*'].
416 Returns : array of all the valid amino acid codes
417 Args : [optional] $code => [0 -> return list of 1 letter aa codes,
418 1 -> return list of 3 letter aa codes,
419 2 -> return associative array of both ]
424 my ( $self, $code ) = @_;
428 foreach my $c ( sort values %ONECODE ) {
429 push @codes, $c unless ( $c =~ /[BZX\*]/ );
431 push @codes, qw(B Z X *); # so they are in correct order ?
434 elsif ( $code == 1 ) {
436 foreach my $c ( sort keys %ONECODE ) {
437 push @codes, $c unless ( $c =~ /(Asx|Glx|Xaa|Ter)/ );
439 push @codes, ( 'Asx', 'Glx', 'Xaa', 'Ter' );
442 elsif ( $code == 2 ) {
443 my %codes = %ONECODE;
444 foreach my $c ( keys %ONECODE ) {
445 my $aa = $ONECODE{$c};
452 "unrecognized code in " . ref($self) . " method valid_aa()" );
460 Usage : Bio::SeqUtils->mutate($seq,$mutation1, $mutation2);
461 Function: Inplace editing of the sequence.
463 The second argument can be a Bio::LiveSeq::Mutation object
464 or an array of them. The mutations are applied sequentially
465 checking only that their position is within the current
466 sequence. Insertions are inserted before the given
470 Args : sequence object
471 mutation, a Bio::LiveSeq::Mutation object, or an array of them
473 See L<Bio::LiveSeq::Mutation>.
478 my ( $self, $seq, @mutations ) = @_;
480 $self->throw( 'Object [$seq] '
483 . '] should be a Bio::PrimarySeqI ' )
484 unless $seq->isa('Bio::PrimarySeqI');
485 $self->throw( 'Object [$mutations[0]] '
487 . ref( $mutations[0] )
488 . '] should be a Bio::LiveSeq::Mutation' )
489 unless $mutations[0]->isa('Bio::LiveSeq::Mutation');
491 foreach my $mutation (@mutations) {
492 $self->throw('Attempting to mutate sequence beyond its length')
493 unless $mutation->pos - 1 <= $seq->length;
495 my $string = $seq->seq;
496 substr $string, $mutation->pos - 1, $mutation->len, $mutation->seq;
505 Usage : Bio::SeqUtils->cat(@seqs);
507 Function: Concatenates a list of Bio::Seq objects, adding them all on to the
508 end of the first sequence. Annotations and sequence features are
509 copied over from any additional objects, and the coordinates of any
510 copied features are adjusted appropriately.
512 Args : array of sequence objects
514 Note that annotations have no sequence locations. If you concatenate
515 sequences with the same annotations they will all be added.
520 my ( $self, $seq, @seqs ) = @_;
521 $self->throw( 'Object [$seq] '
524 . '] should be a Bio::PrimarySeqI ' )
525 unless $seq->isa('Bio::PrimarySeqI');
527 for my $catseq (@seqs) {
528 $self->throw( 'Object [$catseq] '
531 . '] should be a Bio::PrimarySeqI ' )
532 unless $catseq->isa('Bio::PrimarySeqI');
535 'Trying to concatenate sequences with different alphabets: '
536 . $seq->display_id . '('
539 . $catseq->display_id . '('
542 unless $catseq->alphabet eq $seq->alphabet;
544 my $length = $seq->length;
545 $seq->seq( $seq->seq . $catseq->seq );
548 if ( $seq->isa("Bio::AnnotatableI")
549 and $catseq->isa("Bio::AnnotatableI") )
551 foreach my $key ( $catseq->annotation->get_all_annotation_keys() ) {
553 foreach my $value ( $catseq->annotation->get_Annotations($key) )
555 $seq->annotation->add_Annotation( $key, $value );
561 if ( $seq->isa('Bio::SeqI') and $catseq->isa('Bio::SeqI') ) {
562 for my $feat ( $catseq->get_SeqFeatures ) {
563 $seq->add_SeqFeature( $self->_coord_adjust( $feat, $length ) );
571 =head2 trunc_with_features
573 Title : trunc_with_features
574 Usage : $trunc=Bio::SeqUtils->trunc_with_features($seq, $start, $end);
575 Function: Like Bio::Seq::trunc, but keeps features (adjusting coordinates
576 where necessary. Features that partially overlap the region have
577 their location changed to a Bio::Location::Fuzzy.
578 Returns : A new sequence object
579 Args : A sequence object, start coordinate, end coordinate (inclusive)
584 sub trunc_with_features
{
586 my ( $self, $seq, $start, $end ) = @_;
587 $self->throw( 'Object [$seq] '
590 . '] should be a Bio::SeqI ' )
591 unless $seq->isa('Bio::SeqI');
592 my $trunc = $seq->trunc( $start, $end );
594 Bio
::Range
->new( -start
=> $start, -end
=> $end, -strand
=> 0 );
596 # make sure that there is no annotation or features in $trunc
597 # (->trunc() now clone objects except for Bio::Seq::LargePrimarySeq)
598 $trunc->annotation->remove_Annotations;
599 $trunc->remove_SeqFeatures;
602 foreach my $key ( $seq->annotation->get_all_annotation_keys() ) {
603 foreach my $value ( $seq->annotation->get_Annotations($key) ) {
604 $trunc->annotation->add_Annotation( $key, $value );
611 $_ = $self->_coord_adjust( $_, 1 - $start, $end + 1 - $start )
612 if $_->overlaps($truncrange)
613 } $seq->get_SeqFeatures
616 $trunc->add_SeqFeature($_);
624 Function: cuts a segment out of a sequence and re-joins the left and right fragments
625 (like splicing or digesting and re-ligating a molecule).
626 Positions (and types) of sequence features are adjusted accordingly:
627 Features that span the cut site are converted to split featuress to
628 indicate the disruption.
629 Features that extend into the cut-out fragment are truncated.
630 A new molecule is created and returned.
631 Usage : my $cutseq = Bio::SeqUtils::PbrTools->cut( $seq, 1000, 1100 );
632 Args : a Bio::PrimarySeqI compliant object to cut,
633 first nt of the segment to be deleted
634 last nt of the segment to be deleted
637 clone_obj: if true, clone the input sequence object rather
638 than calling "new" on the object's class
640 Returns : a new Bio::Seq object
646 my ( $seq, $left, $right, $opts_ref ) = @_;
647 $self->throw( 'was expecting 3-4 paramters but got ' . @_ )
648 unless @_ == 3 || @_ == 4;
651 'Object of class [' . ref($seq) . '] should be a Bio::PrimarySeqI ' )
652 unless blessed
($seq) && $seq->isa('Bio::PrimarySeqI');
654 $self->throw("Left coordinate ($left) must be >= 1") if $left < 1;
655 if ( $right > $seq->length ) {
656 $self->throw( "Right coordinate ($right) must be less than "
657 . 'sequence length ('
662 # piece together the sequence string of the remaining fragments
663 my $left_seq = $seq->subseq( 1, $left - 1 );
664 my $right_seq = $seq->subseq( $right + 1, $seq->length );
665 if ( !$left_seq || !$right_seq ) {
667 'could not assemble sequences. At least one of the fragments is empty'
670 my $seq_str = $left_seq . $right_seq;
672 # create the new seq object with the same class as the recipient
673 # or (if requested), make a clone of the existing object. In the
674 # latter case we need to remove sequence features from the cloned
675 # object instead of copying them
677 if ( $opts_ref->{clone_obj
} ) {
678 $product = $self->_new_seq_via_clone( $seq, $seq_str );
681 $product = $self->_new_seq_from_old( $seq, { seq
=> $seq_str } );
684 # move sequence features
685 if ( $product->isa('Bio::SeqI') && $seq->isa('Bio::SeqI') ) {
686 for my $feat ( $seq->get_SeqFeatures ) {
687 my $adjfeat = $self->_coord_adjust_deletion( $feat, $left, $right );
688 $product->add_SeqFeature($adjfeat) if $adjfeat;
692 # add a feature to annotatde the deletion
693 my $deletion_feature = Bio
::SeqFeature
::Generic
->new(
694 -primary_tag
=> 'misc_feature',
695 -tag
=> { note
=> 'deletion of ' . ( $right - $left + 1 ) . 'bp' },
696 -location
=> Bio
::Location
::Simple
->new(
699 -location_type
=> 'IN-BETWEEN'
702 $product->add_SeqFeature($deletion_feature);
709 Function: inserts a fragment (a Bio::Seq object) into a nother sequence object
710 adding all annotations and features to the final product.
711 Features that span the insertion site are converted to split
712 features to indicate the disruption.
713 A new feature is added to indicate the inserted fragment itself.
714 A new molecule is created and returned.
715 Usage : # insert a fragment after pos 1000
716 my $insert_seq = Bio::SeqUtils::PbrTools->insert(
721 Args : recipient sequence (a Bio::PrimarySeqI compliant object),
722 a fragmetn to insert (Bio::PrimarySeqI compliant object),
723 insertion position (fragment is inserted to the right of this pos)
724 pos=0 will prepend the fragment to the recipient
727 clone_obj: if true, clone the input sequence object rather
728 than calling "new" on the object's class
729 Returns : a new Bio::Seq object
735 my ( $recipient, $fragment, $insert_pos, $opts_ref ) = @_;
736 $self->throw( 'was expecting 3-4 paramters but got ' . @_ )
737 unless @_ == 3 || @_ == 4;
739 $self->throw( 'Recipient object of class ['
741 . '] should be a Bio::PrimarySeqI ' )
742 unless blessed
($recipient) && $recipient->isa('Bio::PrimarySeqI');
744 $self->throw( 'Fragment object of class ['
746 . '] should be a Bio::PrimarySeqI ' )
747 unless blessed
($fragment) && $fragment->isa('Bio::PrimarySeqI');
749 $self->throw( 'Can\'t concatenate sequences with different alphabets: '
751 . $recipient->alphabet
752 . ' and fragment is '
753 . $fragment->alphabet )
754 unless $recipient->alphabet eq $fragment->alphabet;
756 if ( $insert_pos < 0 or $insert_pos > $recipient->length ) {
757 $self->throw( "insertion position ($insert_pos) must be between 0 and "
758 . 'recipient sequence length ('
763 if ( $fragment->can('is_circular') && $fragment->is_circular ) {
764 $self->throw('Can\'t insert circular fragments');
767 if ( !$recipient->seq ) {
769 'Recipient has no sequence, can not insert into this object');
772 # construct raw sequence of the new molecule
775 ?
$recipient->subseq( 1, $insert_pos )
777 my $mid_seq = $fragment->seq;
779 $insert_pos < $recipient->length
780 ?
$recipient->subseq( $insert_pos + 1, $recipient->length )
782 my $seq_str = $left_seq . $mid_seq . $right_seq;
784 # create the new seq object with the same class as the recipient
785 # or (if requested), make a clone of the existing object. In the
786 # latter case we need to remove sequence features from the cloned
787 # object instead of copying them
789 if ( $opts_ref->{clone_obj
} ) {
790 $product = $self->_new_seq_via_clone( $recipient, $seq_str );
794 push @desc, 'Inserted fragment: ' . $fragment->desc
795 if defined $fragment->desc;
796 push @desc, 'Recipient: ' . $recipient->desc
797 if defined $recipient->desc;
798 $product = $self->_new_seq_from_old(
802 display_id
=> $recipient->display_id,
803 accession_number
=> $recipient->accession_number || '',
804 alphabet
=> $recipient->alphabet,
805 desc
=> join( '; ', @desc ),
806 verbose
=> $recipient->verbose || $fragment->verbose,
807 is_circular
=> $recipient->is_circular || 0,
813 # move annotations from fragment to product
814 if ( $product->isa("Bio::AnnotatableI")
815 && $fragment->isa("Bio::AnnotatableI") )
817 foreach my $key ( $fragment->annotation->get_all_annotation_keys ) {
818 foreach my $value ( $fragment->annotation->get_Annotations($key) ) {
819 $product->annotation->add_Annotation( $key, $value );
824 # move sequence features to product with adjusted coordinates
825 if ( $product->isa('Bio::SeqI') ) {
827 # for the fragment, just shift the features to new position
828 if ( $fragment->isa('Bio::SeqI') ) {
829 for my $feat ( $fragment->get_SeqFeatures ) {
830 my $adjfeat = $self->_coord_adjust( $feat, $insert_pos );
831 $product->add_SeqFeature($adjfeat) if $adjfeat;
835 # for recipient, shift and modify features according to insertion.
836 if ( $recipient->isa('Bio::SeqI') ) {
837 for my $feat ( $recipient->get_SeqFeatures ) {
839 $self->_coord_adjust_insertion( $feat, $insert_pos,
841 $product->add_SeqFeature($adjfeat) if $adjfeat;
846 # add a feature to annotate the insertion
847 my $insertion_feature = Bio
::SeqFeature
::Generic
->new(
848 -start
=> $insert_pos + 1,
849 -end
=> $insert_pos + $fragment->length,
850 -primary_tag
=> 'misc_feature',
851 -tag
=> { note
=> 'inserted fragment' },
853 $product->add_SeqFeature($insertion_feature);
861 function: pastes a fragment (which can also have features) into a recipient
862 sequence between two "cut" sites, preserving features and adjusting
864 This is a shortcut for deleting a segment from a sequence object followed
865 by an insertion of a fragmnet and is supposed to be used to simulate
866 in-vitro cloning where a recipient (a vector) is digested and a fragment
867 is then ligated into the recipient molecule. The fragment can be flipped
868 (reverse-complemented with all its features).
869 A new sequence object is returned to represent the product of the reaction.
870 Features and annotations are transferred from the insert to the product
871 and features on the recipient are adjusted according to the methods
872 L</"delete"> amd L</"insert">:
873 Features spanning the insertion site will be split up into two sub-locations.
874 (Sub-)features in the deleted region are themselves deleted.
875 (Sub-)features that extend into the deleted region are truncated.
876 The class of the product object depends on the class of the recipient (vector)
877 sequence object. if it is not possible to instantiate a new
878 object of that class, a Bio::Primaryseq object is created instead.
879 usage : # insert the flipped fragment between positions 1000 and 1100 of the
880 # vector, i.e. everything between these two positions is deleted and
881 # replaced by the fragment
882 my $new_molecule = Bio::Sequtils::Pbrtools->ligate(
883 -recipient => $vector,
884 -fragment => $fragment,
890 args : recipient: the recipient/vector molecule
891 fragment: molecule that is to be ligated into the vector
892 left: left cut site (fragment will be inserted to the right of
895 right: right cut site (fragment will be inseterted to the
896 left of this position). defaults to left+1
897 flip: boolean, if true, the fragment is reverse-complemented
898 (including features) before inserting
899 clone_obj: if true, clone the recipient object to create the product
900 instead of calling "new" on its class
901 returns : a new Bio::Seq object of the ligated fragments
907 my ( $recipient, $fragment, $left, $right, $flip, $clone_obj ) =
908 $self->_rearrange( [qw(RECIPIENT FRAGMENT LEFT RIGHT FLIP CLONE_OBJ )],
910 $self->throw("missing required parameter 'recipient'") unless $recipient;
911 $self->throw("missing required parameter 'fragment'") unless $fragment;
912 $self->throw("missing required parameter 'left'") unless defined $left;
914 $right ||= $left + 1;
917 "Fragment must be a Bio::PrimarySeqI compliant object but it is a "
919 unless blessed
($fragment) && $fragment->isa('Bio::PrimarySeqI');
921 $fragment = $self->revcom_with_features($fragment) if $flip;
924 $opts_ref->{clone_obj
} = 1 if $clone_obj;
926 # clone in two steps: first delete between the insertion sites,
927 # then insert the fragment. Step 1 is skipped if insert positions
928 # are adjacent (no deletion)
929 my ( $product1, $product2 );
931 if ( $right == $left + 1 ) {
932 $product1 = $recipient;
936 $self->delete( $recipient, $left + 1, $right - 1, $opts_ref );
939 $self->throw( "Failed in step 1 (cut recipient): " . $@
) if $@
;
940 eval { $product2 = $self->insert( $product1, $fragment, $left, $opts_ref ) };
941 $self->throw( "Failed in step 2 (insert fragment): " . $@
) if $@
;
947 =head2 _coord_adjust_deletion
949 title : _coord_adjust_deletion
950 function: recursively adjusts coordinates of seqfeatures on a molecule
951 where a segment has been deleted.
952 (sub)features that span the deletion site become split features.
953 (sub)features that extend into the deletion site are truncated.
954 A note is added to the feature to inform about the size and
955 position of the deletion.
956 usage : my $adjusted_feature = Bio::Sequtils::_coord_adjust_deletion(
961 args : a Bio::SeqFeatureI compliant object,
962 start (inclusive) position of the deletion site,
963 end (inclusive) position of the deletion site
964 returns : a Bio::SeqFeatureI compliant object
968 sub _coord_adjust_deletion
{
969 my ( $self, $feat, $left, $right ) = @_;
971 $self->throw( 'object [$feat] '
974 . '] should be a Bio::SeqFeatureI ' )
975 unless $feat->isa('Bio::SeqFeatureI');
976 $self->throw('missing coordinates: need a left and a right position')
977 unless defined $left && defined $right;
979 if ( $left > $right ) {
980 if ( $feat->can('is_circular') && $feat->is_circular ) {
982 # todo handle circular molecules
984 'can not yet handle deletions in circular molecules if deletion spans origin'
989 "left coordinate ($left) must be less than right ($right)"
990 . " but it was greater" );
993 my $deletion = Bio
::Location
::Simple
->new(
997 my $del_length = $right - $left + 1;
1000 for my $subfeat ( $feat->get_SeqFeatures ) {
1002 $self->_coord_adjust_deletion( $subfeat, $left, $right );
1003 push @adjsubfeat, $adjsubfeat if $adjsubfeat;
1008 for ( $feat->location->each_Location ) {
1009 next if $deletion->contains($_); # this location will be deleted;
1010 my $strand = $_->strand;
1011 my $type = $_->location_type;
1012 my $start = $_->start;
1013 my $start_type = $_->can('start_pos_type') ?
$_->start_pos_type : undef;
1015 my $end_type = $_->can('end_pos_type') ?
$_->end_pos_type : undef;
1017 if ( $start < $deletion->start && $end > $deletion->end )
1018 { # split the feature
1020 [ $start, ( $deletion->start - 1 ), $start_type, $end_type ],
1022 ( $deletion->start ), $end - $del_length,
1023 $start_type, $end_type
1028 . 'bp internal deletion between pos '
1029 . ( $deletion->start - 1 ) . ' and '
1032 elsif ( $_->start < $deletion->start && $_->end >= $deletion->start )
1033 { # truncate feature end
1035 ( [ $start, ( $deletion->start - 1 ), $start_type, $end_type ] );
1037 ( $end - $deletion->start + 1 ) . 'bp deleted from feature ';
1038 if ( $feat->strand ) {
1039 $note .= $feat->strand == 1 ?
"3' " : "5' ";
1043 elsif ( $_->start <= $deletion->end && $_->end > $deletion->end )
1044 { # truncate feature start and shift end
1047 ( $deletion->start ), $end - $del_length,
1048 $start_type, $end_type
1052 ( $deletion->end - $start + 1 ) . 'bp deleted from feature ';
1053 if ( $feat->strand ) {
1054 $note .= $feat->strand == 1 ?
"5' end" : "3' end";
1060 elsif ( $start >= $deletion->end ) { # just shift entire location
1063 $start - $del_length, $end - $del_length,
1064 $start_type, $end_type
1068 else { # not affected by deletion
1069 @newcoords = ( [ $start, $end, $start_type, $end_type ] );
1072 # if we have no coordinates, we return nothing
1073 # the feature is deleted
1074 return unless @newcoords;
1077 $self->_location_objects_from_coordinate_list( \
@newcoords, $strand,
1079 push @loc, $self->_single_loc_object_from_collection(@subloc);
1082 # create new feature based on original one and move annotation across
1084 Bio
::SeqFeature
::Generic
->new( -primary
=> $feat->primary_tag );
1085 foreach my $key ( $feat->annotation->get_all_annotation_keys() ) {
1086 foreach my $value ( $feat->annotation->get_Annotations($key) ) {
1087 $newfeat->annotation->add_Annotation( $key, $value );
1090 foreach my $key ( $feat->get_all_tags() ) {
1091 $newfeat->add_tag_value( $key, $feat->get_tag_values($key) );
1094 # If we have a note about the deleted bases, add it
1096 $newfeat->add_tag_value( 'note', $note );
1099 # set modified location(s) for the new feature and
1100 # add its subfeatures if any
1101 my $loc = $self->_single_loc_object_from_collection(@loc);
1102 $loc ?
$newfeat->location($loc) : return;
1103 $newfeat->add_SeqFeature($_) for @adjsubfeat;
1109 =head2 _coord_adjust_insertion
1111 title : _coord_adjust_insertion
1112 function: recursively adjusts coordinates of seqfeatures on a molecule
1113 where another sequence has been inserted.
1114 (sub)features that span the insertion site become split features
1115 and a note is added about the size and positin of the insertion.
1116 Features with an IN-BETWEEN location at the insertion site
1117 are lost (such features can only exist between adjacent bases)
1118 usage : my $adjusted_feature = Bio::Sequtils::_coord_adjust_insertion(
1123 args : a Bio::SeqFeatureI compliant object,
1124 insertion position (insert to the right of this position)
1125 length of inserted fragment
1126 returns : a Bio::SeqFeatureI compliant object
1130 sub _coord_adjust_insertion
{
1131 my ( $self, $feat, $insert_pos, $insert_len ) = @_;
1133 $self->throw( 'object [$feat] '
1136 . '] should be a Bio::SeqFeatureI ' )
1137 unless $feat->isa('Bio::SeqFeatureI');
1138 $self->throw('missing insert position') unless defined $insert_pos;
1139 $self->throw('missing insert length') unless defined $insert_len;
1142 for my $subfeat ( $feat->get_SeqFeatures ) {
1144 $self->_coord_adjust_insertion( $subfeat, $insert_pos, $insert_len );
1149 for ( $feat->location->each_Location ) {
1151 # loose IN-BETWEEN features at the insertion site
1153 if ( $_->location_type eq 'IN-BETWEEN' && $_->start == $insert_pos );
1154 my $strand = $_->strand;
1155 my $type = $_->location_type;
1156 my $start = $_->start;
1157 my $start_type = $_->can('start_pos_type') ?
$_->start_pos_type : undef;
1159 my $end_type = $_->can('end_pos_type') ?
$_->end_pos_type : undef;
1161 if ( $start <= $insert_pos && $end > $insert_pos ) { # split the feature
1163 [ $start, $insert_pos, $start_type, $end_type ],
1165 ( $insert_pos + 1 + $insert_len ), $end + $insert_len,
1166 $start_type, $end_type
1171 . 'bp internal insertion between pos '
1172 . $insert_pos . ' and '
1173 . ( $insert_pos + $insert_len + 1 );
1176 elsif ( $start > $insert_pos ) { # just shift entire location
1179 $start + $insert_len, $end + $insert_len,
1180 $start_type, $end_type
1184 else { # not affected
1185 @newcoords = ( [ $start, $end, $start_type, $end_type ] );
1188 # if we have deleted all coordinates, return nothing
1189 # (possible if all locations are IN-BETWEEN)
1190 return unless @newcoords;
1193 $self->_location_objects_from_coordinate_list( \
@newcoords, $strand,
1196 # put together final location which could be a split now
1197 push @loc, $self->_single_loc_object_from_collection(@subloc);
1200 # create new feature based on original one and move annotation across
1202 Bio
::SeqFeature
::Generic
->new( -primary
=> $feat->primary_tag );
1203 foreach my $key ( $feat->annotation->get_all_annotation_keys() ) {
1204 foreach my $value ( $feat->annotation->get_Annotations($key) ) {
1205 $newfeat->annotation->add_Annotation( $key, $value );
1208 foreach my $key ( $feat->get_all_tags() ) {
1209 $newfeat->add_tag_value( $key, $feat->get_tag_values($key) );
1212 # If we have a note about the inserted bases, add it
1214 $newfeat->add_tag_value( 'note', $note );
1217 # set modified location(s) for the new feature and
1218 # add its subfeatures if any
1219 my $loc = $self->_single_loc_object_from_collection(@loc);
1220 $loc ?
$newfeat->location($loc) : return;
1221 $newfeat->add_SeqFeature($_) for @adjsubfeat;
1227 =head2 _single_loc_object_from_collection
1229 Title : _single_loc_object_from_collection
1230 Function: takes an array of location objects. Returns either a split
1231 location object if there are more than one locations in the
1232 array or returns the single location if there is only one
1233 Usage : my $loc = _single_loc_object_from_collection( @sublocs );
1234 Args : array of Bio::Location objects
1235 Returns : a single Bio:;Location object containing all locations
1239 sub _single_loc_object_from_collection
{
1240 my ( $self, @locs ) = @_;
1243 $loc = Bio
::Location
::Split
->new;
1244 $loc->add_sub_Location(@locs);
1246 elsif ( @locs == 1 ) {
1250 } # _single_loc_object_from_collection
1252 =head2 _location_objects_from_coordinate_list
1254 Title : _location_objects_from_coordinate_list
1255 Function: takes an array-ref of start/end coordinates, a strand and a
1256 type and returns a list of Bio::Location objects (Fuzzy by
1257 default, Simple in case of in-between coordinates).
1258 If location type is not "IN-BETWEEN", individual types may be
1259 passed in for start and end location as per Bio::Location::Fuzzy
1261 Usage : my @loc_objs = $self->_location_objects_from_coordinate_list(
1266 Args : array-ref of array-refs each containing:
1267 start, end [, start-type, end-type]
1268 where types are optional. If given, must be
1269 a one of ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
1270 strand (all locations must be on same strand)
1271 location-type (EXACT, IN-BETWEEN etc)
1272 Returns : list of Bio::Location objects
1276 sub _location_objects_from_coordinate_list
{
1278 my ( $coords_ref, $strand, $type ) = @_;
1279 $self->throw( 'expected 3 parameters but got ' . @_ ) unless @_ == 3;
1280 $self->throw('first argument must be an ARRAY reference#')
1281 unless ref($coords_ref) eq 'ARRAY';
1284 foreach my $coords_set (@
$coords_ref) {
1285 my ( $start, $end, $start_type, $end_type ) = @
$coords_set;
1287 # taken from Bio::SeqUtils::_coord_adjust
1288 if ( $type ne 'IN-BETWEEN' ) {
1289 my $loc = Bio
::Location
::Fuzzy
->new(
1293 -location_type
=> $type
1295 $loc->start_pos_type($start_type) if $start_type;
1296 $loc->end_pos_type($end_type) if $end_type;
1301 Bio
::Location
::Simple
->new(
1305 -location_type
=> $type
1310 } # _location_objects_from_coordinate_list
1312 =head2 _new_seq_via_clone
1314 Title : _new_seq_via_clone
1315 Function: clone a sequence object using Bio::Root::Root::clone and set the new sequence string
1316 sequence features are removed.
1317 Usage : my $new_seq = $self->_new_seq_via_clone( $seq_obj, $seq_str );
1318 Args : original seq object [, new sequence string]
1319 Returns : a clone of the original sequence object, optionally with new sequence string
1323 sub _new_seq_via_clone
{
1324 my ( $self, $in_seq_obj, $seq_str ) = @_;
1325 my $out_seq_obj = $in_seq_obj->clone;
1326 $out_seq_obj->remove_SeqFeatures if $out_seq_obj->can('remove_SeqFeatures');
1327 if ( blessed
$out_seq_obj->seq
1328 && $out_seq_obj->seq->isa('Bio::PrimarySeq') )
1330 $out_seq_obj->seq->seq($seq_str);
1333 $out_seq_obj->seq($seq_str);
1335 return $out_seq_obj;
1337 } # _new_seq_via_clone
1339 =head2 _new_seq_from_old
1341 Title : _new_seq_from_old
1342 Function: creates a new sequence obejct, if possible of the same class as the old and adds
1343 attributes to it. Also copies annotation across to the new object.
1344 Usage : my $new_seq = $self->_new_seq_from_old( $seq_obj, { seq => $seq_str, display_id => 'some_ID'});
1345 Args : old sequence object
1346 hashref of attributes for the new sequence (sequence string etc.)
1347 Returns : a new Bio::Seq object
1351 sub _new_seq_from_old
{
1352 my ( $self, $in_seq_obj, $attr ) = @_;
1353 $self->throw('attributes must be a hashref')
1354 if $attr && ref($attr) ne 'HASH';
1357 if ( $in_seq_obj->can_call_new ) {
1358 $seqclass = ref($in_seq_obj);
1361 $seqclass = 'Bio::Primaryseq';
1362 $self->_attempt_to_load_seq;
1365 my $out_seq_obj = $seqclass->new(
1366 -seq
=> $attr->{seq
} || $in_seq_obj->seq,
1367 -display_id
=> $attr->{display_id
} || $in_seq_obj->display_id,
1368 -accession_number
=> $attr->{accession_number
}
1369 || $in_seq_obj->accession_number
1371 -alphabet
=> $in_seq_obj->alphabet,
1372 -desc
=> $attr->{desc
} || $in_seq_obj->desc,
1373 -verbose
=> $attr->{verbose
} || $in_seq_obj->verbose,
1374 -is_circular
=> $attr->{is_circular
} || $in_seq_obj->is_circular || 0,
1377 # move the annotation across to the product
1378 if ( $out_seq_obj->isa("Bio::AnnotatableI")
1379 && $in_seq_obj->isa("Bio::AnnotatableI") )
1381 foreach my $key ( $in_seq_obj->annotation->get_all_annotation_keys ) {
1382 foreach my $value ( $in_seq_obj->annotation->get_Annotations($key) )
1384 $out_seq_obj->annotation->add_Annotation( $key, $value );
1388 return $out_seq_obj;
1389 } # _new_seq_from_old
1391 =head2 _coord_adjust
1393 Title : _coord_adjust
1394 Usage : my $newfeat=Bio::SeqUtils->_coord_adjust($feature, 100, $seq->length);
1395 Function: Recursive subroutine to adjust the coordinates of a feature
1396 and all its subfeatures. If a sequence length is specified, then
1397 any adjusted features that have locations beyond the boundaries
1398 of the sequence are converted to Bio::Location::Fuzzy objects.
1400 Returns : A Bio::SeqFeatureI compliant object.
1401 Args : A Bio::SeqFeatureI compliant object,
1402 the number of bases to add to the coordinates
1403 (optional) the length of the parent sequence
1409 my ( $self, $feat, $add, $length ) = @_;
1410 $self->throw( 'Object [$feat] '
1413 . '] should be a Bio::SeqFeatureI ' )
1414 unless $feat->isa('Bio::SeqFeatureI');
1416 for my $subfeat ( $feat->get_SeqFeatures ) {
1417 push @adjsubfeat, $self->_coord_adjust( $subfeat, $add, $length );
1420 for ( $feat->location->each_Location ) {
1421 my @coords = ( $_->start, $_->end );
1422 my $strand = $_->strand;
1423 my $type = $_->location_type;
1425 $self->throw("can not handle negative feature positions (got: $_)")
1427 if ( $add + $_ < 1 ) {
1430 elsif ( defined $length and $add + $_ > $length ) {
1438 $self->_location_objects_from_coordinate_list( [ \
@coords ],
1442 Bio
::SeqFeature
::Generic
->new( -primary
=> $feat->primary_tag );
1443 foreach my $key ( $feat->annotation->get_all_annotation_keys() ) {
1444 foreach my $value ( $feat->annotation->get_Annotations($key) ) {
1445 $newfeat->annotation->add_Annotation( $key, $value );
1448 foreach my $key ( $feat->get_all_tags() ) {
1449 $newfeat->add_tag_value( $key, $feat->get_tag_values($key) );
1451 my $loc = $self->_single_loc_object_from_collection(@loc);
1452 $loc ?
$newfeat->location($loc) : return;
1453 $newfeat->add_SeqFeature($_) for @adjsubfeat;
1457 =head2 revcom_with_features
1459 Title : revcom_with_features
1460 Usage : $revcom=Bio::SeqUtils->revcom_with_features($seq);
1461 Function: Like Bio::Seq::revcom, but keeps features (adjusting coordinates
1463 Returns : A new sequence object
1464 Args : A sequence object
1469 sub revcom_with_features
{
1470 my ( $self, $seq ) = @_;
1471 $self->throw( 'Object [$seq] '
1474 . '] should be a Bio::SeqI ' )
1475 unless $seq->isa('Bio::SeqI');
1476 my $revcom = $seq->revcom;
1478 # make sure that there is no annotation or features in $trunc
1479 # (->revcom() now clone objects except for Bio::Seq::LargePrimarySeq)
1480 $revcom->annotation->remove_Annotations;
1481 $revcom->remove_SeqFeatures;
1484 foreach my $key ( $seq->annotation->get_all_annotation_keys() ) {
1485 foreach my $value ( $seq->annotation->get_Annotations($key) ) {
1486 $revcom->annotation->add_Annotation( $key, $value );
1491 for ( map { $self->_feature_revcom( $_, $seq->length ) }
1492 reverse $seq->get_SeqFeatures )
1494 $revcom->add_SeqFeature($_);
1499 =head2 _feature_revcom
1501 Title : _feature_revcom
1502 Usage : my $newfeat=Bio::SeqUtils->_feature_revcom($feature, $seq->length);
1503 Function: Recursive subroutine to reverse complement a feature and
1504 all its subfeatures. The length of the parent sequence must be
1507 Returns : A Bio::SeqFeatureI compliant object.
1508 Args : A Bio::SeqFeatureI compliant object,
1509 the length of the parent sequence
1514 sub _feature_revcom
{
1515 my ( $self, $feat, $length ) = @_;
1516 $self->throw( 'Object [$feat] '
1519 . '] should be a Bio::SeqFeatureI ' )
1520 unless $feat->isa('Bio::SeqFeatureI');
1522 for my $subfeat ( $feat->get_SeqFeatures ) {
1523 push @adjsubfeat, $self->_feature_revcom( $subfeat, $length );
1526 for ( $feat->location->each_Location ) {
1527 my $type = $_->location_type;
1529 if ( $_->strand == -1 ) { $strand = 1 }
1530 elsif ( $_->strand == 1 ) { $strand = -1 }
1531 else { $strand = $_->strand }
1533 $self->_coord_revcom( $_->start, $_->start_pos_type, $length );
1535 $self->_coord_revcom( $_->end, $_->end_pos_type, $length );
1536 my $newstart_type = $_->end_pos_type;
1537 $newstart_type = 'BEFORE' if $_->end_pos_type eq 'AFTER';
1538 $newstart_type = 'AFTER' if $_->end_pos_type eq 'BEFORE';
1539 my $newend_type = $_->start_pos_type;
1540 $newend_type = 'BEFORE' if $_->start_pos_type eq 'AFTER';
1541 $newend_type = 'AFTER' if $_->start_pos_type eq 'BEFORE';
1543 $self->_location_objects_from_coordinate_list(
1544 [ [ $newstart, $newend, $newstart_type, $newend_type ] ],
1548 Bio
::SeqFeature
::Generic
->new( -primary
=> $feat->primary_tag );
1549 foreach my $key ( $feat->annotation->get_all_annotation_keys() ) {
1550 foreach my $value ( $feat->annotation->get_Annotations($key) ) {
1551 $newfeat->annotation->add_Annotation( $key, $value );
1554 foreach my $key ( $feat->get_all_tags() ) {
1555 $newfeat->add_tag_value( $key, $feat->get_tag_values($key) );
1558 my $loc = $self->_single_loc_object_from_collection(@loc);
1559 $loc ?
$newfeat->location($loc) : return;
1561 $newfeat->add_SeqFeature($_) for @adjsubfeat;
1566 my ( $self, $coord, $type, $length ) = @_;
1567 if ( $type eq 'BETWEEN' or $type eq 'WITHIN' ) {
1568 $coord =~ s/(\d+)(\D*)(\d+)/$length+1-$3.$2.$length+1-$1/ge;
1571 $coord =~ s/(\d+)/$length+1-$1/ge;
1572 $coord =~ tr/<>/></;
1573 $coord = '>' . $coord
1574 if $type eq 'BEFORE' and substr( $coord, 0, 1 ) ne '>';
1575 $coord = '<' . $coord
1576 if $type eq 'AFTER' and substr( $coord, 0, 1 ) ne '<';
1584 Usage : my $newseq = Bio::SeqUtils->
1585 evolve($seq, $similarity, $transition_transversion_rate);
1586 Function: Mutates the sequence by point mutations until the similarity of
1587 the new sequence has decreased to the required level.
1588 Transition/transversion rate is adjustable.
1589 Returns : A new Bio::PrimarySeq object
1590 Args : sequence object
1591 percentage similarity (e.g. 80)
1592 tr/tv rate, optional, defaults to 1 (= 1:1)
1594 Set the verbosity of the Bio::SeqUtils object to positive integer to
1595 see the mutations as they happen.
1597 This method works only on nucleotide sequences. It prints a warning if
1598 you set the target similarity to be less than 25%.
1600 Transition/transversion ratio is an observed attribute of an sequence
1601 comparison. We are dealing here with the transition/transversion rate
1602 that we set for our model of sequence evolution.
1607 my ( $self, $seq, $sim, $rate ) = @_;
1610 $self->throw( 'Object [$seq] '
1613 . '] should be a Bio::PrimarySeqI ' )
1614 unless $seq->isa('Bio::PrimarySeqI');
1617 "[$sim] " . ' should be a positive integer or float under 100' )
1618 unless $sim =~ /^[+\d.]+$/ and $sim <= 100;
1621 "Nucleotide sequences are 25% similar by chance.
1622 Do you really want to set similarity to [$sim]%?\n"
1625 $self->throw('Only nucleotide sequences are supported')
1626 if $seq->alphabet eq 'protein';
1628 # arrays of possible changes have transitions as first items
1630 $changes{'a'} = [ 't', 'c', 'g' ];
1631 $changes{'t'} = [ 'a', 'c', 'g' ];
1632 $changes{'c'} = [ 'g', 'a', 't' ];
1633 $changes{'g'} = [ 'c', 'a', 't' ];
1635 # given the desired rate, find out where cut off points need to be
1636 # when random numbers are generated from 0 to 100
1637 # we are ignoring identical mutations (e.g. A->A) to speed things up
1638 my $bin_size = 100 / ( $rate + 2 );
1639 my $transition = 100 - ( 2 * $bin_size );
1640 my $first_transversion = $transition + $bin_size;
1642 # unify the look of sequence strings
1643 my $string = lc $seq->seq; # lower case
1645 s/u/t/; # simplyfy our life; modules should deal with the change anyway
1646 # store the original sequence string
1647 my $oristring = $string;
1648 my $length = $seq->length;
1650 # stop evolving if the limit has been reached
1651 until ( $self->_get_similarity( $oristring, $string ) <= $sim ) {
1653 # find the location in the string to change
1654 my $loc = int( rand $length ) + 1;
1656 # nucleotide to change
1657 my $oldnuc = substr $string, $loc - 1, 1;
1660 # nucleotide it is changed to
1661 my $choose = rand(100);
1662 if ( $choose < $transition ) {
1663 $newnuc = $changes{$oldnuc}[0];
1665 elsif ( $choose < $first_transversion ) {
1666 $newnuc = $changes{$oldnuc}[1];
1669 $newnuc = $changes{$oldnuc}[2];
1673 substr $string, $loc - 1, 1, $newnuc;
1675 $self->debug("$loc$oldnuc>$newnuc\n");
1678 return new Bio
::PrimarySeq
(
1679 -id
=> $seq->id . "-$sim",
1680 -description
=> $seq->description,
1685 sub _get_similarity
{
1686 my ( $self, $oriseq, $seq ) = @_;
1688 my $len = length($oriseq);
1691 for ( my $i = 0 ; $i < $len ; $i++ ) {
1692 $c++ if substr( $oriseq, $i, 1 ) eq substr( $seq, $i, 1 );
1694 return 100 * $c / $len;