Add Root back, plus some test and doc fixes
[bioperl-live.git] / Bio / Restriction / Enzyme.pm
blob7a18baf3b031384dad81294add64bc7601d93049
1 #------------------------------------------------------------------
3 # BioPerl module Bio::Restriction::Enzyme
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Rob Edwards <redwards@utmem.edu>
9 # You may distribute this module under the same terms as perl itself
10 #------------------------------------------------------------------
12 ## POD Documentation:
14 =head1 NAME
16 Bio::Restriction::Enzyme - A single restriction endonuclease
17 (cuts DNA at specific locations)
19 =head1 SYNOPSIS
21 # set up a single restriction enzyme. This contains lots of
22 # information about the enzyme that is generally parsed from a
23 # rebase file and can then be read back
25 use Bio::Restriction::Enzyme;
27 # define a new enzyme with the cut sequence
28 my $re=Bio::Restriction::Enzyme->new
29 (-enzyme=>'EcoRI', -seq=>'G^AATTC');
31 # once the sequence has been defined a bunch of stuff is calculated
32 # for you:
34 #### PRECALCULATED
36 # find where the enzyme cuts after ...
37 my $ca=$re->cut;
39 # ... and where it cuts on the opposite strand
40 my $oca = $re->complementary_cut;
42 # get the cut sequence string back.
43 # Note that site will return the sequence with a caret
44 my $with_caret=$re->site; #returns 'G^AATTC';
46 # but it is also a Bio::PrimarySeq object ....
47 my $without_caret=$re->seq; # returns 'GAATTC';
48 # ... and so does string
49 $without_caret=$re->string; #returns 'GAATTC';
51 # what is the reverse complement of the cut site
52 my $rc=$re->revcom; # returns 'GAATTC';
54 # now the recognition length. There are two types:
55 # recognition_length() is the length of the sequence
56 # cutter() estimate of cut frequency
58 my $recog_length = $re->recognition_length; # returns 6
59 # also returns 6 in this case but would return
60 # 4 for GANNTC and 5 for RGATCY (BstX2I)!
61 $recog_length=$re->cutter;
63 # is the sequence a palindrome - the same forwards and backwards
64 my $pal= $re->palindromic; # this is a boolean
66 # is the sequence blunt (i.e. no overhang - the forward and reverse
67 # cuts are the same)
68 print "blunt\n" if $re->overhang eq 'blunt';
70 # Overhang can have three values: "5'", "3'", "blunt", and undef
71 # Direction is very important if you use Klenow!
72 my $oh=$re->overhang;
74 # what is the overhang sequence
75 my $ohseq=$re->overhang_seq; # will return 'AATT';
77 # is the sequence ambiguous - does it contain non-GATC bases?
78 my $ambig=$re->is_ambiguous; # this is boolean
80 print "Stuff about the enzyme\nCuts after: $ca\n",
81 "Complementary cut: $oca\nSite:\n\t$with_caret or\n",
82 "\t$without_caret\n";
83 print "Reverse of the sequence: $rc\nRecognition length: $recog_length\n",
84 "Is it palindromic? $pal\n";
85 print "The overhang is $oh with sequence $ohseq\n",
86 "And is it ambiguous? $ambig\n\n";
89 ### THINGS YOU CAN SET, and get from rich REBASE file
91 # get or set the isoschizomers (enzymes that recognize the same
92 # site)
93 $re->isoschizomers('PvuII', 'SmaI'); # not really true :)
94 print "Isoschizomers are ", join " ", $re->isoschizomers, "\n";
96 # get or set the methylation sites
97 $re->methylation_sites(2); # not really true :)
98 print "Methylated at ", join " ", keys %{$re->methylation_sites},"\n";
100 #Get or set the source microbe
101 $re->microbe('E. coli');
102 print "It came from ", $re->microbe, "\n";
104 # get or set the person who isolated it
105 $re->source("Rob"); # not really true :)
106 print $re->source, " sent it to us\n";
108 # get or set whether it is commercially available and the company
109 # that it can be bought at
110 $re->vendors('NEB'); # my favorite
111 print "Is it commercially available :";
112 print $re->vendors ? "Yes" : "No";
113 print " and it can be got from ", join " ",
114 $re->vendors, "\n";
116 # get or set a reference for this
117 $re->reference('Edwards et al. J. Bacteriology');
118 print "It was not published in ", $re->reference, "\n";
120 # get or set the enzyme name
121 $re->name('BamHI');
122 print "The name of EcoRI is not really ", $re->name, "\n";
125 =head1 DESCRIPTION
127 This module defines a single restriction endonuclease. You can use it
128 to make custom restriction enzymes, and it is used by
129 Bio::Restriction::IO to define enzymes in the New England Biolabs
130 REBASE collection.
132 Use Bio::Restriction::Analysis to figure out which enzymes are available
133 and where they cut your sequence.
136 =head1 RESTRICTION MODIFICATION SYSTEMS
138 At least three geneticaly and biochamically distinct restriction
139 modification systems exist. The cutting components of them are known
140 as restriction endonuleases. The three systems are known by roman
141 numerals: Type I, II, and III restriction enzymes.
143 REBASE format 'cutzymes'(#15) lists enzyme type in its last field. The
144 categories there do not always match the the following short
145 descriptions of the enzymes types. See
146 http://it.stlawu.edu/~tbudd/rmsyst.html for a better overview.
149 =head2 TypeI
151 Type I systems recognize a bipartite asymetrical sequence of 5-7 bp:
153 ---TGA*NnTGCT--- * = methylation sites
154 ---ACTNnA*CGA--- n = 6 for EcoK, n = 8 for EcoB
156 The cleavage site is roughly 1000 (400-7000) base pairs from the
157 recognition site.
159 =head2 TypeII
161 The simplest and most common (at least commercially).
163 Site recognition is via short palindromic base sequences that are 4-6
164 base pairs long. Cleavage is at the recognition site (but may
165 occasionally be just adjacent to the palindromic sequence, usually
166 within) and may produce blunt end termini or staggered, "sticky
167 end" termini.
169 =head2 TypeIII
171 The recognition site is a 5-7 bp asymmetrical sequence. Cleavage is
172 ATP dependent 24-26 base pairs downstream from the recognition site
173 and usually yields staggered cuts 2-4 bases apart.
176 =head1 COMMENTS
178 I am trying to make this backwards compatible with
179 Bio::Tools::RestrictionEnzyme. Undoubtedly some things will break,
180 but we can fix things as we progress.....!
182 I have added another comments section at the end of this POD that
183 discusses a couple of areas I know are broken (at the moment)
186 =head1 TO DO
188 =over 2
190 =item *
192 Convert vendors touse full names of companies instead of code
194 =item *
196 Add regular expression based matching to vendors
198 =item *
200 Move away from the archaic ^ notation for cut sites. Ideally
201 I'd totally like to remove this altogether, or add a method
202 that adds it in if someone really wants it. We should be
203 fixed on a sequence, number notation.
205 =back
207 =head1 FEEDBACK
209 =head2 Mailing Lists
211 User feedback is an integral part of the evolution of this and other
212 Bioperl modules. Send your comments and suggestions preferably to one
213 of the Bioperl mailing lists. Your participation is much appreciated.
215 bioperl-l@bioperl.org - General discussion
216 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
218 =head2 Support
220 Please direct usage questions or support issues to the mailing list:
222 I<bioperl-l@bioperl.org>
224 rather than to the module maintainer directly. Many experienced and
225 reponsive experts will be able look at the problem and quickly
226 address it. Please include a thorough description of the problem
227 with code and data examples if at all possible.
229 =head2 Reporting Bugs
231 Report bugs to the Bioperl bug tracking system to help us keep track
232 the bugs and their resolution. Bug reports can be submitted via the
233 web:
235 https://github.com/bioperl/bioperl-live/issues
237 =head1 AUTHOR
239 Rob Edwards, redwards@utmem.edu
241 =head1 CONTRIBUTORS
243 Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
244 Peter Blaiklock, pblaiklo@restrictionmapper.org
245 Mark A. Jensen, maj-at-fortinbras-dot-us
247 =head1 COPYRIGHT
249 Copyright (c) 2003 Rob Edwards.
251 Some of this work is Copyright (c) 1997-2002 Steve A. Chervitz. All
252 Rights Reserved. This module is free software; you can redistribute
253 it and/or modify it under the same terms as Perl itself.
255 =head1 SEE ALSO
257 L<Bio::Restriction::Analysis>,
258 L<Bio::Restriction::EnzymeCollection>, L<Bio::Restriction::IO>
260 =head1 APPENDIX
262 Methods beginning with a leading underscore are considered private and
263 are intended for internal use by this module. They are not considered
264 part of the public interface and are described here for documentation
265 purposes only.
267 =cut
269 package Bio::Restriction::Enzyme;
270 use strict;
272 use Bio::PrimarySeq;
274 use Data::Dumper;
275 use Tie::RefHash;
276 use vars qw (%TYPE);
277 use base qw(Bio::Root::Root Bio::Restriction::EnzymeI);
279 BEGIN {
280 my %TYPE = (I => 1, II => 1, III => 1);
283 =head2 new
285 Title : new
286 Function
287 Function : Initializes the Enzyme object
288 Returns : The Restriction::Enzyme object
289 Argument : A standard definition can have several formats. For example:
290 $re->new(-enzyme='EcoRI', -seq->'GAATTC' -cut->'1')
291 Or, you can define the cut site in the sequence, for example
292 $re->new(-enzyme='EcoRI', -seq->'G^AATTC'), but you must use a caret
293 Or, a sequence can cut outside the recognition site, for example
294 $re->new(-enzyme='AbeI', -seq->'CCTCAGC' -cut->'-5/-2')
296 Other arguments:
297 -isoschizomers=>\@list a reference to an array of
298 known isoschizomers
299 -references=>$ref a reference to the enzyme
300 -source=>$source the source (person) of the enzyme
301 -commercial_availability=>@companies a list of companies
302 that supply the enzyme
303 -methylation_site=>\%sites a reference to hash that has
304 the position as the key and the type of methylation
305 as the value
306 -xln_sub => sub { ($self,$cut) = @_; ...; return $xln_cut },
307 a coderef to a routine that translates the input cut value
308 into Bio::Restriction::Enzyme coordinates
309 ( e.g., for withrefm format, this might be
310 -xln_sub => sub { length( shift()->string ) + shift } )
312 A Restriction::Enzyme object manages its recognition sequence as a
313 Bio::PrimarySeq object.
315 The minimum requirement is for a name and a sequence.
317 This will create the restriction enzyme object, and define several
318 things about the sequence, such as palindromic, size, etc.
320 =cut
322 # do all cut/comp cut setting within the constructor
323 # new args
325 sub new {
326 my($class, @args) = @_;
327 my $self = $class->SUPER::new(@args);
329 my ($name,$enzyme,$site,$seq,$precut, $postcut,$cut,$complementary_cut, $is_prototype, $prototype,
330 $isoschizomers, $meth, $microbe, $source, $vendors, $references, $neo, $recog, $xln_sub) =
331 $self->_rearrange([qw(
332 NAME
333 ENZYME
334 SITE
336 PRECUT
337 POSTCUT
339 COMPLEMENTARY_CUT
340 IS_PROTOTYPE
341 PROTOTYPE
342 ISOSCHIZOMERS
343 METHYLATION_SITES
344 MICROBE
345 SOURCE
346 VENDORS
347 REFERENCES
348 IS_NEOSCHIZOMER
349 RECOG
350 XLN_SUB
351 )], @args);
353 $self->throw('At the minimum, you must define a name and '.
354 'recognition site for the restriction enzyme')
355 unless (($name || $enzyme) && ($site || $recog || $seq));
357 $self->{_isoschizomers} = [];
358 $self->{_methylation_sites} = {};
359 $self->{_vendors} = [];
360 $self->{_references} = [];
362 # squelch warnings
363 $postcut ||='';
365 # enzyme name
366 $enzyme && $self->name($enzyme);
367 $name && $self->name($name);
369 # site
371 # note that the site() setter will automatically set
372 # cut(), complementary_cut(), if the cut site is indicated
373 # in $site with '^' /maj
375 # create the cut site if appropriate/this is a kludge due to
376 # the base.pm format in the new B:R order...
377 if ( $cut and $cut <= length $site) {
378 $site = substr($site, 0, $cut).'^'.substr($site, $cut);
381 if ($site) {
382 $self->site($site);
384 else {
385 $seq && $self->site($seq);
388 if ($recog) {
389 $self->recog($recog);
391 else {
392 $seq && $self->recog($seq);
393 $site && $self->recog($site);
395 # call revcom_site to initialize it and revcom_recog:
396 $self->revcom_site();
398 $recog = $self->string; # for length calculations below
400 if ($xln_sub) {
401 $self->warn("Translation subroutine is not a coderef; ignoring") unless
402 ref($xln_sub) eq 'CODE';
405 # cut coordinates
406 my ($pc_cut, $pc_comp_cut) = ( $postcut =~ /(-?\d+)\/(-?\d+)/ );
408 # cut definitions in constructor override any autoset in
409 # site()
410 # definitions in site conform to withrefm coords, translation
411 # happens here
413 if (defined $cut) {
414 $self->cut( $xln_sub ? $xln_sub->($self, $cut) : $cut );
416 elsif ( defined $pc_cut ) {
417 $self->cut( $xln_sub ? $xln_sub->($self, $pc_cut) : $pc_cut );
420 if (defined $complementary_cut) {
421 $self->complementary_cut($xln_sub ? $xln_sub->($self,$complementary_cut) : $complementary_cut);
423 elsif (defined $pc_comp_cut) {
424 $self->complementary_cut($xln_sub ? $xln_sub->($self,$pc_comp_cut) : $pc_comp_cut);
427 $is_prototype && $self->is_prototype($is_prototype);
428 $prototype && $self->prototype($prototype);
429 $isoschizomers && $self->isoschizomers($isoschizomers);
430 $meth && $self->methylation_sites($meth);
431 $microbe && $self->microbe($microbe);
432 $source && $self->source($source);
433 $vendors && $self->vendors($vendors);
434 $references && $self->references($references);
435 $neo && $self->is_neoschizomer($neo);
437 # create multicut enzymes here if $precut defined
438 if (defined $precut) {
439 bless $self, 'Bio::Restriction::Enzyme::MultiCut';
440 my ($pc_cut, $pc_comp_cut) = $precut =~ /(-?\d+)\/(-?\d+)/;
441 my $re2 = $self->clone;
442 $re2->cut($xln_sub ? $xln_sub->($self, -$pc_cut) : -$pc_cut);
443 $re2->complementary_cut($xln_sub ? $xln_sub->($self, -$pc_comp_cut) : -$pc_comp_cut);
444 $self->others($re2);
447 return $self;
450 =head1 Essential methods
452 =cut
454 =head2 name
456 Title : name
457 Usage : $re->name($newval)
458 Function : Gets/Sets the restriction enzyme name
459 Example : $re->name('EcoRI')
460 Returns : value of name
461 Args : newvalue (optional)
463 This will also clean up the name. I have added this because some
464 people get confused about restriction enzyme names. The name should
465 be One upper case letter, and two lower case letters (because it is
466 derived from the organism name, eg. EcoRI is from E. coli). After
467 that it is all confused, but the numbers should be roman numbers not
468 numbers, therefore we'll correct those. At least this will provide
469 some standard, I hope.
471 =cut
473 sub name{
474 my ($self, $name)=@_;
476 if ($name) { # correct and set the name
477 my $old_name = $name;
479 # remove spaces. Some people write HindIII as Hind III
480 $name =~ s/\s+//g;
481 # change TAILING ones to I's
482 if ($name =~ m/(1+)$/) {
483 my $i = 'I' x length($1);
484 $name =~ s/1+$/$i/;
487 # make the first letter upper case
488 $name =~ s/^(\w)/uc($1)/e;
490 unless ($name eq $old_name) {
491 # we have changed the name, so send a warning
492 $self->warn("The enzyme name $old_name was changed to $name");
494 $self->{'_name'} = $name;
496 return $self->{'_name'};
500 =head2 site
502 Title : site
503 Usage : $re->site();
504 Function : Gets/sets the recognition sequence for the enzyme.
505 Example : $seq_string = $re->site();
506 Returns : String containing recognition sequence indicating
507 : cleavage site as in 'G^AATTC'.
508 Argument : n/a
509 Throws : n/a
512 Side effect: the sequence is always converted to upper case.
514 The cut site can also be set by using methods L<cut|cut> and
515 L<complementary_cut|complementary_cut>.
517 This will pad out missing sequence with N's. For example the enzyme
518 Acc36I cuts at ACCTGC(4/8). This will be returned as ACCTGCNNNN^
520 Note that the common notation ACCTGC(4/8) means that the forward
521 strand cut is four nucleotides after the END of the recognition
522 site. The forward cut() in the coordinates used here in Acc36I
523 ACCTGC(4/8) is at 6+4 i.e. 10.
525 ** This is the main setable method for the recognition site.
527 =cut
529 sub site {
530 my ( $self, $site ) = @_;
532 if ($site) {
534 $self->throw("Unrecognized characters in site: [$site]")
535 if $site =~ /[^ATGCMRWSYKVHDBN\^]/i;
537 # we may have to redefine this if there is a ^ in the sequence
539 # first, check and see if we have a cut site in the sequence
540 # if so, find the position, and set the target sequence and cut site
542 $self->{'_site'} = $site;
544 my ( $first, $second ) = $site =~ /(.*)\^(.*)/;
545 $site = "$1$2" if defined $first;
546 $self->{'_site'} = $site;
548 # now set the recognition site as a new Bio::PrimarySeq object
549 # we need it before calling cut() and complementary_cut()
550 $self->{_seq} = Bio::PrimarySeq->new(
551 -id => $self->name,
552 -seq => $site,
553 -verbose => $self->verbose,
554 -alphabet => 'dna'
557 if ( defined $first ) {
558 $self->cut( length $first );
559 $self->complementary_cut( length $second );
560 $self->revcom_site();
563 return $self->{'_site'};
566 =head2 revcom_site
568 Title : revcom_site
569 Usage : $re->revcom_site();
570 Function : Gets/sets the complementary recognition sequence for the enzyme.
571 Example : $seq_string = $re->revcom_site();
572 Returns : String containing recognition sequence indicating
573 : cleavage site as in 'G^AATTC'.
574 Argument : none (sets on first call)
575 Throws : n/a
577 This is the same as site, except it returns the revcom site. For
578 palindromic enzymes these two are identical. For non-palindromic
579 enzymes they are not!
581 On set, this also handles setting the revcom_recog attribute.
583 See also L<site|site> above.
585 =cut
587 sub revcom_site {
588 my $self = shift;
589 # getter
590 return $self->{'_revcom_site'} unless !$self->{'_revcom_site'};
592 # setter
593 my $site = $self->{'_site'};
594 if ($self->is_palindromic) {
595 $self->{'_revcom_site'}=$self->{'_site'};
596 $self->revcom_recog( $self->string );
597 return $self->{'_revcom_site'};
600 $self->throw("Unrecognized characters in revcom site: [$site]")
601 if $site =~ /[^ATGCMRWSYKVHDBN\^]/i;
603 if ($site =~ /\^/) {
604 # first, check and see if we have a cut site indicated in the sequence
605 # if so, find the position, and set the target sequence and cut site
606 $site = $self->revcom;
607 $self->revcom_recog( $site );
608 my $c = length($site)-$self->cut;
609 $site = substr($site, 0, $c).'^'.substr($site,$c);
610 $self->{'_revcom_site'} = $site;
612 else {
613 my $revcom=$self->revcom;
614 $self->revcom_recog( $revcom );
615 # my $cc=$self->complementary_cut;
616 # my $hat=length($revcom)-$cc+1; # we need it on the other strand!
617 # if ($cc > length($revcom)) {
618 # my $pad= "N" x ($cc-length($revcom));
619 # $revcom = $pad. $revcom;
620 # $hat=length($revcom)-$cc+1;
622 # elsif ($cc < 0) {
623 # my $pad = "N" x -$cc;
624 # $revcom .= $pad;
625 # $hat=length($revcom);
627 # $revcom =~ s/(.{$hat})/$1\^/;
628 $self->{'_revcom_site'}=$revcom;
630 return $self->{'_revcom_site'};
633 =head2 cut
635 Title : cut
636 Usage : $num = $re->cut(1);
637 Function : Sets/gets an integer indicating the position of cleavage
638 relative to the 5' end of the recognition sequence in the
639 forward strand.
641 For type II enzymes, sets the symmetrically positioned
642 reverse strand cut site by calling complementary_cut().
644 Returns : Integer, 0 if not set
645 Argument : an integer for the forward strand cut site (optional)
647 Note that the common notation ACCTGC(4/8) means that the forward
648 strand cut is four nucleotides after the END of the recognition
649 site. The forwad cut in the coordinates used here in Acc36I
650 ACCTGC(4/8) is at 6+4 i.e. 10.
652 Note that REBASE uses notation where cuts within symmetic sites are
653 marked by '^' within the forward sequence but if the site is
654 asymmetric the parenthesis syntax is used where numbering ALWAYS
655 starts from last nucleotide in the forward strand. That's why AciI has
656 a site usually written as CCGC(-3/-1) actualy cuts in
658 C^C G C
659 G G C^G
661 In our notation, these locations are 1 and 3.
664 The cuts locations in the notation used are relative to the first
665 (non-N) nucleotide of the reported forward strand of the recognition
666 sequence. The following diagram numbers the phosphodiester bonds
667 (marked by + ) which can be cut by the restriction enzymes:
669 1 2 3 4 5 6 7 8 ...
670 N + N + N + N + N + G + A + C + T + G + G + N + N + N
671 ... -5 -4 -3 -2 -1
674 =cut
676 sub cut {
677 my ($self, $value) = @_;
678 if (defined $value) {
679 $self->throw("The cut position needs to be an integer [$value]")
680 unless $value =~ /[-+]?\d+/;
681 $self->{'_cut'} = $value;
683 # add the caret to the site attribute only if internal /maj
684 if ( ($self->{_site} !~ /\^/) && ($value <= length ($self->{_site}))) {
685 $self->{_site} =
686 substr($self->{_site}, 0, $value). '^'. substr($self->{_site}, $value);
689 # auto-set comp cut only if cut site is inside the recog site./maj
690 $self->complementary_cut(length ($self->seq->seq) - $value )
691 if (($self->{_site} =~ /\^/) && ($self->type eq 'II'));
694 # return undef if not defined yet, not 0 /maj
695 return $self->{'_cut'};
698 =head2 cuts_after
700 Title : cuts_after
701 Usage : Alias for cut()
703 =cut
705 sub cuts_after {
706 shift->cut(@_);
710 =head2 complementary_cut
712 Title : complementary_cut
713 Usage : $num = $re->complementary_cut('1');
714 Function : Sets/Gets an integer indicating the position of cleavage
715 : on the reverse strand of the restriction site.
716 Returns : Integer
717 Argument : An integer (optional)
718 Throws : Exception if argument is non-numeric.
720 This method determines the cut on the reverse strand of the sequence.
721 For most enzymes this will be within the sequence, and will be set
722 automatically based on the forward strand cut, but it need not be.
724 B<Note> that the returned location indicates the location AFTER the
725 first non-N site nucleotide in the FORWARD strand.
727 =cut
729 sub complementary_cut {
730 my ($self, $num)=@_;
732 if (defined $num) {
733 $self->throw("The cut position needs to be an integer [$num]")
734 unless $num =~ /[-+]?\d+/;
735 $self->{'_rc_cut'} = $num;
737 # return undef, not 0, if not yet defined /maj
738 return $self->{'_rc_cut'};
742 =head1 Read only (usually) recognition site descriptive methods
744 =cut
746 =head2 type
748 Title : type
749 Usage : $re->type();
750 Function : Get/set the restriction system type
751 Returns :
752 Argument : optional type: ('I'|II|III)
754 Restriction enzymes have been catezorized into three types. Some
755 REBASE formats give the type, but the following rules can be used to
756 classify the known enzymes:
758 =over 4
760 =item 1
762 Bipartite site (with 6-8 Ns in the middle and the cut site
763 is E<gt> 50 nt away) =E<gt> type I
765 =item 2
767 Site length E<lt> 3 =E<gt> type I
769 =item 3
771 5-6 asymmetric site and cuts E<gt>20 nt away =E<gt> type III
773 =item 4
775 All other =E<gt> type II
777 =back
779 There are some enzymes in REBASE which have bipartite recognition site
780 and cat far from the site but are still classified as type I. I've no
781 idea if this is really so.
783 =cut
785 sub type {
786 my ($self, $value) = @_;
788 if ($value) {
789 $self->throw("Not a valid value [$value], needs to one of : ".
790 join (', ', sort keys %TYPE) )
791 unless $TYPE{$value};
792 return $self->{'_type'} = $value;
795 # pre set
796 #return $self->{'_type'} if $self->{'_type'};
797 # bipartite
798 return $self->{'_type'} = 'I'
799 if $self->{'_seq'}->seq =~ /N*[^N]+N{6,8}[^N]/ and abs($self->cut) > 50 ;
800 # 3 nt site
801 return $self->{'_type'} = 'I'
802 if $self->{'_seq'}->length == 3;
803 # asymmetric and cuts > 20 nt
804 return $self->{'_type'} = 'III'
805 if (length $self->string == 5 or length $self->string == 6 ) and
806 not $self->palindromic and abs($self->cut) > 20;
807 return $self->{'_type'} = 'II';
810 =head2 seq
812 Title : seq
813 Usage : $re->seq();
814 Function : Get the Bio::PrimarySeq.pm object representing
815 : the recognition sequence
816 Returns : A Bio::PrimarySeq object representing the
817 enzyme recognition site
818 Argument : n/a
819 Throws : n/a
822 =cut
824 sub seq {
825 shift->{'_seq'};
828 =head2 string
830 Title : string
831 Usage : $re->string();
832 Function : Get a string representing the recognition sequence.
833 Returns : String. Does NOT contain a '^' representing the cut location
834 as returned by the site() method.
835 Argument : n/a
836 Throws : n/a
838 =cut
840 sub string {
841 shift->{'_seq'}->seq;
844 =head2 recog
846 Title : recog
847 Usage : $enz->recog($recognition_sequence)
848 Function: Gets/sets the pure recognition site. Sets as
849 regexp if appropriate.
850 As for string(), the cut indicating carets (^)
851 are expunged.
852 Example :
853 Returns : value of recog (a scalar)
854 Args : on set, new value (a scalar or undef, optional)
856 =cut
858 sub recog{
859 my $self = shift;
860 my $recog = shift;
861 return $self->{'recog'} unless $recog;
862 $recog =~ s/\^//g;
863 $recog = _expand($recog) if $recog =~ /[^ATGC]/;
864 return $self->{'recog'} = $recog;
867 =head2 revcom_recog
869 Title : revcom_recog
870 Usage : $enz->revcom_recog($recognition_sequence)
871 Function: Gets/sets the pure reverse-complemented recognition site.
872 Sets as regexp if appropriate.
873 As for string(), the cut indicating carets (^) are expunged.
874 Example :
875 Returns : value of recog (a scalar)
876 Args : on set, new value (a scalar or undef, optional)
878 =cut
880 sub revcom_recog{
881 my $self = shift;
882 my $recog = shift;
883 unless ($recog) {
884 $self->throw( "revcom recognition site not set; call \$enz->revcom_site to initialize" ) unless $self->{'revcom_recog'};
885 return $self->{'revcom_recog'};
887 $recog =~ s/\^//g;
888 $recog = _expand($recog) if $recog =~ /[^ATGC]/;
889 return $self->{'revcom_recog'} = $recog;
892 =head2 revcom
894 Title : revcom
895 Usage : $re->revcom();
896 Function : Get a string representing the reverse complement of
897 : the recognition sequence.
898 Returns : String
899 Argument : n/a
900 Throws : n/a
902 =cut
904 sub revcom {
905 shift->{'_seq'}->revcom->seq();
908 =head2 recognition_length
910 Title : recognition_length
911 Usage : $re->recognition_length();
912 Function : Get the length of the RECOGNITION sequence.
913 This is the total recognition sequence,
914 inluding the ambiguous codes.
915 Returns : An integer
916 Argument : Nothing
918 See also: L<non_ambiguous_length>
920 =cut
922 sub recognition_length {
923 my $self = shift;
924 return length($self->string);
927 =head2 cutter
929 Title : cutter
930 Usage : $re->cutter
931 Function : Returns the "cutter" value of the recognition site.
933 This is a value relative to site length and lack of
934 ambiguity codes. Hence: 'RCATGY' is a five (5) cutter site
935 and 'CCTNAGG' a six cutter
937 This measure correlates to the frequency of the enzyme
938 cuts much better than plain recognition site length.
940 Example : $re->cutter
941 Returns : integer or float number
942 Args : none
944 Why is this better than just stripping the ambiguos codes? Think about
945 it like this: You have a random sequence; all nucleotides are equally
946 probable. You have a four nucleotide re site. The probability of that
947 site finding a match is one out of 4^4 or 256, meaning that on average
948 a four cutter finds a match every 256 nucleotides. For a six cutter,
949 the average fragment length is 4^6 or 4096. In the case of ambiguity
950 codes the chances are finding the match are better: an R (A|T) has 1/2
951 chance of finding a match in a random sequence. Therefore, for RGCGCY
952 the probability is one out of (2*4*4*4*4*2) which exactly the same as
953 for a five cutter! Cutter, although it can have non-integer values
954 turns out to be a useful and simple measure.
956 From bug 2178: VHDB are ambiguity symbols that match three different
957 nucleotides, so they contribute less to the effective recognition sequence
958 length than e.g. Y which matches only two nucleotides. A symbol which matches n
959 of the 4 nucleotides has an effective length of 1 - log(n) / log(4).
961 =cut
963 sub cutter {
964 my ($self)=@_;
965 $_ = uc $self->string;
967 my $cutter = tr/[ATGC]//d;
968 my $count = tr/[MRWSYK]//d;
969 $cutter += $count/2;
970 $count = tr/[VHDB]//d;
971 $cutter += $count * (1 - log(3) / log(4));
972 return $cutter;
976 =head2 is_palindromic
978 Title : is_palindromic
979 Alias : palindromic
980 Usage : $re->is_palindromic();
981 Function : Determines if the recognition sequence is palindromic
982 : for the current restriction enzyme.
983 Returns : Boolean
984 Argument : n/a
985 Throws : n/a
987 A palindromic site (EcoRI):
989 5-GAATTC-3
990 3-CTTAAG-5
992 =cut
994 sub is_palindromic {
995 my $self = shift;
996 return $self->{_palindromic} if defined $self->{_palindromic};
997 if ($self->string eq $self->revcom) {
998 return $self->{_palindromic}=1;
1000 return $self->{_palindromic} = 0;
1003 sub palindromic { shift->is_palindromic(@_) }
1005 =head2 is_symmetric
1007 Title : is_symmetric
1008 Alias : symmetric
1009 Usage : $re->is_symmetric();
1010 Function : Determines if the enzyme is a symmetric cutter
1011 Returns : Boolean
1012 Argument : none
1014 A symmetric but non-palindromic site (HindI):
1016 5-C A C-3
1017 3-G T G-5
1020 =cut
1022 sub is_symmetric {
1023 no warnings qw( uninitialized );
1024 my $self = shift;
1026 return $self->{_symmetric} if defined $self->{_symmetric};
1027 if ($self->is_palindromic) {
1028 return $self->{_symmetric} = 1;
1030 if ($self->cut == length($self->string) - $self->complementary_cut) {
1031 return $self->{_symmetric}=1;
1033 return $self->{_symmetric} = 0;
1037 sub symmetric { shift->is_symmetric(@_) }
1039 =head2 overhang
1041 Title : overhang
1042 Usage : $re->overhang();
1043 Function : Determines the overhang of the restriction enzyme
1044 Returns : "5'", "3'", "blunt" of undef
1045 Argument : n/a
1046 Throws : n/a
1048 A blunt site in SmaI returns C<blunt>
1050 5' C C C^G G G 3'
1051 3' G G G^C C C 5'
1053 A 5' overhang in EcoRI returns C<5'>
1055 5' G^A A T T C 3'
1056 3' C T T A A^G 5'
1058 A 3' overhang in KpnI returns C<3'>
1060 5' G G T A C^C 3'
1061 3' C^C A T G G 5'
1063 =cut
1065 sub overhang {
1066 my $self = shift;
1067 unless ($self->{'_cut'} && $self->{'_rc_cut'}) {
1068 return "unknown";
1070 if ($self->{_cut} < $self->{_rc_cut}) {
1071 $self->{_overhang}="5'";
1072 } elsif ($self->{_cut} == $self->{_rc_cut}) {
1073 $self->{_overhang}="blunt";
1074 } elsif ($self->{_cut} > $self->{_rc_cut}) {
1075 $self->{_overhang}="3'";
1076 } else {
1077 $self->{_overhang}="unknown";
1079 return $self->{_overhang}
1082 =head2 overhang_seq
1084 Title : overhang_seq
1085 Usage : $re->overhang_seq();
1086 Function : Determines the overhang sequence of the restriction enzyme
1087 Returns : a Bio::LocatableSeq
1088 Argument : n/a
1089 Throws : n/a
1091 I do not think it is necessary to create a seq object of these. (Heikki)
1093 Note: returns empty string for blunt sequences and undef for ones that
1094 we don't know. Compare these:
1096 A blunt site in SmaI returns empty string
1098 5' C C C^G G G 3'
1099 3' G G G^C C C 5'
1101 A 5' overhang in EcoRI returns C<AATT>
1103 5' G^A A T T C 3'
1104 3' C T T A A^G 5'
1106 A 3' overhang in KpnI returns C<GTAC>
1108 5' G G T A C^C 3'
1109 3' C^C A T G G 5'
1111 Note that you need to use method L<overhang|overhang> to decide
1112 whether it is a 5' or 3' overhang!!!
1114 Note: The overhang stuff does not work if the site is asymmetric! Rethink!
1116 =cut
1118 sub overhang_seq {
1119 my $self = shift;
1121 # my $overhang->Bio::PrimarySeq(-id=>$self->name . '-overhang',
1122 # -verbose=>$self->verbose,
1123 # -alphabet=>'dna');
1125 return '' if $self->overhang eq 'blunt' ;
1127 unless ($self->{_cut} && $self->{_rc_cut}) {
1128 # lets just check that we really can't figure it out
1129 $self->cut;
1130 $self->complementary_cut;
1131 unless ($self->{_cut} && $self->{_rc_cut}) {
1132 return;
1136 # this is throwing an error for sequences outside the restriction
1137 # site (eg ^NNNNGATCNNNN^)
1138 # So if this is the case we need to fake these guys
1139 if (($self->{_cut}<0) ||
1140 ($self->{_rc_cut}<0) ||
1141 ($self->{_cut}>$self->seq->length) ||
1142 ($self->{_rc_cut}>$self->seq->length)) {
1143 my $tempseq=$self->site;
1144 my ($five, $three)=split /\^/, $tempseq;
1145 if ($self->{_cut} > $self->{_rc_cut}) {
1146 return substr($five, $self->{_rc_cut})
1147 } elsif ($self->{_cut} < $self->{_rc_cut}) {
1148 return substr($three, 0, $self->{_rc_cut})
1149 } else {
1150 return '';
1154 if ($self->{_cut} > $self->{_rc_cut}) {
1155 return $self->seq->subseq($self->{_rc_cut}+1,$self->{_cut});
1156 } elsif ($self->{_cut} < $self->{_rc_cut}) {
1157 return $self->seq->subseq($self->{_cut}+1, $self->{_rc_cut});
1158 } else {
1159 return '';
1165 =head2 compatible_ends
1167 Title : compatible_ends
1168 Usage : $re->compatible_ends($re2);
1169 Function : Determines if the two restriction enzyme cut sites
1170 have compatible ends.
1171 Returns : 0 if not, 1 if only one pair ends match, 2 if both ends.
1172 Argument : a Bio::Restriction::Enzyme
1173 Throws : unless the argument is a Bio::Resriction::Enzyme and
1174 if there are Ns in the ovarhangs
1176 In case of type II enzymes which which cut symmetrically, this
1177 function can be considered to return a boolean value.
1180 =cut
1182 sub compatible_ends {
1183 my ($self, $re) = @_;
1185 $self->throw("Need a Bio::Restriction::Enzyme as an argument, [$re]")
1186 unless $re->isa('Bio::Restriction::Enzyme');
1188 # $self->throw("Only type II enzymes work now")
1189 # unless $self->type eq 'II';
1191 $self->debug("N(s) in overhangs. Can not compare")
1192 if $self->overhang_seq =~ /N/ or $re->overhang_seq =~ /N/;
1194 return 2 if $self->overhang_seq eq $re->overhang_seq and
1195 $self->overhang eq $re->overhang;
1197 return 0;
1200 =head2 is_ambiguous
1202 Title : is_ambiguous
1203 Usage : $re->is_ambiguous();
1204 Function : Determines if the restriction enzyme contains ambiguous sequences
1205 Returns : Boolean
1206 Argument : n/a
1207 Throws : n/a
1209 =cut
1211 sub is_ambiguous {
1212 my $self = shift;
1213 return $self->string =~ m/[^AGCT]/ ? 1 : 0 ;
1216 =head2 Additional methods from Rebase
1218 =cut
1220 =head2 is_prototype
1222 Title : is_prototype
1223 Usage : $re->is_prototype
1224 Function : Get/Set method for finding out if this enzyme is a prototype
1225 Example : $re->is_prototype(1)
1226 Returns : Boolean
1227 Args : none
1229 Prototype enzymes are the most commonly available and usually first
1230 enzymes discoverd that have the same recognition site. Using only
1231 prototype enzymes in restriction analysis avoids redundancy and
1232 speeds things up.
1234 =cut
1236 sub is_prototype {
1237 my ($self, $value) = @_;
1238 if (defined $value) {
1239 return $self->{'_is_prototype'} = $value ;
1241 if (defined $self->{'_is_prototype'}) {
1242 return $self->{'_is_prototype'}
1243 } else {
1244 $self->warn("Can't unequivocally assign prototype based on input format alone");
1245 return
1249 =head2 is_neoschizomer
1251 Title : is_neoschizomer
1252 Usage : $re->is_neoschizomer
1253 Function : Get/Set method for finding out if this enzyme is a neoschizomer
1254 Example : $re->is_neoschizomer(1)
1255 Returns : Boolean
1256 Args : none
1258 Neoschizomers are distinguishable from the prototype enzyme by having a
1259 different cleavage pattern. Note that not all formats report this
1261 =cut
1263 sub is_neoschizomer {
1264 my ($self, $value) = @_;
1265 if (defined $value) {
1266 return $self->{'_is_neoschizomer'} = $value ;
1268 if (defined $self->{'_is_neoschizomer'}) {
1269 return $self->{'_is_neoschizomer'}
1270 } else {
1271 $self->warn("Can't unequivocally assign neoschizomer based on input format alone");
1272 return
1276 =head2 prototype_name
1278 Title : prototype_name
1279 Alias : prototype
1280 Usage : $re->prototype_name
1281 Function : Get/Set method for the name of prototype for
1282 this enzyme's recognition site
1283 Example : $re->prototype_name(1)
1284 Returns : prototype enzyme name string or an empty string
1285 Args : optional prototype enzyme name string
1287 If the enzyme itself is the prototype, its own name is returned. Not to
1288 confuse the negative result with an unset value, use method
1289 L<is_prototype|is_prototype>.
1291 This method is called I<prototype_name> rather than I<prototype>,
1292 because it returns a string rather than on object.
1294 =cut
1296 sub prototype_name {
1297 my $self = shift;
1299 $self->{'_prototype'} = shift if @_;
1300 return $self->name if $self->{'_is_prototype'};
1301 return $self->{'_prototype'} || '';
1304 sub prototype { shift->prototype_name(@_) }
1306 =head2 isoschizomers
1308 Title : isoschizomers
1309 Alias : isos
1310 Usage : $re->isoschizomers(@list);
1311 Function : Gets/Sets a list of known isoschizomers (enzymes that
1312 recognize the same site, but don't necessarily cut at
1313 the same position).
1314 Arguments : A reference to an array that contains the isoschizomers
1315 Returns : A reference to an array of the known isoschizomers or 0
1316 if not defined.
1318 This has to be the hardest name to spell, so now you can use the alias
1319 'isos'. Added for compatibility to REBASE
1321 =cut
1323 sub isoschizomers {
1324 my ($self) = shift;
1325 push @{$self->{_isoschizomers}}, @_ if @_;
1326 # make sure that you don't dereference if null
1327 # chad believes quite strongly that you should return
1328 # a reference to an array anyway. don't bother dereferencing.
1329 # i'll post that to the list.
1330 if ($self->{'_isoschizomers'}) {
1331 return @{$self->{_isoschizomers}};
1336 sub isos { shift->isoschizomers(@_) }
1338 =head2 purge_isoschizomers
1340 Title : purge_isoschizomers
1341 Alias : purge_isos
1342 Usage : $re->purge_isoschizomers();
1343 Function : Purges the set of isoschizomers for this enzyme
1344 Arguments :
1345 Returns : 1
1347 =cut
1349 sub purge_isoschizomers {
1350 my ($self) = shift;
1351 $self->{_isoschizomers} = [];
1355 sub purge_isos { shift->purge_isoschizomers(@_) }
1357 =head2 methylation_sites
1359 Title : methylation_sites
1360 Usage : $re->methylation_sites(\%sites);
1361 Function : Gets/Sets known methylation sites (positions on the sequence
1362 that get modified to promote or prevent cleavage).
1363 Arguments : A reference to a hash that contains the methylation sites
1364 Returns : A reference to a hash of the methylation sites or
1365 an empty string if not defined.
1367 There are three types of methylation sites:
1369 =over 3
1371 =item * (6) = N6-methyladenosine
1373 =item * (5) = 5-methylcytosine
1375 =item * (4) = N4-methylcytosine
1377 =back
1379 These are stored as 6, 5, and 4 respectively. The hash has the
1380 sequence position as the key and the type of methylation as the value.
1381 A negative number in the sequence position indicates that the DNA is
1382 methylated on the complementary strand.
1384 Note that in REBASE, the methylation positions are given
1385 Added for compatibility to REBASE.
1387 =cut
1389 sub methylation_sites {
1390 my $self = shift;
1392 while (@_) {
1393 my $key = shift;
1394 $self->{'_methylation_sites'}->{$key} = shift;
1396 return %{$self->{_methylation_sites}};
1400 =head2 purge_methylation_sites
1402 Title : purge_methylation_sites
1403 Usage : $re->purge_methylation_sites();
1404 Function : Purges the set of methylation_sites for this enzyme
1405 Arguments :
1406 Returns :
1408 =cut
1410 sub purge_methylation_sites {
1411 my ($self) = shift;
1412 $self->{_methylation_sites} = {};
1415 =head2 microbe
1417 Title : microbe
1418 Usage : $re->microbe($microbe);
1419 Function : Gets/Sets microorganism where the restriction enzyme was found
1420 Arguments : A scalar containing the microbes name
1421 Returns : A scalar containing the microbes name or 0 if not defined
1423 Added for compatibility to REBASE
1425 =cut
1427 sub microbe {
1428 my ($self, $microbe) = @_;
1429 if ($microbe) {
1430 $self->{_microbe}=$microbe;
1432 return $self->{_microbe} || '';
1437 =head2 source
1439 Title : source
1440 Usage : $re->source('Rob Edwards');
1441 Function : Gets/Sets the person who provided the enzyme
1442 Arguments : A scalar containing the persons name
1443 Returns : A scalar containing the persons name or 0 if not defined
1445 Added for compatibility to REBASE
1447 =cut
1449 sub source {
1450 my ($self, $source) = @_;
1451 if ($source) {
1452 $self->{_source}=$source;
1454 return $self->{_source} || '';
1458 =head2 vendors
1460 Title : vendors
1461 Usage : $re->vendor(@list_of_companies);
1462 Function : Gets/Sets the a list of companies that you can get the enzyme from.
1463 Also sets the commercially_available boolean
1464 Arguments : A reference to an array containing the names of companies
1465 that you can get the enzyme from
1466 Returns : A reference to an array containing the names of companies
1467 that you can get the enzyme from
1469 Added for compatibility to REBASE
1471 =cut
1473 sub vendors {
1474 my $self = shift;
1475 push @{$self->{_vendors}}, @_ if @_;
1476 if ($self->{'_vendors'}) {
1477 return @{$self->{'_vendors'}};
1482 =head2 purge_vendors
1484 Title : purge_vendors
1485 Usage : $re->purge_references();
1486 Function : Purges the set of references for this enzyme
1487 Arguments :
1488 Returns :
1490 =cut
1492 sub purge_vendors {
1493 my ($self) = shift;
1494 $self->{_vendors} = [];
1498 =head2 vendor
1500 Title : vendor
1501 Usage : $re->vendor(@list_of_companies);
1502 Function : Gets/Sets the a list of companies that you can get the enzyme from.
1503 Also sets the commercially_available boolean
1504 Arguments : A reference to an array containing the names of companies
1505 that you can get the enzyme from
1506 Returns : A reference to an array containing the names of companies
1507 that you can get the enzyme from
1509 Added for compatibility to REBASE
1511 =cut
1514 sub vendor {
1515 my $self = shift;
1516 return push @{$self->{_vendors}}, @_;
1517 return $self->{_vendors};
1521 =head2 references
1523 Title : references
1524 Usage : $re->references(string);
1525 Function : Gets/Sets the references for this enzyme
1526 Arguments : an array of string reference(s) (optional)
1527 Returns : an array of references
1529 Use L<purge_references|purge_references> to reset the list of references
1531 This should be a L<Bio::Biblio> object, but its not (yet)
1533 =cut
1535 sub references {
1536 my ($self) = shift;
1537 push @{$self->{_references}}, @_ if @_;
1538 return @{$self->{_references}};
1542 =head2 purge_references
1544 Title : purge_references
1545 Usage : $re->purge_references();
1546 Function : Purges the set of references for this enzyme
1547 Arguments :
1548 Returns : 1
1550 =cut
1552 sub purge_references {
1553 my ($self) = shift;
1554 $self->{_references} = [];
1558 =head2 clone
1560 Title : clone
1561 Usage : $re->clone
1562 Function : Deep copy of the object
1563 Arguments : -
1564 Returns : new Bio::Restriction::EnzymeI object
1566 This works as long as the object is a clean in-memory object using
1567 scalars, arrays and hashes. You have been warned.
1569 If you have module Storable, it is used, otherwise local code is used.
1570 Todo: local code cuts circular references.
1572 =cut
1574 # there's some issue here; deprecating and rolling another below/maj
1576 sub clone_depr {
1577 my ($self, $this) = @_;
1579 eval { require Storable; };
1580 return Storable::dclone($self) unless $@;
1581 # modified from deep_copy() @ http://www.stonehenge.com/merlyn/UnixReview/col30.html
1582 unless ($this) {
1583 my $new;
1584 foreach my $k (keys %$self) {
1585 if (not ref $self->{$k}) {
1586 $new->{$k} = $self->{$k};
1587 } else {
1588 $new->{$k} = $self->clone($self->{$k});
1590 #print Dumper $new;
1592 bless $new, ref($self);
1593 return $new;
1595 if (not ref $this) {
1596 $this;
1598 elsif (ref $this eq "ARRAY") {
1599 [map $self->clone($_), @$this];
1601 elsif (ref $this eq "HASH") {
1602 +{map { $_ => $self->clone($this->{$_}) } keys %$this};
1603 } else { # objects
1604 return if $this->isa('Bio::Restriction::EnzymeI');
1605 return $this->clone if $this->can('clone');
1606 my $obj;
1607 foreach my $k (keys %$this) {
1608 if (not ref $this->{$k}) {
1609 $obj->{$k} = $this->{$k};
1610 } else {
1611 $obj->{$k} = $this->clone($this->{$k});
1614 bless $obj, ref($this);
1615 return $obj;
1619 sub clone {
1620 my $self = shift;
1621 my ($this, $visited) = @_;
1622 unless (defined $this) {
1623 my %h;
1624 tie %h, 'Tie::RefHash';
1625 my $visited = \%h;
1626 return $self->clone($self, $visited);
1628 my $thing;
1629 for ($this) {
1630 if (ref) {
1631 return $visited->{$this} if $visited->{$this};
1633 # scalar
1634 (!ref) && do {
1635 $thing = $this;
1636 last;
1638 # object
1639 (ref =~ /^Bio::/) && do {
1640 $thing = {};
1641 bless($thing, ref);
1642 $visited->{$this} = $thing;
1643 foreach my $attr (keys %{$_}) {
1644 $thing->{$attr} = (defined $_->{$attr} ? $self->clone($_->{$attr},$visited) : undef );
1646 last;
1648 (ref eq 'ARRAY') && do {
1649 $thing = [];
1650 $visited->{$this} = $thing;
1651 foreach my $elt (@{$_}) {
1652 push @$thing, (defined $elt ? $self->clone($elt,$visited) : undef);
1654 last;
1656 (ref eq 'HASH') && do {
1657 $thing = {};
1658 $visited->{$this} = $thing;
1659 no warnings qw( uninitialized ); # avoid 'uninitialized value' warning against $key
1660 foreach my $key (%{$_}) {
1661 $thing->{$key} = (defined $_->{key} ? $self->clone( $_->{$key},$visited) : undef );
1663 use warnings;
1664 last;
1666 (ref eq 'SCALAR') && do {
1667 $thing = ${$_};
1668 $visited->{$this} = $thing;
1669 $thing = \$thing;
1670 last;
1674 return $thing;
1679 =head2 _expand
1681 Title : _expand
1682 Function : Expand nucleotide ambiguity codes to their representative letters
1683 Returns : The full length string
1684 Arguments : The string to be expanded.
1686 Stolen from the original RestrictionEnzyme.pm
1688 =cut
1691 sub _expand {
1692 my $str = shift;
1694 $str =~ s/N|X/\./g;
1695 $str =~ s/R/\[AG\]/g;
1696 $str =~ s/Y/\[CT\]/g;
1697 $str =~ s/S/\[GC\]/g;
1698 $str =~ s/W/\[AT\]/g;
1699 $str =~ s/M/\[AC\]/g;
1700 $str =~ s/K/\[TG\]/g;
1701 $str =~ s/B/\[CGT\]/g;
1702 $str =~ s/D/\[AGT\]/g;
1703 $str =~ s/H/\[ACT\]/g;
1704 $str =~ s/V/\[ACG\]/g;
1706 return $str;