* seq_inds is not defined for Model-based HSPs
[bioperl-live.git] / Bio / Variation / VariantI.pm
blob4ad7944e71c770c3ba868f45ecd4ea609933daba
1 # $Id$
3 # BioPerl module for Bio::Variation::VariantI
5 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Variation::VariantI - Sequence Change SeqFeature abstract class
17 =head1 SYNOPSIS
19 #get Bio::Variant::VariantI somehow
20 print $var->restriction_changes, "\n";
21 foreach $allele ($var->each_Allele) {
22 #work on Bio::Variation::Allele objects
25 =head1 DESCRIPTION
27 This superclass defines common methods to basic sequence changes. The
28 instantiable classes Bio::Variation::DNAMutation,
29 Bio::Variation::RNAChange and Bio::Variation::AAChange use them.
30 See L<Bio::Variation::DNAMutation>, L<Bio::Variation::RNAChange>,
31 and L<Bio::Variation::AAChange> for more information.
33 These classes store information, heavy computation to detemine allele
34 sequences is done elsewhere.
36 The database cross-references are implemented as
37 Bio::Annotation::DBLink objects. The methods to access them are
38 defined in Bio::DBLinkContainerI. See L<Bio::Annotation::DBLink>
39 and L<Bio::DBLinkContainerI> for details.
41 Bio::Variation::VariantI redifines and extends
42 Bio::SeqFeature::Generic for sequence variations. This class
43 describes specific sequence change events. These events are always
44 from a specific reference sequence to something different. See
45 L<Bio::SeqFeature::Generic> for more information.
47 IMPORTANT: The notion of reference sequence permeates all
48 Bio::Variation classes. This is especially important to remember when
49 dealing with Alleles. In a polymorphic site, there can be a large
50 number of alleles. One of then has to be selected to be the reference
51 allele (allele_ori). ALL the rest has to be passed to the Variant
52 using the method add_Allele, including the mutated allele in a
53 canonical mutation. The IO modules and generated attributes depend on
54 it. They ignore the allele linked to using allele_mut and circulate
55 each Allele returned by each_Allele into allele_mut and calculate
56 the changes between that and allele_ori.
59 =head1 FEEDBACK
61 =head2 Mailing Lists
63 User feedback is an integral part of the evolution of this and other
64 Bioperl modules. Send your comments and suggestions preferably to the
65 Bioperl mailing lists Your participation is much appreciated.
67 bioperl-l@bioperl.org - General discussion
68 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
70 =head2 Reporting Bugs
72 Report bugs to the Bioperl bug tracking system to help us keep track
73 the bugs and their resolution. Bug reports can be submitted via the
74 web:
76 http://bugzilla.open-bio.org/
78 =head1 AUTHOR - Heikki Lehvaslaiho
80 Email: heikki-at-bioperl-dot-org
82 =head1 APPENDIX
84 The rest of the documentation details each of the object
85 methods. Internal methods are usually preceded with a _
87 =cut
90 # Let the code begin...
93 package Bio::Variation::VariantI;
94 use strict;
95 # Object preamble - inheritance
97 use base qw(Bio::Root::Root Bio::SeqFeature::Generic Bio::DBLinkContainerI);
99 =head2 id
101 Title : id
102 Usage : $obj->id
103 Function:
105 Read only method. Returns the id of the variation object.
106 The id is the id of the first DBLink object attached to this object.
108 Example :
109 Returns : scalar
110 Args : none
112 =cut
114 sub id {
115 my ($self) = @_;
116 my @ids = $self->each_DBLink;
117 my $id = $ids[0] if scalar @ids > 0;
118 return $id->database. "::". $id->primary_id if $id;
122 =head2 add_Allele
124 Title : add_Allele
125 Usage : $self->add_Allele($allele)
126 Function:
128 Adds one Bio::Variation::Allele into the list of alleles.
129 Note that the method forces the convention that nucleotide
130 sequence is in lower case and amino acds are in upper
131 case.
133 Example :
134 Returns : 1 when succeeds, 0 for failure.
135 Args : Allele object
137 =cut
140 sub add_Allele {
141 my ($self,$value) = @_;
142 if (defined $value) {
143 if( ! $value->isa('Bio::Variation::Allele') ) {
144 my $com = ref $value;
145 $self->throw("Is not a Allele object but a [$com]");
146 return 0;
147 } else {
148 if ( $self->isa('Bio::Variation::AAChange') ) {
149 $value->seq( uc $value->seq) if $value->seq;
150 } else {
151 $value->seq( lc $value->seq) if $value->seq;
153 push(@{$self->{'alleles'}},$value);
154 $self->allele_mut($value); #????
155 return 1;
157 } else {
158 return 0;
163 =head2 each_Allele
165 Title : alleles
166 Usage : $obj->each_Allele();
167 Function:
169 Returns a list of Bio::Variation::Allele objects
171 Example :
172 Returns : list of Alleles
173 Args : none
175 =cut
177 sub each_Allele{
178 my ($self,@args) = @_;
179 return @{$self->{'alleles'}};
183 =head2 isMutation
185 Title : isMutation
186 Usage : print join('/', $obj->each_Allele) if not $obj->isMutation;
187 Function:
189 Returns or sets the boolean value indicating that the
190 variant descibed is a canonical mutation with two alleles
191 assinged to be the original (wild type) allele and mutated
192 allele, respectively. If this value is not set, it is
193 assumed that the Variant descibes polymorphisms.
195 Returns : a boolean
197 =cut
199 sub isMutation {
200 my ($self,$value) = @_;
201 if (defined $value) {
202 if ($value ) {
203 $self->{'isMutation'} = 1;
204 } else {
205 $self->{'isMutation'} = 0;
208 return $self->{'isMutation'};
212 =head2 allele_ori
214 Title : allele_ori
215 Usage : $obj->allele_ori();
216 Function:
218 Links to and returns the Bio::Variation::Allele object.
219 If value is not set, returns false. All other Alleles are
220 compared to this.
222 Amino acid sequences are stored in upper case characters,
223 others in lower case.
225 Example :
226 Returns : string
227 Args : string
229 See L<Bio::Variation::Allele> for more.
231 =cut
233 sub allele_ori {
234 my ($self,$value) = @_;
235 if( defined $value) {
236 if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) {
237 $self->throw("Value is not Bio::Variation::Allele but [$value]");
238 } else {
239 if ( $self->isa('Bio::Variation::AAChange') ) {
240 $value->seq( uc $value->seq) if $value->seq;
241 } else {
242 $value->seq( lc $value->seq) if $value->seq;
244 $self->{'allele_ori'} = $value;
247 return $self->{'allele_ori'};
251 =head2 allele_mut
253 Title : allele_mut
254 Usage : $obj->allele_mut();
255 Function:
257 Links to and returns the Bio::Variation::Allele
258 object. Sets and returns the mutated allele sequence.
259 If value is not set, returns false.
261 Amino acid sequences are stored in upper case characters,
262 others in lower case.
264 Example :
265 Returns : string
266 Args : string
268 See L<Bio::Variation::Allele> for more.
270 =cut
273 sub allele_mut {
274 my ($self,$value) = @_;
275 if( defined $value) {
276 if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) {
277 $self->throw("Value is not Bio::Variation::Allele but [$value]");
278 } else {
279 if ( $self->isa('Bio::Variation::AAChange') ) {
280 $value->seq( uc $value->seq) if $value->seq;
281 } else {
282 $value->seq( lc $value->seq) if $value->seq;
284 $self->{'allele_mut'} = $value;
287 return $self->{'allele_mut'};
290 =head2 length
292 Title : length
293 Usage : $obj->length();
294 Function:
296 Sets and returns the length of the affected original
297 allele sequence. If value is not set, returns false == 0.
299 Value 0 means that the variant position is before the
300 start=end sequence position. (Value 1 would denote a point
301 mutation). This follows the convension to report an
302 insertion (2insT) in equivalent way to a corresponding
303 deletion (2delT) (Think about indel polymorpism ATC <=> AC
304 where the origianal state is not known ).
306 Example :
307 Returns : string
308 Args : string
310 =cut
313 sub length {
314 my ($self,$value) = @_;
315 if ( defined $value) {
316 $self->{'length'} = $value;
318 if ( ! exists $self->{'length'} ) {
319 return 0;
321 return $self->{'length'};
324 =head2 upStreamSeq
326 Title : upStreamSeq
327 Usage : $obj->upStreamSeq();
328 Function:
330 Sets and returns upstream flanking sequence string. If
331 value is not set, returns false. The sequence should be
332 >=25 characters long, if possible.
334 Example :
335 Returns : string or false
336 Args : string
338 =cut
341 sub upStreamSeq {
342 my ($self,$value) = @_;
343 if( defined $value) {
344 $self->{'upstreamseq'} = $value;
346 return $self->{'upstreamseq'};
350 =head2 dnStreamSeq
352 Title : dnStreamSeq
353 Usage : $obj->dnStreamSeq();
354 Function:
356 Sets and returns dnstream flanking sequence string. If
357 value is not set, returns false. The sequence should be
358 >=25 characters long, if possible.
360 Example :
361 Returns : string or false
362 Args : string
364 =cut
367 sub dnStreamSeq {
368 my ($self,$value) = @_;
369 if( defined $value) {
370 $self->{'dnstreamseq'} = $value;
372 return $self->{'dnstreamseq'};
377 =head2 label
379 Title : label
380 Usage : $obj->label();
381 Function:
383 Sets and returns mutation event label(s). If value is not
384 set, or no argument is given returns false. Each
385 instantiable class needs to implement this method. Valid
386 values are listed in 'Mutation event controlled vocabulary' in
387 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
389 Example :
390 Returns : string
391 Args : string
393 =cut
396 sub label {
397 my ($self,$value) = @_;
398 $self->throw_not_implemented();
403 =head2 status
405 Title : status
406 Usage : $obj->status()
407 Function:
409 Returns the status of the sequence change object.
410 Valid values are: 'suspected' and 'proven'
412 Example : $obj->status('proven');
413 Returns : scalar
414 Args : valid string (optional, for setting)
417 =cut
420 sub status {
421 my ($self,$value) = @_;
422 my %status = (suspected => 1,
423 proven => 1
426 if( defined $value) {
427 $value = lc $value;
428 if ($status{$value}) {
429 $self->{'status'} = $value;
431 else {
432 $self->throw("$value is not valid status value!");
435 if( ! exists $self->{'status'} ) {
436 return "$self";
438 return $self->{'status'};
442 =head2 proof
444 Title : proof
445 Usage : $obj->proof()
446 Function:
448 Returns the proof of the sequence change object.
449 Valid values are: 'computed' and 'experimental'.
451 Example : $obj->proof('computed');
452 Returns : scalar
453 Args : valid string (optional, for setting)
456 =cut
459 sub proof {
460 my ($self,$value) = @_;
461 my %proof = (computed => 1,
462 experimental => 1
465 if( defined $value) {
466 $value = lc $value;
467 if ($proof{$value}) {
468 $self->{'proof'} = $value;
469 } else {
470 $self->throw("$value is not valid proof value!");
473 return $self->{'proof'};
477 =head2 region
479 Title : region
480 Usage : $obj->region();
481 Function:
483 Sets and returns the name of the sequence region type or
484 protein domain at this location. If value is not set,
485 returns false.
487 Example :
488 Returns : string
489 Args : string
491 =cut
494 sub region {
495 my ($self,$value) = @_;
496 if( defined $value) {
497 $self->{'region'} = $value;
499 return $self->{'region'};
503 =head2 region_value
505 Title : region_value
506 Usage : $obj->region_value();
507 Function:
509 Sets and returns the name of the sequence region_value or
510 protein domain at this location. If value is not set,
511 returns false.
513 Example :
514 Returns : string
515 Args : string
517 =cut
520 sub region_value {
521 my ($self,$value) = @_;
522 if( defined $value) {
523 $self->{'region_value'} = $value;
525 return $self->{'region_value'};
528 =head2 region_dist
530 Title : region_dist
531 Usage : $obj->region_dist();
532 Function:
534 Sets and returns the distance tot the closest region
535 (i.e. intro/exon or domain) boundary. If distance is not
536 set, returns false.
538 Example :
539 Returns : integer
540 Args : integer
542 =cut
545 sub region_dist {
546 my ($self,$value) = @_;
547 if( defined $value) {
548 if ( not $value =~ /^[+-]?\d+$/ ) {
549 $self->throw("[$value] for region_dist has to be an integer\n");
550 } else {
551 $self->{'region_dist'} = $value;
554 return $self->{'region_dist'};
558 =head2 numbering
560 Title : numbering
561 Usage : $obj->numbering()
562 Function:
564 Returns the numbering chema used locating sequnce features.
565 Valid values are: 'entry' and 'coding'
567 Example : $obj->numbering('coding');
568 Returns : scalar
569 Args : valid string (optional, for setting)
572 =cut
575 sub numbering {
576 my ($self,$value) = @_;
577 my %numbering = (entry => 1,
578 coding => 1
581 if( defined $value) {
582 $value = lc $value;
583 if ($numbering{$value}) {
584 $self->{'numbering'} = $value;
586 else {
587 $self->throw("'$value' is not a valid for numbering!");
590 if( ! exists $self->{'numbering'} ) {
591 return "$self";
593 return $self->{'numbering'};
596 =head2 mut_number
598 Title : mut_number
599 Usage : $num = $obj->mut_number;
600 : $num = $obj->mut_number($number);
601 Function:
603 Returns or sets the number identifying the order in which the
604 mutation has been issued. Numbers shouldstart from 1.
605 If the number has never been set, the method will return ''
607 If you want the output from IO modules look nice and, for
608 multivariant/allele variations, make sense you better set
609 this attribute.
611 Returns : an integer
613 =cut
616 sub mut_number {
617 my ($self,$value) = @_;
618 if (defined $value) {
619 $self->{'mut_number'} = $value;
621 unless (exists $self->{'mut_number'}) {
622 return ('');
623 } else {
624 return $self->{'mut_number'};
629 =head2 SeqDiff
631 Title : SeqDiff
632 Usage : $mutobj = $obj->SeqDiff;
633 : $mutobj = $obj->SeqDiff($objref);
634 Function:
636 Returns or sets the link-reference to the umbrella
637 Bio::Variation::SeqDiff object. If there is no link,
638 it will return undef
640 Note: Adding a variant into a SeqDiff object will
641 automatically set this value.
643 Returns : an obj_ref or undef
645 See L<Bio::Variation::SeqDiff> for more information.
647 =cut
649 sub SeqDiff {
650 my ($self,$value) = @_;
651 if (defined $value) {
652 if( ! $value->isa('Bio::Variation::SeqDiff') ) {
653 $self->throw("Is not a Bio::Variation::SeqDiff object but a [$value]");
654 return;
656 else {
657 $self->{'seqDiff'} = $value;
660 unless (exists $self->{'seqDiff'}) {
661 return;
662 } else {
663 return $self->{'seqDiff'};
667 =head2 add_DBLink
669 Title : add_DBLink
670 Usage : $self->add_DBLink($ref)
671 Function: adds a link object
672 Example :
673 Returns :
674 Args :
677 =cut
680 sub add_DBLink{
681 my ($self,$com) = @_;
682 if( $com && ! $com->isa('Bio::Annotation::DBLink') ) {
683 $self->throw("Is not a link object but a [$com]");
685 $com && push(@{$self->{'link'}},$com);
688 =head2 each_DBLink
690 Title : each_DBLink
691 Usage : foreach $ref ( $self->each_DBlink() )
692 Function: gets an array of DBlink of objects
693 Example :
694 Returns :
695 Args :
698 =cut
700 sub each_DBLink{
701 my ($self) = @_;
703 return @{$self->{'link'}};
706 =head2 restriction_changes
708 Title : restriction_changes
709 Usage : $obj->restriction_changes();
710 Function:
712 Returns a string containing a list of restriction
713 enzyme changes of form +EcoRI, separated by
714 commas. Strings need to be valid restriction enzyme names
715 as stored in REBASE. allele_ori and allele_mut need to be assigned.
717 Example :
718 Returns : string
719 Args : string
721 =cut
723 sub restriction_changes {
724 my ($self) = @_;
726 if (not $self->{'re_changes'}) {
727 my %re = &_enzymes;
729 # complain if used on AA data
730 if ($self->isa('Bio::Variation::AAChange')) {
731 $self->throw('Restriction enzymes do not bite polypeptides!');
734 #sanity checks
735 $self->warn('Upstream sequence is empty!')
736 if $self->upStreamSeq eq '';
737 $self->warn('Downstream sequence is empty!')
738 if $self->dnStreamSeq eq '';
739 # $self->warn('Original allele sequence is empty!')
740 # if $self->allele_ori eq '';
741 # $self->warn('Mutated allele sequence is empty!')
742 # if $self->allele_mut eq '';
744 #reuse the non empty DNA level list at RNA level if the flanks are identical
745 #Hint: Check DNAMutation object first
746 if ($self->isa('Bio::Variation::RNAChange') and $self->DNAMutation and
747 $self->upStreamSeq eq $self->DNAMutation->upStreamSeq and
748 $self->dnStreamSeq eq $self->DNAMutation->dnStreamSeq and
749 $self->DNAMutation->restriction_changes ne '' ) {
750 $self->{'re_changes'} = $self->DNAMutation->restriction_changes;
751 } else {
753 #maximum length of a type II restriction site in the current REBASE
754 my ($le_dn) = 15;
755 my ($le_up) = $le_dn;
757 #reduce the flank lengths if the desired length is not available
758 $le_dn = CORE::length ($self->dnStreamSeq) if $le_dn > CORE::length ($self->dnStreamSeq);
759 $le_up = CORE::length ($self->upStreamSeq) if $le_up > CORE::length ($self->upStreamSeq);
761 #Build sequence strings to compare
762 my ($oriseq, $mutseq);
763 $oriseq = $mutseq = substr($self->upStreamSeq, -$le_up, $le_up);
764 $oriseq .= $self->allele_ori->seq if $self->allele_ori->seq;
765 $mutseq .= $self->allele_mut->seq if $self->allele_mut->seq;
766 $oriseq .= substr($self->dnStreamSeq, 0, $le_dn);
767 $mutseq .= substr($self->dnStreamSeq, 0, $le_dn);
769 # ... and their reverse complements
770 my $oriseq_rev = _revcompl ($oriseq);
771 my $mutseq_rev = _revcompl ($mutseq);
773 # collect results into a string
774 my $rec = '';
775 foreach my $enz (sort keys (%re)) {
776 my $site = $re{$enz};
777 my @ori = ($oriseq=~ /$site/g);
778 my @mut = ($mutseq=~ /$site/g);
779 my @ori_r = ($oriseq_rev =~ /$site/g);
780 my @mut_r = ($mutseq_rev =~ /$site/g);
782 $rec .= '+'. $enz. ", "
783 if (scalar @ori < scalar @mut) or (scalar @ori_r < scalar @mut_r);
784 $rec .= '-'. $enz. ", "
785 if (scalar @ori > scalar @mut) or (scalar @ori_r > scalar @mut_r);
788 $rec = substr($rec, 0, CORE::length($rec) - 2) if $rec ne '';
789 $self->{'re_changes'} = $rec;
792 return $self->{'re_changes'}
796 sub _revcompl {
797 # side effect: lower case letters
798 my ($seq) = shift;
800 $seq = lc $seq;
801 $seq =~ tr/acgtrymkswhbvdnx/tgcayrkmswdvbhnx/;
802 return CORE::reverse $seq;
806 sub _enzymes {
807 #REBASE version 005 type2.005
808 my %enzymes = (
809 'AarI' => 'cacctgc',
810 'AatII' => 'gacgtc',
811 'AccI' => 'gt[ac][gt]ac',
812 'AceIII' => 'cagctc',
813 'AciI' => 'ccgc',
814 'AclI' => 'aacgtt',
815 'AcyI' => 'g[ag]cg[ct]c',
816 'AflII' => 'cttaag',
817 'AflIII' => 'ac[ag][ct]gt',
818 'AgeI' => 'accggt',
819 'AhaIII' => 'tttaaa',
820 'AloI' => 'gaac[acgt][acgt][acgt][acgt][acgt][acgt]tcc',
821 'AluI' => 'agct',
822 'AlwNI' => 'cag[acgt][acgt][acgt]ctg',
823 'ApaBI' => 'gca[acgt][acgt][acgt][acgt][acgt]tgc',
824 'ApaI' => 'gggccc',
825 'ApaLI' => 'gtgcac',
826 'ApoI' => '[ag]aatt[ct]',
827 'AscI' => 'ggcgcgcc',
828 'AsuI' => 'gg[acgt]cc',
829 'AsuII' => 'ttcgaa',
830 'AvaI' => 'c[ct]cg[ag]g',
831 'AvaII' => 'gg[at]cc',
832 'AvaIII' => 'atgcat',
833 'AvrII' => 'cctagg',
834 'BaeI' => 'ac[acgt][acgt][acgt][acgt]gta[ct]c',
835 'BalI' => 'tggcca',
836 'BamHI' => 'ggatcc',
837 'BbvCI' => 'cctcagc',
838 'BbvI' => 'gcagc',
839 'BbvII' => 'gaagac',
840 'BccI' => 'ccatc',
841 'Bce83I' => 'cttgag',
842 'BcefI' => 'acggc',
843 'BcgI' => 'cga[acgt][acgt][acgt][acgt][acgt][acgt]tgc',
844 'BciVI' => 'gtatcc',
845 'BclI' => 'tgatca',
846 'BetI' => '[at]ccgg[at]',
847 'BfiI' => 'actggg',
848 'BglI' => 'gcc[acgt][acgt][acgt][acgt][acgt]ggc',
849 'BglII' => 'agatct',
850 'BinI' => 'ggatc',
851 'BmgI' => 'g[gt]gccc',
852 'BplI' => 'gag[acgt][acgt][acgt][acgt][acgt]ctc',
853 'Bpu10I' => 'cct[acgt]agc',
854 'BsaAI' => '[ct]acgt[ag]',
855 'BsaBI' => 'gat[acgt][acgt][acgt][acgt]atc',
856 'BsaXI' => 'ac[acgt][acgt][acgt][acgt][acgt]ctcc',
857 'BsbI' => 'caacac',
858 'BscGI' => 'cccgt',
859 'BseMII' => 'ctcag',
860 'BsePI' => 'gcgcgc',
861 'BseRI' => 'gaggag',
862 'BseSI' => 'g[gt]gc[ac]c',
863 'BsgI' => 'gtgcag',
864 'BsiI' => 'cacgag',
865 'BsiYI' => 'cc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gg',
866 'BsmAI' => 'gtctc',
867 'BsmI' => 'gaatgc',
868 'Bsp1407I' => 'tgtaca',
869 'Bsp24I' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]tgg',
870 'BspGI' => 'ctggac',
871 'BspHI' => 'tcatga',
872 'BspLU11I' => 'acatgt',
873 'BspMI' => 'acctgc',
874 'BspMII' => 'tccgga',
875 'BsrBI' => 'ccgctc',
876 'BsrDI' => 'gcaatg',
877 'BsrI' => 'actgg',
878 'BstEII' => 'ggt[acgt]acc',
879 'BstXI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]tgg',
880 'BtrI' => 'cacgtc',
881 'BtsI' => 'gcagtg',
882 'Cac8I' => 'gc[acgt][acgt]gc',
883 'CauII' => 'cc[cg]gg',
884 'Cfr10I' => '[ag]ccgg[ct]',
885 'CfrI' => '[ct]ggcc[ag]',
886 'CjeI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]gt',
887 'CjePI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt]tc',
888 'ClaI' => 'atcgat',
889 'CviJI' => '[ag]gc[ct]',
890 'CviRI' => 'tgca',
891 'DdeI' => 'ct[acgt]ag',
892 'DpnI' => 'gatc',
893 'DraII' => '[ag]gg[acgt]cc[ct]',
894 'DraIII' => 'cac[acgt][acgt][acgt]gtg',
895 'DrdI' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]gtc',
896 'DrdII' => 'gaacca',
897 'DsaI' => 'cc[ag][ct]gg',
898 'Eam1105I' => 'gac[acgt][acgt][acgt][acgt][acgt]gtc',
899 'EciI' => 'ggcgga',
900 'Eco31I' => 'ggtctc',
901 'Eco47III' => 'agcgct',
902 'Eco57I' => 'ctgaag',
903 'EcoNI' => 'cct[acgt][acgt][acgt][acgt][acgt]agg',
904 'EcoRI' => 'gaattc',
905 'EcoRII' => 'cc[at]gg',
906 'EcoRV' => 'gatatc',
907 'Esp3I' => 'cgtctc',
908 'EspI' => 'gct[acgt]agc',
909 'FauI' => 'cccgc',
910 'FinI' => 'gggac',
911 'Fnu4HI' => 'gc[acgt]gc',
912 'FnuDII' => 'cgcg',
913 'FokI' => 'ggatg',
914 'FseI' => 'ggccggcc',
915 'GdiII' => 'cggcc[ag]',
916 'GsuI' => 'ctggag',
917 'HaeI' => '[at]ggcc[at]',
918 'HaeII' => '[ag]gcgc[ct]',
919 'HaeIII' => 'ggcc',
920 'HaeIV' => 'ga[ct][acgt][acgt][acgt][acgt][acgt][ag]tc',
921 'HgaI' => 'gacgc',
922 'HgiAI' => 'g[at]gc[at]c',
923 'HgiCI' => 'gg[ct][ag]cc',
924 'HgiEII' => 'acc[acgt][acgt][acgt][acgt][acgt][acgt]ggt',
925 'HgiJII' => 'g[ag]gc[ct]c',
926 'HhaI' => 'gcgc',
927 'Hin4I' => 'ga[cgt][acgt][acgt][acgt][acgt][acgt][acg]tc',
928 'HindII' => 'gt[ct][ag]ac',
929 'HindIII' => 'aagctt',
930 'HinfI' => 'ga[acgt]tc',
931 'HpaI' => 'gttaac',
932 'HpaII' => 'ccgg',
933 'HphI' => 'ggtga',
934 'Hpy178III' => 'tc[acgt][acgt]ga',
935 'Hpy188I' => 'tc[acgt]ga',
936 'Hpy99I' => 'cg[at]cg',
937 'KpnI' => 'ggtacc',
938 'Ksp632I' => 'ctcttc',
939 'MaeI' => 'ctag',
940 'MaeII' => 'acgt',
941 'MaeIII' => 'gt[acgt]ac',
942 'MboI' => 'gatc',
943 'MboII' => 'gaaga',
944 'McrI' => 'cg[ag][ct]cg',
945 'MfeI' => 'caattg',
946 'MjaIV' => 'gt[acgt][acgt]ac',
947 'MluI' => 'acgcgt',
948 'MmeI' => 'tcc[ag]ac',
949 'MnlI' => 'cctc',
950 'MseI' => 'ttaa',
951 'MslI' => 'ca[ct][acgt][acgt][acgt][acgt][ag]tg',
952 'MstI' => 'tgcgca',
953 'MwoI' => 'gc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gc',
954 'NaeI' => 'gccggc',
955 'NarI' => 'ggcgcc',
956 'NcoI' => 'ccatgg',
957 'NdeI' => 'catatg',
958 'NheI' => 'gctagc',
959 'NlaIII' => 'catg',
960 'NlaIV' => 'gg[acgt][acgt]cc',
961 'NotI' => 'gcggccgc',
962 'NruI' => 'tcgcga',
963 'NspBII' => 'c[ac]gc[gt]g',
964 'NspI' => '[ag]catg[ct]',
965 'PacI' => 'ttaattaa',
966 'Pfl1108I' => 'tcgtag',
967 'PflMI' => 'cca[acgt][acgt][acgt][acgt][acgt]tgg',
968 'PleI' => 'gagtc',
969 'PmaCI' => 'cacgtg',
970 'PmeI' => 'gtttaaac',
971 'PpiI' => 'gaac[acgt][acgt][acgt][acgt][acgt]ctc',
972 'PpuMI' => '[ag]gg[at]cc[ct]',
973 'PshAI' => 'gac[acgt][acgt][acgt][acgt]gtc',
974 'PsiI' => 'ttataa',
975 'PstI' => 'ctgcag',
976 'PvuI' => 'cgatcg',
977 'PvuII' => 'cagctg',
978 'RleAI' => 'cccaca',
979 'RsaI' => 'gtac',
980 'RsrII' => 'cgg[at]ccg',
981 'SacI' => 'gagctc',
982 'SacII' => 'ccgcgg',
983 'SalI' => 'gtcgac',
984 'SanDI' => 'ggg[at]ccc',
985 'SapI' => 'gctcttc',
986 'SauI' => 'cct[acgt]agg',
987 'ScaI' => 'agtact',
988 'ScrFI' => 'cc[acgt]gg',
989 'SduI' => 'g[agt]gc[act]c',
990 'SecI' => 'cc[acgt][acgt]gg',
991 'SexAI' => 'acc[at]ggt',
992 'SfaNI' => 'gcatc',
993 'SfeI' => 'ct[ag][ct]ag',
994 'SfiI' => 'ggcc[acgt][acgt][acgt][acgt][acgt]ggcc',
995 'SgfI' => 'gcgatcgc',
996 'SgrAI' => 'c[ag]ccgg[ct]g',
997 'SimI' => 'gggtc',
998 'SmaI' => 'cccggg',
999 'SmlI' => 'ct[ct][ag]ag',
1000 'SnaBI' => 'tacgta',
1001 'SnaI' => 'gtatac',
1002 'SpeI' => 'actagt',
1003 'SphI' => 'gcatgc',
1004 'SplI' => 'cgtacg',
1005 'SrfI' => 'gcccgggc',
1006 'Sse232I' => 'cgccggcg',
1007 'Sse8387I' => 'cctgcagg',
1008 'Sse8647I' => 'agg[at]cct',
1009 'SspI' => 'aatatt',
1010 'Sth132I' => 'cccg',
1011 'StuI' => 'aggcct',
1012 'StyI' => 'cc[at][at]gg',
1013 'SwaI' => 'atttaaat',
1014 'TaqI' => 'tcga',
1015 'TaqII' => 'gaccga',
1016 'TatI' => '[at]gtac[at]',
1017 'TauI' => 'gc[cg]gc',
1018 'TfiI' => 'ga[at]tc',
1019 'TseI' => 'gc[at]gc',
1020 'Tsp45I' => 'gt[cg]ac',
1021 'Tsp4CI' => 'ac[acgt]gt',
1022 'TspEI' => 'aatt',
1023 'TspRI' => 'ca[cg]tg[acgt][acgt]',
1024 'Tth111I' => 'gac[acgt][acgt][acgt]gtc',
1025 'Tth111II' => 'caa[ag]ca',
1026 'UbaGI' => 'cac[acgt][acgt][acgt][acgt]gtg',
1027 'UbaPI' => 'cgaacg',
1028 'VspI' => 'attaat',
1029 'XbaI' => 'tctaga',
1030 'XcmI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt]tgg',
1031 'XhoI' => 'ctcgag',
1032 'XhoII' => '[ag]gatc[ct]',
1033 'XmaIII' => 'cggccg',
1034 'XmnI' => 'gaa[acgt][acgt][acgt][acgt]ttc'
1037 return %enzymes;