tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / Restriction / IO / base.pm
bloba917b6f40f83d51833ad74e270acb9a337131ee5
1 # $Id$
3 # BioPerl module for Bio::Restriction::IO::base
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Rob Edwards <redwards@utmem.edu>
9 # Copyright Rob Edwards
11 # You may distribute this module under the same terms as perl itself
14 # POD documentation - main docs before the code
16 =head1 NAME
18 Bio::Restriction::IO::base - base enzyme set
20 =head1 SYNOPSIS
22 Do not use this module directly. Use it via the Bio::Restriction::IO class.
24 =head1 DESCRIPTION
27 This class defines some base methods for restriction enzyme input and
28 at the same time gives a base list of common enzymes.
30 =head1 FEEDBACK
32 =head2 Mailing Lists
34 User feedback is an integral part of the evolution of this and other
35 Bioperl modules. Send your comments and suggestions preferably to the
36 Bioperl mailing lists Your participation is much appreciated.
38 bioperl-l@bioperl.org - General discussion
39 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
41 =head2 Support
43 Please direct usage questions or support issues to the mailing list:
45 I<bioperl-l@bioperl.org>
47 rather than to the module maintainer directly. Many experienced and
48 reponsive experts will be able look at the problem and quickly
49 address it. Please include a thorough description of the problem
50 with code and data examples if at all possible.
52 =head2 Reporting Bugs
54 Report bugs to the Bioperl bug tracking system to help us keep track
55 the bugs and their resolution. Bug reports can be submitted via the
56 web:
58 http://bugzilla.open-bio.org/
60 =head1 AUTHOR
62 Rob Edwards, redwards@utmem.edu
64 =head1 CONTRIBUTORS
66 Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
67 Mark A. Jensen, maj-at-fortinbras-dot-us
69 =head1 APPENDIX
71 The rest of the documentation details each of the object
72 methods. Internal methods are usually preceded with a _
74 =cut
76 # Let the code begin...
78 package Bio::Restriction::IO::base;
80 use strict;
82 use Bio::Restriction::Enzyme;
83 use Bio::Restriction::EnzymeCollection;
84 use Bio::Restriction::Enzyme::MultiCut;
85 use Bio::Restriction::Enzyme::MultiSite;
87 use base qw(Bio::Restriction::IO);
89 my $offset; # class variable
91 sub new {
92 my($class, @args) = @_;
93 $class = ref $class ? ref $class : $class;
94 my $self = bless {}, $class;
95 $self->_initialize(@args);
96 return $self;
102 my %FILE_FORMAT = (
103 #'itype2' => 'itype2', # itype2 format doesn't work with 'current'
104 #'8' => 'itype2',
105 'withrefm' => 'withrefm',
106 '31' => 'withrefm',
107 #'bairoch' => 'bairoch', # bairoch format doesn't work with 'current'
108 #'19' => 'bairoch',
109 #'macvector' => 'bairoch',
110 #'vectorNTI' => 'bairoch',
111 'neo' => 'neos',
112 'prototype' => 'proto'
115 sub _initialize {
116 my($self,@args) = @_;
117 my ($current, $url, $file, $fh, $format, $verbose) =
118 $self->_rearrange([qw(CURRENT URL FILE FH FORMAT VERBOSE)],@args);
119 $verbose || 0;
120 $self->verbose($verbose);
121 if ($current && $format) {
122 $self->throw("Can't use -current with file, fh, or url set") if ($url || $file || $fh);
123 $self->throw("Format $format not retrievable using 'current'") if (!exists $FILE_FORMAT{$format});
124 my $io = $self->new(-url => 'ftp://ftp.neb.com/pub/rebase/VERSION');
125 chomp (my $version = $io->_readline);
126 push @args, (-url => "ftp://ftp.neb.com/pub/rebase/$FILE_FORMAT{$format}.$version");
129 $self->_companies;
130 return unless $self->SUPER::_initialize(@args);
135 =head2 read
137 Title : read
138 Usage : $renzs = $stream->read
139 Function: reads all the restrction enzymes from the stream
140 Returns : a Bio::Restriction::Restriction object
141 Args : none
143 =cut
145 sub read {
146 my $self = shift;
148 my $renzs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
149 seek DATA,($offset||=tell DATA), 0;
150 while (<DATA>) {
151 chomp;
152 next if /^\s*$/;
153 my ($name, $site, $cut) = split /\s+/;
154 my $re = Bio::Restriction::Enzyme->new(-name => $name,
155 -site => $site,
156 -cut => $cut);
157 $renzs->enzymes($re);
159 return $renzs;
162 =head2 _xln_sub
164 Title : _xln_sub
165 Function: Translates withrefm coords to Bio::Restriction coords
166 Args : Bio::Restriction::Enzyme object, scalar integer (cut posn)
167 Note : Used internally; pass as a coderef to the B:R::Enzyme
168 constructor
169 Note : It is convenient for each format module to have its own
170 version of this; not currently demanded by the interface.
171 =cut
173 sub _xln_sub { # for base.pm, a no-op
174 my ($z,$c) = @_;
175 return $c;
179 =head2 write
181 Title : write
182 Usage : $stream->write($renzs)
183 Function: writes restriction enzymes into the stream
184 Returns : 1 for success and 0 for error
185 Args : a Bio::Restriction::Enzyme
186 or a Bio::Restriction::EnzymeCollection object
188 =cut
190 sub write {
191 my $self = shift;
192 foreach (@_) {
193 map { printf "%s\t%s\t%s\n", $_->name, $_->string, $_->cut
194 } sort {$a->name cmp $b->name} $_->each_enzyme
195 if $_->isa('Bio::Restriction::EnzymeCollection');
196 printf "%s\t%s\t%s\n", $_->name, $_->string, $_->cut
197 if $_->isa('Bio::Restriction::Enzyme');
201 =head2 verify_prototype
203 Title : verify_prototype
204 Purpose : checks enzyme against current prototype list (retrieved remotely)
205 Returns : returns TRUE if enzyme is prototype
206 Argument : Bio::Restriction::EnzymeI
207 Comments : This is an auxiliary method to retrieve and check an enzyme
208 as a prototype. It retrieves the current list, stores it
209 as a singleton instance, then uses it to check the prototype
210 and modify is_prototype() to true or false. Use as follows:
212 my $col = $io->read;
213 for my $enz ($col->each_enzyme) {
214 print $enz->name.":".$enz->site."\n";
215 print "\t".$io->verify_prototype($enz)."\n";
218 =cut
220 my $protodb;
222 sub verify_prototype {
223 my ($self, $enz) = @_;
224 $self->throw("Must pass a Bio::Restriction::EnzymeI") unless
225 $enz && ref $enz && $enz->isa("Bio::Restriction::EnzymeI");
226 if (!defined $protodb) {
227 my $io = Bio::Restriction::IO->new(-format => 'prototype',
228 -current => 1);
229 $protodb = $io->read;
231 if ($protodb->get_enzyme($enz->name)) {
232 $enz->is_prototype(1);
233 } else {
234 $enz->is_prototype(0);
236 $enz->is_prototype;
239 =head2 Common REBASE parsing methods
241 The rest of the methods in this file are to be used by other REBASE
242 parsers. They are not to be used outside subclasses of this base
243 class. (They are 'protected' in the sense the word is used in Java.)
245 =cut
247 =head2 _cuts_from_site
249 Title : _cuts_from_site
250 Usage : ($site, $cut, $comp_cut) = _cuts_from_site('ACGCGT(4/5)');
251 Function: Separates cut positions from a single site string.
252 Does nothing to site if it does not have the cut string
253 Returns : array of site_string, forward_cut_position, reverse_cut_position
254 Args : recognition site string
255 Note : Not used in withrefm refactor/maj
257 =cut
259 sub _cuts_from_site {
260 my ($self, $site) = @_;
261 my ($cut, $comp_cut) = $site =~ /\((-?\d+)\/(-?\d+)\)/;
262 $site =~ s/\(.*\)$//;
263 return ($site, $cut, $comp_cut);
267 =head2 _meth
269 Title : _meth
270 Usage : ($pos, $meth) = $self->_meth('2(5)');
271 Function: Separates methylation postion and coce from a string.
272 Adjusts the postion depending on enzyme site length
273 and symmetry
274 Returns : array of position and methylation code
275 Args : 1. reference to Enzyme object
276 2. methylation description string
278 =cut
280 sub _meth {
281 my ($self, $re, $meth) = @_;
283 $meth =~ /(\S+)\((\d+)\)/;
284 my ($pos, $m) = ($1, $2);
285 $pos = 0 if $pos eq '?';
286 $pos = $re->seq->length + $pos if $pos and ! $re->palindromic;
287 return ($pos, $m);
289 $self->warn("Unknown methylation format [$meth]") if $self->verbose >0;
293 =head2 _coordinate_shift_to_cut
295 Title : _coordinate_shift_to_cut
296 Usage : $cut = $self->_coordinate_shift_to_cut($oricut, offset);
297 Function: Adjust cut position coordinates to start from the
298 first nucleotides of site
299 Returns : Cut position in correct coordinates
300 Args : 1. Original cut position
301 2. Length of the recognition site
302 Note : Not used in withrefm.pm refactor/maj
304 =cut
306 sub _coordinate_shift_to_cut {
307 my ($self, $cut, $site_length) = @_;
308 return $cut + $site_length;
312 =head2 _make_multisites
314 Title : _make_multisites
315 Usage : $self->_make_multisites($first_enzyme, \@sites, \@mets)
316 Function: Bless a Bio::Restriction::Enzyme into
317 Bio::Restriction::Enzyme::MultiSite and clone it as many
318 times as there are alternative sites.
319 Returns : nothing, does in place editing
320 Args : 1. a Bio::Restriction::Enzyme
321 2. reference to an array of recognition site strings
322 3. reference to an array of methylation code strings, optional
324 =cut
326 # removed the enzyme collection from arg list /maj
328 sub _make_multisites {
329 my ($self, $re, $sites, $meths, $xln_sub) = @_;
331 bless $re, 'Bio::Restriction::Enzyme::MultiSite';
333 my $count = 0;
334 while ($count < scalar @{$sites}) {
335 # this should probably be refactored to use the constructor
336 # too, rather than the clone/accessor method /maj
337 # my $re2 = $re->clone;
338 # my $re2;
340 my $site = @{$sites}[$count];
341 my ($precut, $recog, $postcut) = ( $site =~ m/^(?:\((\w+\/\w+)\))?([\w^]+)(?:\((\w+\/\w+)\))?/ );
343 # set the site attribute
344 # $re2->site($recog);
346 # set the recog attribute (which will make the regexp transformation
347 # if necessary:
348 # $re2->recog($recog);
349 # $recog = $re2->string;
351 # no warnings; # avoid 'uninitialized value' warning against $postcut
352 # my ($cut, $comp_cut) = ( $postcut =~ /(-?\d+)\/(-?\d+)/ );
353 # use warnings;
355 # note the following hard codes the coordinate transformation
356 # used for rebase/itype2 : this method will break on the
357 # base.pm format.
358 # if ($cut) {
359 # $re2->cut($cut + length $recog);
360 # $re2->complementary_cut($comp_cut + length $recog);
363 my $re2 = Bio::Restriction::Enzyme::MultiSite->new(
364 -name => $re->name,
365 -site => $recog,
366 -recog => $recog,
367 -precut => $precut,
368 -postcut => $postcut,
369 -xln_sub => $xln_sub
372 if ($meths and @$meths) {
373 $re2->purge_methylation_sites;
374 $re2->methylation_sites($self->_meth($re2, @{$meths}[$count]));
377 $re->others($re2);
378 $count++;
381 foreach my $enz ($re->others) {
382 $enz->others($re, grep {$_ ne $enz} $re->others);
388 =head2 _make_multicuts
390 Title : _make_multicuts
391 Usage : $self->_make_multicuts($first_enzyme, $precuts)
392 Function:
394 Bless a Bio::Restriction::Enzyme into
395 Bio::Restriction::Enzyme::MultiCut and clone it. The precut
396 string is processed to replase the cut sites in the cloned
397 object. Both objects refer to each other through others() method.
399 Returns : nothing, does in place editing
400 Args : 1. a Bio::Restriction::Enzyme
401 2. precut string, e.g. '12/7'
404 The examples we have of multiply cutting enzymes cut only four
405 times. This protected method deals only with a string of two
406 integers separated with a slash, e.g. '12/7'. The numbers represent the postions
407 BEFORE the start of the recognition site, i.e. negative positions.
409 =cut
411 # removed the enzyme collection from arg list /maj
413 sub _make_multicuts {
414 my ($self, $re, $precut) = @_;
416 bless $re, 'Bio::Restriction::Enzyme::MultiCut';
418 my ($cut, $comp_cut) = $precut =~ /(-?\d+)\/(-?\d+)/;
420 my $re2 = $re->clone;
422 $re2->cut("-$cut");
423 $re2->complementary_cut("-$comp_cut");
425 $re->others($re2);
430 =head2 _companies
432 Title : _companies
433 Purpose : Defines the companies that we know about
434 Returns : A hash
435 Argument : Nothing
436 Comments : An internal method to define the companies that we know about
437 REBASE uses a code, and this converts the code to the real name
438 (e.g. A = Amersham Pharmacia Biotech)
440 =cut
442 sub _companies {
443 # this is just so it is easy to set up the codes that REBASE uses
444 my $self=shift;
445 my %companies=(
446 'A'=>'Amersham Pharmacia Biotech (1/03)',
447 'C'=>'Minotech Biotechnology (6/01)',
448 'E'=>'Stratagene (1/03)',
449 'F'=>'Fermentas AB (1/03)',
450 'G'=>'Qbiogene (1/03)',
451 'H'=>'American Allied Biochemical, Inc. (10/98)',
452 'I'=>'SibEnzyme Ltd. (1/03)',
453 'J'=>'Nippon Gene Co., Ltd. (6/00)',
454 'K'=>'Takara Shuzo Co. Ltd. (1/03)',
455 'M'=>'Roche Applied Science (1/03)',
456 'N'=>'New England Biolabs (1/03)',
457 'O'=>'Toyobo Biochemicals (11/98)',
458 'P'=>'Megabase Research Products (5/99)',
459 'Q'=>'CHIMERx (1/03)',
460 'R'=>'Promega Corporation (1/03)',
461 'S'=>'Sigma Chemical Corporation (1/03)',
462 'U'=>'Bangalore Genei (1/03)',
463 'V'=>'MRC-Holland (1/03)',
464 'X'=>'EURx Ltd. (1/03)');
465 $self->{company}=\%companies;
470 __DATA__
471 AasI GACNNNNNNGTC 7
472 AatI AGGCCT 3
473 AccII CGCG 2
474 AatII GACGTC 5
475 AauI TGTACA 1
476 Acc113I AGTACT 3
477 Acc16I TGCGCA 3
478 Acc65I GGTACC 1
479 AccB1I GGYRCC 1
480 AccB7I CCANNNNNTGG 7
481 AccI GTMKAC 2
482 AccIII TCCGGA 1
483 AciI CCGC 1
484 AclI AACGTT 2
485 AcsI RAATTY 1
486 AcvI CACGTG 3
487 AcyI GRCGYC 2
488 AdeI CACNNNGTG 6
489 AfaI GTAC 2
490 AfeI AGCGCT 3
491 AflI GGWCC 1
492 AflII CTTAAG 1
493 AflIII ACRYGT 1
494 AgeI ACCGGT 1
495 AhaIII TTTAAA 3
496 AhdI GACNNNNNGTC 6
497 AhlI ACTAGT 1
498 AleI CACNNNNGTG 5
499 AluI AGCT 2
500 Alw21I GWGCWC 5
501 Alw44I GTGCAC 1
502 AlwNI CAGNNNCTG 6
503 Ama87I CYCGRG 1
504 AocI CCTNAGG 2
505 Aor51HI AGCGCT 3
506 ApaBI GCANNNNNTGC 8
507 ApaI GGGCCC 5
508 ApaLI GTGCAC 1
509 ApoI RAATTY 1
510 AscI GGCGCGCC 2
511 AseI ATTAAT 2
512 AsiAI ACCGGT 1
513 AsiSI GCGATCGC 5
514 AsnI ATTAAT 2
515 Asp700I GAANNNNTTC 5
516 Asp718I GGTACC 1
517 AspEI GACNNNNNGTC 6
518 AspHI GWGCWC 5
519 AspI GACNNNGTC 4
520 AspLEI GCGC 3
521 AspS9I GGNCC 1
522 AsuC2I CCSGG 2
523 AsuI GGNCC 1
524 AsuII TTCGAA 2
525 AsuNHI GCTAGC 1
526 AvaI CYCGRG 1
527 AvaII GGWCC 1
528 AviII TGCGCA 3
529 AvrII CCTAGG 1
530 AxyI CCTNAGG 2
531 BalI TGGCCA 3
532 BamHI GGATCC 1
533 BanI GGYRCC 1
534 BanII GRGCYC 5
535 BanIII ATCGAT 2
536 BbeI GGCGCC 5
537 BbrPI CACGTG 3
538 BbuI GCATGC 5
539 Bbv12I GWGCWC 5
540 BclI TGATCA 1
541 BcnI CCSGG 2
542 BcoI CYCGRG 1
543 BcuI ACTAGT 1
544 BetI WCCGGW 1
545 BfaI CTAG 1
546 BfmI CTRYAG 1
547 BfrBI ATGCAT 3
548 BfrI CTTAAG 1
549 BfuCI GATC 0
550 BglI GCCNNNNNGGC 7
551 BglII AGATCT 1
552 BlnI CCTAGG 1
553 BloHII CTGCAG 5
554 BlpI GCTNAGC 2
555 Bme1390I CCNGG 2
556 Bme1580I GKGCMC 5
557 Bme18I GGWCC 1
558 BmtI GCTAGC 5
559 BmyI GDGCHC 5
560 BoxI GACNNNNGTC 5
561 Bpu1102I GCTNAGC 2
562 Bpu14I TTCGAA 2
563 Bsa29I ATCGAT 2
564 BsaAI YACGTR 3
565 BsaBI GATNNNNATC 5
566 BsaHI GRCGYC 2
567 BsaJI CCNNGG 1
568 BsaOI CGRYCG 4
569 BsaWI WCCGGW 1
570 Bsc4I CCNNNNNNNGG 7
571 BscBI GGNNCC 3
572 BscFI GATC 0
573 BscI ATCGAT 2
574 Bse118I RCCGGY 1
575 Bse21I CCTNAGG 2
576 Bse8I GATNNNNATC 5
577 BseAI TCCGGA 1
578 BseBI CCWGG 2
579 BseCI ATCGAT 2
580 BseDI CCNNGG 1
581 BseJI GATNNNNATC 5
582 BseLI CCNNNNNNNGG 7
583 BsePI GCGCGC 1
584 BseSI GKGCMC 5
585 BseX3I CGGCCG 1
586 Bsh1236I CGCG 2
587 Bsh1285I CGRYCG 4
588 BshFI GGCC 2
589 BshI GGCC 2
590 BshNI GGYRCC 1
591 BshTI ACCGGT 1
592 BsiBI GATNNNNATC 5
593 BsiCI TTCGAA 2
594 BsiEI CGRYCG 4
595 BsiHKAI GWGCWC 5
596 BsiHKCI CYCGRG 1
597 BsiLI CCWGG 2
598 BsiMI TCCGGA 1
599 BsiQI TGATCA 1
600 BsiSI CCGG 1
601 BsiWI CGTACG 1
602 BsiXI ATCGAT 2
603 BsiYI CCNNNNNNNGG 7
604 BsiZI GGNCC 1
605 BslI CCNNNNNNNGG 7
606 BsoBI CYCGRG 1
607 Bsp106I ATCGAT 2
608 Bsp119I TTCGAA 2
609 Bsp120I GGGCCC 1
610 Bsp1286I GDGCHC 5
611 Bsp13I TCCGGA 1
612 Bsp1407I TGTACA 1
613 Bsp143I GATC 0
614 Bsp143II RGCGCY 5
615 Bsp1720I GCTNAGC 2
616 Bsp19I CCATGG 1
617 Bsp68I TCGCGA 3
618 BspA2I CCTAGG 1
619 BspCI CGATCG 4
620 BspDI ATCGAT 2
621 BspEI TCCGGA 1
622 BspHI TCATGA 1
623 BspLI GGNNCC 3
624 BspLU11I ACATGT 1
625 BspMII TCCGGA 1
626 BspT104I TTCGAA 2
627 BspT107I GGYRCC 1
628 BspTI CTTAAG 1
629 BspXI ATCGAT 2
630 BsrBRI GATNNNNATC 5
631 BsrFI RCCGGY 1
632 BsrGI TGTACA 1
633 BssAI RCCGGY 1
634 BssECI CCNNGG 1
635 BssHI CTCGAG 1
636 BssHII GCGCGC 1
637 BssKI CCNGG 0
638 BssNAI GTATAC 3
639 BssT1I CCWWGG 1
640 Bst1107I GTATAC 3
641 Bst2UI CCWGG 2
642 Bst4CI ACNGT 3
643 Bst98I CTTAAG 1
644 BstACI GRCGYC 2
645 BstAPI GCANNNNNTGC 7
646 BstBAI YACGTR 3
647 BstBI TTCGAA 2
648 BstC8I GCNNGC 3
649 BstDEI CTNAG 1
650 BstDSI CCRYGG 1
651 BstEII GGTNACC 1
652 BstENI CCTNNNNNAGG 5
653 BstENII GATC 0
654 BstFNI CGCG 2
655 BstH2I RGCGCY 5
656 BstHHI GCGC 3
657 BstHPI GTTAAC 3
658 BstKTI GATC 3
659 BstMAI CTGCAG 5
660 BstMCI CGRYCG 4
661 BstMWI GCNNNNNNNGC 7
662 BstNI CCWGG 2
663 BstNSI RCATGY 5
664 BstOI CCWGG 2
665 BstPAI GACNNNNGTC 5
666 BstPI GGTNACC 1
667 BstSCI CCNGG 0
668 BstSFI CTRYAG 1
669 BstSNI TACGTA 3
670 BstUI CGCG 2
671 BstX2I RGATCY 1
672 BstXI CCANNNNNNTGG 8
673 BstYI RGATCY 1
674 BstZ17I GTATAC 3
675 BstZI CGGCCG 1
676 Bsu15I ATCGAT 2
677 Bsu36I CCTNAGG 2
678 BsuRI GGCC 2
679 BsuTUI ATCGAT 2
680 BtgI CCRYGG 1
681 BthCI GCNGC 4
682 Cac8I GCNNGC 3
683 CaiI CAGNNNCTG 6
684 CauII CCSGG 2
685 CciNI GCGGCCGC 2
686 CelII GCTNAGC 2
687 CfoI GCGC 3
688 Cfr10I RCCGGY 1
689 Cfr13I GGNCC 1
690 Cfr42I CCGCGG 4
691 Cfr9I CCCGGG 1
692 CfrI YGGCCR 1
693 ChaI GATC 4
694 ClaI ATCGAT 2
695 CpoI CGGWCCG 2
696 Csp45I TTCGAA 2
697 Csp6I GTAC 1
698 CspAI ACCGGT 1
699 CspI CGGWCCG 2
700 CviAII CATG 1
701 CviJI RGCY 2
702 CviRI TGCA 2
703 CviTI RGCY 2
704 CvnI CCTNAGG 2
705 DdeI CTNAG 1
706 DpnI GATC 2
707 DpnII GATC 0
708 DraI TTTAAA 3
709 DraII RGGNCCY 2
710 DraIII CACNNNGTG 6
711 DrdI GACNNNNNNGTC 7
712 DsaI CCRYGG 1
713 DseDI GACNNNNNNGTC 7
714 EaeI YGGCCR 1
715 EagI CGGCCG 1
716 Eam1105I GACNNNNNGTC 6
717 Ecl136II GAGCTC 3
718 EclHKI GACNNNNNGTC 6
719 EclXI CGGCCG 1
720 Eco105I TACGTA 3
721 Eco130I CCWWGG 1
722 Eco147I AGGCCT 3
723 Eco24I GRGCYC 5
724 Eco32I GATATC 3
725 Eco47I GGWCC 1
726 Eco47III AGCGCT 3
727 Eco52I CGGCCG 1
728 Eco72I CACGTG 3
729 Eco81I CCTNAGG 2
730 Eco88I CYCGRG 1
731 Eco91I GGTNACC 1
732 EcoHI CCSGG 0
733 EcoICRI GAGCTC 3
734 EcoNI CCTNNNNNAGG 5
735 EcoO109I RGGNCCY 2
736 EcoO65I GGTNACC 1
737 EcoRI GAATTC 1
738 EcoRII CCWGG 0
739 EcoRV GATATC 3
740 EcoT14I CCWWGG 1
741 EcoT22I ATGCAT 5
742 EcoT38I GRGCYC 5
743 EgeI GGCGCC 3
744 EheI GGCGCC 3
745 ErhI CCWWGG 1
746 EsaBC3I TCGA 2
747 EspI GCTNAGC 2
748 FatI CATG 0
749 FauNDI CATATG 2
750 FbaI TGATCA 1
751 FblI GTMKAC 2
752 FmuI GGNCC 4
753 Fnu4HI GCNGC 2
754 FnuDII CGCG 2
755 FriOI GRGCYC 5
756 FseI GGCCGGCC 6
757 Fsp4HI GCNGC 2
758 FspAI RTGCGCAY 4
759 FspI TGCGCA 3
760 FunI AGCGCT 3
761 FunII GAATTC 1
762 HaeI WGGCCW 3
763 HaeII RGCGCY 5
764 HaeIII GGCC 2
765 HapII CCGG 1
766 HgiAI GWGCWC 5
767 HgiCI GGYRCC 1
768 HgiJII GRGCYC 5
769 HhaI GCGC 3
770 Hin1I GRCGYC 2
771 Hin6I GCGC 1
772 HinP1I GCGC 1
773 HincII GTYRAC 3
774 HindI CAC 2
775 HindII GTYRAC 3
776 HindIII AAGCTT 1
777 HinfI GANTC 1
778 HpaI GTTAAC 3
779 HpaII CCGG 1
780 Hpy178III TCNNGA 2
781 Hpy188I TCNGA 3
782 Hpy188III TCNNGA 2
783 Hpy8I GTNNAC 3
784 Hpy99I CGWCG 5
785 HpyCH4I CATG 3
786 HpyCH4III ACNGT 3
787 HpyCH4IV ACGT 1
788 HpyCH4V TGCA 2
789 HpyF10VI GCNNNNNNNGC 8
790 Hsp92I GRCGYC 2
791 Hsp92II CATG 4
792 HspAI GCGC 1
793 ItaI GCNGC 2
794 KasI GGCGCC 1
795 Kpn2I TCCGGA 1
796 KpnI GGTACC 5
797 Ksp22I TGATCA 1
798 KspAI GTTAAC 3
799 KspI CCGCGG 4
800 Kzo9I GATC 0
801 LpnI RGCGCY 3
802 LspI TTCGAA 2
803 MabI ACCWGGT 1
804 MaeI CTAG 1
805 MaeII ACGT 1
806 MaeIII GTNAC 0
807 MamI GATNNNNATC 5
808 MboI GATC 0
809 McrI CGRYCG 4
810 MfeI CAATTG 1
811 MflI RGATCY 1
812 MhlI GDGCHC 5
813 MlsI TGGCCA 3
814 MluI ACGCGT 1
815 MluNI TGGCCA 3
816 Mly113I GGCGCC 2
817 Mph1103I ATGCAT 5
818 MroI TCCGGA 1
819 MroNI GCCGGC 1
820 MroXI GAANNNNTTC 5
821 MscI TGGCCA 3
822 MseI TTAA 1
823 MslI CAYNNNNRTG 5
824 Msp20I TGGCCA 3
825 MspA1I CMGCKG 3
826 MspCI CTTAAG 1
827 MspI CCGG 1
828 MspR9I CCNGG 2
829 MssI GTTTAAAC 4
830 MstI TGCGCA 3
831 MunI CAATTG 1
832 MvaI CCWGG 2
833 MvnI CGCG 2
834 MwoI GCNNNNNNNGC 7
835 NaeI GCCGGC 3
836 NarI GGCGCC 2
837 NciI CCSGG 2
838 NcoI CCATGG 1
839 NdeI CATATG 2
840 NdeII GATC 0
841 NgoAIV GCCGGC 1
842 NgoMIV GCCGGC 1
843 NheI GCTAGC 1
844 NlaIII CATG 4
845 NlaIV GGNNCC 3
846 Nli3877I CYCGRG 5
847 NmuCI GTSAC 0
848 NotI GCGGCCGC 2
849 NruGI GACNNNNNGTC 6
850 NruI TCGCGA 3
851 NsbI TGCGCA 3
852 NsiI ATGCAT 5
853 NspBII CMGCKG 3
854 NspI RCATGY 5
855 NspIII CYCGRG 1
856 NspV TTCGAA 2
857 OliI CACNNNNGTG 5
858 PacI TTAATTAA 5
859 PaeI GCATGC 5
860 PaeR7I CTCGAG 1
861 PagI TCATGA 1
862 PalI GGCC 2
863 PauI GCGCGC 1
864 PceI AGGCCT 3
865 PciI ACATGT 1
866 PdiI GCCGGC 3
867 PdmI GAANNNNTTC 5
868 Pfl23II CGTACG 1
869 PflBI CCANNNNNTGG 7
870 PflFI GACNNNGTC 4
871 PflMI CCANNNNNTGG 7
872 PfoI TCCNGGA 1
873 PinAI ACCGGT 1
874 Ple19I CGATCG 4
875 PmaCI CACGTG 3
876 PmeI GTTTAAAC 4
877 PmlI CACGTG 3
878 Ppu10I ATGCAT 1
879 PpuMI RGGWCCY 2
880 PpuXI RGGWCCY 2
881 PshAI GACNNNNGTC 5
882 PshBI ATTAAT 2
883 PsiI TTATAA 3
884 Psp03I GGWCC 4
885 Psp124BI GAGCTC 5
886 Psp1406I AACGTT 2
887 Psp5II RGGWCCY 2
888 Psp6I CCWGG 0
889 PspAI CCCGGG 1
890 PspEI GGTNACC 1
891 PspGI CCWGG 0
892 PspLI CGTACG 1
893 PspN4I GGNNCC 3
894 PspOMI GGGCCC 1
895 PspPI GGNCC 1
896 PspPPI RGGWCCY 2
897 PssI RGGNCCY 5
898 PstI CTGCAG 5
899 PsuI RGATCY 1
900 PsyI GACNNNGTC 4
901 PvuI CGATCG 4
902 PvuII CAGCTG 3
903 RcaI TCATGA 1
904 RsaI GTAC 2
905 Rsr2I CGGWCCG 2
906 RsrII CGGWCCG 2
907 SacI GAGCTC 5
908 SacII CCGCGG 4
909 SalI GTCGAC 1
910 SanDI GGGWCCC 2
911 SatI GCNGC 2
912 Sau3AI GATC 0
913 Sau96I GGNCC 1
914 SauI CCTNAGG 2
915 SbfI CCTGCAGG 6
916 ScaI AGTACT 3
917 SciI CTCGAG 3
918 ScrFI CCNGG 2
919 SdaI CCTGCAGG 6
920 SduI GDGCHC 5
921 SecI CCNNGG 1
922 SelI CGCG 0
923 SexAI ACCWGGT 1
924 SfcI CTRYAG 1
925 SfeI CTRYAG 1
926 SfiI GGCCNNNNNGGCC 8
927 SfoI GGCGCC 3
928 Sfr274I CTCGAG 1
929 Sfr303I CCGCGG 4
930 SfuI TTCGAA 2
931 SgfI GCGATCGC 5
932 SgrAI CRCCGGYG 2
933 SgrBI CCGCGG 4
934 SinI GGWCC 1
935 SlaI CTCGAG 1
936 SmaI CCCGGG 3
937 SmiI ATTTAAAT 4
938 SmiMI CAYNNNNRTG 5
939 SmlI CTYRAG 1
940 SnaBI TACGTA 3
941 SpaHI GCATGC 5
942 SpeI ACTAGT 1
943 SphI GCATGC 5
944 SplI CGTACG 1
945 SrfI GCCCGGGC 4
946 Sse232I CGCCGGCG 2
947 Sse8387I CCTGCAGG 6
948 Sse8647I AGGWCCT 2
949 Sse9I AATT 0
950 SseBI AGGCCT 3
951 SspBI TGTACA 1
952 SspI AATATT 3
953 SstI GAGCTC 5
954 SstII CCGCGG 4
955 StuI AGGCCT 3
956 StyI CCWWGG 1
957 SunI CGTACG 1
958 SwaI ATTTAAAT 4
959 TaaI ACNGT 3
960 TaiI ACGT 4
961 TaqI TCGA 1
962 TasI AATT 0
963 TatI WGTACW 1
964 TauI GCSGC 4
965 TelI GACNNNGTC 4
966 TfiI GAWTC 1
967 ThaI CGCG 2
968 TliI CTCGAG 1
969 Tru1I TTAA 1
970 Tru9I TTAA 1
971 TscI ACGT 4
972 TseI GCWGC 1
973 Tsp45I GTSAC 0
974 Tsp4CI ACNGT 3
975 Tsp509I AATT 0
976 TspEI AATT 0
977 Tth111I GACNNNGTC 4
978 TthHB8I TCGA 1
979 UnbI GGNCC 0
980 Van91I CCANNNNNTGG 7
981 Vha464I CTTAAG 1
982 VneI GTGCAC 1
983 VpaK11AI GGWCC 0
984 VpaK11BI GGWCC 1
985 VspI ATTAAT 2
986 XagI CCTNNNNNAGG 5
987 XapI RAATTY 1
988 XbaI TCTAGA 1
989 XceI RCATGY 5
990 XcmI CCANNNNNNNNNTGG 8
991 XhoI CTCGAG 1
992 XhoII RGATCY 1
993 XmaCI CCCGGG 1
994 XmaI CCCGGG 1
995 XmaIII CGGCCG 1
996 XmaJI CCTAGG 1
997 XmiI GTMKAC 2
998 XmnI GAANNNNTTC 5
999 XspI CTAG 1
1000 ZhoI ATCGAT 2
1001 ZraI GACGTC 3
1002 Zsp2I ATGCAT 5