3 # BioPerl module for Bio::Restriction::IO::base
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Rob Edwards <redwards@utmem.edu>
9 # Copyright Rob Edwards
11 # You may distribute this module under the same terms as perl itself
14 # POD documentation - main docs before the code
18 Bio::Restriction::IO::base - base enzyme set
22 Do not use this module directly. Use it via the Bio::Restriction::IO class.
27 This class defines some base methods for restriction enzyme input and
28 at the same time gives a base list of common enzymes.
34 User feedback is an integral part of the evolution of this and other
35 Bioperl modules. Send your comments and suggestions preferably to the
36 Bioperl mailing lists Your participation is much appreciated.
38 bioperl-l@bioperl.org - General discussion
39 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
43 Please direct usage questions or support issues to the mailing list:
45 I<bioperl-l@bioperl.org>
47 rather than to the module maintainer directly. Many experienced and
48 reponsive experts will be able look at the problem and quickly
49 address it. Please include a thorough description of the problem
50 with code and data examples if at all possible.
54 Report bugs to the Bioperl bug tracking system to help us keep track
55 the bugs and their resolution. Bug reports can be submitted via the
58 http://bugzilla.open-bio.org/
62 Rob Edwards, redwards@utmem.edu
66 Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
67 Mark A. Jensen, maj-at-fortinbras-dot-us
71 The rest of the documentation details each of the object
72 methods. Internal methods are usually preceded with a _
76 # Let the code begin...
78 package Bio
::Restriction
::IO
::base
;
82 use Bio
::Restriction
::Enzyme
;
83 use Bio
::Restriction
::EnzymeCollection
;
84 use Bio
::Restriction
::Enzyme
::MultiCut
;
85 use Bio
::Restriction
::Enzyme
::MultiSite
;
87 use base
qw(Bio::Restriction::IO);
89 my $offset; # class variable
92 my($class, @args) = @_;
93 $class = ref $class ?
ref $class : $class;
94 my $self = bless {}, $class;
95 $self->_initialize(@args);
103 #'itype2' => 'itype2', # itype2 format doesn't work with 'current'
105 'withrefm' => 'withrefm',
107 #'bairoch' => 'bairoch', # bairoch format doesn't work with 'current'
109 #'macvector' => 'bairoch',
110 #'vectorNTI' => 'bairoch',
112 'prototype' => 'proto'
116 my($self,@args) = @_;
117 my ($current, $url, $file, $fh, $format, $verbose) =
118 $self->_rearrange([qw(CURRENT URL FILE FH FORMAT VERBOSE)],@args);
120 $self->verbose($verbose);
121 if ($current && $format) {
122 $self->throw("Can't use -current with file, fh, or url set") if ($url || $file || $fh);
123 $self->throw("Format $format not retrievable using 'current'") if (!exists $FILE_FORMAT{$format});
124 my $io = $self->new(-url
=> 'ftp://ftp.neb.com/pub/rebase/VERSION');
125 chomp (my $version = $io->_readline);
126 push @args, (-url
=> "ftp://ftp.neb.com/pub/rebase/$FILE_FORMAT{$format}.$version");
130 return unless $self->SUPER::_initialize
(@args);
138 Usage : $renzs = $stream->read
139 Function: reads all the restrction enzymes from the stream
140 Returns : a Bio::Restriction::Restriction object
148 my $renzs = Bio
::Restriction
::EnzymeCollection
->new(-empty
=> 1);
149 seek DATA
,($offset||=tell DATA
), 0;
153 my ($name, $site, $cut) = split /\s+/;
154 my $re = Bio
::Restriction
::Enzyme
->new(-name
=> $name,
157 $renzs->enzymes($re);
165 Function: Translates withrefm coords to Bio::Restriction coords
166 Args : Bio::Restriction::Enzyme object, scalar integer (cut posn)
167 Note : Used internally; pass as a coderef to the B:R::Enzyme
169 Note : It is convenient for each format module to have its own
170 version of this; not currently demanded by the interface.
173 sub _xln_sub
{ # for base.pm, a no-op
182 Usage : $stream->write($renzs)
183 Function: writes restriction enzymes into the stream
184 Returns : 1 for success and 0 for error
185 Args : a Bio::Restriction::Enzyme
186 or a Bio::Restriction::EnzymeCollection object
193 map { printf "%s\t%s\t%s\n", $_->name, $_->string, $_->cut
194 } sort {$a->name cmp $b->name} $_->each_enzyme
195 if $_->isa('Bio::Restriction::EnzymeCollection');
196 printf "%s\t%s\t%s\n", $_->name, $_->string, $_->cut
197 if $_->isa('Bio::Restriction::Enzyme');
201 =head2 verify_prototype
203 Title : verify_prototype
204 Purpose : checks enzyme against current prototype list (retrieved remotely)
205 Returns : returns TRUE if enzyme is prototype
206 Argument : Bio::Restriction::EnzymeI
207 Comments : This is an auxiliary method to retrieve and check an enzyme
208 as a prototype. It retrieves the current list, stores it
209 as a singleton instance, then uses it to check the prototype
210 and modify is_prototype() to true or false. Use as follows:
213 for my $enz ($col->each_enzyme) {
214 print $enz->name.":".$enz->site."\n";
215 print "\t".$io->verify_prototype($enz)."\n";
222 sub verify_prototype
{
223 my ($self, $enz) = @_;
224 $self->throw("Must pass a Bio::Restriction::EnzymeI") unless
225 $enz && ref $enz && $enz->isa("Bio::Restriction::EnzymeI");
226 if (!defined $protodb) {
227 my $io = Bio
::Restriction
::IO
->new(-format
=> 'prototype',
229 $protodb = $io->read;
231 if ($protodb->get_enzyme($enz->name)) {
232 $enz->is_prototype(1);
234 $enz->is_prototype(0);
239 =head2 Common REBASE parsing methods
241 The rest of the methods in this file are to be used by other REBASE
242 parsers. They are not to be used outside subclasses of this base
243 class. (They are 'protected' in the sense the word is used in Java.)
247 =head2 _cuts_from_site
249 Title : _cuts_from_site
250 Usage : ($site, $cut, $comp_cut) = _cuts_from_site('ACGCGT(4/5)');
251 Function: Separates cut positions from a single site string.
252 Does nothing to site if it does not have the cut string
253 Returns : array of site_string, forward_cut_position, reverse_cut_position
254 Args : recognition site string
255 Note : Not used in withrefm refactor/maj
259 sub _cuts_from_site
{
260 my ($self, $site) = @_;
261 my ($cut, $comp_cut) = $site =~ /\((-?\d+)\/(-?\d
+)\
)/;
262 $site =~ s/\(.*\)$//;
263 return ($site, $cut, $comp_cut);
270 Usage : ($pos, $meth) = $self->_meth('2(5)');
271 Function: Separates methylation postion and coce from a string.
272 Adjusts the postion depending on enzyme site length
274 Returns : array of position and methylation code
275 Args : 1. reference to Enzyme object
276 2. methylation description string
281 my ($self, $re, $meth) = @_;
283 $meth =~ /(\S+)\((\d+)\)/;
284 my ($pos, $m) = ($1, $2);
285 $pos = 0 if $pos eq '?';
286 $pos = $re->seq->length + $pos if $pos and ! $re->palindromic;
289 $self->warn("Unknown methylation format [$meth]") if $self->verbose >0;
293 =head2 _coordinate_shift_to_cut
295 Title : _coordinate_shift_to_cut
296 Usage : $cut = $self->_coordinate_shift_to_cut($oricut, offset);
297 Function: Adjust cut position coordinates to start from the
298 first nucleotides of site
299 Returns : Cut position in correct coordinates
300 Args : 1. Original cut position
301 2. Length of the recognition site
302 Note : Not used in withrefm.pm refactor/maj
306 sub _coordinate_shift_to_cut
{
307 my ($self, $cut, $site_length) = @_;
308 return $cut + $site_length;
312 =head2 _make_multisites
314 Title : _make_multisites
315 Usage : $self->_make_multisites($first_enzyme, \@sites, \@mets)
316 Function: Bless a Bio::Restriction::Enzyme into
317 Bio::Restriction::Enzyme::MultiSite and clone it as many
318 times as there are alternative sites.
319 Returns : nothing, does in place editing
320 Args : 1. a Bio::Restriction::Enzyme
321 2. reference to an array of recognition site strings
322 3. reference to an array of methylation code strings, optional
326 # removed the enzyme collection from arg list /maj
328 sub _make_multisites
{
329 my ($self, $re, $sites, $meths, $xln_sub) = @_;
331 bless $re, 'Bio::Restriction::Enzyme::MultiSite';
334 while ($count < scalar @
{$sites}) {
335 # this should probably be refactored to use the constructor
336 # too, rather than the clone/accessor method /maj
337 # my $re2 = $re->clone;
340 my $site = @
{$sites}[$count];
341 my ($precut, $recog, $postcut) = ( $site =~ m/^(?:\((\w+\/\w
+)\
))?
([\w
^]+)(?
:\
((\w
+\
/\w+)\))?/ );
343 # set the site attribute
344 # $re2->site($recog);
346 # set the recog attribute (which will make the regexp transformation
348 # $re2->recog($recog);
349 # $recog = $re2->string;
351 # no warnings; # avoid 'uninitialized value' warning against $postcut
352 # my ($cut, $comp_cut) = ( $postcut =~ /(-?\d+)\/(-?\d+)/ );
355 # note the following hard codes the coordinate transformation
356 # used for rebase/itype2 : this method will break on the
359 # $re2->cut($cut + length $recog);
360 # $re2->complementary_cut($comp_cut + length $recog);
363 my $re2 = Bio
::Restriction
::Enzyme
::MultiSite
->new(
368 -postcut
=> $postcut,
372 if ($meths and @
$meths) {
373 $re2->purge_methylation_sites;
374 $re2->methylation_sites($self->_meth($re2, @
{$meths}[$count]));
381 foreach my $enz ($re->others) {
382 $enz->others($re, grep {$_ ne $enz} $re->others);
388 =head2 _make_multicuts
390 Title : _make_multicuts
391 Usage : $self->_make_multicuts($first_enzyme, $precuts)
394 Bless a Bio::Restriction::Enzyme into
395 Bio::Restriction::Enzyme::MultiCut and clone it. The precut
396 string is processed to replase the cut sites in the cloned
397 object. Both objects refer to each other through others() method.
399 Returns : nothing, does in place editing
400 Args : 1. a Bio::Restriction::Enzyme
401 2. precut string, e.g. '12/7'
404 The examples we have of multiply cutting enzymes cut only four
405 times. This protected method deals only with a string of two
406 integers separated with a slash, e.g. '12/7'. The numbers represent the postions
407 BEFORE the start of the recognition site, i.e. negative positions.
411 # removed the enzyme collection from arg list /maj
413 sub _make_multicuts
{
414 my ($self, $re, $precut) = @_;
416 bless $re, 'Bio::Restriction::Enzyme::MultiCut';
418 my ($cut, $comp_cut) = $precut =~ /(-?\d+)\/(-?\d
+)/;
420 my $re2 = $re->clone;
423 $re2->complementary_cut("-$comp_cut");
433 Purpose : Defines the companies that we know about
436 Comments : An internal method to define the companies that we know about
437 REBASE uses a code, and this converts the code to the real name
438 (e.g. A = Amersham Pharmacia Biotech)
443 # this is just so it is easy to set up the codes that REBASE uses
446 'A'=>'Amersham Pharmacia Biotech (1/03)',
447 'C'=>'Minotech Biotechnology (6/01)',
448 'E'=>'Stratagene (1/03)',
449 'F'=>'Fermentas AB (1/03)',
450 'G'=>'Qbiogene (1/03)',
451 'H'=>'American Allied Biochemical, Inc. (10/98)',
452 'I'=>'SibEnzyme Ltd. (1/03)',
453 'J'=>'Nippon Gene Co., Ltd. (6/00)',
454 'K'=>'Takara Shuzo Co. Ltd. (1/03)',
455 'M'=>'Roche Applied Science (1/03)',
456 'N'=>'New England Biolabs (1/03)',
457 'O'=>'Toyobo Biochemicals (11/98)',
458 'P'=>'Megabase Research Products (5/99)',
459 'Q'=>'CHIMERx (1/03)',
460 'R'=>'Promega Corporation (1/03)',
461 'S'=>'Sigma Chemical Corporation (1/03)',
462 'U'=>'Bangalore Genei (1/03)',
463 'V'=>'MRC-Holland (1/03)',
464 'X'=>'EURx Ltd. (1/03)');
465 $self->{company
}=\
%companies;
716 Eam1105I GACNNNNNGTC
6
789 HpyF10VI GCNNNNNNNGC
8
990 XcmI CCANNNNNNNNNTGG
8