bump rc version
[bioperl-live.git] / Bio / Restriction / EnzymeCollection.pm
blob1344855c3e0779ecdfffe117b4f16157c1e0089d
1 #-------------------------------------------------------------------------------
3 # BioPerl module Bio::Restriction::EnzymeCollection
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Rob Edwards <redwards@utmem.edu>
9 # You may distribute this module under the same terms as perl itself
10 #-------------------------------------------------------------------------------
12 ## POD Documentation:
14 =head1 NAME
16 Bio::Restriction::EnzymeCollection - Set of restriction endonucleases
18 =head1 SYNOPSIS
20 use Bio::Restriction::EnzymeCollection;
22 # Create a collection with the default enzymes.
23 my $default_collection = Bio::Restriction::EnzymeCollection->new();
25 # Or create a collection from a REBASE 'withrefm' file obtained from
26 # ftp://ftp.neb.com/pub/rebase/. (See Bio::Restriction::IO for more
27 # information.)
28 my $rebase = Bio::Restriction::IO->new(
29 -file => 'withrefm.610',
30 -format => 'withrefm' );
31 my $rebase_collection = $rebase->read();
33 # Or create an empty collection and set the enzymes later. See
34 # 'CUSTOM COLLECTIONS' below for more information.
35 my $empty_collection =
36 Bio::Restriction::EnzymeCollection->new( -empty => 1 );
38 # Get an array of Bio::Restriction::Enzyme objects from the collection.
39 my @enzymes = $default_collection->each_enzyme();
41 # Get a Bio::Restriction::Enzyme object for a particular enzyme by name.
42 my $enz = $default_collection->get_enzyme( 'EcoRI' );
44 # Get a Bio::Restriction::EnzymeCollection object containing the enzymes
45 # that have the equivalent of 6-bp recognition sequences.
46 my $six_cutters = $default_collection->cutters( 6 );
48 # Get a Bio::Restriction::EnzymeCollection object containing the enzymes
49 # that are rare cutters.
50 my $rare_cutters = $default_collection->cutters( -start => 6, -end => 8 );
52 # Get a Bio::Restriction::EnzymeCollection object that contains enzymes
53 # that generate blunt ends:
54 my $blunt_cutters = $default_collection->blunt_enzymes();
56 # See 'CUSTOM COLLECTIONS' below for an example of creating a
57 # Bio::Restriction::EnzymeCollection object with a specified subset of
58 # enzymes using methods provided by the Bio::RestrictionEnzyme class.
60 =head1 DESCRIPTION
62 Bio::Restriction::EnzymeCollection represents a collection of
63 restriction enzymes.
65 If you create a new collection directly rather than from a REBASE
66 file using L<Bio::Restriction::IO>, it will be populated by a
67 default set of enzymes with site and cut information
68 only.
70 Use L<Bio::Restriction::Analysis> to figure out which enzymes are
71 available and where they cut your sequence.
73 =head1 CUSTOM COLLECTIONS
75 Note that the underlying L<Bio::Restriction::Enzyme> objects have a rich
76 variety of methods that allow more complicated selections than the methods
77 that are defined by Bio::Restriction::EnzymeCollection.
79 For example, the way to create a custom collection of Type II enzymes
80 is as follows:
82 my $complete_collection =
83 Bio::Restriction::EnzymeCollection->new();
84 my $type_ii_collection =
85 Bio::Restriction::EnzymeCollection->new( -empty => 1 );
86 $type_ii_collection->enzymes(
87 grep { $_->type() eq 'II' } $complete_collection->each_enzyme() );
89 =head1 SEE ALSO
91 L<Bio::Restriction::IO> - read in enzymes from REBASE files
93 L<Bio::Restriction::Analysis> - figure out what enzymes cut a sequence
95 L<Bio::Restriction::Enzyme> - define a single restriction enzyme
97 =head1 FEEDBACK
99 =head2 Mailing Lists
101 User feedback is an integral part of the evolution of this and other
102 Bioperl modules. Send your comments and suggestions preferably to one
103 of the Bioperl mailing lists. Your participation is much appreciated.
105 bioperl-l@bioperl.org - General discussion
106 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
108 =head2 Support
110 Please direct usage questions or support issues to the mailing list:
112 I<bioperl-l@bioperl.org>
114 rather than to the module maintainer directly. Many experienced and
115 reponsive experts will be able look at the problem and quickly
116 address it. Please include a thorough description of the problem
117 with code and data examples if at all possible.
119 =head2 Reporting Bugs
121 Report bugs to the Bioperl bug tracking system to help us keep track
122 the bugs and their resolution. Bug reports can be submitted via the
123 web:
125 https://github.com/bioperl/bioperl-live/issues
127 =head1 AUTHOR
129 Rob Edwards, redwards@utmem.edu
131 =head1 CONTRIBUTORS
133 Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
135 =head1 COPYRIGHT
137 Copyright (c) 2003 Rob Edwards.
139 Some of this work is Copyright (c) 1997-2002 Steve A. Chervitz. All
140 Rights Reserved.
142 This module is free software; you can redistribute it and/or modify it
143 under the same terms as Perl itself.
145 =head1 APPENDIX
147 Methods beginning with a leading underscore are considered private and
148 are intended for internal use by this module. They are not considered
149 part of the public interface and are described here for documentation
150 purposes only.
152 =cut
155 package Bio::Restriction::EnzymeCollection;
156 use strict;
158 use Bio::Restriction::Enzyme;
159 use Bio::Restriction::IO;
161 use Data::Dumper;
163 use base qw(Bio::Root::Root);
165 =head2 new
167 Title : new
168 Function : Initializes the Restriction::EnzymeCollection object
169 Returns : The Restriction::EnzymeCollection object
170 Arguments : optional named parameter -empty
172 Set parameter -empty to true if you do NOT want the collection be
173 populated by the default set of prototype type II enzymes.
175 Alternatively, pass an array of enzymes to -enzymes parameter.
177 =cut
179 sub new {
180 my($class, @args) = @_;
181 my $self = $class->SUPER::new(@args);
183 my ($empty, $enzymes) =
184 $self->_rearrange([qw(
185 EMPTY
186 ENZYMES
187 )], @args);
189 $self->{'_all_enzymes'} = [];
190 $self->{'_enzymes'} = {};
192 return $self if $empty;
195 if ($enzymes) {
196 # as advertised in pod/maj
197 $self->throw( "Arg to -enzymes must be an arrayref to Bio::Restriction::Enzyme objects") unless ref($enzymes) eq 'ARRAY';
198 $self->enzymes(@$enzymes);
199 return $self;
201 else {
202 # the default set of enzymes
203 my $in = Bio::Restriction::IO->new(-verbose => $self->verbose);
204 return $in->read;
208 =head2 Manipulate the enzymes within the collection
210 =cut
212 =head2 enzymes
214 Title : enzyme
215 Function : add/get method for enzymes and enzyme collections
216 Returns : object itself
217 Arguments : array of Bio::Restriction::Enzyme and
218 Bio::Restriction::EnzymeCollection objects
220 =cut
222 sub enzymes {
223 my ($self, @enzs)=@_;
224 foreach my $e (@enzs) {
225 if ( ref $e eq '') {
226 print "|$e|\n";
228 elsif ($e->isa('Bio::Restriction::EnzymeI')) {
229 push(@{$self->{'_all_enzymes'}},$e);
230 $self->{'_enzymes'}->{$e->name} = $e;
232 elsif ($e->isa('Bio::Restriction::EnzymeCollection')) {
233 $self->enzymes($e->each_enzyme);
234 } else {
235 my $r = 1;
236 $self->warn("EnzymeCollection can not deal with ".
237 ref($e)." objects");
240 return $self;
244 # method to remove duplicates?
247 =head2 each_enzyme
249 Title : each_enzyme
250 Function : get an array of enzymes
251 Returns : array of Bio::Restriction::Enzyme objects
252 Arguments : -
254 =cut
256 sub each_enzyme {
257 my $self = shift;
258 return @{$self->{'_all_enzymes'}};
261 =head2 get_enzyme
263 Title : get_enzyme
264 Function : Gets a Bio::Restriction::Enzyme object for the enzyme name
265 Returns : A Bio::Restriction::Enzyme object or undef
266 Arguments : An enzyme name that is in the collection
268 =cut
270 sub get_enzyme {
271 my ($self, $name)=@_;
272 return $self->{'_enzymes'}->{$name};
276 =head2 available_list
278 Title : available_list
279 Function : Gets a list of all the enzymes that we know about
280 Returns : A reference to an array with all the enzyme names
281 that we have defined or 0 if none are defined
282 Arguments : Nothing
283 Comments : Note, I maintain this for backwards compatibility,
284 but I don't like the name as it is very ambiguous
286 =cut
288 sub available_list {
289 my ($self, $size)=@_;
290 my @keys = sort keys %{$self->{'_enzymes'}};
291 return @keys;
294 =head2 longest_cutter
296 Title : longest_cutter
297 Function : Gets the enzyme with the longest recognition site
298 Returns : A Bio::Restriction::Enzyme object
299 Arguments : Nothing
300 Comments : Note, this is used by Bio::Restriction::Analysis
301 to figure out what to do with circular sequences
303 =cut
305 sub longest_cutter {
306 my ($self)=@_;
307 my $longest=0; my $longest_enz='.';
308 foreach my $enz ($self->each_enzyme) {
309 my $len=$enz->recognition_length;
310 if ($len > $longest) {$longest=$len; $longest_enz=$enz}
312 return $longest_enz;
315 =head2 Filter enzymes
317 =cut
319 =head2 blunt_enzymes
321 Title : blunt_enzymes
322 Function : Gets a list of all the enzymes that are blunt cutters
323 Returns : A reference to an array with all the enzyme names that
324 are blunt cutters or 0 if none are defined
325 Arguments : Nothing
326 Comments :
328 This is an example of the kind of filtering better done by the scripts
329 using the rich collection of methods in Bio::Restriction::Enzyme.
331 =cut
333 sub blunt_enzymes {
334 my $self=shift;
335 my $bs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
336 return $bs->enzymes( grep { $_->overhang eq 'blunt' } $self->each_enzyme );
340 =head2 cutters
342 Title : cutters
343 Function : Gets a list of all the enzymes that recognize a
344 certain size, e.g. 6-cutters
345 Usage : $cutters = $collection->cutters(6);
346 Returns : A reference to an array with all the enzyme names
347 that are x cutters or 0 if none are defined
348 Arguments : A positive number for the size of cutters to return
350 A range: (-start => 6, -end => 8,
351 -inclusive => 1, -exclusive = 0 )
353 The default for a range is 'inclusive'
356 =cut
358 sub cutters {
359 my ($self) = shift;
361 return unless @_; # no argument
363 if (scalar @_ == 1 ) {
364 my $size = shift;
365 my @sizes;
366 (ref $size eq 'ARRAY') ? push @sizes, @{$size} : push @sizes, $size;
367 my $bs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
368 for my $size (@sizes) {
369 $self->throw("Need a positive number [$size]")
370 unless $size =~ /[+]?[\d\.]+/;
371 foreach my $e ($self->each_enzyme) {
372 ##print $e->name, ": ", $e->cutter, "\n" if $e->cutter == $size;
373 $bs->enzymes($e) if $e->cutter == $size;
376 return $bs;
378 } else { # named arguments
380 my ($start, $end, $inclusive, $exclusive ) =
381 $self->_rearrange([qw(
382 START
384 INCLUSIVE
385 EXCLUSIVE
386 )], @_);
388 $self->throw("Start needs a positive number [$start]")
389 unless $start =~ /[+]?[\d\.]+/;
390 $self->throw("End needs a positive number [$end]")
391 unless $end =~ /[+]?[\d\.]+/;
393 my $limits;
394 $inclusive = 1 if $inclusive or not $exclusive;
395 $inclusive = 0 if $exclusive;
397 my $bs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
398 if ($inclusive) {
399 foreach my $e ($self->each_enzyme) {
400 $bs->enzymes($e) if $e->cutter >= $start and $e->cutter <= $end;
402 } else {
403 foreach my $e ($self->each_enzyme) {
404 $bs->enzymes($e) if $e->cutter > $start and $e->cutter < $end;
407 return $bs;