sync with trunk (to r15946)
[bioperl-live.git] / Bio / Restriction / EnzymeCollection.pm
blob028eaa6aa77940a1428dcde1cc02aea4583bb029
1 # $Id$
2 #-------------------------------------------------------------------------------
4 # BioPerl module Bio::Restriction::EnzymeCollection
6 # Please direct questions and support issues to <bioperl-l@bioperl.org>
8 # Cared for by Rob Edwards <redwards@utmem.edu>
10 # You may distribute this module under the same terms as perl itself
11 #-------------------------------------------------------------------------------
13 ## POD Documentation:
15 =head1 NAME
17 Bio::Restriction::EnzymeCollection - Set of restriction endonucleases
19 =head1 SYNOPSIS
21 use Bio::Restriction::EnzymeCollection;
23 # Create a collection with the default enzymes.
24 my $default_collection = Bio::Restriction::EnzymeCollection->new();
26 # Or create a collection from a REBASE 'withrefm' file obtained from
27 # ftp://ftp.neb.com/pub/rebase/. (See Bio::Restriction::IO for more
28 # information.)
29 my $rebase = Bio::Restriction::IO->new(
30 -file => 'withrefm.610',
31 -format => 'withrefm' );
32 my $rebase_collection = $rebase->read();
34 # Or create an empty collection and set the enzymes later. See
35 # 'CUSTOM COLLECTIONS' below for more information.
36 my $empty_collection =
37 Bio::Restriction::EnzymeCollection->new( -empty => 1 );
39 # Get an array of Bio::Restriction::Enzyme objects from the collection.
40 my @enzymes = $default_collection->each_enzyme();
42 # Get a Bio::Restriction::Enzyme object for a particular enzyme by name.
43 my $enz = $default_collection->get_enzyme( 'EcoRI' );
45 # Get a Bio::Restriction::EnzymeCollection object containing the enzymes
46 # that have the equivalent of 6-bp recognition sequences.
47 my $six_cutters = $default_collection->cutters( 6 );
49 # Get a Bio::Restriction::EnzymeCollection object containing the enzymes
50 # that are rare cutters.
51 my $rare_cutters = $default_collection->cutters( -start => 6, -end => 8 );
53 # Get a Bio::Restriction::EnzymeCollection object that contains enzymes
54 # that generate blunt ends:
55 my $blunt_cutters = $default_collection->blunt_enzymes();
57 # See 'CUSTOM COLLECTIONS' below for an example of creating a
58 # Bio::Restriction::EnzymeCollection object with a specified subset of
59 # enzymes using methods provided by the Bio::RestrictionEnzyme class.
61 =head1 DESCRIPTION
63 Bio::Restriction::EnzymeCollection represents a collection of
64 restriction enzymes.
66 If you create a new collection directly rather than from a REBASE
67 file using L<Bio::Restriction::IO>, it will be populated by a
68 default set of enzymes with site and cut information
69 only.
71 Use L<Bio::Restriction::Analysis> to figure out which enzymes are
72 available and where they cut your sequence.
74 =head1 CUSTOM COLLECTIONS
76 Note that the underlying L<Bio::Restriction::Enzyme> objects have a rich
77 variety of methods that allow more complicated selections than the methods
78 that are defined by Bio::Restriction::EnzymeCollection.
80 For example, the way to create a custom collection of Type II enzymes
81 is as follows:
83 my $complete_collection =
84 Bio::Restriction::EnzymeCollection->new();
85 my $type_ii_collection =
86 Bio::Restriction::EnzymeCollection->new( -empty => 1 );
87 $type_ii_collection->enzymes(
88 grep { $_->type() eq 'II' } $complete_collection->each_enzyme() );
90 =head1 SEE ALSO
92 L<Bio::Restriction::IO> - read in enzymes from REBASE files
94 L<Bio::Restriction::Analysis> - figure out what enzymes cut a sequence
96 L<Bio::Restriction::Enzyme> - define a single restriction enzyme
98 =head1 FEEDBACK
100 =head2 Mailing Lists
102 User feedback is an integral part of the evolution of this and other
103 Bioperl modules. Send your comments and suggestions preferably to one
104 of the Bioperl mailing lists. Your participation is much appreciated.
106 bioperl-l@bioperl.org - General discussion
107 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
109 =head2 Support
111 Please direct usage questions or support issues to the mailing list:
113 L<bioperl-l@bioperl.org>
115 rather than to the module maintainer directly. Many experienced and
116 reponsive experts will be able look at the problem and quickly
117 address it. Please include a thorough description of the problem
118 with code and data examples if at all possible.
120 =head2 Reporting Bugs
122 Report bugs to the Bioperl bug tracking system to help us keep track
123 the bugs and their resolution. Bug reports can be submitted via the
124 web:
126 http://bugzilla.open-bio.org/
128 =head1 AUTHOR
130 Rob Edwards, redwards@utmem.edu
132 =head1 CONTRIBUTORS
134 Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
136 =head1 COPYRIGHT
138 Copyright (c) 2003 Rob Edwards.
140 Some of this work is Copyright (c) 1997-2002 Steve A. Chervitz. All
141 Rights Reserved.
143 This module is free software; you can redistribute it and/or modify it
144 under the same terms as Perl itself.
146 =head1 APPENDIX
148 Methods beginning with a leading underscore are considered private and
149 are intended for internal use by this module. They are not considered
150 part of the public interface and are described here for documentation
151 purposes only.
153 =cut
156 package Bio::Restriction::EnzymeCollection;
157 use strict;
159 use Bio::Restriction::Enzyme;
160 use Bio::Restriction::IO;
161 use UNIVERSAL qw(isa);
163 use Data::Dumper;
165 use base qw(Bio::Root::Root);
167 =head2 new
169 Title : new
170 Function : Initializes the Restriction::EnzymeCollection object
171 Returns : The Restriction::EnzymeCollection object
172 Arguments : optional named parameter -empty
174 Set parameter -empty to true if you do NOT want the collection be
175 populated by the default set of prototype type II enzymes.
177 Alternatively, pass an array of enzymes to -enzymes parameter.
179 =cut
181 sub new {
182 my($class, @args) = @_;
183 my $self = $class->SUPER::new(@args);
185 my ($empty, $enzymes) =
186 $self->_rearrange([qw(
187 EMPTY
188 ENZYMES
189 )], @args);
191 $self->{'_all_enzymes'} = [];
192 $self->{'_enzymes'} = {};
194 return $self if $empty;
197 if ($enzymes) {
198 # as advertised in pod/maj
199 $self->throw( "Arg to -enzymes must be an arrayref to Bio::Restriction::Enzyme objects") unless ref($enzymes) eq 'ARRAY';
200 $self->enzymes(@$enzymes);
201 return $self;
203 else {
204 # the default set of enzymes
205 my $in = Bio::Restriction::IO->new(-verbose => $self->verbose);
206 return $in->read;
210 =head2 Manipulate the enzymes within the collection
212 =cut
214 =head2 enzymes
216 Title : enzyme
217 Function : add/get method for enzymes and enzyme collections
218 Returns : object itself
219 Arguments : array of Bio::Restriction::Enzyme and
220 Bio::Restriction::EnzymeCollection objects
222 =cut
224 sub enzymes {
225 my ($self, @enzs)=@_;
226 foreach my $e (@enzs) {
227 if ( ref $e eq '') {
228 print "|$e|\n";
230 elsif ($e->isa('Bio::Restriction::EnzymeI')) {
231 push(@{$self->{'_all_enzymes'}},$e);
232 $self->{'_enzymes'}->{$e->name} = $e;
234 elsif ($e->isa('Bio::Restriction::EnzymeCollection')) {
235 $self->enzymes($e->each_enzyme);
236 } else {
237 my $r = 1;
238 $self->warn("EnzymeCollection can not deal with ".
239 ref($e)." objects");
242 return $self;
246 # method to remove duplicates?
249 =head2 each_enzyme
251 Title : each_enzyme
252 Function : get an array of enzymes
253 Returns : array of Bio::Restriction::Enzyme objects
254 Arguments : -
256 =cut
258 sub each_enzyme {
259 my $self = shift;
260 return @{$self->{'_all_enzymes'}};
263 =head2 get_enzyme
265 Title : get_enzyme
266 Function : Gets a Bio::Restriction::Enzyme object for the enzyme name
267 Returns : A Bio::Restriction::Enzyme object or undef
268 Arguments : An enzyme name that is in the collection
270 =cut
272 sub get_enzyme {
273 my ($self, $name)=@_;
274 return $self->{'_enzymes'}->{$name};
278 =head2 available_list
280 Title : available_list
281 Function : Gets a list of all the enzymes that we know about
282 Returns : A reference to an array with all the enzyme names
283 that we have defined or 0 if none are defined
284 Arguments : Nothing
285 Comments : Note, I maintain this for backwards compatibility,
286 but I don't like the name as it is very ambiguous
288 =cut
290 sub available_list {
291 my ($self, $size)=@_;
292 my @keys = sort keys %{$self->{'_enzymes'}};
293 return @keys;
296 =head2 longest_cutter
298 Title : longest_cutter
299 Function : Gets the enzyme with the longest recognition site
300 Returns : A Bio::Restriction::Enzyme object
301 Arguments : Nothing
302 Comments : Note, this is used by Bio::Restriction::Analysis
303 to figure out what to do with circular sequences
305 =cut
307 sub longest_cutter {
308 my ($self)=@_;
309 my $longest=0; my $longest_enz='.';
310 foreach my $enz ($self->each_enzyme) {
311 my $len=$enz->recognition_length;
312 if ($len > $longest) {$longest=$len; $longest_enz=$enz}
314 return $longest_enz;
317 =head2 Filter enzymes
319 =cut
321 =head2 blunt_enzymes
323 Title : blunt_enzymes
324 Function : Gets a list of all the enzymes that are blunt cutters
325 Returns : A reference to an array with all the enzyme names that
326 are blunt cutters or 0 if none are defined
327 Arguments : Nothing
328 Comments :
330 This is an example of the kind of filtering better done by the scripts
331 using the rich collection of methods in Bio::Restriction::Enzyme.
333 =cut
335 sub blunt_enzymes {
336 my $self=shift;
337 my $bs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
338 return $bs->enzymes( grep { $_->overhang eq 'blunt' } $self->each_enzyme );
342 =head2 cutters
344 Title : cutters
345 Function : Gets a list of all the enzymes that recognize a
346 certain size, e.g. 6-cutters
347 Usage : $cutters = $collection->cutters(6);
348 Returns : A reference to an array with all the enzyme names
349 that are x cutters or 0 if none are defined
350 Arguments : A positive number for the size of cutters to return
352 A range: (-start => 6, -end => 8,
353 -inclusive => 1, -exclusive = 0 )
355 The default for a range is 'inclusive'
358 =cut
360 sub cutters {
361 my ($self) = shift;
363 return unless @_; # no argument
365 if (scalar @_ == 1 ) {
366 my $size = shift;
367 my @sizes;
368 (ref $size eq 'ARRAY') ? push @sizes, @{$size} : push @sizes, $size;
369 my $bs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
370 for my $size (@sizes) {
371 $self->throw("Need a positive number [$size]")
372 unless $size =~ /[+]?[\d\.]+/;
373 foreach my $e ($self->each_enzyme) {
374 ##print $e->name, ": ", $e->cutter, "\n" if $e->cutter == $size;
375 $bs->enzymes($e) if $e->cutter == $size;
378 return $bs;
380 } else { # named arguments
382 my ($start, $end, $inclusive, $exclusive ) =
383 $self->_rearrange([qw(
384 START
386 INCLUSIVE
387 EXCLUSIVE
388 )], @_);
390 $self->throw("Start needs a positive number [$start]")
391 unless $start =~ /[+]?[\d\.]+/;
392 $self->throw("End needs a positive number [$end]")
393 unless $end =~ /[+]?[\d\.]+/;
395 my $limits;
396 $inclusive = 1 if $inclusive or not $exclusive;
397 $inclusive = 0 if $exclusive;
399 my $bs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
400 if ($inclusive) {
401 foreach my $e ($self->each_enzyme) {
402 $bs->enzymes($e) if $e->cutter >= $start and $e->cutter <= $end;
404 } else {
405 foreach my $e ($self->each_enzyme) {
406 $bs->enzymes($e) if $e->cutter > $start and $e->cutter < $end;
409 return $bs;