3 # BioPerl module for Bio::RangeI
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Lehvaslaiho <heikki-at-bioperl-dot-org>
9 # Copyright Matthew Pocock
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::RangeI - Range interface
21 #Do not run this module directly
25 This provides a standard BioPerl range interface that should be
26 implemented by any object that wants to be treated as a range. This
27 serves purely as an abstract base class for implementers and can not
30 Ranges are modeled as having (start, end, length, strand). They use
31 Bio-coordinates - all points E<gt>= start and E<lt>= end are within the
32 range. End is always greater-than or equal-to start, and length is
33 greater than or equal to 1. The behaviour of a range is undefined if
34 ranges with negative numbers or zero are used.
38 length = end - start + 1
40 strand = (-1 | 0 | +1)
46 User feedback is an integral part of the evolution of this and other
47 Bioperl modules. Send your comments and suggestions preferably to one
48 of the Bioperl mailing lists. Your participation is much appreciated.
50 bioperl-l@bioperl.org - General discussion
51 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
55 Please direct usage questions or support issues to the mailing list:
57 I<bioperl-l@bioperl.org>
59 rather than to the module maintainer directly. Many experienced and
60 reponsive experts will be able look at the problem and quickly
61 address it. Please include a thorough description of the problem
62 with code and data examples if at all possible.
66 Report bugs to the Bioperl bug tracking system to help us keep track
67 the bugs and their resolution. Bug reports can be submitted via the
70 http://bugzilla.bioperl.org/
72 =head1 AUTHOR - Heikki Lehvaslaiho
74 Email: heikki-at-bioperl-dot-org
78 Juha Muilu (muilu@ebi.ac.uk)
79 Sendu Bala (bix@sendu.me.uk)
80 Malcolm Cook (mec@stowers-institute.org)
81 Stephen Montgomery (sm8 at sanger.ac.uk)
85 The rest of the documentation details each of the object
86 methods. Internal methods are usually preceded with a _
95 use vars
qw(%STRAND_OPTIONS);
97 use base qw(Bio::Root::RootI);
100 # STRAND_OPTIONS contains the legal values for the strand-testing options
101 %STRAND_OPTIONS = map { $_, '_' . $_ }
103 'strong', # ranges must have the same strand
104 'weak', # ranges must have the same strand or no strand
105 'ignore', # ignore strand information
112 # returns true if strands are equal and non-zero
115 my ($s1, $s2) = ($r1->strand(), $r2->strand());
117 return 1 if $s1 != 0 && $s1 == $s2;
120 # returns true if strands are equal or either is zero
123 my ($s1, $s2) = ($r1->strand(), $r2->strand());
124 return 1 if $s1 == 0 || $s2 == 0 || $s1 == $s2;
127 # returns true for any strandedness
132 # works out what test to use for the strictness and returns true/false
133 # e.g. $r1->_testStrand($r2, 'strong')
135 my ($r1, $r2, $comp) = @_;
136 return 1 unless $comp;
137 my $func = $STRAND_OPTIONS{$comp};
138 return $r1->$func($r2);
141 =head1 Abstract methods
143 These methods must be implemented in all subclasses.
148 Usage : $start = $range->start();
149 Function: get/set the start of this range
150 Returns : the start of this range
151 Args : optionally allows the start to be set
152 using $range->start($start)
157 shift->throw_not_implemented();
163 Usage : $end = $range->end();
164 Function: get/set the end of this range
165 Returns : the end of this range
166 Args : optionally allows the end to be set
167 using $range->end($end)
172 shift->throw_not_implemented();
178 Usage : $length = $range->length();
179 Function: get/set the length of this range
180 Returns : the length of this range
181 Args : optionally allows the length to be set
182 using $range->length($length)
187 shift->throw_not_implemented();
193 Usage : $strand = $range->strand();
194 Function: get/set the strand of this range
195 Returns : the strandedness (-1, 0, +1)
196 Args : optionally allows the strand to be set
197 using $range->strand($strand)
202 shift->throw_not_implemented();
205 =head1 Boolean Methods
207 These methods return true or false. They throw an error if start and
210 $range->overlaps($otherRange) && print "Ranges overlap\n";
215 Usage : if($r1->overlaps($r2)) { do stuff }
216 Function: tests if $r2 overlaps $r1
217 Args : arg #1 = a range to compare this one to (mandatory)
218 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
219 Returns : true if the ranges overlap, false otherwise
224 my ($self, $other, $so) = @_;
226 $self->throw("start is undefined") unless defined $self->start;
227 $self->throw("end is undefined") unless defined $self->end;
228 $self->throw("not a Bio::RangeI object") unless defined $other &&
229 $other->isa('Bio::RangeI');
230 $other->throw("start is undefined") unless defined $other->start;
231 $other->throw("end is undefined") unless defined $other->end;
234 ($self->_testStrand($other, $so)
236 ($self->start() > $other->end() or
237 $self->end() < $other->start() )
244 Usage : if($r1->contains($r2) { do stuff }
245 Function: tests whether $r1 totally contains $r2
246 Args : arg #1 = a range to compare this one to (mandatory)
247 alternatively, integer scalar to test
248 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
249 Returns : true if the argument is totally contained within this range
254 my ($self, $other, $so) = @_;
255 $self->throw("start is undefined") unless defined $self->start;
256 $self->throw("end is undefined") unless defined $self->end;
258 if(defined $other && ref $other) { # a range object?
259 $other->throw("Not a Bio::RangeI object: $other") unless $other->isa('Bio::RangeI');
260 $other->throw("start is undefined") unless defined $other->start;
261 $other->throw("end is undefined") unless defined $other->end;
263 return ($self->_testStrand($other, $so) and
264 $other->start() >= $self->start() and
265 $other->end() <= $self->end());
267 $self->throw("'$other' is not an integer.\n") unless $other =~ /^[-+]?\d+$/;
268 return ($other >= $self->start() and $other <= $self->end());
275 Usage : if($r1->equals($r2))
276 Function: test whether $r1 has the same start, end, length as $r2
277 Args : arg #1 = a range to compare this one to (mandatory)
278 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
279 Returns : true if they are describing the same range
284 my ($self, $other, $so) = @_;
286 $self->throw("start is undefined") unless defined $self->start;
287 $self->throw("end is undefined") unless defined $self->end;
288 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI');
289 $other->throw("start is undefined") unless defined $other->start;
290 $other->throw("end is undefined") unless defined $other->end;
292 return ($self->_testStrand($other, $so) and
293 $self->start() == $other->start() and
294 $self->end() == $other->end() );
297 =head1 Geometrical methods
299 These methods do things to the geometry of ranges, and return
300 Bio::RangeI compliant objects or triplets (start, stop, strand) from
301 which new ranges could be built.
306 Usage : ($start, $stop, $strand) = $r1->intersection($r2); OR
307 ($start, $stop, $strand) = Bio::Range->intersection(\@ranges); OR
308 my $containing_range = $r1->intersection($r2); OR
309 my $containing_range = Bio::Range->intersection(\@ranges);
310 Function: gives the range that is contained by all ranges
311 Returns : undef if they do not overlap, or
312 the range that they do overlap (in the form of an object
313 like the calling one, OR a three element array)
314 Args : arg #1 = [REQUIRED] a range to compare this one to,
315 or an array ref of ranges
316 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
321 my ($self, $given, $so) = @_;
322 $self->throw("missing arg: you need to pass in another feature") unless $given;
325 if ($self eq "Bio::RangeI") {
326 $self = "Bio::Range";
327 $self->warn("calling static methods of an interface is deprecated; use $self instead");
330 push(@ranges, $self);
332 ref($given) eq 'ARRAY' ?
push(@ranges, @
{$given}) : push(@ranges, $given);
333 $self->throw("Need at least 2 ranges") unless @ranges >= 2;
336 while (@ranges > 0) {
337 unless ($intersect) {
338 $intersect = shift(@ranges);
339 $self->throw("Not an object: $intersect") unless ref($intersect);
340 $self->throw("Not a Bio::RangeI object: $intersect") unless $intersect->isa('Bio::RangeI');
341 $self->throw("start is undefined") unless defined $intersect->start;
342 $self->throw("end is undefined") unless defined $intersect->end;
345 my $compare = shift(@ranges);
346 $self->throw("Not an object: $compare") unless ref($compare);
347 $self->throw("Not a Bio::RangeI object: $compare") unless $compare->isa('Bio::RangeI');
348 $self->throw("start is undefined") unless defined $compare->start;
349 $self->throw("end is undefined") unless defined $compare->end;
350 return unless $compare->_testStrand($intersect, $so);
352 my @starts = sort {$a <=> $b} ($intersect->start(), $compare->start());
353 my @ends = sort {$a <=> $b} ($intersect->end(), $compare->end());
355 my $start = pop @starts; # larger of the 2 starts
356 my $end = shift @ends; # smaller of the 2 ends
358 my $intersect_strand; # strand for the intersection
359 if (defined($intersect->strand) && defined($compare->strand) && $intersect->strand == $compare->strand) {
360 $intersect_strand = $compare->strand;
363 $intersect_strand = 0;
370 $intersect = $self->new(-start
=> $start,
372 -strand
=> $intersect_strand);
377 return ($intersect->start, $intersect->end, $intersect->strand);
387 Usage : ($start, $stop, $strand) = $r1->union($r2);
388 : ($start, $stop, $strand) = Bio::Range->union(@ranges);
389 my $newrange = Bio::Range->union(@ranges);
390 Function: finds the minimal Range that contains all of the Ranges
391 Args : a Range or list of Range objects
392 Returns : the range containing all of the range
393 (in the form of an object like the calling one, OR
394 a three element array)
401 if ($self eq "Bio::RangeI") {
402 $self = "Bio::Range";
403 $self->warn("calling static methods of an interface is deprecated; use $self instead");
406 unshift @ranges, $self;
409 my @start = sort {$a<=>$b}
410 map( { $_->start() } @ranges);
411 my @end = sort {$a<=>$b}
412 map( { $_->end() } @ranges);
414 my $start = shift @start;
415 while( !defined $start ) {
416 $start = shift @start;
421 my $union_strand; # Strand for the union range object.
424 if(! defined $union_strand) {
425 $union_strand = $_->strand;
428 if(not defined $_->strand or $union_strand ne $_->strand) {
434 return unless $start or $end;
436 return ( $start,$end,$union_strand);
438 return $self->new('-start' => $start,
440 '-strand' => $union_strand
445 =head2 overlap_extent
447 Title : overlap_extent
448 Usage : ($a_unique,$common,$b_unique) = $a->overlap_extent($b)
449 Function: Provides actual amount of overlap between two different
452 Returns : array of values containing the length unique to the calling
453 range, the length common to both, and the length unique to
462 $a->throw("start is undefined") unless defined $a->start;
463 $a->throw("end is undefined") unless defined $a->end;
464 $b->throw("Not a Bio::RangeI object") unless $b->isa('Bio::RangeI');
465 $b->throw("start is undefined") unless defined $b->start;
466 $b->throw("end is undefined") unless defined $b->end;
468 if( ! $a->overlaps($b) ) {
469 return ($a->length,0,$b->length);
472 my ($au,$bu) = (0, 0);
473 if( $a->start < $b->start ) {
474 $au = $b->start - $a->start;
476 $bu = $a->start - $b->start;
479 if( $a->end > $b->end ) {
480 $au += $a->end - $b->end;
482 $bu += $b->end - $a->end;
485 my $intersect = $a->intersection($b);
487 warn("no intersection\n");
488 return ($au, 0, $bu);
490 my $ie = $intersect->end;
491 my $is = $intersect->start;
492 return ($au,$ie-$is+1,$bu);
496 =head2 disconnected_ranges
498 Title : disconnected_ranges
499 Usage : my @disc_ranges = Bio::Range->disconnected_ranges(@ranges);
500 Function: finds the minimal set of ranges such that each input range
501 is fully contained by at least one output range, and none of
502 the output ranges overlap
503 Args : a list of ranges
504 Returns : a list of objects of the same type as the input
509 sub disconnected_ranges
{
511 if ($self eq "Bio::RangeI") {
512 $self = "Bio::Range";
513 $self->warn("calling static methods of an interface is deprecated; use $self instead");
517 unshift @inranges, $self;
520 my @outranges = (); # disconnected ranges
522 # iterate through all input ranges $inrange,
523 # adding each input range to the set of output ranges @outranges,
524 # provided $inrange does not overlap ANY range in @outranges
525 # - if it does overlap an outrange, then merge it
526 foreach my $inrange (@inranges) {
528 my @outranges_new = ();
529 my @intersecting_ranges = ();
531 # iterate through all @outranges, testing if it intersects
532 # current $inrange; if it does, merge and add to list
533 # of @intersecting_ranges, otherwise add $outrange to
534 # the new list of outranges that do NOT intersect
535 for (my $i=0; $i<@outranges; $i++) {
536 my $outrange = $outranges[$i];
537 my $intersection = $inrange->intersection($outrange);
540 my $union = $inrange->union($outrange);
541 push(@intersecting_ranges, $union);
544 push(@outranges_new, $outrange);
547 @outranges = @outranges_new;
548 # @outranges now contains a list of non-overlapping ranges
549 # that do not intersect the current $inrange
551 if (@intersecting_ranges) {
552 if (@intersecting_ranges > 1) {
553 # this sf intersected > 1 range, which means that
554 # all the ranges it intersects should be joined
555 # together in a new range
557 $self->union(@intersecting_ranges);
558 push(@outranges, $merged_range);
562 # exactly 1 intersecting range
563 push(@outranges, @intersecting_ranges);
567 # no intersections found - new range
569 $self->new('-start'=>$inrange->start,
570 '-end'=>$inrange->end,
571 '-strand'=>$inrange->strand,
578 =head2 offsetStranded
580 Title : offsetStranded
581 Usage : $rnge->ofsetStranded($fiveprime_offset, $threeprime_offset)
582 Function : destructively modifies RangeI implementing object to
583 offset its start and stop coordinates by values $fiveprime_offset and
584 $threeprime_offset (positive values being in the strand direction).
585 Args : two integer offsets: $fiveprime_offset and $threeprime_offset
586 Returns : $self, offset accordingly.
591 my ($self, $offset_fiveprime, $offset_threeprime) = @_;
592 my ($offset_start, $offset_end) = $self->strand() eq -1 ?
(- $offset_threeprime, - $offset_fiveprime) : ($offset_fiveprime, $offset_threeprime);
593 $self->start($self->start + $offset_start);
594 $self->end($self->end + $offset_end);
601 Usage : my @subtracted = $r1->subtract($r2)
602 Function: Subtract range r2 from range r1
603 Args : arg #1 = a range to subtract from this one (mandatory)
604 arg #2 = strand option ('strong', 'weak', 'ignore') (optional)
605 Returns : undef if they do not overlap or r2 contains this RangeI,
606 or an arrayref of Range objects (this is an array since some
607 instances where the subtract range is enclosed within this range
608 will result in the creation of two new disjoint ranges)
613 my ($self, $range, $so) = @_;
614 $self->throw("missing arg: you need to pass in another feature")
616 return unless $self->_testStrand($range, $so);
618 if ($self eq "Bio::RangeI") {
619 $self = "Bio::Range";
620 $self->warn("calling static methods of an interface is
621 deprecated; use $self instead");
623 $range->throw("Input a Bio::RangeI object") unless
624 $range->isa('Bio::RangeI');
627 if ($self->location->isa('Bio::Location::SplitLocationI') ) {
628 @sub_locations = $self->location->sub_Location;
630 @sub_locations = $self;
634 foreach my $sl (@sub_locations) {
635 if (!$sl->overlaps($range)) {
637 $self->new('-start' =>$sl->start,
639 '-strand'=>$sl->strand,
644 ##Subtracts everything
645 if ($range->contains($sl)) {
649 my ($start, $end, $strand) = $sl->intersection($range, $so);
650 ##Subtract intersection from $self range
652 if ($sl->start < $start) {
654 $self->new('-start' =>$sl->start,
656 '-strand'=>$sl->strand,
659 if ($sl->end > $end) {
661 $self->new('-start' =>$end + 1,
663 '-strand'=>$sl->strand,