2 # BioPerl module for Bio::RangeI
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Lehvaslaiho <heikki-at-bioperl-dot-org>
8 # Copyright Matthew Pocock
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::RangeI - Range interface
20 #Do not run this module directly
24 This provides a standard BioPerl range interface that should be
25 implemented by any object that wants to be treated as a range. This
26 serves purely as an abstract base class for implementers and can not
29 Ranges are modeled as having (start, end, length, strand). They use
30 Bio-coordinates - all points E<gt>= start and E<lt>= end are within the
31 range. End is always greater-than or equal-to start, and length is
32 greater than or equal to 1. The behaviour of a range is undefined if
33 ranges with negative numbers or zero are used.
37 length = end - start + 1
39 strand = (-1 | 0 | +1)
45 User feedback is an integral part of the evolution of this and other
46 Bioperl modules. Send your comments and suggestions preferably to one
47 of the Bioperl mailing lists. Your participation is much appreciated.
49 bioperl-l@bioperl.org - General discussion
50 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
54 Please direct usage questions or support issues to the mailing list:
56 I<bioperl-l@bioperl.org>
58 rather than to the module maintainer directly. Many experienced and
59 reponsive experts will be able look at the problem and quickly
60 address it. Please include a thorough description of the problem
61 with code and data examples if at all possible.
65 Report bugs to the Bioperl bug tracking system to help us keep track
66 the bugs and their resolution. Bug reports can be submitted via the
69 https://github.com/bioperl/bioperl-live/issues
71 =head1 AUTHOR - Heikki Lehvaslaiho
73 Email: heikki-at-bioperl-dot-org
77 Juha Muilu (muilu@ebi.ac.uk)
78 Sendu Bala (bix@sendu.me.uk)
79 Malcolm Cook (mec@stowers-institute.org)
80 Stephen Montgomery (sm8 at sanger.ac.uk)
84 The rest of the documentation details each of the object
85 methods. Internal methods are usually preceded with a _
94 use vars
qw(%STRAND_OPTIONS);
96 use base qw(Bio::Root::RootI);
99 # STRAND_OPTIONS contains the legal values for the strand-testing options
100 %STRAND_OPTIONS = map { $_, '_' . $_ }
102 'strong', # ranges must have the same strand
103 'weak', # ranges must have the same strand or no strand
104 'ignore', # ignore strand information
111 # returns true if strands are equal and non-zero
114 my ($s1, $s2) = ($r1->strand(), $r2->strand());
116 return 1 if $s1 != 0 && $s1 == $s2;
119 # returns true if strands are equal or either is zero
122 my ($s1, $s2) = ($r1->strand(), $r2->strand());
123 return 1 if $s1 == 0 || $s2 == 0 || $s1 == $s2;
126 # returns true for any strandedness
131 # works out what test to use for the strictness and returns true/false
132 # e.g. $r1->_testStrand($r2, 'strong')
134 my ($r1, $r2, $comp) = @_;
135 return 1 unless $comp;
136 my $func = $STRAND_OPTIONS{$comp};
137 return $r1->$func($r2);
140 =head1 Abstract methods
142 These methods must be implemented in all subclasses.
147 Usage : $start = $range->start();
148 Function: get/set the start of this range
149 Returns : the start of this range
150 Args : optionally allows the start to be set
151 using $range->start($start)
156 shift->throw_not_implemented();
162 Usage : $end = $range->end();
163 Function: get/set the end of this range
164 Returns : the end of this range
165 Args : optionally allows the end to be set
166 using $range->end($end)
171 shift->throw_not_implemented();
177 Usage : $length = $range->length();
178 Function: get/set the length of this range
179 Returns : the length of this range
180 Args : optionally allows the length to be set
181 using $range->length($length)
186 shift->throw_not_implemented();
192 Usage : $strand = $range->strand();
193 Function: get/set the strand of this range
194 Returns : the strandedness (-1, 0, +1)
195 Args : optionally allows the strand to be set
196 using $range->strand($strand)
201 shift->throw_not_implemented();
204 =head1 Boolean Methods
206 These methods return true or false. They throw an error if start and
209 $range->overlaps($otherRange) && print "Ranges overlap\n";
214 Usage : if($r1->overlaps($r2)) { do stuff }
215 Function: tests if $r2 overlaps $r1
216 Args : arg #1 = a range to compare this one to (mandatory)
217 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
218 Returns : true if the ranges overlap, false otherwise
223 my ($self, $other, $so) = @_;
225 $self->throw("start is undefined") unless defined $self->start;
226 $self->throw("end is undefined") unless defined $self->end;
227 $self->throw("not a Bio::RangeI object") unless defined $other &&
228 $other->isa('Bio::RangeI');
229 $other->throw("start is undefined") unless defined $other->start;
230 $other->throw("end is undefined") unless defined $other->end;
233 ($self->_testStrand($other, $so)
235 ($self->start() > $other->end() or
236 $self->end() < $other->start() )
243 Usage : if($r1->contains($r2) { do stuff }
244 Function: tests whether $r1 totally contains $r2
245 Args : arg #1 = a range to compare this one to (mandatory)
246 alternatively, integer scalar to test
247 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
248 Returns : true if the argument is totally contained within this range
253 my ($self, $other, $so) = @_;
254 $self->throw("start is undefined") unless defined $self->start;
255 $self->throw("end is undefined") unless defined $self->end;
257 if(defined $other && ref $other) { # a range object?
258 $other->throw("Not a Bio::RangeI object: $other") unless $other->isa('Bio::RangeI');
259 $other->throw("start is undefined") unless defined $other->start;
260 $other->throw("end is undefined") unless defined $other->end;
262 return ($self->_testStrand($other, $so) and
263 $other->start() >= $self->start() and
264 $other->end() <= $self->end());
266 $self->throw("'$other' is not an integer.\n") unless $other =~ /^[-+]?\d+$/;
267 return ($other >= $self->start() and $other <= $self->end());
274 Usage : if($r1->equals($r2))
275 Function: test whether $r1 has the same start, end, length as $r2
276 Args : arg #1 = a range to compare this one to (mandatory)
277 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
278 Returns : true if they are describing the same range
283 my ($self, $other, $so) = @_;
285 $self->throw("start is undefined") unless defined $self->start;
286 $self->throw("end is undefined") unless defined $self->end;
287 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI');
288 $other->throw("start is undefined") unless defined $other->start;
289 $other->throw("end is undefined") unless defined $other->end;
291 return ($self->_testStrand($other, $so) and
292 $self->start() == $other->start() and
293 $self->end() == $other->end() );
296 =head1 Geometrical methods
298 These methods do things to the geometry of ranges, and return
299 Bio::RangeI compliant objects or triplets (start, stop, strand) from
300 which new ranges could be built.
305 Usage : ($start, $end, $strand) = $r1->intersection($r2); OR
306 ($start, $end, $strand) = Bio::Range->intersection(\@ranges); OR
307 my $containing_range = $r1->intersection($r2); OR
308 my $containing_range = Bio::Range->intersection(\@ranges);
309 Function: gives the range that is contained by all ranges
310 Returns : undef if they do not overlap or if @ranges has only a
311 single range, else returns the range that they do
312 overlap. In scalar contex, the return value is an object of
313 the same class as the calling one. In array context the
314 return value is a three element array.
315 Args : arg #1 = [REQUIRED] a Bio::RangeI to compare this one to,
316 or an array ref of ranges
317 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
322 my ($self, $given, $so) = @_;
323 $self->throw("missing arg: you need to pass in another feature") unless $given;
326 if ($self eq "Bio::RangeI") {
327 $self = "Bio::Range";
328 $self->warn("calling static methods of an interface is deprecated; use $self instead");
331 push(@ranges, $self);
333 ref($given) eq 'ARRAY' ?
push(@ranges, @
{$given}) : push(@ranges, $given);
334 #$self->throw("Need at least 2 ranges") unless @ranges >= 2;
335 # Rather than the above, I think the following is more consistent
336 return undef unless @ranges >= 2;
339 while (@ranges > 0) {
340 unless ($intersect) {
341 $intersect = shift(@ranges);
342 $self->throw("Not an object: $intersect") unless ref($intersect);
343 $self->throw("Not a Bio::RangeI object: $intersect") unless $intersect->isa('Bio::RangeI');
344 $self->throw("start is undefined") unless defined $intersect->start;
345 $self->throw("end is undefined") unless defined $intersect->end;
348 my $compare = shift(@ranges);
349 $self->throw("Not an object: $compare") unless ref($compare);
350 $self->throw("Not a Bio::RangeI object: $compare") unless $compare->isa('Bio::RangeI');
351 $self->throw("start is undefined") unless defined $compare->start;
352 $self->throw("end is undefined") unless defined $compare->end;
353 return unless $compare->_testStrand($intersect, $so);
355 my @starts = sort {$a <=> $b} ($intersect->start(), $compare->start());
356 my @ends = sort {$a <=> $b} ($intersect->end(), $compare->end());
358 my $start = pop @starts; # larger of the 2 starts
359 my $end = shift @ends; # smaller of the 2 ends
361 my $intersect_strand; # strand for the intersection
362 if (defined($intersect->strand) && defined($compare->strand) && $intersect->strand == $compare->strand) {
363 $intersect_strand = $compare->strand;
366 $intersect_strand = 0;
373 $intersect = $self->new(-start
=> $start,
375 -strand
=> $intersect_strand);
380 return ($intersect->start, $intersect->end, $intersect->strand);
390 Usage : ($start, $end, $strand) = $r1->union($r2);
391 : ($start, $end, $strand) = Bio::Range->union(@ranges);
392 my $newrange = Bio::Range->union(@ranges);
393 Function: finds the minimal Range that contains all of the Ranges
394 Args : a Range or list of Range objects
396 Returns : the range containing all of the range. In scalar contex,
397 the return value is an object of the same class as the
398 calling one. In array context the return value is a
406 if ($self eq "Bio::RangeI") {
407 $self = "Bio::Range";
408 $self->warn("calling static methods of an interface is deprecated; use $self instead");
411 unshift @ranges, $self;
414 my @start = sort {$a<=>$b}
415 map( { $_->start() } @ranges);
416 my @end = sort {$a<=>$b}
417 map( { $_->end() } @ranges);
419 my $start = shift @start;
420 while( !defined $start ) {
421 $start = shift @start;
426 my $union_strand; # Strand for the union range object.
429 if(! defined $union_strand) {
430 $union_strand = $_->strand;
433 if(not defined $_->strand or $union_strand ne $_->strand) {
439 return unless $start or $end;
441 return ( $start,$end,$union_strand);
443 return $self->new('-start' => $start,
445 '-strand' => $union_strand
450 =head2 overlap_extent
452 Title : overlap_extent
453 Usage : ($a_unique,$common,$b_unique) = $a->overlap_extent($b)
454 Function: Provides actual amount of overlap between two different
457 Returns : array of values containing the length unique to the calling
458 range, the length common to both, and the length unique to
467 $a->throw("start is undefined") unless defined $a->start;
468 $a->throw("end is undefined") unless defined $a->end;
469 $b->throw("Not a Bio::RangeI object") unless $b->isa('Bio::RangeI');
470 $b->throw("start is undefined") unless defined $b->start;
471 $b->throw("end is undefined") unless defined $b->end;
473 if( ! $a->overlaps($b) ) {
474 return ($a->length,0,$b->length);
477 my ($au,$bu) = (0, 0);
478 if( $a->start < $b->start ) {
479 $au = $b->start - $a->start;
481 $bu = $a->start - $b->start;
484 if( $a->end > $b->end ) {
485 $au += $a->end - $b->end;
487 $bu += $b->end - $a->end;
490 my $intersect = $a->intersection($b);
492 warn("no intersection\n");
493 return ($au, 0, $bu);
495 my $ie = $intersect->end;
496 my $is = $intersect->start;
497 return ($au,$ie-$is+1,$bu);
501 =head2 disconnected_ranges
503 Title : disconnected_ranges
504 Usage : my @disc_ranges = Bio::Range->disconnected_ranges(@ranges);
505 Function: finds the minimal set of ranges such that each input range
506 is fully contained by at least one output range, and none of
507 the output ranges overlap
508 Args : a list of ranges
509 Returns : a list of objects of the same type as the input
514 sub disconnected_ranges
{
516 if ($self eq "Bio::RangeI") {
517 $self = "Bio::Range";
518 $self->warn("calling static methods of an interface is deprecated; use $self instead");
522 unshift @inranges, $self;
525 my @outranges = (); # disconnected ranges
527 # iterate through all input ranges $inrange,
528 # adding each input range to the set of output ranges @outranges,
529 # provided $inrange does not overlap ANY range in @outranges
530 # - if it does overlap an outrange, then merge it
531 foreach my $inrange (@inranges) {
533 my @outranges_new = ();
534 my @intersecting_ranges = ();
536 # iterate through all @outranges, testing if it intersects
537 # current $inrange; if it does, merge and add to list
538 # of @intersecting_ranges, otherwise add $outrange to
539 # the new list of outranges that do NOT intersect
540 for (my $i=0; $i<@outranges; $i++) {
541 my $outrange = $outranges[$i];
542 my $intersection = $inrange->intersection($outrange);
545 my $union = $inrange->union($outrange);
546 push(@intersecting_ranges, $union);
549 push(@outranges_new, $outrange);
552 @outranges = @outranges_new;
553 # @outranges now contains a list of non-overlapping ranges
554 # that do not intersect the current $inrange
556 if (@intersecting_ranges) {
557 if (@intersecting_ranges > 1) {
558 # this sf intersected > 1 range, which means that
559 # all the ranges it intersects should be joined
560 # together in a new range
562 $self->union(@intersecting_ranges);
563 push(@outranges, $merged_range);
567 # exactly 1 intersecting range
568 push(@outranges, @intersecting_ranges);
572 # no intersections found - new range
574 $self->new('-start'=>$inrange->start,
575 '-end'=>$inrange->end,
576 '-strand'=>$inrange->strand,
583 =head2 offsetStranded
585 Title : offsetStranded
586 Usage : $rnge->ofsetStranded($fiveprime_offset, $threeprime_offset)
587 Function : destructively modifies RangeI implementing object to
588 offset its start and stop coordinates by values $fiveprime_offset and
589 $threeprime_offset (positive values being in the strand direction).
590 Args : two integer offsets: $fiveprime_offset and $threeprime_offset
591 Returns : $self, offset accordingly.
596 my ($self, $offset_fiveprime, $offset_threeprime) = @_;
597 my ($offset_start, $offset_end) = $self->strand() eq -1 ?
(- $offset_threeprime, - $offset_fiveprime) : ($offset_fiveprime, $offset_threeprime);
598 $self->start($self->start + $offset_start);
599 $self->end($self->end + $offset_end);
606 Usage : my @subtracted = $r1->subtract($r2)
607 Function: Subtract range r2 from range r1
608 Args : arg #1 = a range to subtract from this one (mandatory)
609 arg #2 = strand option ('strong', 'weak', 'ignore') (optional)
610 Returns : undef if they do not overlap or r2 contains this RangeI,
611 or an arrayref of Range objects (this is an array since some
612 instances where the subtract range is enclosed within this range
613 will result in the creation of two new disjoint ranges)
618 my ($self, $range, $so) = @_;
619 $self->throw("missing arg: you need to pass in another feature")
621 return unless $self->_testStrand($range, $so);
623 if ($self eq "Bio::RangeI") {
624 $self = "Bio::Range";
625 $self->warn("calling static methods of an interface is
626 deprecated; use $self instead");
628 $range->throw("Input a Bio::RangeI object") unless
629 $range->isa('Bio::RangeI');
632 if ($self->location->isa('Bio::Location::SplitLocationI') ) {
633 @sub_locations = $self->location->sub_Location;
635 @sub_locations = $self;
639 foreach my $sl (@sub_locations) {
640 if (!$sl->overlaps($range)) {
642 $self->new('-start' =>$sl->start,
644 '-strand'=>$sl->strand,
649 ##Subtracts everything
650 if ($range->contains($sl)) {
654 my ($start, $end, $strand) = $sl->intersection($range, $so);
655 ##Subtract intersection from $self range
657 if ($sl->start < $start) {
659 $self->new('-start' =>$sl->start,
661 '-strand'=>$sl->strand,
664 if ($sl->end > $end) {
666 $self->new('-start' =>$end + 1,
668 '-strand'=>$sl->strand,