Updated INSTALL.WIN to the most recent document from the Wiki
[bioperl-live.git] / Bio / RangeI.pm
blob4882881a91b0c070d2327304253f6267e22519ef
2 # BioPerl module for Bio::RangeI
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Lehvaslaiho <heikki-at-bioperl-dot-org>
8 # Copyright Matthew Pocock
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::RangeI - Range interface
18 =head1 SYNOPSIS
20 #Do not run this module directly
22 =head1 DESCRIPTION
24 This provides a standard BioPerl range interface that should be
25 implemented by any object that wants to be treated as a range. This
26 serves purely as an abstract base class for implementers and can not
27 be instantiated.
29 Ranges are modeled as having (start, end, length, strand). They use
30 Bio-coordinates - all points E<gt>= start and E<lt>= end are within the
31 range. End is always greater-than or equal-to start, and length is
32 greater than or equal to 1. The behaviour of a range is undefined if
33 ranges with negative numbers or zero are used.
35 So, in summary:
37 length = end - start + 1
38 end >= start
39 strand = (-1 | 0 | +1)
41 =head1 FEEDBACK
43 =head2 Mailing Lists
45 User feedback is an integral part of the evolution of this and other
46 Bioperl modules. Send your comments and suggestions preferably to one
47 of the Bioperl mailing lists. Your participation is much appreciated.
49 bioperl-l@bioperl.org - General discussion
50 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52 =head2 Support
54 Please direct usage questions or support issues to the mailing list:
56 I<bioperl-l@bioperl.org>
58 rather than to the module maintainer directly. Many experienced and
59 reponsive experts will be able look at the problem and quickly
60 address it. Please include a thorough description of the problem
61 with code and data examples if at all possible.
63 =head2 Reporting Bugs
65 Report bugs to the Bioperl bug tracking system to help us keep track
66 the bugs and their resolution. Bug reports can be submitted via the
67 web:
69 https://github.com/bioperl/bioperl-live/issues
71 =head1 AUTHOR - Heikki Lehvaslaiho
73 Email: heikki-at-bioperl-dot-org
75 =head1 CONTRIBUTORS
77 Juha Muilu (muilu@ebi.ac.uk)
78 Sendu Bala (bix@sendu.me.uk)
79 Malcolm Cook (mec@stowers-institute.org)
80 Stephen Montgomery (sm8 at sanger.ac.uk)
82 =head1 APPENDIX
84 The rest of the documentation details each of the object
85 methods. Internal methods are usually preceded with a _
87 =cut
89 package Bio::RangeI;
91 use strict;
92 use Carp;
93 use integer;
94 use vars qw(%STRAND_OPTIONS);
96 use base qw(Bio::Root::RootI);
98 BEGIN {
99 # STRAND_OPTIONS contains the legal values for the strand-testing options
100 %STRAND_OPTIONS = map { $_, '_' . $_ }
102 'strong', # ranges must have the same strand
103 'weak', # ranges must have the same strand or no strand
104 'ignore', # ignore strand information
108 # utility methods
111 # returns true if strands are equal and non-zero
112 sub _strong {
113 my ($r1, $r2) = @_;
114 my ($s1, $s2) = ($r1->strand(), $r2->strand());
116 return 1 if $s1 != 0 && $s1 == $s2;
119 # returns true if strands are equal or either is zero
120 sub _weak {
121 my ($r1, $r2) = @_;
122 my ($s1, $s2) = ($r1->strand(), $r2->strand());
123 return 1 if $s1 == 0 || $s2 == 0 || $s1 == $s2;
126 # returns true for any strandedness
127 sub _ignore {
128 return 1;
131 # works out what test to use for the strictness and returns true/false
132 # e.g. $r1->_testStrand($r2, 'strong')
133 sub _testStrand() {
134 my ($r1, $r2, $comp) = @_;
135 return 1 unless $comp;
136 my $func = $STRAND_OPTIONS{$comp};
137 return $r1->$func($r2);
140 =head1 Abstract methods
142 These methods must be implemented in all subclasses.
144 =head2 start
146 Title : start
147 Usage : $start = $range->start();
148 Function: get/set the start of this range
149 Returns : the start of this range
150 Args : optionally allows the start to be set
151 using $range->start($start)
153 =cut
155 sub start {
156 shift->throw_not_implemented();
159 =head2 end
161 Title : end
162 Usage : $end = $range->end();
163 Function: get/set the end of this range
164 Returns : the end of this range
165 Args : optionally allows the end to be set
166 using $range->end($end)
168 =cut
170 sub end {
171 shift->throw_not_implemented();
174 =head2 length
176 Title : length
177 Usage : $length = $range->length();
178 Function: get/set the length of this range
179 Returns : the length of this range
180 Args : optionally allows the length to be set
181 using $range->length($length)
183 =cut
185 sub length {
186 shift->throw_not_implemented();
189 =head2 strand
191 Title : strand
192 Usage : $strand = $range->strand();
193 Function: get/set the strand of this range
194 Returns : the strandedness (-1, 0, +1)
195 Args : optionally allows the strand to be set
196 using $range->strand($strand)
198 =cut
200 sub strand {
201 shift->throw_not_implemented();
204 =head1 Boolean Methods
206 These methods return true or false. They throw an error if start and
207 end are not defined.
209 $range->overlaps($otherRange) && print "Ranges overlap\n";
211 =head2 overlaps
213 Title : overlaps
214 Usage : if($r1->overlaps($r2)) { do stuff }
215 Function: tests if $r2 overlaps $r1
216 Args : arg #1 = a range to compare this one to (mandatory)
217 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
218 Returns : true if the ranges overlap, false otherwise
220 =cut
222 sub overlaps {
223 my ($self, $other, $so) = @_;
225 $self->throw("start is undefined") unless defined $self->start;
226 $self->throw("end is undefined") unless defined $self->end;
227 $self->throw("not a Bio::RangeI object") unless defined $other &&
228 $other->isa('Bio::RangeI');
229 $other->throw("start is undefined") unless defined $other->start;
230 $other->throw("end is undefined") unless defined $other->end;
232 return
233 ($self->_testStrand($other, $so)
234 and not (
235 ($self->start() > $other->end() or
236 $self->end() < $other->start() )
240 =head2 contains
242 Title : contains
243 Usage : if($r1->contains($r2) { do stuff }
244 Function: tests whether $r1 totally contains $r2
245 Args : arg #1 = a range to compare this one to (mandatory)
246 alternatively, integer scalar to test
247 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
248 Returns : true if the argument is totally contained within this range
250 =cut
252 sub contains {
253 my ($self, $other, $so) = @_;
254 $self->throw("start is undefined") unless defined $self->start;
255 $self->throw("end is undefined") unless defined $self->end;
257 if(defined $other && ref $other) { # a range object?
258 $other->throw("Not a Bio::RangeI object: $other") unless $other->isa('Bio::RangeI');
259 $other->throw("start is undefined") unless defined $other->start;
260 $other->throw("end is undefined") unless defined $other->end;
262 return ($self->_testStrand($other, $so) and
263 $other->start() >= $self->start() and
264 $other->end() <= $self->end());
265 } else { # a scalar?
266 $self->throw("'$other' is not an integer.\n") unless $other =~ /^[-+]?\d+$/;
267 return ($other >= $self->start() and $other <= $self->end());
271 =head2 equals
273 Title : equals
274 Usage : if($r1->equals($r2))
275 Function: test whether $r1 has the same start, end, length as $r2
276 Args : arg #1 = a range to compare this one to (mandatory)
277 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
278 Returns : true if they are describing the same range
280 =cut
282 sub equals {
283 my ($self, $other, $so) = @_;
285 $self->throw("start is undefined") unless defined $self->start;
286 $self->throw("end is undefined") unless defined $self->end;
287 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI');
288 $other->throw("start is undefined") unless defined $other->start;
289 $other->throw("end is undefined") unless defined $other->end;
291 return ($self->_testStrand($other, $so) and
292 $self->start() == $other->start() and
293 $self->end() == $other->end() );
296 =head1 Geometrical methods
298 These methods do things to the geometry of ranges, and return
299 Bio::RangeI compliant objects or triplets (start, stop, strand) from
300 which new ranges could be built.
302 =head2 intersection
304 Title : intersection
305 Usage : ($start, $end, $strand) = $r1->intersection($r2); OR
306 ($start, $end, $strand) = Bio::Range->intersection(\@ranges); OR
307 my $containing_range = $r1->intersection($r2); OR
308 my $containing_range = Bio::Range->intersection(\@ranges);
309 Function: gives the range that is contained by all ranges
310 Returns : undef if they do not overlap or if @ranges has only a
311 single range, else returns the range that they do
312 overlap. In scalar contex, the return value is an object of
313 the same class as the calling one. In array context the
314 return value is a three element array.
315 Args : arg #1 = [REQUIRED] a Bio::RangeI to compare this one to,
316 or an array ref of ranges
317 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
319 =cut
321 sub intersection {
322 my ($self, $given, $so) = @_;
323 $self->throw("missing arg: you need to pass in another feature") unless $given;
325 my @ranges;
326 if ($self eq "Bio::RangeI") {
327 $self = "Bio::Range";
328 $self->warn("calling static methods of an interface is deprecated; use $self instead");
330 if (ref $self) {
331 push(@ranges, $self);
333 ref($given) eq 'ARRAY' ? push(@ranges, @{$given}) : push(@ranges, $given);
334 #$self->throw("Need at least 2 ranges") unless @ranges >= 2;
335 # Rather than the above, I think the following is more consistent
336 return undef unless @ranges >= 2;
338 my $intersect;
339 while (@ranges > 0) {
340 unless ($intersect) {
341 $intersect = shift(@ranges);
342 $self->throw("Not an object: $intersect") unless ref($intersect);
343 $self->throw("Not a Bio::RangeI object: $intersect") unless $intersect->isa('Bio::RangeI');
344 $self->throw("start is undefined") unless defined $intersect->start;
345 $self->throw("end is undefined") unless defined $intersect->end;
348 my $compare = shift(@ranges);
349 $self->throw("Not an object: $compare") unless ref($compare);
350 $self->throw("Not a Bio::RangeI object: $compare") unless $compare->isa('Bio::RangeI');
351 $self->throw("start is undefined") unless defined $compare->start;
352 $self->throw("end is undefined") unless defined $compare->end;
353 return unless $compare->_testStrand($intersect, $so);
355 my @starts = sort {$a <=> $b} ($intersect->start(), $compare->start());
356 my @ends = sort {$a <=> $b} ($intersect->end(), $compare->end());
358 my $start = pop @starts; # larger of the 2 starts
359 my $end = shift @ends; # smaller of the 2 ends
361 my $intersect_strand; # strand for the intersection
362 if (defined($intersect->strand) && defined($compare->strand) && $intersect->strand == $compare->strand) {
363 $intersect_strand = $compare->strand;
365 else {
366 $intersect_strand = 0;
369 if ($start > $end) {
370 return;
372 else {
373 $intersect = $self->new(-start => $start,
374 -end => $end,
375 -strand => $intersect_strand);
379 if (wantarray()) {
380 return ($intersect->start, $intersect->end, $intersect->strand);
382 else {
383 return $intersect;
387 =head2 union
389 Title : union
390 Usage : ($start, $end, $strand) = $r1->union($r2);
391 : ($start, $end, $strand) = Bio::Range->union(@ranges);
392 my $newrange = Bio::Range->union(@ranges);
393 Function: finds the minimal Range that contains all of the Ranges
394 Args : a Range or list of Range objects
396 Returns : the range containing all of the range. In scalar contex,
397 the return value is an object of the same class as the
398 calling one. In array context the return value is a
399 three element array.
401 =cut
403 sub union {
404 my $self = shift;
405 my @ranges = @_;
406 if ($self eq "Bio::RangeI") {
407 $self = "Bio::Range";
408 $self->warn("calling static methods of an interface is deprecated; use $self instead");
410 if(ref $self) {
411 unshift @ranges, $self;
414 my @start = sort {$a<=>$b}
415 map( { $_->start() } @ranges);
416 my @end = sort {$a<=>$b}
417 map( { $_->end() } @ranges);
419 my $start = shift @start;
420 while( !defined $start ) {
421 $start = shift @start;
424 my $end = pop @end;
426 my $union_strand; # Strand for the union range object.
428 foreach(@ranges) {
429 if(! defined $union_strand) {
430 $union_strand = $_->strand;
431 next;
432 } else {
433 if(not defined $_->strand or $union_strand ne $_->strand) {
434 $union_strand = 0;
435 last;
439 return unless $start or $end;
440 if( wantarray() ) {
441 return ( $start,$end,$union_strand);
442 } else {
443 return $self->new('-start' => $start,
444 '-end' => $end,
445 '-strand' => $union_strand
450 =head2 overlap_extent
452 Title : overlap_extent
453 Usage : ($a_unique,$common,$b_unique) = $a->overlap_extent($b)
454 Function: Provides actual amount of overlap between two different
455 ranges
456 Example :
457 Returns : array of values containing the length unique to the calling
458 range, the length common to both, and the length unique to
459 the argument range
460 Args : a range
462 =cut
464 sub overlap_extent{
465 my ($a,$b) = @_;
467 $a->throw("start is undefined") unless defined $a->start;
468 $a->throw("end is undefined") unless defined $a->end;
469 $b->throw("Not a Bio::RangeI object") unless $b->isa('Bio::RangeI');
470 $b->throw("start is undefined") unless defined $b->start;
471 $b->throw("end is undefined") unless defined $b->end;
473 if( ! $a->overlaps($b) ) {
474 return ($a->length,0,$b->length);
477 my ($au,$bu) = (0, 0);
478 if( $a->start < $b->start ) {
479 $au = $b->start - $a->start;
480 } else {
481 $bu = $a->start - $b->start;
484 if( $a->end > $b->end ) {
485 $au += $a->end - $b->end;
486 } else {
487 $bu += $b->end - $a->end;
490 my $intersect = $a->intersection($b);
491 if( ! $intersect ) {
492 warn("no intersection\n");
493 return ($au, 0, $bu);
494 } else {
495 my $ie = $intersect->end;
496 my $is = $intersect->start;
497 return ($au,$ie-$is+1,$bu);
501 =head2 disconnected_ranges
503 Title : disconnected_ranges
504 Usage : my @disc_ranges = Bio::Range->disconnected_ranges(@ranges);
505 Function: finds the minimal set of ranges such that each input range
506 is fully contained by at least one output range, and none of
507 the output ranges overlap
508 Args : a list of ranges
509 Returns : a list of objects of the same type as the input
510 (conforms to RangeI)
512 =cut
514 sub disconnected_ranges {
515 my $self = shift;
516 if ($self eq "Bio::RangeI") {
517 $self = "Bio::Range";
518 $self->warn("calling static methods of an interface is deprecated; use $self instead");
520 my @inranges = @_;
521 if(ref $self) {
522 unshift @inranges, $self;
525 my @outranges = (); # disconnected ranges
527 # iterate through all input ranges $inrange,
528 # adding each input range to the set of output ranges @outranges,
529 # provided $inrange does not overlap ANY range in @outranges
530 # - if it does overlap an outrange, then merge it
531 foreach my $inrange (@inranges) {
532 my $intersects = 0;
533 my @outranges_new = ();
534 my @intersecting_ranges = ();
536 # iterate through all @outranges, testing if it intersects
537 # current $inrange; if it does, merge and add to list
538 # of @intersecting_ranges, otherwise add $outrange to
539 # the new list of outranges that do NOT intersect
540 for (my $i=0; $i<@outranges; $i++) {
541 my $outrange = $outranges[$i];
542 my $intersection = $inrange->intersection($outrange);
543 if ($intersection) {
544 $intersects = 1;
545 my $union = $inrange->union($outrange);
546 push(@intersecting_ranges, $union);
548 else {
549 push(@outranges_new, $outrange);
552 @outranges = @outranges_new;
553 # @outranges now contains a list of non-overlapping ranges
554 # that do not intersect the current $inrange
556 if (@intersecting_ranges) {
557 if (@intersecting_ranges > 1) {
558 # this sf intersected > 1 range, which means that
559 # all the ranges it intersects should be joined
560 # together in a new range
561 my $merged_range =
562 $self->union(@intersecting_ranges);
563 push(@outranges, $merged_range);
566 else {
567 # exactly 1 intersecting range
568 push(@outranges, @intersecting_ranges);
571 else {
572 # no intersections found - new range
573 push(@outranges,
574 $self->new('-start'=>$inrange->start,
575 '-end'=>$inrange->end,
576 '-strand'=>$inrange->strand,
580 return @outranges;
583 =head2 offsetStranded
585 Title : offsetStranded
586 Usage : $rnge->ofsetStranded($fiveprime_offset, $threeprime_offset)
587 Function : destructively modifies RangeI implementing object to
588 offset its start and stop coordinates by values $fiveprime_offset and
589 $threeprime_offset (positive values being in the strand direction).
590 Args : two integer offsets: $fiveprime_offset and $threeprime_offset
591 Returns : $self, offset accordingly.
593 =cut
595 sub offsetStranded {
596 my ($self, $offset_fiveprime, $offset_threeprime) = @_;
597 my ($offset_start, $offset_end) = $self->strand() eq -1 ? (- $offset_threeprime, - $offset_fiveprime) : ($offset_fiveprime, $offset_threeprime);
598 $self->start($self->start + $offset_start);
599 $self->end($self->end + $offset_end);
600 return $self;
603 =head2 subtract
605 Title : subtract
606 Usage : my @subtracted = $r1->subtract($r2)
607 Function: Subtract range r2 from range r1
608 Args : arg #1 = a range to subtract from this one (mandatory)
609 arg #2 = strand option ('strong', 'weak', 'ignore') (optional)
610 Returns : undef if they do not overlap or r2 contains this RangeI,
611 or an arrayref of Range objects (this is an array since some
612 instances where the subtract range is enclosed within this range
613 will result in the creation of two new disjoint ranges)
615 =cut
617 sub subtract() {
618 my ($self, $range, $so) = @_;
619 $self->throw("missing arg: you need to pass in another feature")
620 unless $range;
621 return unless $self->_testStrand($range, $so);
623 if ($self eq "Bio::RangeI") {
624 $self = "Bio::Range";
625 $self->warn("calling static methods of an interface is
626 deprecated; use $self instead");
628 $range->throw("Input a Bio::RangeI object") unless
629 $range->isa('Bio::RangeI');
631 my @sub_locations;
632 if ($self->location->isa('Bio::Location::SplitLocationI') ) {
633 @sub_locations = $self->location->sub_Location;
634 } else {
635 @sub_locations = $self;
638 my @outranges;
639 foreach my $sl (@sub_locations) {
640 if (!$sl->overlaps($range)) {
641 push(@outranges,
642 $self->new('-start' =>$sl->start,
643 '-end' =>$sl->end,
644 '-strand'=>$sl->strand,
646 next;
649 ##Subtracts everything
650 if ($range->contains($sl)) {
651 next;
654 my ($start, $end, $strand) = $sl->intersection($range, $so);
655 ##Subtract intersection from $self range
657 if ($sl->start < $start) {
658 push(@outranges,
659 $self->new('-start' =>$sl->start,
660 '-end' =>$start - 1,
661 '-strand'=>$sl->strand,
662 ));
664 if ($sl->end > $end) {
665 push(@outranges,
666 $self->new('-start' =>$end + 1,
667 '-end' =>$sl->end,
668 '-strand'=>$sl->strand,
669 ));
673 if (@outranges) {
674 return \@outranges;
676 return;