more tests for the orf finder
[bioperl-live.git] / Bio / RangeI.pm
blobee5ffdddbb6efe2320d7efde1f7e8d790969056c
1 # $Id$
3 # BioPerl module for Bio::RangeI
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Lehvaslaiho <heikki-at-bioperl-dot-org>
9 # Copyright Matthew Pocock
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::RangeI - Range interface
19 =head1 SYNOPSIS
21 #Do not run this module directly
23 =head1 DESCRIPTION
25 This provides a standard BioPerl range interface that should be
26 implemented by any object that wants to be treated as a range. This
27 serves purely as an abstract base class for implementers and can not
28 be instantiated.
30 Ranges are modeled as having (start, end, length, strand). They use
31 Bio-coordinates - all points E<gt>= start and E<lt>= end are within the
32 range. End is always greater-than or equal-to start, and length is
33 greater than or equal to 1. The behaviour of a range is undefined if
34 ranges with negative numbers or zero are used.
36 So, in summary:
38 length = end - start + 1
39 end >= start
40 strand = (-1 | 0 | +1)
42 =head1 FEEDBACK
44 =head2 Mailing Lists
46 User feedback is an integral part of the evolution of this and other
47 Bioperl modules. Send your comments and suggestions preferably to one
48 of the Bioperl mailing lists. Your participation is much appreciated.
50 bioperl-l@bioperl.org - General discussion
51 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
53 =head2 Support
55 Please direct usage questions or support issues to the mailing list:
57 I<bioperl-l@bioperl.org>
59 rather than to the module maintainer directly. Many experienced and
60 reponsive experts will be able look at the problem and quickly
61 address it. Please include a thorough description of the problem
62 with code and data examples if at all possible.
64 =head2 Reporting Bugs
66 Report bugs to the Bioperl bug tracking system to help us keep track
67 the bugs and their resolution. Bug reports can be submitted via the
68 web:
70 http://bugzilla.bioperl.org/
72 =head1 AUTHOR - Heikki Lehvaslaiho
74 Email: heikki-at-bioperl-dot-org
76 =head1 CONTRIBUTORS
78 Juha Muilu (muilu@ebi.ac.uk)
79 Sendu Bala (bix@sendu.me.uk)
80 Malcolm Cook (mec@stowers-institute.org)
81 Stephen Montgomery (sm8 at sanger.ac.uk)
83 =head1 APPENDIX
85 The rest of the documentation details each of the object
86 methods. Internal methods are usually preceded with a _
88 =cut
90 package Bio::RangeI;
92 use strict;
93 use Carp;
94 use integer;
95 use vars qw(%STRAND_OPTIONS);
97 use base qw(Bio::Root::RootI);
99 BEGIN {
100 # STRAND_OPTIONS contains the legal values for the strand-testing options
101 %STRAND_OPTIONS = map { $_, '_' . $_ }
103 'strong', # ranges must have the same strand
104 'weak', # ranges must have the same strand or no strand
105 'ignore', # ignore strand information
109 # utility methods
112 # returns true if strands are equal and non-zero
113 sub _strong {
114 my ($r1, $r2) = @_;
115 my ($s1, $s2) = ($r1->strand(), $r2->strand());
117 return 1 if $s1 != 0 && $s1 == $s2;
120 # returns true if strands are equal or either is zero
121 sub _weak {
122 my ($r1, $r2) = @_;
123 my ($s1, $s2) = ($r1->strand(), $r2->strand());
124 return 1 if $s1 == 0 || $s2 == 0 || $s1 == $s2;
127 # returns true for any strandedness
128 sub _ignore {
129 return 1;
132 # works out what test to use for the strictness and returns true/false
133 # e.g. $r1->_testStrand($r2, 'strong')
134 sub _testStrand() {
135 my ($r1, $r2, $comp) = @_;
136 return 1 unless $comp;
137 my $func = $STRAND_OPTIONS{$comp};
138 return $r1->$func($r2);
141 =head1 Abstract methods
143 These methods must be implemented in all subclasses.
145 =head2 start
147 Title : start
148 Usage : $start = $range->start();
149 Function: get/set the start of this range
150 Returns : the start of this range
151 Args : optionally allows the start to be set
152 using $range->start($start)
154 =cut
156 sub start {
157 shift->throw_not_implemented();
160 =head2 end
162 Title : end
163 Usage : $end = $range->end();
164 Function: get/set the end of this range
165 Returns : the end of this range
166 Args : optionally allows the end to be set
167 using $range->end($end)
169 =cut
171 sub end {
172 shift->throw_not_implemented();
175 =head2 length
177 Title : length
178 Usage : $length = $range->length();
179 Function: get/set the length of this range
180 Returns : the length of this range
181 Args : optionally allows the length to be set
182 using $range->length($length)
184 =cut
186 sub length {
187 shift->throw_not_implemented();
190 =head2 strand
192 Title : strand
193 Usage : $strand = $range->strand();
194 Function: get/set the strand of this range
195 Returns : the strandedness (-1, 0, +1)
196 Args : optionally allows the strand to be set
197 using $range->strand($strand)
199 =cut
201 sub strand {
202 shift->throw_not_implemented();
205 =head1 Boolean Methods
207 These methods return true or false. They throw an error if start and
208 end are not defined.
210 $range->overlaps($otherRange) && print "Ranges overlap\n";
212 =head2 overlaps
214 Title : overlaps
215 Usage : if($r1->overlaps($r2)) { do stuff }
216 Function: tests if $r2 overlaps $r1
217 Args : arg #1 = a range to compare this one to (mandatory)
218 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
219 Returns : true if the ranges overlap, false otherwise
221 =cut
223 sub overlaps {
224 my ($self, $other, $so) = @_;
226 $self->throw("start is undefined") unless defined $self->start;
227 $self->throw("end is undefined") unless defined $self->end;
228 $self->throw("not a Bio::RangeI object") unless defined $other &&
229 $other->isa('Bio::RangeI');
230 $other->throw("start is undefined") unless defined $other->start;
231 $other->throw("end is undefined") unless defined $other->end;
233 return
234 ($self->_testStrand($other, $so)
235 and not (
236 ($self->start() > $other->end() or
237 $self->end() < $other->start() )
241 =head2 contains
243 Title : contains
244 Usage : if($r1->contains($r2) { do stuff }
245 Function: tests whether $r1 totally contains $r2
246 Args : arg #1 = a range to compare this one to (mandatory)
247 alternatively, integer scalar to test
248 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
249 Returns : true if the argument is totally contained within this range
251 =cut
253 sub contains {
254 my ($self, $other, $so) = @_;
255 $self->throw("start is undefined") unless defined $self->start;
256 $self->throw("end is undefined") unless defined $self->end;
258 if(defined $other && ref $other) { # a range object?
259 $other->throw("Not a Bio::RangeI object: $other") unless $other->isa('Bio::RangeI');
260 $other->throw("start is undefined") unless defined $other->start;
261 $other->throw("end is undefined") unless defined $other->end;
263 return ($self->_testStrand($other, $so) and
264 $other->start() >= $self->start() and
265 $other->end() <= $self->end());
266 } else { # a scalar?
267 $self->throw("'$other' is not an integer.\n") unless $other =~ /^[-+]?\d+$/;
268 return ($other >= $self->start() and $other <= $self->end());
272 =head2 equals
274 Title : equals
275 Usage : if($r1->equals($r2))
276 Function: test whether $r1 has the same start, end, length as $r2
277 Args : arg #1 = a range to compare this one to (mandatory)
278 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
279 Returns : true if they are describing the same range
281 =cut
283 sub equals {
284 my ($self, $other, $so) = @_;
286 $self->throw("start is undefined") unless defined $self->start;
287 $self->throw("end is undefined") unless defined $self->end;
288 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI');
289 $other->throw("start is undefined") unless defined $other->start;
290 $other->throw("end is undefined") unless defined $other->end;
292 return ($self->_testStrand($other, $so) and
293 $self->start() == $other->start() and
294 $self->end() == $other->end() );
297 =head1 Geometrical methods
299 These methods do things to the geometry of ranges, and return
300 Bio::RangeI compliant objects or triplets (start, stop, strand) from
301 which new ranges could be built.
303 =head2 intersection
305 Title : intersection
306 Usage : ($start, $stop, $strand) = $r1->intersection($r2); OR
307 ($start, $stop, $strand) = Bio::Range->intersection(\@ranges); OR
308 my $containing_range = $r1->intersection($r2); OR
309 my $containing_range = Bio::Range->intersection(\@ranges);
310 Function: gives the range that is contained by all ranges
311 Returns : undef if they do not overlap, or
312 the range that they do overlap (in the form of an object
313 like the calling one, OR a three element array)
314 Args : arg #1 = [REQUIRED] a range to compare this one to,
315 or an array ref of ranges
316 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
318 =cut
320 sub intersection {
321 my ($self, $given, $so) = @_;
322 $self->throw("missing arg: you need to pass in another feature") unless $given;
324 my @ranges;
325 if ($self eq "Bio::RangeI") {
326 $self = "Bio::Range";
327 $self->warn("calling static methods of an interface is deprecated; use $self instead");
329 if (ref $self) {
330 push(@ranges, $self);
332 ref($given) eq 'ARRAY' ? push(@ranges, @{$given}) : push(@ranges, $given);
333 $self->throw("Need at least 2 ranges") unless @ranges >= 2;
335 my $intersect;
336 while (@ranges > 0) {
337 unless ($intersect) {
338 $intersect = shift(@ranges);
339 $self->throw("Not an object: $intersect") unless ref($intersect);
340 $self->throw("Not a Bio::RangeI object: $intersect") unless $intersect->isa('Bio::RangeI');
341 $self->throw("start is undefined") unless defined $intersect->start;
342 $self->throw("end is undefined") unless defined $intersect->end;
345 my $compare = shift(@ranges);
346 $self->throw("Not an object: $compare") unless ref($compare);
347 $self->throw("Not a Bio::RangeI object: $compare") unless $compare->isa('Bio::RangeI');
348 $self->throw("start is undefined") unless defined $compare->start;
349 $self->throw("end is undefined") unless defined $compare->end;
350 return unless $compare->_testStrand($intersect, $so);
352 my @starts = sort {$a <=> $b} ($intersect->start(), $compare->start());
353 my @ends = sort {$a <=> $b} ($intersect->end(), $compare->end());
355 my $start = pop @starts; # larger of the 2 starts
356 my $end = shift @ends; # smaller of the 2 ends
358 my $intersect_strand; # strand for the intersection
359 if (defined($intersect->strand) && defined($compare->strand) && $intersect->strand == $compare->strand) {
360 $intersect_strand = $compare->strand;
362 else {
363 $intersect_strand = 0;
366 if ($start > $end) {
367 return;
369 else {
370 $intersect = $self->new(-start => $start,
371 -end => $end,
372 -strand => $intersect_strand);
376 if (wantarray()) {
377 return ($intersect->start, $intersect->end, $intersect->strand);
379 else {
380 return $intersect;
384 =head2 union
386 Title : union
387 Usage : ($start, $stop, $strand) = $r1->union($r2);
388 : ($start, $stop, $strand) = Bio::Range->union(@ranges);
389 my $newrange = Bio::Range->union(@ranges);
390 Function: finds the minimal Range that contains all of the Ranges
391 Args : a Range or list of Range objects
392 Returns : the range containing all of the range
393 (in the form of an object like the calling one, OR
394 a three element array)
396 =cut
398 sub union {
399 my $self = shift;
400 my @ranges = @_;
401 if ($self eq "Bio::RangeI") {
402 $self = "Bio::Range";
403 $self->warn("calling static methods of an interface is deprecated; use $self instead");
405 if(ref $self) {
406 unshift @ranges, $self;
409 my @start = sort {$a<=>$b}
410 map( { $_->start() } @ranges);
411 my @end = sort {$a<=>$b}
412 map( { $_->end() } @ranges);
414 my $start = shift @start;
415 while( !defined $start ) {
416 $start = shift @start;
419 my $end = pop @end;
421 my $union_strand; # Strand for the union range object.
423 foreach(@ranges) {
424 if(! defined $union_strand) {
425 $union_strand = $_->strand;
426 next;
427 } else {
428 if(not defined $_->strand or $union_strand ne $_->strand) {
429 $union_strand = 0;
430 last;
434 return unless $start or $end;
435 if( wantarray() ) {
436 return ( $start,$end,$union_strand);
437 } else {
438 return $self->new('-start' => $start,
439 '-end' => $end,
440 '-strand' => $union_strand
445 =head2 overlap_extent
447 Title : overlap_extent
448 Usage : ($a_unique,$common,$b_unique) = $a->overlap_extent($b)
449 Function: Provides actual amount of overlap between two different
450 ranges
451 Example :
452 Returns : array of values containing the length unique to the calling
453 range, the length common to both, and the length unique to
454 the argument range
455 Args : a range
457 =cut
459 sub overlap_extent{
460 my ($a,$b) = @_;
462 $a->throw("start is undefined") unless defined $a->start;
463 $a->throw("end is undefined") unless defined $a->end;
464 $b->throw("Not a Bio::RangeI object") unless $b->isa('Bio::RangeI');
465 $b->throw("start is undefined") unless defined $b->start;
466 $b->throw("end is undefined") unless defined $b->end;
468 if( ! $a->overlaps($b) ) {
469 return ($a->length,0,$b->length);
472 my ($au,$bu) = (0, 0);
473 if( $a->start < $b->start ) {
474 $au = $b->start - $a->start;
475 } else {
476 $bu = $a->start - $b->start;
479 if( $a->end > $b->end ) {
480 $au += $a->end - $b->end;
481 } else {
482 $bu += $b->end - $a->end;
485 my $intersect = $a->intersection($b);
486 if( ! $intersect ) {
487 warn("no intersection\n");
488 return ($au, 0, $bu);
489 } else {
490 my $ie = $intersect->end;
491 my $is = $intersect->start;
492 return ($au,$ie-$is+1,$bu);
496 =head2 disconnected_ranges
498 Title : disconnected_ranges
499 Usage : my @disc_ranges = Bio::Range->disconnected_ranges(@ranges);
500 Function: finds the minimal set of ranges such that each input range
501 is fully contained by at least one output range, and none of
502 the output ranges overlap
503 Args : a list of ranges
504 Returns : a list of objects of the same type as the input
505 (conforms to RangeI)
507 =cut
509 sub disconnected_ranges {
510 my $self = shift;
511 if ($self eq "Bio::RangeI") {
512 $self = "Bio::Range";
513 $self->warn("calling static methods of an interface is deprecated; use $self instead");
515 my @inranges = @_;
516 if(ref $self) {
517 unshift @inranges, $self;
520 my @outranges = (); # disconnected ranges
522 # iterate through all input ranges $inrange,
523 # adding each input range to the set of output ranges @outranges,
524 # provided $inrange does not overlap ANY range in @outranges
525 # - if it does overlap an outrange, then merge it
526 foreach my $inrange (@inranges) {
527 my $intersects = 0;
528 my @outranges_new = ();
529 my @intersecting_ranges = ();
531 # iterate through all @outranges, testing if it intersects
532 # current $inrange; if it does, merge and add to list
533 # of @intersecting_ranges, otherwise add $outrange to
534 # the new list of outranges that do NOT intersect
535 for (my $i=0; $i<@outranges; $i++) {
536 my $outrange = $outranges[$i];
537 my $intersection = $inrange->intersection($outrange);
538 if ($intersection) {
539 $intersects = 1;
540 my $union = $inrange->union($outrange);
541 push(@intersecting_ranges, $union);
543 else {
544 push(@outranges_new, $outrange);
547 @outranges = @outranges_new;
548 # @outranges now contains a list of non-overlapping ranges
549 # that do not intersect the current $inrange
551 if (@intersecting_ranges) {
552 if (@intersecting_ranges > 1) {
553 # this sf intersected > 1 range, which means that
554 # all the ranges it intersects should be joined
555 # together in a new range
556 my $merged_range =
557 $self->union(@intersecting_ranges);
558 push(@outranges, $merged_range);
561 else {
562 # exactly 1 intersecting range
563 push(@outranges, @intersecting_ranges);
566 else {
567 # no intersections found - new range
568 push(@outranges,
569 $self->new('-start'=>$inrange->start,
570 '-end'=>$inrange->end,
571 '-strand'=>$inrange->strand,
575 return @outranges;
578 =head2 offsetStranded
580 Title : offsetStranded
581 Usage : $rnge->ofsetStranded($fiveprime_offset, $threeprime_offset)
582 Function : destructively modifies RangeI implementing object to
583 offset its start and stop coordinates by values $fiveprime_offset and
584 $threeprime_offset (positive values being in the strand direction).
585 Args : two integer offsets: $fiveprime_offset and $threeprime_offset
586 Returns : $self, offset accordingly.
588 =cut
590 sub offsetStranded {
591 my ($self, $offset_fiveprime, $offset_threeprime) = @_;
592 my ($offset_start, $offset_end) = $self->strand() eq -1 ? (- $offset_threeprime, - $offset_fiveprime) : ($offset_fiveprime, $offset_threeprime);
593 $self->start($self->start + $offset_start);
594 $self->end($self->end + $offset_end);
595 return $self;
598 =head2 subtract
600 Title : subtract
601 Usage : my @subtracted = $r1->subtract($r2)
602 Function: Subtract range r2 from range r1
603 Args : arg #1 = a range to subtract from this one (mandatory)
604 arg #2 = strand option ('strong', 'weak', 'ignore') (optional)
605 Returns : undef if they do not overlap or r2 contains this RangeI,
606 or an arrayref of Range objects (this is an array since some
607 instances where the subtract range is enclosed within this range
608 will result in the creation of two new disjoint ranges)
610 =cut
612 sub subtract() {
613 my ($self, $range, $so) = @_;
614 $self->throw("missing arg: you need to pass in another feature")
615 unless $range;
616 return unless $self->_testStrand($range, $so);
618 if ($self eq "Bio::RangeI") {
619 $self = "Bio::Range";
620 $self->warn("calling static methods of an interface is
621 deprecated; use $self instead");
623 $range->throw("Input a Bio::RangeI object") unless
624 $range->isa('Bio::RangeI');
626 my @sub_locations;
627 if ($self->location->isa('Bio::Location::SplitLocationI') ) {
628 @sub_locations = $self->location->sub_Location;
629 } else {
630 @sub_locations = $self;
633 my @outranges;
634 foreach my $sl (@sub_locations) {
635 if (!$sl->overlaps($range)) {
636 push(@outranges,
637 $self->new('-start' =>$sl->start,
638 '-end' =>$sl->end,
639 '-strand'=>$sl->strand,
641 next;
644 ##Subtracts everything
645 if ($range->contains($sl)) {
646 next;
649 my ($start, $end, $strand) = $sl->intersection($range, $so);
650 ##Subtract intersection from $self range
652 if ($sl->start < $start) {
653 push(@outranges,
654 $self->new('-start' =>$sl->start,
655 '-end' =>$start - 1,
656 '-strand'=>$sl->strand,
657 ));
659 if ($sl->end > $end) {
660 push(@outranges,
661 $self->new('-start' =>$end + 1,
662 '-end' =>$sl->end,
663 '-strand'=>$sl->strand,
664 ));
668 if (@outranges) {
669 return \@outranges;
671 return;