bug 2549; fixed small bug in Bio::Taxon which doesn't catch -common_name
[bioperl-live.git] / Bio / RangeI.pm
blob1863c9d410778916c67c105c008d3fd674ccdbf0
1 # $Id$
3 # BioPerl module for Bio::RangeI
5 # Cared for by Lehvaslaiho <heikki-at-bioperl-dot-org>
7 # Copyright Matthew Pocock
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::RangeI - Range interface
17 =head1 SYNOPSIS
19 #Do not run this module directly
21 =head1 DESCRIPTION
23 This provides a standard BioPerl range interface that should be
24 implemented by any object that wants to be treated as a range. This
25 serves purely as an abstract base class for implementers and can not
26 be instantiated.
28 Ranges are modeled as having (start, end, length, strand). They use
29 Bio-coordinates - all points E<gt>= start and E<lt>= end are within the
30 range. End is always greater-than or equal-to start, and length is
31 greater than or equal to 1. The behaviour of a range is undefined if
32 ranges with negative numbers or zero are used.
34 So, in summary:
36 length = end - start + 1
37 end >= start
38 strand = (-1 | 0 | +1)
40 =head1 FEEDBACK
42 =head2 Mailing Lists
44 User feedback is an integral part of the evolution of this and other
45 Bioperl modules. Send your comments and suggestions preferably to one
46 of the Bioperl mailing lists. Your participation is much appreciated.
48 bioperl-l@bioperl.org - General discussion
49 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51 =head2 Reporting Bugs
53 Report bugs to the Bioperl bug tracking system to help us keep track
54 the bugs and their resolution. Bug reports can be submitted via the
55 web:
57 http://bugzilla.bioperl.org/
59 =head1 AUTHOR - Heikki Lehvaslaiho
61 Email: heikki-at-bioperl-dot-org
63 =head1 CONTRIBUTORS
65 Juha Muilu (muilu@ebi.ac.uk)
66 Sendu Bala (bix@sendu.me.uk)
67 Malcolm Cook (mec@stowers-institute.org)
68 Stephen Montgomery (sm8 at sanger.ac.uk)
70 =head1 APPENDIX
72 The rest of the documentation details each of the object
73 methods. Internal methods are usually preceded with a _
75 =cut
77 package Bio::RangeI;
79 use strict;
80 use Carp;
81 use integer;
82 use vars qw(%STRAND_OPTIONS);
84 use base qw(Bio::Root::RootI);
86 BEGIN {
87 # STRAND_OPTIONS contains the legal values for the strand-testing options
88 %STRAND_OPTIONS = map { $_, '_' . $_ }
90 'strong', # ranges must have the same strand
91 'weak', # ranges must have the same strand or no strand
92 'ignore', # ignore strand information
96 # utility methods
99 # returns true if strands are equal and non-zero
100 sub _strong {
101 my ($r1, $r2) = @_;
102 my ($s1, $s2) = ($r1->strand(), $r2->strand());
104 return 1 if $s1 != 0 && $s1 == $s2;
107 # returns true if strands are equal or either is zero
108 sub _weak {
109 my ($r1, $r2) = @_;
110 my ($s1, $s2) = ($r1->strand(), $r2->strand());
111 return 1 if $s1 == 0 || $s2 == 0 || $s1 == $s2;
114 # returns true for any strandedness
115 sub _ignore {
116 return 1;
119 # works out what test to use for the strictness and returns true/false
120 # e.g. $r1->_testStrand($r2, 'strong')
121 sub _testStrand() {
122 my ($r1, $r2, $comp) = @_;
123 return 1 unless $comp;
124 my $func = $STRAND_OPTIONS{$comp};
125 return $r1->$func($r2);
128 =head1 Abstract methods
130 These methods must be implemented in all subclasses.
132 =head2 start
134 Title : start
135 Usage : $start = $range->start();
136 Function: get/set the start of this range
137 Returns : the start of this range
138 Args : optionally allows the start to be set
139 using $range->start($start)
141 =cut
143 sub start {
144 shift->throw_not_implemented();
147 =head2 end
149 Title : end
150 Usage : $end = $range->end();
151 Function: get/set the end of this range
152 Returns : the end of this range
153 Args : optionally allows the end to be set
154 using $range->end($end)
156 =cut
158 sub end {
159 shift->throw_not_implemented();
162 =head2 length
164 Title : length
165 Usage : $length = $range->length();
166 Function: get/set the length of this range
167 Returns : the length of this range
168 Args : optionally allows the length to be set
169 using $range->length($length)
171 =cut
173 sub length {
174 shift->throw_not_implemented();
177 =head2 strand
179 Title : strand
180 Usage : $strand = $range->strand();
181 Function: get/set the strand of this range
182 Returns : the strandedness (-1, 0, +1)
183 Args : optionally allows the strand to be set
184 using $range->strand($strand)
186 =cut
188 sub strand {
189 shift->throw_not_implemented();
192 =head1 Boolean Methods
194 These methods return true or false. They throw an error if start and
195 end are not defined.
197 $range->overlaps($otherRange) && print "Ranges overlap\n";
199 =head2 overlaps
201 Title : overlaps
202 Usage : if($r1->overlaps($r2)) { do stuff }
203 Function: tests if $r2 overlaps $r1
204 Args : arg #1 = a range to compare this one to (mandatory)
205 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
206 Returns : true if the ranges overlap, false otherwise
208 =cut
210 sub overlaps {
211 my ($self, $other, $so) = @_;
213 $self->throw("start is undefined") unless defined $self->start;
214 $self->throw("end is undefined") unless defined $self->end;
215 $self->throw("not a Bio::RangeI object") unless defined $other &&
216 $other->isa('Bio::RangeI');
217 $other->throw("start is undefined") unless defined $other->start;
218 $other->throw("end is undefined") unless defined $other->end;
220 return
221 ($self->_testStrand($other, $so)
222 and not (
223 ($self->start() > $other->end() or
224 $self->end() < $other->start() )
228 =head2 contains
230 Title : contains
231 Usage : if($r1->contains($r2) { do stuff }
232 Function: tests whether $r1 totally contains $r2
233 Args : arg #1 = a range to compare this one to (mandatory)
234 alternatively, integer scalar to test
235 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
236 Returns : true if the argument is totally contained within this range
238 =cut
240 sub contains {
241 my ($self, $other, $so) = @_;
242 $self->throw("start is undefined") unless defined $self->start;
243 $self->throw("end is undefined") unless defined $self->end;
245 if(defined $other && ref $other) { # a range object?
246 $other->throw("Not a Bio::RangeI object: $other") unless $other->isa('Bio::RangeI');
247 $other->throw("start is undefined") unless defined $other->start;
248 $other->throw("end is undefined") unless defined $other->end;
250 return ($self->_testStrand($other, $so) and
251 $other->start() >= $self->start() and
252 $other->end() <= $self->end());
253 } else { # a scalar?
254 $self->throw("'$other' is not an integer.\n") unless $other =~ /^[-+]?\d+$/;
255 return ($other >= $self->start() and $other <= $self->end());
259 =head2 equals
261 Title : equals
262 Usage : if($r1->equals($r2))
263 Function: test whether $r1 has the same start, end, length as $r2
264 Args : arg #1 = a range to compare this one to (mandatory)
265 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
266 Returns : true if they are describing the same range
268 =cut
270 sub equals {
271 my ($self, $other, $so) = @_;
273 $self->throw("start is undefined") unless defined $self->start;
274 $self->throw("end is undefined") unless defined $self->end;
275 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI');
276 $other->throw("start is undefined") unless defined $other->start;
277 $other->throw("end is undefined") unless defined $other->end;
279 return ($self->_testStrand($other, $so) and
280 $self->start() == $other->start() and
281 $self->end() == $other->end() );
284 =head1 Geometrical methods
286 These methods do things to the geometry of ranges, and return
287 Bio::RangeI compliant objects or triplets (start, stop, strand) from
288 which new ranges could be built.
290 =head2 intersection
292 Title : intersection
293 Usage : ($start, $stop, $strand) = $r1->intersection($r2); OR
294 ($start, $stop, $strand) = Bio::Range->intersection(\@ranges); OR
295 my $containing_range = $r1->intersection($r2); OR
296 my $containing_range = Bio::Range->intersection(\@ranges);
297 Function: gives the range that is contained by all ranges
298 Returns : undef if they do not overlap, or
299 the range that they do overlap (in the form of an object
300 like the calling one, OR a three element array)
301 Args : arg #1 = [REQUIRED] a range to compare this one to,
302 or an array ref of ranges
303 arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
305 =cut
307 sub intersection {
308 my ($self, $given, $so) = @_;
309 $self->throw("missing arg: you need to pass in another feature") unless $given;
311 my @ranges;
312 if ($self eq "Bio::RangeI") {
313 $self = "Bio::Range";
314 $self->warn("calling static methods of an interface is deprecated; use $self instead");
316 if (ref $self) {
317 push(@ranges, $self);
319 ref($given) eq 'ARRAY' ? push(@ranges, @{$given}) : push(@ranges, $given);
320 $self->throw("Need at least 2 ranges") unless @ranges >= 2;
322 my $intersect;
323 while (@ranges > 0) {
324 unless ($intersect) {
325 $intersect = shift(@ranges);
326 $self->throw("Not an object: $intersect") unless ref($intersect);
327 $self->throw("Not a Bio::RangeI object: $intersect") unless $intersect->isa('Bio::RangeI');
328 $self->throw("start is undefined") unless defined $intersect->start;
329 $self->throw("end is undefined") unless defined $intersect->end;
332 my $compare = shift(@ranges);
333 $self->throw("Not an object: $compare") unless ref($compare);
334 $self->throw("Not a Bio::RangeI object: $compare") unless $compare->isa('Bio::RangeI');
335 $self->throw("start is undefined") unless defined $compare->start;
336 $self->throw("end is undefined") unless defined $compare->end;
337 return unless $compare->_testStrand($intersect, $so);
339 my @starts = sort {$a <=> $b} ($intersect->start(), $compare->start());
340 my @ends = sort {$a <=> $b} ($intersect->end(), $compare->end());
342 my $start = pop @starts; # larger of the 2 starts
343 my $end = shift @ends; # smaller of the 2 ends
345 my $intersect_strand; # strand for the intersection
346 if (defined($intersect->strand) && defined($compare->strand) && $intersect->strand == $compare->strand) {
347 $intersect_strand = $compare->strand;
349 else {
350 $intersect_strand = 0;
353 if ($start > $end) {
354 return;
356 else {
357 $intersect = $self->new(-start => $start,
358 -end => $end,
359 -strand => $intersect_strand);
363 if (wantarray()) {
364 return ($intersect->start, $intersect->end, $intersect->strand);
366 else {
367 return $intersect;
371 =head2 union
373 Title : union
374 Usage : ($start, $stop, $strand) = $r1->union($r2);
375 : ($start, $stop, $strand) = Bio::Range->union(@ranges);
376 my $newrange = Bio::Range->union(@ranges);
377 Function: finds the minimal Range that contains all of the Ranges
378 Args : a Range or list of Range objects
379 Returns : the range containing all of the range
380 (in the form of an object like the calling one, OR
381 a three element array)
383 =cut
385 sub union {
386 my $self = shift;
387 my @ranges = @_;
388 if ($self eq "Bio::RangeI") {
389 $self = "Bio::Range";
390 $self->warn("calling static methods of an interface is deprecated; use $self instead");
392 if(ref $self) {
393 unshift @ranges, $self;
396 my @start = sort {$a<=>$b}
397 map( { $_->start() } @ranges);
398 my @end = sort {$a<=>$b}
399 map( { $_->end() } @ranges);
401 my $start = shift @start;
402 while( !defined $start ) {
403 $start = shift @start;
406 my $end = pop @end;
408 my $union_strand; # Strand for the union range object.
410 foreach(@ranges) {
411 if(! defined $union_strand) {
412 $union_strand = $_->strand;
413 next;
414 } else {
415 if(not defined $_->strand or $union_strand ne $_->strand) {
416 $union_strand = 0;
417 last;
421 return unless $start or $end;
422 if( wantarray() ) {
423 return ( $start,$end,$union_strand);
424 } else {
425 return $self->new('-start' => $start,
426 '-end' => $end,
427 '-strand' => $union_strand
432 =head2 overlap_extent
434 Title : overlap_extent
435 Usage : ($a_unique,$common,$b_unique) = $a->overlap_extent($b)
436 Function: Provides actual amount of overlap between two different
437 ranges
438 Example :
439 Returns : array of values containing the length unique to the calling
440 range, the length common to both, and the length unique to
441 the argument range
442 Args : a range
444 =cut
446 sub overlap_extent{
447 my ($a,$b) = @_;
449 $a->throw("start is undefined") unless defined $a->start;
450 $a->throw("end is undefined") unless defined $a->end;
451 $b->throw("Not a Bio::RangeI object") unless $b->isa('Bio::RangeI');
452 $b->throw("start is undefined") unless defined $b->start;
453 $b->throw("end is undefined") unless defined $b->end;
455 if( ! $a->overlaps($b) ) {
456 return ($a->length,0,$b->length);
459 my ($au,$bu) = (0, 0);
460 if( $a->start < $b->start ) {
461 $au = $b->start - $a->start;
462 } else {
463 $bu = $a->start - $b->start;
466 if( $a->end > $b->end ) {
467 $au += $a->end - $b->end;
468 } else {
469 $bu += $b->end - $a->end;
472 my $intersect = $a->intersection($b);
473 my $ie = $intersect->end;
474 my $is = $intersect->start;
476 return ($au,$ie-$is+1,$bu);
479 =head2 disconnected_ranges
481 Title : disconnected_ranges
482 Usage : my @disc_ranges = Bio::Range->disconnected_ranges(@ranges);
483 Function: finds the minimal set of ranges such that each input range
484 is fully contained by at least one output range, and none of
485 the output ranges overlap
486 Args : a list of ranges
487 Returns : a list of objects of the same type as the input
488 (conforms to RangeI)
490 =cut
492 sub disconnected_ranges {
493 my $self = shift;
494 if ($self eq "Bio::RangeI") {
495 $self = "Bio::Range";
496 $self->warn("calling static methods of an interface is deprecated; use $self instead");
498 my @inranges = @_;
499 if(ref $self) {
500 unshift @inranges, $self;
503 my @outranges = (); # disconnected ranges
505 # iterate through all input ranges $inrange,
506 # adding each input range to the set of output ranges @outranges,
507 # provided $inrange does not overlap ANY range in @outranges
508 # - if it does overlap an outrange, then merge it
509 foreach my $inrange (@inranges) {
510 my $intersects = 0;
511 my @outranges_new = ();
512 my @intersecting_ranges = ();
514 # iterate through all @outranges, testing if it intersects
515 # current $inrange; if it does, merge and add to list
516 # of @intersecting_ranges, otherwise add $outrange to
517 # the new list of outranges that do NOT intersect
518 for (my $i=0; $i<@outranges; $i++) {
519 my $outrange = $outranges[$i];
520 my $intersection = $inrange->intersection($outrange);
521 if ($intersection) {
522 $intersects = 1;
523 my $union = $inrange->union($outrange);
524 push(@intersecting_ranges, $union);
526 else {
527 push(@outranges_new, $outrange);
530 @outranges = @outranges_new;
531 # @outranges now contains a list of non-overlapping ranges
532 # that do not intersect the current $inrange
534 if (@intersecting_ranges) {
535 if (@intersecting_ranges > 1) {
536 # this sf intersected > 1 range, which means that
537 # all the ranges it intersects should be joined
538 # together in a new range
539 my $merged_range =
540 $self->union(@intersecting_ranges);
541 push(@outranges, $merged_range);
544 else {
545 # exactly 1 intersecting range
546 push(@outranges, @intersecting_ranges);
549 else {
550 # no intersections found - new range
551 push(@outranges,
552 $self->new('-start'=>$inrange->start,
553 '-end'=>$inrange->end,
554 '-strand'=>$inrange->strand,
558 return @outranges;
561 =head2 offsetStranded
563 Title : offsetStranded
564 Usage : $rnge->ofsetStranded($fiveprime_offset, $threeprime_offset)
565 Function : destructively modifies RangeI implementing object to
566 offset its start and stop coordinates by values $fiveprime_offset and
567 $threeprime_offset (positive values being in the strand direction).
568 Args : two integer offsets: $fiveprime_offset and $threeprime_offset
569 Returns : $self, offset accordingly.
571 =cut
573 sub offsetStranded {
574 my ($self, $offset_fiveprime, $offset_threeprime) = @_;
575 my ($offset_start, $offset_end) = $self->strand() eq -1 ? (- $offset_threeprime, - $offset_fiveprime) : ($offset_fiveprime, $offset_threeprime);
576 $self->start($self->start + $offset_start);
577 $self->end($self->end + $offset_end);
578 return $self;
581 =head2 subtract
583 Title : subtract
584 Usage : my @subtracted = $r1->subtract($r2)
585 Function: Subtract range r2 from range r1
586 Args : arg #1 = a range to subtract from this one (mandatory)
587 arg #2 = strand option ('strong', 'weak', 'ignore') (optional)
588 Returns : undef if they do not overlap or r2 contains this RangeI,
589 or an arrayref of Range objects (this is an array since some
590 instances where the subtract range is enclosed within this range
591 will result in the creation of two new disjoint ranges)
593 =cut
595 sub subtract() {
596 my ($self, $range, $so) = @_;
597 $self->throw("missing arg: you need to pass in another feature")
598 unless $range;
599 return unless $self->_testStrand($range, $so);
601 if ($self eq "Bio::RangeI") {
602 $self = "Bio::Range";
603 $self->warn("calling static methods of an interface is
604 deprecated; use $self instead");
606 $range->throw("Input a Bio::RangeI object") unless
607 $range->isa('Bio::RangeI');
609 if (!$self->overlaps($range)) {
610 return undef;
613 ##Subtracts everything
614 if ($range->contains($self)) {
615 return undef;
618 my ($start, $end, $strand) = $self->intersection($range, $so);
619 ##Subtract intersection from $self range
621 my @outranges = ();
622 if ($self->start < $start) {
623 push(@outranges,
624 $self->new('-start'=>$self->start,
625 '-end'=>$start - 1,
626 '-strand'=>$self->strand,
627 ));
629 if ($self->end > $end) {
630 push(@outranges,
631 $self->new('-start'=>$end + 1,
632 '-end'=>$self->end,
633 '-strand'=>$self->strand,
634 ));
636 return \@outranges;