tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / Location / Split.pm
blob11f093887a139d4fa6bb93b6fdb9680771089f0e
1 # $Id$
3 # BioPerl module for Bio::Location::Split
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Location::Split - Implementation of a Location on a Sequence
16 which has multiple locations (start/end points)
18 =head1 SYNOPSIS
20 use Bio::Location::Split;
22 my $splitlocation = Bio::Location::Split->new();
23 $splitlocation->add_sub_Location(Bio::Location::Simple->new(-start=>1,
24 -end=>30,
25 -strand=>1));
26 $splitlocation->add_sub_Location(Bio::Location::Simple->new(-start=>50,
27 -end=>61,
28 -strand=>1));
29 my @sublocs = $splitlocation->sub_Location();
31 my $count = 1;
32 # print the start/end points of the sub locations
33 foreach my $location ( sort { $a->start <=> $b->start }
34 @sublocs ) {
35 printf "sub feature %d [%d..%d]\n",
36 $count, $location->start,$location->end, "\n";
37 $count++;
40 =head1 DESCRIPTION
42 This implementation handles locations which span more than one
43 start/end location, or and/or lie on different sequences, and can
44 work with split locations that depend on the specific order of the
45 sublocations ('join') or don't have a specific order but represent
46 a feature spanning discontiguous sublocations ('order', 'bond').
48 Note that the order in which sublocations are added may be very important,
49 depending on the specific split location type. For instance, a 'join'
50 must have the sublocations added in the order that one expects to
51 join the sublocations, whereas all other types are sorted based on the
52 sequence location.
54 =head1 FEEDBACK
56 User feedback is an integral part of the evolution of this and other
57 Bioperl modules. Send your comments and suggestions preferably to one
58 of the Bioperl mailing lists. Your participation is much appreciated.
60 bioperl-l@bioperl.org - General discussion
61 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
63 =head2 Support
65 Please direct usage questions or support issues to the mailing list:
67 I<bioperl-l@bioperl.org>
69 rather than to the module maintainer directly. Many experienced and
70 reponsive experts will be able look at the problem and quickly
71 address it. Please include a thorough description of the problem
72 with code and data examples if at all possible.
74 =head2 Reporting Bugs
76 Report bugs to the Bioperl bug tracking system to help us keep track
77 the bugs and their resolution. Bug reports can be submitted via the
78 web:
80 http://bugzilla.open-bio.org/
82 =head1 AUTHOR - Jason Stajich
84 Email jason-AT-bioperl_DOT_org
86 =head1 APPENDIX
88 The rest of the documentation details each of the object
89 methods. Internal methods are usually preceded with a _
91 =cut
93 # Let the code begin...
95 package Bio::Location::Split;
97 # as defined by BSANE 0.03
98 our @CORBALOCATIONOPERATOR = ('NONE','JOIN', undef, 'ORDER');;
100 use Bio::Root::Root;
102 use base qw(Bio::Location::Atomic Bio::Location::SplitLocationI);
104 sub new {
105 my ($class, @args) = @_;
106 my $self = $class->SUPER::new(@args);
107 # initialize
108 $self->{'_sublocations'} = [];
109 my ( $type, $seqid, $locations ) =
110 $self->_rearrange([qw(SPLITTYPE
111 SEQ_ID
112 LOCATIONS
113 )], @args);
114 if( defined $locations && ref($locations) =~ /array/i ) {
115 $self->add_sub_Location(@$locations);
117 $seqid && $self->seq_id($seqid);
118 $type = lc ($type);
119 $self->splittype($type || 'JOIN');
120 return $self;
123 =head2 each_Location
125 Title : each_Location
126 Usage : @locations = $locObject->each_Location($order);
127 Function: Conserved function call across Location:: modules - will
128 return an array containing the component Location(s) in
129 that object, regardless if the calling object is itself a
130 single location or one containing sublocations.
131 Returns : an array of Bio::LocationI implementing objects
132 Args : Optional sort order to be passed to sub_Location()
134 =cut
136 sub each_Location {
137 my ($self, $order) = @_;
138 my @locs = ();
139 foreach my $subloc ($self->sub_Location($order)) {
140 # Recursively check to get hierarchical split locations:
141 push @locs, $subloc->each_Location($order);
143 return @locs;
146 =head2 sub_Location
148 Title : sub_Location
149 Usage : @sublocs = $splitloc->sub_Location();
150 Function: Returns the array of sublocations making up this compound (split)
151 location. Those sublocations referring to the same sequence as
152 the root split location will be sorted by start position (forward
153 sort) or end position (reverse sort) and come first (before
154 those on other sequences).
156 The sort order can be optionally specified or suppressed by the
157 value of the first argument. The default is no sort.
159 Returns : an array of Bio::LocationI implementing objects
160 Args : Optionally 1, 0, or -1 for specifying a forward, no, or reverse
161 sort order
163 =cut
165 sub sub_Location {
166 my ($self, $order) = @_;
167 $order = 0 unless defined $order;
168 if( defined($order) && ($order !~ /^-?\d+$/) ) {
169 $self->throw("value $order passed in to sub_Location is $order, an invalid value");
171 $order = 1 if($order > 1);
172 $order = -1 if($order < -1);
173 my @sublocs = defined $self->{'_sublocations'} ? @{$self->{'_sublocations'}} : ();
175 # return the array if no ordering requested
176 return @sublocs if( ($order == 0) || (! @sublocs) );
178 # sort those locations that are on the same sequence as the top (`master')
179 # if the top seq is undefined, we take the first defined in a sublocation
180 my $seqid = $self->seq_id();
181 my $i = 0;
182 while((! defined($seqid)) && ($i <= $#sublocs)) {
183 $seqid = $sublocs[$i++]->seq_id();
185 if((! $self->seq_id()) && $seqid) {
186 $self->warn("sorted sublocation array requested but ".
187 "root location doesn't define seq_id ".
188 "(at least one sublocation does!)");
190 my @locs = ($seqid ?
191 grep { $_->seq_id() eq $seqid; } @sublocs :
192 @sublocs);
193 if(@locs) {
194 if($order == 1) {
195 # Schwartzian transforms for performance boost
196 @locs = map { $_->[0] }
197 sort {
198 (defined $a && defined $b) ? $a->[1] <=> $b->[1] :
199 $a ? -1 : 1
201 map {
202 [$_, (defined $_->start ? $_->start : $_->end)]
203 } @locs;;
204 } else { # $order == -1
205 @locs = map { $_->[0]}
206 sort {
207 (defined $a && defined $b) ? $b->[1] <=> $a->[1] :
208 $a ? -1 : 1
210 map {
211 [$_, (defined $_->end ? $_->end : $_->start)]
212 } @locs;
215 # push the rest unsorted
216 if($seqid) {
217 push(@locs, grep { $_->seq_id() ne $seqid; } @sublocs);
219 # done!
221 return @locs;
224 =head2 add_sub_Location
226 Title : add_sub_Location
227 Usage : $splitloc->add_sub_Location(@locationIobjs);
228 Function: add an additional sublocation
229 Returns : number of current sub locations
230 Args : list of Bio::LocationI implementing object(s) to add
232 =cut
234 sub add_sub_Location {
235 my ($self,@args) = @_;
236 my @locs;
237 foreach my $loc ( @args ) {
238 if( !ref($loc) || ! $loc->isa('Bio::LocationI') ) {
239 $self->throw("Trying to add $loc as a sub Location but it doesn't implement Bio::LocationI!");
240 next;
242 push @{$self->{'_sublocations'}}, $loc;
245 return scalar @{$self->{'_sublocations'}};
248 =head2 splittype
250 Title : splittype
251 Usage : $splittype = $location->splittype();
252 Function: get/set the split splittype
253 Returns : the splittype of split feature (join, order)
254 Args : splittype to set
256 =cut
258 sub splittype {
259 my ($self, $value) = @_;
260 if( defined $value || ! defined $self->{'_splittype'} ) {
261 $value = 'JOIN' unless( defined $value );
262 $self->{'_splittype'} = uc ($value);
264 return $self->{'_splittype'};
267 =head2 is_single_sequence
269 Title : is_single_sequence
270 Usage : if($splitloc->is_single_sequence()) {
271 print "Location object $splitloc is split ".
272 "but only across a single sequence\n";
274 Function: Determine whether this location is split across a single or
275 multiple sequences.
277 This implementation ignores (sub-)locations that do not define
278 seq_id(). The same holds true for the root location.
280 Returns : TRUE if all sublocations lie on the same sequence as the root
281 location (feature), and FALSE otherwise.
282 Args : none
284 =cut
286 sub is_single_sequence {
287 my ($self) = @_;
289 my $seqid = $self->seq_id();
290 foreach my $loc ($self->sub_Location(0)) {
291 $seqid = $loc->seq_id() if(! $seqid);
292 if(defined($loc->seq_id()) && ($loc->seq_id() ne $seqid)) {
293 return 0;
296 return 1;
299 =head2 guide_strand
301 Title : guide_strand
302 Usage : $str = $loc->guide_strand();
303 Function: Get/Set the guide strand. Of use only if the split type is
304 a 'join' (this helps determine the order of sublocation
305 retrieval)
306 Returns : value of guide strand (1, -1, or undef)
307 Args : new value (-1 or 1, optional)
309 =cut
311 sub guide_strand {
312 my $self = shift;
313 return $self->{'strand'} = shift if @_;
314 return $self->{'strand'};
317 =head1 LocationI methods
319 =head2 strand
321 Title : strand
322 Usage : $obj->strand($newval)
323 Function: For SplitLocations, setting the strand of the container
324 (this object) is a short-cut for setting the strand of all
325 sublocations.
327 In get-mode, checks if no sub-location is remote, and if
328 all have the same strand. If so, it returns that shared
329 strand value. Otherwise it returns undef.
331 Example :
332 Returns : on get, value of strand if identical between sublocations
333 (-1, 1, or undef)
334 Args : new value (-1 or 1, optional)
337 =cut
339 sub strand{
340 my ($self,$value) = @_;
341 if( defined $value) {
342 $self->{'strand'} = $value;
343 # propagate to all sublocs
344 foreach my $loc ($self->sub_Location(0)) {
345 $loc->strand($value);
347 } else {
348 my ($strand, $lstrand);
349 foreach my $loc ($self->sub_Location(0)) {
350 # we give up upon any location that's remote or doesn't have
351 # the strand specified, or has a differing one set than
352 # previously seen.
353 # calling strand() is potentially expensive if the subloc is also
354 # a split location, so we cache it
355 $lstrand = $loc->strand();
356 if((! $lstrand) ||
357 ($strand && ($strand != $lstrand)) ||
358 $loc->is_remote()) {
359 $strand = undef;
360 last;
361 } elsif(! $strand) {
362 $strand = $lstrand;
365 return $strand;
369 =head2 flip_strand
371 Title : flip_strand
372 Usage : $location->flip_strand();
373 Function: Flip-flop a strand to the opposite. Also switch Split strand
374 from undef to -1 or -1 to undef
375 Returns : None
376 Args : None
378 =cut
380 sub flip_strand {
381 my $self = shift;
382 for my $loc ( $self->sub_Location(0) ) {
383 $loc->flip_strand;
384 if ($loc->isa('Bio::Location::SplitLocationI')) {
385 my $gs = ($self->guide_strand == -1) ? undef : -1;
386 $loc->guide_strand($gs);
391 =head2 start
393 Title : start
394 Usage : $start = $location->start();
395 Function: get the starting point of the first (sorted) sublocation
396 Returns : integer
397 Args : none
399 =cut
401 sub start {
402 my ($self,$value) = @_;
403 if( defined $value ) {
404 $self->throw("Trying to set the starting point of a split location, ".
405 "that is not possible, try manipulating the sub Locations");
407 return $self->SUPER::start();
410 =head2 end
412 Title : end
413 Usage : $end = $location->end();
414 Function: get the ending point of the last (sorted) sublocation
415 Returns : integer
416 Args : none
418 =cut
420 sub end {
421 my ($self,$value) = @_;
422 if( defined $value ) {
423 $self->throw("Trying to set the ending point of a split location, ".
424 "that is not possible, try manipulating the sub Locations");
426 return $self->SUPER::end();
429 =head2 min_start
431 Title : min_start
432 Usage : $min_start = $location->min_start();
433 Function: get the minimum starting point
434 Returns : the minimum starting point from the contained sublocations
435 Args : none
437 =cut
439 sub min_start {
440 my ($self, $value) = @_;
442 if( defined $value ) {
443 $self->throw("Trying to set the minimum starting point of a split ".
444 "location, that is not possible, try manipulating the sub Locations");
446 my @locs = $self->sub_Location(1);
447 return $locs[0]->min_start() if @locs;
448 return;
451 =head2 max_start
453 Title : max_start
454 Usage : my $maxstart = $location->max_start();
455 Function: Get maximum starting location of feature startpoint
456 Returns : integer or undef if no maximum starting point.
457 Args : none
459 =cut
461 sub max_start {
462 my ($self,$value) = @_;
464 if( defined $value ) {
465 $self->throw("Trying to set the maximum starting point of a split ".
466 "location, that is not possible, try manipulating the sub Locations");
468 my @locs = $self->sub_Location(1);
469 return $locs[0]->max_start() if @locs;
470 return;
473 =head2 start_pos_type
475 Title : start_pos_type
476 Usage : my $start_pos_type = $location->start_pos_type();
477 Function: Get start position type (ie <,>, ^)
478 Returns : type of position coded as text
479 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
480 Args : none
482 =cut
484 sub start_pos_type {
485 my ($self,$value) = @_;
487 if( defined $value ) {
488 $self->throw("Trying to set the start_pos_type of a split location, ".
489 "that is not possible, try manipulating the sub Locations");
491 my @locs = $self->sub_Location();
492 return ( @locs ) ? $locs[0]->start_pos_type() : undef;
495 =head2 min_end
497 Title : min_end
498 Usage : my $minend = $location->min_end();
499 Function: Get minimum ending location of feature endpoint
500 Returns : integer or undef if no minimum ending point.
501 Args : none
503 =cut
505 sub min_end {
506 my ($self,$value) = @_;
508 if( defined $value ) {
509 $self->throw("Trying to set the minimum end point of a split location, ".
510 "that is not possible, try manipulating the sub Locations");
512 # reverse sort locations by largest ending to smallest ending
513 my @locs = $self->sub_Location(-1);
514 return $locs[0]->min_end() if @locs;
515 return;
518 =head2 max_end
520 Title : max_end
521 Usage : my $maxend = $location->max_end();
522 Function: Get maximum ending location of feature endpoint
523 Returns : integer or undef if no maximum ending point.
524 Args : none
526 =cut
528 sub max_end {
529 my ($self,$value) = @_;
531 if( defined $value ) {
532 $self->throw("Trying to set the maximum end point of a split location, ".
533 "that is not possible, try manipulating the sub Locations");
535 # reverse sort locations by largest ending to smallest ending
536 my @locs = $self->sub_Location(-1);
537 return $locs[0]->max_end() if @locs;
538 return;
541 =head2 end_pos_type
543 Title : end_pos_type
544 Usage : my $end_pos_type = $location->end_pos_type();
545 Function: Get end position type (ie <,>, ^)
546 Returns : type of position coded as text
547 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
548 Args : none
550 =cut
552 sub end_pos_type {
553 my ($self,$value) = @_;
555 if( defined $value ) {
556 $self->throw("Trying to set end_pos_type of a split location, ".
557 "that is not possible, try manipulating the sub Locations");
559 my @locs = $self->sub_Location();
560 return ( @locs ) ? $locs[0]->end_pos_type() : undef;
564 =head2 seq_id
566 Title : seq_id
567 Usage : my $seqid = $location->seq_id();
568 Function: Get/Set seq_id that location refers to
570 We override this here in order to propagate to all sublocations
571 which are not remote (provided this root is not remote either)
572 Returns : seq_id
573 Args : [optional] seq_id value to set
576 =cut
578 sub seq_id {
579 my $self = shift;
581 if(@_ && !$self->is_remote()) {
582 foreach my $subloc ($self->sub_Location(0)) {
583 $subloc->seq_id(@_) if !$subloc->is_remote();
586 return $self->SUPER::seq_id(@_);
589 =head2 coordinate_policy
591 Title : coordinate_policy
592 Usage : $policy = $location->coordinate_policy();
593 $location->coordinate_policy($mypolicy); # set may not be possible
594 Function: Get the coordinate computing policy employed by this object.
596 See Bio::Location::CoordinatePolicyI for documentation about
597 the policy object and its use.
599 The interface *does not* require implementing classes to accept
600 setting of a different policy. The implementation provided here
601 does, however, allow to do so.
603 Implementors of this interface are expected to initialize every
604 new instance with a CoordinatePolicyI object. The implementation
605 provided here will return a default policy object if none has
606 been set yet. To change this default policy object call this
607 method as a class method with an appropriate argument. Note that
608 in this case only subsequently created Location objects will be
609 affected.
611 Returns : A Bio::Location::CoordinatePolicyI implementing object.
612 Args : On set, a Bio::Location::CoordinatePolicyI implementing object.
614 =head2 to_FTstring
616 Title : to_FTstring
617 Usage : my $locstr = $location->to_FTstring()
618 Function: returns the FeatureTable string of this location
619 Returns : string
620 Args : none
622 =cut
624 sub to_FTstring {
625 my ($self) = @_;
626 my @strs;
627 my $strand = $self->strand() || 0;
628 my $stype = lc($self->splittype());
629 my $guide = $self->guide_strand();
631 if( $strand < 0 ) {
632 $self->flip_strand; # this will recursively set the strand
633 # to +1 for all the sub locations
635 # If the split type is join, the order is important;
636 # otherwise must be 5'->3' regardless
638 my @locs = ($stype eq 'join' && (!$guide && $strand == -1)) ?
639 reverse $self->sub_Location() : $self->sub_Location() ;
641 foreach my $loc ( @locs ) {
642 $loc->verbose($self->verbose);
643 my $str = $loc->to_FTstring();
644 # we only append the remote seq_id if it hasn't been done already
645 # by the sub-location (which it should if it knows it's remote)
646 # (and of course only if it's necessary)
647 if( (! $loc->is_remote) &&
648 defined($self->seq_id) && defined($loc->seq_id) &&
649 ($loc->seq_id ne $self->seq_id) ) {
650 $str = sprintf("%s:%s", $loc->seq_id, $str);
652 push @strs, $str;
654 $self->flip_strand if $strand < 0;
655 my $str;
656 if( @strs == 1 ) {
657 ($str) = @strs;
658 } elsif( @strs == 0 ) {
659 $self->warn("no Sublocations for this splitloc, so not returning anything\n");
660 } else {
661 $str = sprintf("%s(%s)",lc $self->splittype, join(",", @strs));
663 if( $strand < 0 ) { # wrap this in a complement if it was unrolled
664 $str = sprintf("%s(%s)",'complement',$str);
667 return $str;
670 =head2 valid_Location
672 Title : valid_Location
673 Usage : if ($location->valid_location) {...};
674 Function: boolean method to determine whether location is considered valid
675 (has minimum requirements for Simple implementation)
676 Returns : Boolean value: true if location is valid, false otherwise
677 Args : none
679 =cut
681 # we'll probably need to override the RangeI methods since our locations will
682 # not be contiguous.