2 # BioPerl module for Bio::Location::Split
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Jason Stajich <jason@bioperl.org>
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
10 # POD documentation - main docs before the code
14 Bio::Location::Split - Implementation of a Location on a Sequence
15 which has multiple locations (start/end points)
19 use Bio::Location::Split;
21 my $splitlocation = Bio::Location::Split->new();
22 $splitlocation->add_sub_Location(Bio::Location::Simple->new(-start=>1,
25 $splitlocation->add_sub_Location(Bio::Location::Simple->new(-start=>50,
28 my @sublocs = $splitlocation->sub_Location();
31 # print the start/end points of the sub locations
32 foreach my $location ( sort { $a->start <=> $b->start }
34 printf "sub feature %d [%d..%d]\n",
35 $count, $location->start,$location->end, "\n";
41 This implementation handles locations which span more than one
42 start/end location, or and/or lie on different sequences, and can
43 work with split locations that depend on the specific order of the
44 sublocations ('join') or don't have a specific order but represent
45 a feature spanning noncontiguous sublocations ('order', 'bond').
47 Note that the order in which sublocations are added may be very important,
48 depending on the specific split location type. For instance, a 'join'
49 must have the sublocations added in the order that one expects to
50 join the sublocations, whereas all other types are sorted based on the
55 User feedback is an integral part of the evolution of this and other
56 Bioperl modules. Send your comments and suggestions preferably to one
57 of the Bioperl mailing lists. Your participation is much appreciated.
59 bioperl-l@bioperl.org - General discussion
60 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
64 Please direct usage questions or support issues to the mailing list:
66 I<bioperl-l@bioperl.org>
68 rather than to the module maintainer directly. Many experienced and
69 reponsive experts will be able look at the problem and quickly
70 address it. Please include a thorough description of the problem
71 with code and data examples if at all possible.
75 Report bugs to the Bioperl bug tracking system to help us keep track
76 the bugs and their resolution. Bug reports can be submitted via the
79 https://github.com/bioperl/bioperl-live/issues
81 =head1 AUTHOR - Jason Stajich
83 Email jason-AT-bioperl_DOT_org
87 The rest of the documentation details each of the object
88 methods. Internal methods are usually preceded with a _
92 # Let the code begin...
94 package Bio
::Location
::Split
;
96 # as defined by BSANE 0.03
97 our @CORBALOCATIONOPERATOR = ('NONE','JOIN', undef, 'ORDER');;
101 use base
qw(Bio::Location::Atomic Bio::Location::SplitLocationI);
104 my ($class, @args) = @_;
105 my $self = $class->SUPER::new
(@args);
107 $self->{'_sublocations'} = [];
108 my ( $type, $seqid, $locations ) =
109 $self->_rearrange([qw(SPLITTYPE
113 if( defined $locations && ref($locations) =~ /array/i ) {
114 $self->add_sub_Location(@
$locations);
116 $seqid && $self->seq_id($seqid);
119 $self->splittype($type);
125 Title : each_Location
126 Usage : @locations = $locObject->each_Location($order);
127 Function: Conserved function call across Location:: modules - will
128 return an array containing the component Location(s) in
129 that object, regardless if the calling object is itself a
130 single location or one containing sublocations.
131 Returns : an array of Bio::LocationI implementing objects
132 Args : Optional sort order to be passed to sub_Location()
137 my ($self, $order) = @_;
139 foreach my $subloc ($self->sub_Location($order)) {
140 # Recursively check to get hierarchical split locations:
141 push @locs, $subloc->each_Location($order);
149 Usage : @sublocs = $splitloc->sub_Location();
150 Function: Returns the array of sublocations making up this compound (split)
151 location. Those sublocations referring to the same sequence as
152 the root split location will be sorted by start position (forward
153 sort) or end position (reverse sort) and come first (before
154 those on other sequences).
156 The sort order can be optionally specified or suppressed by the
157 value of the first argument. The default is no sort.
159 Returns : an array of Bio::LocationI implementing objects
160 Args : Optionally 1, 0, or -1 for specifying a forward, no, or reverse
166 my ($self, $order) = @_;
167 $order = 0 unless defined $order;
168 if( defined($order) && ($order !~ /^-?\d+$/) ) {
169 $self->throw("value $order passed in to sub_Location is $order, an invalid value");
171 $order = 1 if($order > 1);
172 $order = -1 if($order < -1);
173 my @sublocs = defined $self->{'_sublocations'} ? @
{$self->{'_sublocations'}} : ();
175 # return the array if no ordering requested
176 return @sublocs if( ($order == 0) || (! @sublocs) );
178 # sort those locations that are on the same sequence as the top (`master')
179 # if the top seq is undefined, we take the first defined in a sublocation
180 my $seqid = $self->seq_id();
182 while((! defined($seqid)) && ($i <= $#sublocs)) {
183 $seqid = $sublocs[$i++]->seq_id();
185 if((! $self->seq_id()) && $seqid) {
186 $self->warn("sorted sublocation array requested but ".
187 "root location doesn't define seq_id ".
188 "(at least one sublocation does!)");
191 grep { $_->seq_id() eq $seqid; } @sublocs :
195 # Schwartzian transforms for performance boost
196 @locs = map { $_->[0] }
198 (defined $a && defined $b) ?
$a->[1] <=> $b->[1] :
202 [$_, (defined $_->start ?
$_->start : $_->end)]
204 } else { # $order == -1
205 @locs = map { $_->[0]}
207 (defined $a && defined $b) ?
$b->[1] <=> $a->[1] :
211 [$_, (defined $_->end ?
$_->end : $_->start)]
215 # push the rest unsorted
217 push(@locs, grep { $_->seq_id() ne $seqid; } @sublocs);
224 =head2 add_sub_Location
226 Title : add_sub_Location
227 Usage : $splitloc->add_sub_Location(@locationIobjs);
228 Function: add an additional sublocation
229 Returns : number of current sub locations
230 Args : list of Bio::LocationI implementing object(s) to add
234 sub add_sub_Location
{
235 my ($self,@args) = @_;
237 foreach my $loc ( @args ) {
238 if( !ref($loc) || ! $loc->isa('Bio::LocationI') ) {
239 $self->throw("Trying to add $loc as a sub Location but it doesn't implement Bio::LocationI!");
242 push @
{$self->{'_sublocations'}}, $loc;
245 return scalar @
{$self->{'_sublocations'}};
251 Usage : $splittype = $location->splittype();
252 Function: get/set the split splittype
253 Returns : the splittype of split feature (join, order)
254 Args : splittype to set
259 my ($self, $value) = @_;
260 if( defined $value || ! defined $self->{'_splittype'} ) {
261 $value = 'JOIN' unless( defined $value );
262 $self->{'_splittype'} = uc ($value);
264 return $self->{'_splittype'};
267 =head2 is_single_sequence
269 Title : is_single_sequence
270 Usage : if($splitloc->is_single_sequence()) {
271 print "Location object $splitloc is split ".
272 "but only across a single sequence\n";
274 Function: Determine whether this location is split across a single or
277 This implementation ignores (sub-)locations that do not define
278 seq_id(). The same holds true for the root location.
280 Returns : TRUE if all sublocations lie on the same sequence as the root
281 location (feature), and FALSE otherwise.
286 sub is_single_sequence
{
289 my $seqid = $self->seq_id();
290 foreach my $loc ($self->sub_Location(0)) {
291 $seqid = $loc->seq_id() if(! $seqid);
292 if(defined($loc->seq_id()) && ($loc->seq_id() ne $seqid)) {
302 Usage : $str = $loc->guide_strand();
303 Function: Get/Set the guide strand. Of use only if the split type is
304 a 'join' (this helps determine the order of sublocation
306 Returns : value of guide strand (1, -1, or undef)
307 Args : new value (-1 or 1, optional)
313 return $self->{'strand'} = shift if @_;
315 # Sublocations strand values consistency check to set Guide Strand
317 foreach my $loc ($self->sub_Location(0)) {
318 push @subloc_strands, $loc->strand || 1;
320 if ($self->isa('Bio::Location::SplitLocationI')) {
322 my $first_value = $subloc_strands[0];
323 foreach my $strand (@subloc_strands) {
324 $identical++ if ($strand == $first_value);
327 if ($identical == scalar @subloc_strands) {
328 $self->{'strand'} = $first_value;
331 $self->{'strand'} = undef;
334 return $self->{'strand'};
337 =head1 LocationI methods
342 Usage : $obj->strand($newval)
343 Function: For SplitLocations, setting the strand of the container
344 (this object) is a short-cut for setting the strand of all
347 In get-mode, checks if no sub-location is remote, and if
348 all have the same strand. If so, it returns that shared
349 strand value. Otherwise it returns undef.
352 Returns : on get, value of strand if identical between sublocations
354 Args : new value (-1 or 1, optional)
360 my ($self,$value) = @_;
361 if( defined $value) {
362 $self->{'strand'} = $value;
363 # propagate to all sublocs
364 foreach my $loc ($self->sub_Location(0)) {
365 $loc->strand($value);
368 my ($strand, $lstrand);
369 foreach my $loc ($self->sub_Location(0)) {
370 # we give up upon any location that's remote or doesn't have
371 # the strand specified, or has a differing one set than
373 # calling strand() is potentially expensive if the subloc is also
374 # a split location, so we cache it
375 $lstrand = $loc->strand();
377 ($strand && ($strand != $lstrand)) ||
392 Usage : $location->flip_strand();
393 Function: Flip-flop a strand to the opposite. Also sets Split strand
394 to be consistent with the sublocation strands
395 (1, -1 or undef for mixed strand values)
406 for my $loc ( $self->sub_Location(0) ) {
407 # Atomic "flip_strand" now initialize strand if necessary
408 my $new_strand = $loc->flip_strand;
410 # Store strand values for later consistency check
412 push @subloc_strands, $new_strand;
415 # Sublocations strand values consistency check to set Guide Strand
416 if ($self->isa('Bio::Location::SplitLocationI')) {
418 my $first_value = $subloc_strands[0];
419 foreach my $strand (@subloc_strands) {
420 $identical++ if ($strand == $first_value);
423 if ($identical == scalar @subloc_strands) {
424 $self->guide_strand($first_value);
427 # Mixed strand values, must reverse the sublocations order
428 $self->guide_strand(undef);
429 @
{ $self->{_sublocations
} } = reverse @sublocs;
437 Usage : $start = $location->start();
438 Function: get the starting point of the first (sorted) sublocation
445 my ($self,$value) = @_;
446 if( defined $value ) {
447 $self->throw("Trying to set the starting point of a split location, ".
448 "that is not possible, try manipulating the sub Locations");
450 return $self->SUPER::start
();
456 Usage : $end = $location->end();
457 Function: get the ending point of the last (sorted) sublocation
464 my ($self,$value) = @_;
465 if( defined $value ) {
466 $self->throw("Trying to set the ending point of a split location, ".
467 "that is not possible, try manipulating the sub Locations");
469 return $self->SUPER::end
();
475 Usage : $min_start = $location->min_start();
476 Function: get the minimum starting point
477 Returns : the minimum starting point from the contained sublocations
483 my ($self, $value) = @_;
485 if( defined $value ) {
486 $self->throw("Trying to set the minimum starting point of a split ".
487 "location, that is not possible, try manipulating the sub Locations");
489 my @locs = $self->sub_Location(1);
490 return $locs[0]->min_start() if @locs;
497 Usage : my $maxstart = $location->max_start();
498 Function: Get maximum starting location of feature startpoint
499 Returns : integer or undef if no maximum starting point.
505 my ($self,$value) = @_;
507 if( defined $value ) {
508 $self->throw("Trying to set the maximum starting point of a split ".
509 "location, that is not possible, try manipulating the sub Locations");
511 my @locs = $self->sub_Location(1);
512 return $locs[0]->max_start() if @locs;
516 =head2 start_pos_type
518 Title : start_pos_type
519 Usage : my $start_pos_type = $location->start_pos_type();
520 Function: Get start position type (ie <,>, ^)
521 Returns : type of position coded as text
522 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
528 my ($self,$value) = @_;
530 if( defined $value ) {
531 $self->throw("Trying to set the start_pos_type of a split location, ".
532 "that is not possible, try manipulating the sub Locations");
534 my @locs = $self->sub_Location();
535 return ( @locs ) ?
$locs[0]->start_pos_type() : undef;
541 Usage : my $minend = $location->min_end();
542 Function: Get minimum ending location of feature endpoint
543 Returns : integer or undef if no minimum ending point.
549 my ($self,$value) = @_;
551 if( defined $value ) {
552 $self->throw("Trying to set the minimum end point of a split location, ".
553 "that is not possible, try manipulating the sub Locations");
555 # reverse sort locations by largest ending to smallest ending
556 my @locs = $self->sub_Location(-1);
557 return $locs[0]->min_end() if @locs;
564 Usage : my $maxend = $location->max_end();
565 Function: Get maximum ending location of feature endpoint
566 Returns : integer or undef if no maximum ending point.
572 my ($self,$value) = @_;
574 if( defined $value ) {
575 $self->throw("Trying to set the maximum end point of a split location, ".
576 "that is not possible, try manipulating the sub Locations");
578 # reverse sort locations by largest ending to smallest ending
579 my @locs = $self->sub_Location(-1);
580 return $locs[0]->max_end() if @locs;
587 Usage : my $end_pos_type = $location->end_pos_type();
588 Function: Get end position type (ie <,>, ^)
589 Returns : type of position coded as text
590 ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
596 my ($self,$value) = @_;
598 if( defined $value ) {
599 $self->throw("Trying to set end_pos_type of a split location, ".
600 "that is not possible, try manipulating the sub Locations");
602 my @locs = $self->sub_Location();
603 return ( @locs ) ?
$locs[0]->end_pos_type() : undef;
610 Usage : my $seqid = $location->seq_id();
611 Function: Get/Set seq_id that location refers to
613 We override this here in order to propagate to all sublocations
614 which are not remote (provided this root is not remote either)
616 Args : [optional] seq_id value to set
624 if(@_ && !$self->is_remote()) {
625 foreach my $subloc ($self->sub_Location(0)) {
626 $subloc->seq_id(@_) if !$subloc->is_remote();
629 return $self->SUPER::seq_id
(@_);
632 =head2 coordinate_policy
634 Title : coordinate_policy
635 Usage : $policy = $location->coordinate_policy();
636 $location->coordinate_policy($mypolicy); # set may not be possible
637 Function: Get the coordinate computing policy employed by this object.
639 See Bio::Location::CoordinatePolicyI for documentation about
640 the policy object and its use.
642 The interface *does not* require implementing classes to accept
643 setting of a different policy. The implementation provided here
644 does, however, allow to do so.
646 Implementors of this interface are expected to initialize every
647 new instance with a CoordinatePolicyI object. The implementation
648 provided here will return a default policy object if none has
649 been set yet. To change this default policy object call this
650 method as a class method with an appropriate argument. Note that
651 in this case only subsequently created Location objects will be
654 Returns : A Bio::Location::CoordinatePolicyI implementing object.
655 Args : On set, a Bio::Location::CoordinatePolicyI implementing object.
660 Usage : my $locstr = $location->to_FTstring()
661 Function: returns the FeatureTable string of this location
670 my $strand = $self->strand() || 0;
671 my $stype = lc($self->splittype());
674 $self->flip_strand; # this will recursively set the strand
675 # to +1 for all the sub locations
678 foreach my $loc ( $self->sub_Location(0) ) {
679 $loc->verbose($self->verbose);
680 my $str = $loc->to_FTstring();
681 # we only append the remote seq_id if it hasn't been done already
682 # by the sub-location (which it should if it knows it's remote)
683 # (and of course only if it's necessary)
684 if( (! $loc->is_remote) &&
685 defined($self->seq_id) && defined($loc->seq_id) &&
686 ($loc->seq_id ne $self->seq_id) ) {
687 $str = sprintf("%s:%s", $loc->seq_id, $str);
691 $self->flip_strand if $strand < 0;
695 } elsif( @strs == 0 ) {
696 $self->warn("no Sublocations for this splitloc, so not returning anything\n");
698 $str = sprintf("%s(%s)",lc $self->splittype, join(",", @strs));
700 if( $strand < 0 ) { # wrap this in a complement if it was unrolled
701 $str = sprintf("%s(%s)",'complement',$str);
707 =head2 valid_Location
709 Title : valid_Location
710 Usage : if ($location->valid_location) {...};
711 Function: boolean method to determine whether location is considered valid
712 (has minimum requirements for Simple implementation)
713 Returns : Boolean value: true if location is valid, false otherwise
718 # we'll probably need to override the RangeI methods since our locations will