Merge pull request #181 from bioperl/limit-dockerhub-trigger
[bioperl-live.git] / Bio / DB / GFF / Segment.pm
blob364591b2f3a402e0a474d238866d9484267af5d5
1 =head1 NAME
3 Bio::DB::GFF::Segment -- Simple DNA segment object
5 =head1 SYNOPSIS
7 See L<Bio::DB::GFF>.
9 =head1 DESCRIPTION
11 Bio::DB::GFF::Segment provides the basic representation of a range of
12 DNA contained in a GFF database. It is the base class from which the
13 Bio::DB::GFF::RelSegment and Bio::DB::GFF::Feature classes are
14 derived.
16 Generally, you will not create or manipulate Bio::DB::GFF::Segment
17 objects directly, but use those that are returned by the Bio::DB::GFF
18 module.
20 =cut
22 package Bio::DB::GFF::Segment;
24 use strict;
25 use Bio::Annotation::Collection;
27 use base qw(Bio::Root::Root Bio::RangeI Bio::SeqI Bio::Das::SegmentI);
29 use overload
30 '""' => 'asString',
31 eq => 'equals',
32 fallback => 1;
34 =head1 API
36 The remainder of this document describes the API for
37 Bio::DB::GFF::Segment.
39 =cut
41 =head2 new
43 Title : new
44 Usage : $s = Bio::DB::GFF::Segment->new(@args)
45 Function: create a new segment
46 Returns : a new Bio::DB::GFF::Segment object
47 Args : see below
48 Status : Public
50 This method creates a new Bio::DB::GFF::Segment object. Generally
51 this is called automatically by the Bio::DB::GFF module and
52 derivatives.
54 There are five positional arguments:
56 $factory a Bio::DB::GFF::Adaptor to use for database access
57 $sourceseq ID of the source sequence
58 $sourceclass class of the source sequence
59 $start start of the desired segment relative to source sequence
60 $stop stop of the desired segment relative to source sequence
62 =cut
64 sub new {
65 my $class = shift;
66 my ($factory,$segclass,$segname,$start,$stop) = @_;
67 $segclass = $segname->class if ref($segname) && $segname->can('class');
68 $segclass ||= 'Sequence';
70 $factory or $class->throw("->new(): provide a factory argument");
71 $class = ref $class if ref $class;
72 return bless { factory => $factory,
73 sourceseq => $segname,
74 class => $segclass,
75 start => $start,
76 stop => $stop,
77 strand => 0,
78 },$class;
81 # read-only accessors
83 =head2 factory
85 Title : factory
86 Usage : $s->factory
87 Function: get the factory object
88 Returns : a Bio::DB::GFF::Adaptor
89 Args : none
90 Status : Public
92 This is a read-only accessor for the Bio::DB::GFF::Adaptor object used
93 to create the segment.
95 =cut
97 sub factory { shift->{factory} }
99 # start, stop, length
101 =head2 start
103 Title : start
104 Usage : $s->start
105 Function: start of segment
106 Returns : integer
107 Args : none
108 Status : Public
110 This is a read-only accessor for the start of the segment.
112 =cut
114 sub start { shift->{start} }
116 =head2 end
118 Title : end
119 Usage : $s->end
120 Function: end of segment
121 Returns : integer
122 Args : none
123 Status : Public
125 This is a read-only accessor for the end of the segment.
127 =cut
129 sub end { shift->{stop} }
131 =head2 stop
133 Title : stop
134 Usage : $s->stop
135 Function: stop of segment
136 Returns : integer
137 Args : none
138 Status : Public
140 This is an alias for end(), provided for AcePerl compatibility.
142 =cut
144 *stop = \&end;
146 =head2 length
148 Title : length
149 Usage : $s->length
150 Function: length of segment
151 Returns : integer
152 Args : none
153 Status : Public
155 Returns the length of the segment. Always a positive number.
157 =cut
159 sub length { abs($_[0]->{start} - $_[0]->{stop})+1 }
162 =head2 strand
164 Title : strand
165 Usage : $s->strand
166 Function: strand of segment
167 Returns : +1,0,-1
168 Args : none
169 Status : Public
171 Returns the strand on which the segment resides, either +1, 0 or -1.
173 =cut
175 sub strand {
176 my $self = shift;
180 =head2 low
182 Title : low
183 Usage : $s->low
184 Function: return lower coordinate
185 Returns : lower coordinate
186 Args : none
187 Status : Public
189 Returns the lower coordinate, either start or end.
191 =cut
193 sub low {
194 my $self = shift;
195 my ($start,$stop) = ($self->start,$self->stop);
196 return $start < $stop ? $start : $stop;
198 *abs_low = \&low;
200 =head2 high
202 Title : high
203 Usage : $s->high
204 Function: return higher coordinate
205 Returns : higher coordinate
206 Args : none
207 Status : Public
209 Returns the higher coordinate, either start or end.
211 =cut
213 sub high {
214 my $self = shift;
215 my ($start,$stop) = ($self->start,$self->stop);
216 return $start > $stop ? $start : $stop;
218 *abs_high = \&high;
220 =head2 sourceseq
222 Title : sourceseq
223 Usage : $s->sourceseq
224 Function: get the segment source
225 Returns : a string
226 Args : none
227 Status : Public
229 Returns the name of the source sequence for this segment.
231 =cut
233 sub sourceseq { shift->{sourceseq} }
235 =head2 class
237 Title : class
238 Usage : $s->class([$newclass])
239 Function: get the source sequence class
240 Returns : a string
241 Args : new class (optional)
242 Status : Public
244 Gets or sets the class for the source sequence for this segment.
246 =cut
248 sub class {
249 my $self = shift;
250 my $d = $self->{class};
251 $self->{class} = shift if @_;
255 =head2 subseq
257 Title : subseq
258 Usage : $s->subseq($start,$stop)
259 Function: generate a subsequence
260 Returns : a Bio::DB::GFF::Segment object
261 Args : start and end of subsequence
262 Status : Public
264 This method generates a new segment from the start and end positions
265 given in the arguments. If stop E<lt> start, then the strand is reversed.
267 =cut
269 sub subseq {
270 my $self = shift;
271 my ($newstart,$newstop) = @_;
272 my ($refseq,$start,$stop,$class) = ($self->{sourceseq},
273 $self->{start},$self->{stop},
274 $self->class);
276 # We deliberately force subseq to return objects of type RelSegment
277 # Otherwise, when we get a subsequence from a Feature object,
278 # its method and source go along for the ride, which is incorrect.
279 my $new = $self->new_from_segment($self);
280 if ($start <= $stop) {
281 @{$new}{qw(start stop)} = ($start + $newstart - 1, $start + $newstop - 1);
282 } else {
283 @{$new}{qw(start stop)} = ($start - ($newstart - 1), $start - ($newstop - 1)),
287 $new;
290 =head2 seq
292 Title : seq
293 Usage : $s->seq
294 Function: get the sequence string for this segment
295 Returns : a Bio::PrimarySeq
296 Args : none
297 Status : Public
299 Returns the sequence for this segment as a Bio::PrimarySeq. (-)
300 strand segments are automatically reverse complemented
302 The method is called dna() return the data as a simple sequence
303 string.
305 =cut
307 sub seq {
308 my $self = shift;
309 my $dna = $self->dna;
310 require Bio::PrimarySeq unless Bio::PrimarySeq->can('new');
311 return Bio::PrimarySeq->new(-id => $self->display_name) unless $dna;
312 return Bio::PrimarySeq->new(-seq => $dna,
313 -id => $self->display_name);
316 =head2 dna
318 Title : dna
319 Usage : $s->dna
320 Function: get the DNA string for this segment
321 Returns : a string
322 Args : none
323 Status : Public
325 Returns the sequence for this segment as a simple string. (-) strand
326 segments are automatically reverse complemented
328 The method is also called protein().
330 =cut
332 sub dna {
333 my $self = shift;
334 my ($ref,$class,$start,$stop,$strand)
335 = @{$self}{qw(sourceseq class start stop strand)};
336 return $self->factory->dna($ref,$start,$stop,$class);
339 *protein = \&dna;
342 =head2 primary_seq
344 Title : primary_seq
345 Usage : $s->primary_seq
346 Function: returns a Bio::PrimarySeqI compatible object
347 Returns : a Bio::PrimarySeqI object
348 Args : none
349 Status : Public
351 This is for compatibility with BioPerl's separation of SeqI
352 from PrimarySeqI. It just returns itself.
354 =cut
358 sub primary_seq { shift }
360 =head2 type
362 Title : type
363 Usage : $s->type
364 Function: return the string "feature"
365 Returns : the string "feature"
366 Args : none
367 Status : Public
369 This is for future sequence ontology-compatibility and
370 represents the default type of a feature on the genome
372 =cut
374 sub type { "feature" }
376 =head2 equals
378 Title : equals
379 Usage : $s->equals($d)
380 Function: segment equality
381 Returns : true, if two segments are equal
382 Args : another segment
383 Status : Public
385 Returns true if the two segments have the same source sequence, start and stop.
387 =cut
389 sub equals {
390 my $self = shift;
391 my $peer = shift;
392 return unless defined $peer;
393 return $self->asString eq $peer unless ref($peer) && $peer->isa('Bio::DB::GFF::Segment');
394 return $self->{start} eq $peer->{start}
395 && $self->{stop} eq $peer->{stop}
396 && $self->{sourceseq} eq $peer->{sourceseq};
399 =head2 asString
401 Title : asString
402 Usage : $s->asString
403 Function: human-readable string for segment
404 Returns : a string
405 Args : none
406 Status : Public
408 Returns a human-readable string representing this sequence. Format
411 sourceseq/start,stop
413 =cut
415 sub asString {
416 my $self = shift;
417 my $label = $self->refseq;
418 my $start = $self->start;
419 my $stop = $self->stop;
420 return "$label:$start,$stop";
423 =head2 clone
425 Title : clone
426 Usage : $copy = $s->clone
427 Function: make a copy of this segment
428 Returns : a Bio::DB::GFF::Segment object
429 Args : none
430 Status : Public
432 This method creates a copy of the segment and returns it.
434 =cut
436 # deep copy of the thing
437 sub clone {
438 my $self = shift;
439 my %h = %$self;
440 return bless \%h,ref($self);
443 =head2 error
445 Title : error
446 Usage : $error = $s->error([$new_error])
447 Function: get or set the last error
448 Returns : a string
449 Args : an error message (optional)
450 Status : Public
452 In case of a fault, this method can be used to obtain the last error
453 message. Internally it is called to set the error message.
455 =cut
457 sub error {
458 my $self = shift;
459 my $g = $self->{error};
460 $self->{error} = shift if @_;
464 =head1 Relative Addressing Methods
466 The following methods are provided for compatibility with
467 Bio::DB::GFF::RelSegment, which provides relative addressing
468 functions.
470 =head2 abs_start
472 Title : abs_start
473 Usage : $s->abs_start
474 Function: the absolute start of the segment
475 Returns : an integer
476 Args : none
477 Status : Public
479 This is an alias to start(), and provided for API compatibility with
480 Bio::DB::GFF::RelSegment.
482 =cut
484 *abs_start = \&start;
486 =head2 abs_end
488 Title : abs_end
489 Usage : $s->abs_end
490 Function: the absolute stop of the segment
491 Returns : an integer
492 Args : none
493 Status : Public
495 This is an alias to stop(), and provided for API compatibility with
496 Bio::DB::GFF::RelSegment.
498 =cut
500 *abs_stop = \&stop;
501 *abs_end = \&stop;
503 =head2 abs_strand
505 Title : abs_strand
506 Usage : $s->abs_strand
507 Function: the absolute strand of the segment
508 Returns : +1,0,-1
509 Args : none
510 Status : Public
512 This is an alias to strand(), and provided for API compatibility with
513 Bio::DB::GFF::RelSegment.
515 =cut
517 sub abs_strand {
518 my $self = shift;
519 return $self->abs_end <=> $self->abs_start;
522 =head2 abs_ref
524 Title : abs_ref
525 Usage : $s->abs_ref
526 Function: the reference sequence for this segment
527 Returns : a string
528 Args : none
529 Status : Public
531 This is an alias to sourceseq(), and is here to provide API
532 compatibility with Bio::DB::GFF::RelSegment.
534 =cut
536 *abs_ref = \&sourceseq;
538 =head2 refseq
540 Title : refseq
541 Usage : $s->refseq
542 Function: get or set the reference sequence
543 Returns : a string
544 Args : none
545 Status : Public
547 Examine or change the reference sequence. This is an alias to
548 sourceseq(), provided here for API compatibility with
549 Bio::DB::GFF::RelSegment.
551 =cut
553 *refseq = \&sourceseq;
555 =head2 ref
557 Title : ref
558 Usage : $s->refseq
559 Function: get or set the reference sequence
560 Returns : a string
561 Args : none
562 Status : Public
564 An alias for refseq()
566 =cut
568 sub ref { shift->refseq(@_) }
570 =head2 seq_id
572 Title : seq_id
573 Usage : $ref = $s->seq_id
574 Function: get the reference sequence in a LocationI-compatible way
575 Returns : a string
576 Args : none
577 Status : Public
579 An alias for refseq() but only allows reading.
581 =cut
583 sub seq_id { shift->refseq }
584 *seqname = \&seq_id;
586 =head2 truncated
588 Title : truncated
589 Usage : $truncated = $s->truncated
590 Function: Flag indicating that the segment was truncated during creation
591 Returns : A boolean flag
592 Args : none
593 Status : Public
595 This indicates that the sequence was truncated during creation. The
596 returned flag is undef if no truncation occured. If truncation did
597 occur, the flag is actually an array ref in which the first element is
598 true if truncation occurred on the left, and the second element
599 occurred if truncation occurred on the right.
601 =cut
603 sub truncated {
604 my $self = shift;
605 my $hash = $self->{truncated} or return;
606 CORE::ref($hash) eq 'HASH' or return [1,1]; # paranoia -- not that this would ever happen ;-)
607 return [$hash->{start},$hash->{stop}];
610 =head2 Bio::RangeI Methods
612 The following Bio::RangeI methods are supported:
614 overlaps(), contains(), equals(),intersection(),union(),overlap_extent()
616 =cut
618 sub overlaps {
619 my $self = shift;
620 my($other,$so) = @_;
621 if ($other->isa('Bio::DB::GFF::RelSegment')) {
622 return if $self->abs_ref ne $other->abs_ref;
624 $self->SUPER::overlaps(@_);
627 sub contains {
628 my $self = shift;
629 my($other,$so) = @_;
630 if ($other->isa('Bio::DB::GFF::RelSegment')) {
631 return if $self->abs_ref ne $other->abs_ref;
633 $self->SUPER::contains(@_);
635 #sub equals {
636 # my $self = shift;
637 # my($other,$so) = @_;
638 # if ($other->isa('Bio::DB::GFF::RelSegment')) {
639 # return if $self->abs_ref ne $other->abs_ref;
641 # $self->SUPER::equals(@_);
643 sub intersection {
644 my $self = shift;
645 my($other,$so) = @_;
646 if ($other->isa('Bio::DB::GFF::RelSegment')) {
647 return if $self->abs_ref ne $other->abs_ref;
649 $self->SUPER::intersection(@_);
651 sub union {
652 my $self = shift;
653 my($other) = @_;
654 if ($other->isa('Bio::DB::GFF::RelSegment')) {
655 return if $self->abs_ref ne $other->abs_ref;
657 $self->SUPER::union(@_);
660 sub overlap_extent {
661 my $self = shift;
662 my($other) = @_;
663 if ($other->isa('Bio::DB::GFF::RelSegment')) {
664 return if $self->abs_ref ne $other->abs_ref;
666 $self->SUPER::overlap_extent(@_);
670 =head2 Bio::SeqI implementation
672 =cut
674 =head2 primary_id
676 Title : primary_id
677 Usage : $unique_implementation_key = $obj->primary_id;
678 Function: Returns the unique id for this object in this
679 implementation. This allows implementations to manage their
680 own object ids in a way the implementaiton can control
681 clients can expect one id to map to one object.
683 For sequences with no accession number, this method should
684 return a stringified memory location.
686 Returns : A string
687 Args : None
688 Status : Virtual
691 =cut
693 sub primary_id {
694 my ($obj,$value) = @_;
696 if( defined $value) {
697 $obj->{'primary_id'} = $value;
699 if( ! exists $obj->{'primary_id'} ) {
700 return "$obj";
702 return $obj->{'primary_id'};
706 =head2 display_name
708 Title : display_name
709 Usage : $id = $obj->display_name or $obj->display_name($newid);
710 Function: Gets or sets the display id, also known as the common name of
711 the Seq object.
713 The semantics of this is that it is the most likely string
714 to be used as an identifier of the sequence, and likely to
715 have "human" readability. The id is equivalent to the LOCUS
716 field of the GenBank/EMBL databanks and the ID field of the
717 Swissprot/sptrembl database. In fasta format, the >(\S+) is
718 presumed to be the id, though some people overload the id
719 to embed other information. Bioperl does not use any
720 embedded information in the ID field, and people are
721 encouraged to use other mechanisms (accession field for
722 example, or extending the sequence object) to solve this.
724 Notice that $seq->id() maps to this function, mainly for
725 legacy/convenience issues.
726 Returns : A string
727 Args : None or a new id
729 Note, this used to be called display_id(), and this name is preserved for
730 backward compatibility. The default is to return the seq_id().
732 =cut
734 sub display_name { shift->seq_id }
735 *display_id = \&display_name;
737 =head2 accession_number
739 Title : accession_number
740 Usage : $unique_biological_key = $obj->accession_number;
741 Function: Returns the unique biological id for a sequence, commonly
742 called the accession_number. For sequences from established
743 databases, the implementors should try to use the correct
744 accession number. Notice that primary_id() provides the
745 unique id for the implemetation, allowing multiple objects
746 to have the same accession number in a particular implementation.
748 For sequences with no accession number, this method should return
749 "unknown".
750 Returns : A string
751 Args : None
754 =cut
756 sub accession_number {
757 return 'unknown';
760 =head2 alphabet
762 Title : alphabet
763 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
764 Function: Returns the type of sequence being one of
765 'dna', 'rna' or 'protein'. This is case sensitive.
767 This is not called <type> because this would cause
768 upgrade problems from the 0.5 and earlier Seq objects.
770 Returns : a string either 'dna','rna','protein'. NB - the object must
771 make a call of the type - if there is no type specified it
772 has to guess.
773 Args : none
774 Status : Virtual
777 =cut
779 sub alphabet{
780 return 'dna'; # no way this will be anything other than dna!
783 =head2 desc
785 Title : desc
786 Usage : $seqobj->desc($string) or $seqobj->desc()
787 Function: Sets or gets the description of the sequence
788 Example :
789 Returns : The description
790 Args : The description or none
793 =cut
795 sub desc { shift->asString }
797 *description = \&desc;
799 =head2 species
801 Title : species
802 Usage : $species = $seq->species() or $seq->species($species)
803 Function: Gets or sets the species
804 Example :
805 Returns : Bio::Species object
806 Args : None or Bio::Species object
808 See L<Bio::Species> for more information
810 =cut
812 sub species {
813 my ($self, $species) = @_;
814 if ($species) {
815 $self->{'species'} = $species;
816 } else {
817 return $self->{'species'};
821 =head2 annotation
823 Title : annotation
824 Usage : $ann = $seq->annotation or $seq->annotation($annotation)
825 Function: Gets or sets the annotation
826 Example :
827 Returns : Bio::Annotation object
828 Args : None or Bio::Annotation object
830 See L<Bio::Annotation> for more information
832 =cut
834 sub annotation {
835 my ($obj,$value) = @_;
836 if( defined $value || ! defined $obj->{'annotation'} ) {
837 $value = Bio::Annotation::Collection->new() unless defined $value;
838 $obj->{'annotation'} = $value;
840 return $obj->{'annotation'};
844 =head2 is_circular
846 Title : is_circular
847 Usage : if( $obj->is_circular) { /Do Something/ }
848 Function: Returns true if the molecule is circular
849 Returns : Boolean value
850 Args : none
852 =cut
854 sub is_circular{
855 return 0;
860 __END__
862 =head1 BUGS
864 Report them please.
866 =head1 SEE ALSO
868 L<bioperl>
870 =head1 AUTHOR
872 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
874 Copyright (c) 2001 Cold Spring Harbor Laboratory.
876 This library is free software; you can redistribute it and/or modify
877 it under the same terms as Perl itself.
879 =head1 CONTRIBUTORS
881 Jason Stajich E<lt>jason@bioperl.orgE<gt>.
883 =cut