3 Bio::DB::GFF::Segment -- Simple DNA segment object
11 Bio::DB::GFF::Segment provides the basic representation of a range of
12 DNA contained in a GFF database. It is the base class from which the
13 Bio::DB::GFF::RelSegment and Bio::DB::GFF::Feature classes are
16 Generally, you will not create or manipulate Bio::DB::GFF::Segment
17 objects directly, but use those that are returned by the Bio::DB::GFF
22 package Bio
::DB
::GFF
::Segment
;
25 use Bio
::Annotation
::Collection
;
27 use base
qw(Bio::Root::Root Bio::RangeI Bio::SeqI Bio::Das::SegmentI);
36 The remainder of this document describes the API for
37 Bio::DB::GFF::Segment.
44 Usage : $s = Bio::DB::GFF::Segment->new(@args)
45 Function: create a new segment
46 Returns : a new Bio::DB::GFF::Segment object
50 This method creates a new Bio::DB::GFF::Segment object. Generally
51 this is called automatically by the Bio::DB::GFF module and
54 There are five positional arguments:
56 $factory a Bio::DB::GFF::Adaptor to use for database access
57 $sourceseq ID of the source sequence
58 $sourceclass class of the source sequence
59 $start start of the desired segment relative to source sequence
60 $stop stop of the desired segment relative to source sequence
66 my ($factory,$segclass,$segname,$start,$stop) = @_;
67 $segclass = $segname->class if ref($segname) && $segname->can('class');
68 $segclass ||= 'Sequence';
70 $factory or $class->throw("->new(): provide a factory argument");
71 $class = ref $class if ref $class;
72 return bless { factory
=> $factory,
73 sourceseq
=> $segname,
87 Function: get the factory object
88 Returns : a Bio::DB::GFF::Adaptor
92 This is a read-only accessor for the Bio::DB::GFF::Adaptor object used
93 to create the segment.
97 sub factory
{ shift->{factory
} }
105 Function: start of segment
110 This is a read-only accessor for the start of the segment.
114 sub start
{ shift->{start
} }
120 Function: end of segment
125 This is a read-only accessor for the end of the segment.
129 sub end
{ shift->{stop
} }
135 Function: stop of segment
140 This is an alias for end(), provided for AcePerl compatibility.
150 Function: length of segment
155 Returns the length of the segment. Always a positive number.
159 sub length { abs($_[0]->{start
} - $_[0]->{stop
})+1 }
166 Function: strand of segment
171 Returns the strand on which the segment resides, either +1, 0 or -1.
184 Function: return lower coordinate
185 Returns : lower coordinate
189 Returns the lower coordinate, either start or end.
195 my ($start,$stop) = ($self->start,$self->stop);
196 return $start < $stop ?
$start : $stop;
204 Function: return higher coordinate
205 Returns : higher coordinate
209 Returns the higher coordinate, either start or end.
215 my ($start,$stop) = ($self->start,$self->stop);
216 return $start > $stop ?
$start : $stop;
223 Usage : $s->sourceseq
224 Function: get the segment source
229 Returns the name of the source sequence for this segment.
233 sub sourceseq
{ shift->{sourceseq
} }
238 Usage : $s->class([$newclass])
239 Function: get the source sequence class
241 Args : new class (optional)
244 Gets or sets the class for the source sequence for this segment.
250 my $d = $self->{class};
251 $self->{class} = shift if @_;
258 Usage : $s->subseq($start,$stop)
259 Function: generate a subsequence
260 Returns : a Bio::DB::GFF::Segment object
261 Args : start and end of subsequence
264 This method generates a new segment from the start and end positions
265 given in the arguments. If stop E<lt> start, then the strand is reversed.
271 my ($newstart,$newstop) = @_;
272 my ($refseq,$start,$stop,$class) = ($self->{sourceseq
},
273 $self->{start
},$self->{stop
},
276 # We deliberately force subseq to return objects of type RelSegment
277 # Otherwise, when we get a subsequence from a Feature object,
278 # its method and source go along for the ride, which is incorrect.
279 my $new = $self->new_from_segment($self);
280 if ($start <= $stop) {
281 @
{$new}{qw(start stop)} = ($start + $newstart - 1, $start + $newstop - 1);
283 @
{$new}{qw(start stop)} = ($start - ($newstart - 1), $start - ($newstop - 1)),
294 Function: get the sequence string for this segment
295 Returns : a Bio::PrimarySeq
299 Returns the sequence for this segment as a Bio::PrimarySeq. (-)
300 strand segments are automatically reverse complemented
302 The method is called dna() return the data as a simple sequence
309 my $dna = $self->dna;
310 require Bio
::PrimarySeq
unless Bio
::PrimarySeq
->can('new');
311 return Bio
::PrimarySeq
->new(-id
=> $self->display_name) unless $dna;
312 return Bio
::PrimarySeq
->new(-seq
=> $dna,
313 -id
=> $self->display_name);
320 Function: get the DNA string for this segment
325 Returns the sequence for this segment as a simple string. (-) strand
326 segments are automatically reverse complemented
328 The method is also called protein().
334 my ($ref,$class,$start,$stop,$strand)
335 = @
{$self}{qw(sourceseq class start stop strand)};
336 return $self->factory->dna($ref,$start,$stop,$class);
345 Usage : $s->primary_seq
346 Function: returns a Bio::PrimarySeqI compatible object
347 Returns : a Bio::PrimarySeqI object
351 This is for compatibility with BioPerl's separation of SeqI
352 from PrimarySeqI. It just returns itself.
358 sub primary_seq
{ shift }
364 Function: return the string "feature"
365 Returns : the string "feature"
369 This is for future sequence ontology-compatibility and
370 represents the default type of a feature on the genome
374 sub type
{ "feature" }
379 Usage : $s->equals($d)
380 Function: segment equality
381 Returns : true, if two segments are equal
382 Args : another segment
385 Returns true if the two segments have the same source sequence, start and stop.
392 return unless defined $peer;
393 return $self->asString eq $peer unless ref($peer) && $peer->isa('Bio::DB::GFF::Segment');
394 return $self->{start
} eq $peer->{start
}
395 && $self->{stop
} eq $peer->{stop
}
396 && $self->{sourceseq
} eq $peer->{sourceseq
};
403 Function: human-readable string for segment
408 Returns a human-readable string representing this sequence. Format
417 my $label = $self->refseq;
418 my $start = $self->start;
419 my $stop = $self->stop;
420 return "$label:$start,$stop";
426 Usage : $copy = $s->clone
427 Function: make a copy of this segment
428 Returns : a Bio::DB::GFF::Segment object
432 This method creates a copy of the segment and returns it.
436 # deep copy of the thing
440 return bless \
%h,ref($self);
446 Usage : $error = $s->error([$new_error])
447 Function: get or set the last error
449 Args : an error message (optional)
452 In case of a fault, this method can be used to obtain the last error
453 message. Internally it is called to set the error message.
459 my $g = $self->{error
};
460 $self->{error
} = shift if @_;
464 =head1 Relative Addressing Methods
466 The following methods are provided for compatibility with
467 Bio::DB::GFF::RelSegment, which provides relative addressing
473 Usage : $s->abs_start
474 Function: the absolute start of the segment
479 This is an alias to start(), and provided for API compatibility with
480 Bio::DB::GFF::RelSegment.
484 *abs_start
= \
&start
;
490 Function: the absolute stop of the segment
495 This is an alias to stop(), and provided for API compatibility with
496 Bio::DB::GFF::RelSegment.
506 Usage : $s->abs_strand
507 Function: the absolute strand of the segment
512 This is an alias to strand(), and provided for API compatibility with
513 Bio::DB::GFF::RelSegment.
519 return $self->abs_end <=> $self->abs_start;
526 Function: the reference sequence for this segment
531 This is an alias to sourceseq(), and is here to provide API
532 compatibility with Bio::DB::GFF::RelSegment.
536 *abs_ref
= \
&sourceseq
;
542 Function: get or set the reference sequence
547 Examine or change the reference sequence. This is an alias to
548 sourceseq(), provided here for API compatibility with
549 Bio::DB::GFF::RelSegment.
553 *refseq
= \
&sourceseq
;
559 Function: get or set the reference sequence
564 An alias for refseq()
568 sub ref { shift->refseq(@_) }
573 Usage : $ref = $s->seq_id
574 Function: get the reference sequence in a LocationI-compatible way
579 An alias for refseq() but only allows reading.
583 sub seq_id
{ shift->refseq }
589 Usage : $truncated = $s->truncated
590 Function: Flag indicating that the segment was truncated during creation
591 Returns : A boolean flag
595 This indicates that the sequence was truncated during creation. The
596 returned flag is undef if no truncation occured. If truncation did
597 occur, the flag is actually an array ref in which the first element is
598 true if truncation occurred on the left, and the second element
599 occurred if truncation occurred on the right.
605 my $hash = $self->{truncated
} or return;
606 CORE
::ref($hash) eq 'HASH' or return [1,1]; # paranoia -- not that this would ever happen ;-)
607 return [$hash->{start
},$hash->{stop
}];
610 =head2 Bio::RangeI Methods
612 The following Bio::RangeI methods are supported:
614 overlaps(), contains(), equals(),intersection(),union(),overlap_extent()
621 if ($other->isa('Bio::DB::GFF::RelSegment')) {
622 return if $self->abs_ref ne $other->abs_ref;
624 $self->SUPER::overlaps
(@_);
630 if ($other->isa('Bio::DB::GFF::RelSegment')) {
631 return if $self->abs_ref ne $other->abs_ref;
633 $self->SUPER::contains
(@_);
637 # my($other,$so) = @_;
638 # if ($other->isa('Bio::DB::GFF::RelSegment')) {
639 # return if $self->abs_ref ne $other->abs_ref;
641 # $self->SUPER::equals(@_);
646 if ($other->isa('Bio::DB::GFF::RelSegment')) {
647 return if $self->abs_ref ne $other->abs_ref;
649 $self->SUPER::intersection
(@_);
654 if ($other->isa('Bio::DB::GFF::RelSegment')) {
655 return if $self->abs_ref ne $other->abs_ref;
657 $self->SUPER::union
(@_);
663 if ($other->isa('Bio::DB::GFF::RelSegment')) {
664 return if $self->abs_ref ne $other->abs_ref;
666 $self->SUPER::overlap_extent
(@_);
670 =head2 Bio::SeqI implementation
677 Usage : $unique_implementation_key = $obj->primary_id;
678 Function: Returns the unique id for this object in this
679 implementation. This allows implementations to manage their
680 own object ids in a way the implementaiton can control
681 clients can expect one id to map to one object.
683 For sequences with no accession number, this method should
684 return a stringified memory location.
694 my ($obj,$value) = @_;
696 if( defined $value) {
697 $obj->{'primary_id'} = $value;
699 if( ! exists $obj->{'primary_id'} ) {
702 return $obj->{'primary_id'};
709 Usage : $id = $obj->display_name or $obj->display_name($newid);
710 Function: Gets or sets the display id, also known as the common name of
713 The semantics of this is that it is the most likely string
714 to be used as an identifier of the sequence, and likely to
715 have "human" readability. The id is equivalent to the LOCUS
716 field of the GenBank/EMBL databanks and the ID field of the
717 Swissprot/sptrembl database. In fasta format, the >(\S+) is
718 presumed to be the id, though some people overload the id
719 to embed other information. Bioperl does not use any
720 embedded information in the ID field, and people are
721 encouraged to use other mechanisms (accession field for
722 example, or extending the sequence object) to solve this.
724 Notice that $seq->id() maps to this function, mainly for
725 legacy/convenience issues.
727 Args : None or a new id
729 Note, this used to be called display_id(), and this name is preserved for
730 backward compatibility. The default is to return the seq_id().
734 sub display_name
{ shift->seq_id }
735 *display_id
= \
&display_name
;
737 =head2 accession_number
739 Title : accession_number
740 Usage : $unique_biological_key = $obj->accession_number;
741 Function: Returns the unique biological id for a sequence, commonly
742 called the accession_number. For sequences from established
743 databases, the implementors should try to use the correct
744 accession number. Notice that primary_id() provides the
745 unique id for the implemetation, allowing multiple objects
746 to have the same accession number in a particular implementation.
748 For sequences with no accession number, this method should return
756 sub accession_number
{
763 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
764 Function: Returns the type of sequence being one of
765 'dna', 'rna' or 'protein'. This is case sensitive.
767 This is not called <type> because this would cause
768 upgrade problems from the 0.5 and earlier Seq objects.
770 Returns : a string either 'dna','rna','protein'. NB - the object must
771 make a call of the type - if there is no type specified it
780 return 'dna'; # no way this will be anything other than dna!
786 Usage : $seqobj->desc($string) or $seqobj->desc()
787 Function: Sets or gets the description of the sequence
789 Returns : The description
790 Args : The description or none
795 sub desc
{ shift->asString }
797 *description
= \
&desc
;
802 Usage : $species = $seq->species() or $seq->species($species)
803 Function: Gets or sets the species
805 Returns : Bio::Species object
806 Args : None or Bio::Species object
808 See L<Bio::Species> for more information
813 my ($self, $species) = @_;
815 $self->{'species'} = $species;
817 return $self->{'species'};
824 Usage : $ann = $seq->annotation or $seq->annotation($annotation)
825 Function: Gets or sets the annotation
827 Returns : Bio::Annotation object
828 Args : None or Bio::Annotation object
830 See L<Bio::Annotation> for more information
835 my ($obj,$value) = @_;
836 if( defined $value || ! defined $obj->{'annotation'} ) {
837 $value = Bio
::Annotation
::Collection
->new() unless defined $value;
838 $obj->{'annotation'} = $value;
840 return $obj->{'annotation'};
847 Usage : if( $obj->is_circular) { /Do Something/ }
848 Function: Returns true if the molecule is circular
849 Returns : Boolean value
872 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
874 Copyright (c) 2001 Cold Spring Harbor Laboratory.
876 This library is free software; you can redistribute it and/or modify
877 it under the same terms as Perl itself.
881 Jason Stajich E<lt>jason@bioperl.orgE<gt>.