3 # BioPerl module for Bio::SeqFeature::Generic
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Ewan Birney <birney@sanger.ac.uk>
9 # Copyright Ewan Birney
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::SeqFeature::Generic - Generic SeqFeature
21 $feat = Bio::SeqFeature::Generic->new(
25 -primary => 'repeat', # -primary_tag is a synonym
26 -source_tag => 'repeatmasker',
27 -display_name => 'alu family',
31 sillytag => 'this is silly!' } );
33 $feat = Bio::SeqFeature::Generic->new( -gff_string => $string );
34 # if you want explicitly GFF1
35 $feat = Bio::SeqFeature::Generic->new( -gff1_string => $string );
37 # add it to an annotated sequence
39 $annseq->add_SeqFeature($feat);
43 Bio::SeqFeature::Generic is a generic implementation for the
44 Bio::SeqFeatureI interface, providing a simple object to provide all
45 the information for a feature on a sequence.
47 For many Features, this is all you will need to use (for example, this
48 is fine for Repeats in DNA sequence or Domains in protein
49 sequence). For other features, which have more structure, this is a
50 good base class to extend using inheritence to have new things: this
51 is what is done in the L<Bio::SeqFeature::Gene>,
52 L<Bio::SeqFeature::Transcript> and L<Bio::SeqFeature::Exon>, which provide
53 well coordinated classes to represent genes on DNA sequence (for
54 example, you can get the protein sequence out from a transcript
57 For many Features, you want to add some piece of information, for
58 example a common one is that this feature is 'new' whereas other
59 features are 'old'. The tag system, which here is implemented using a
60 hash can be used here. You can use the tag system to extend the
61 L<Bio::SeqFeature::Generic> programmatically: that is, you know that you have
62 read in more information into the tag 'mytag' which you can then
63 retrieve. This means you do not need to know how to write inherited
64 Perl to provide more complex information on a feature, and/or, if you
65 do know but you do not want to write a new class every time you need
66 some extra piece of information, you can use the tag system to easily
67 store and then retrieve information.
69 The tag system can be written in/out of GFF format, and also into EMBL
70 format via the L<Bio::SeqIO> system
72 =head1 Implemented Interfaces
74 This class implementes the following interfaces.
78 =item L<Bio::SeqFeatureI>
80 Note that this includes implementing Bio::RangeI.
82 =item L<Bio::AnnotatableI>
84 =item L<Bio::FeatureHolderI>
86 Features held by a feature are essentially sub-features.
94 User feedback is an integral part of the evolution of this and other
95 Bioperl modules. Send your comments and suggestions preferably to one
96 of the Bioperl mailing lists. Your participation is much appreciated.
98 bioperl-l@bioperl.org - General discussion
99 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
103 Please direct usage questions or support issues to the mailing list:
105 I<bioperl-l@bioperl.org>
107 rather than to the module maintainer directly. Many experienced and
108 reponsive experts will be able look at the problem and quickly
109 address it. Please include a thorough description of the problem
110 with code and data examples if at all possible.
112 =head2 Reporting Bugs
114 Report bugs to the Bioperl bug tracking system to help us keep track
115 the bugs and their resolution. Bug reports can be submitted via
118 http://bugzilla.open-bio.org/
120 =head1 AUTHOR - Ewan Birney
122 Ewan Birney E<lt>birney@sanger.ac.ukE<gt>
126 This class has been written with an eye out for inheritance. The fields
127 the actual object hash are:
129 _gsf_tag_hash = reference to a hash for the tags
130 _gsf_sub_array = reference to an array for subfeatures
134 The rest of the documentation details each of the object
135 methods. Internal methods are usually preceded with a _
140 # Let the code begin...
143 package Bio
::SeqFeature
::Generic
;
146 use Bio
::Annotation
::Collection
;
147 use Bio
::Location
::Simple
;
148 use Bio
::Location
::Split
;
152 use base
qw(Bio::Root::Root Bio::SeqFeatureI Bio::FeatureHolderI Bio::AnnotatableI);
155 my ( $caller, @args) = @_;
156 my ($self) = $caller->SUPER::new
(@args);
157 $self->_register_for_cleanup(\
&cleanup_generic
);
158 $self->{'_parse_h'} = {};
159 $self->{'_gsf_tag_hash'} = {};
161 # bulk-set attributes
162 $self->set_attributes(@args);
168 =head2 set_attributes
170 Title : set_attributes
172 Function: Sets a whole array of parameters at once.
175 Args : Named parameters, in the form as they would otherwise be passed
176 to new(). Currently recognized are:
178 -start start position
181 -primary_tag primary tag
182 -primary (synonym for -primary_tag)
186 -tag a reference to a tag/value hash
187 -gff_string GFF v.2 string to initialize from
188 -gff1_string GFF v.1 string to initialize from
189 -seq_id the display name of the sequence
190 -annotation the AnnotationCollectionI object
191 -location the LocationI object
196 my ($self,@args) = @_;
197 my ($start, $end, $strand, $primary_tag, $source_tag, $primary,
198 $source, $frame, $score, $tag, $gff_string, $gff1_string,
199 $seqname, $seqid, $annot, $location,$display_name) =
200 $self->_rearrange([qw(START
218 $location && $self->location($location);
219 $gff_string && $self->_from_gff_string($gff_string);
221 $self->gff_format(Bio
::Tools
::GFF
->new('-gff_version' => 1));
222 $self->_from_gff_stream($gff1_string);
224 $primary_tag && $self->primary_tag($primary_tag);
225 $source_tag && $self->source_tag($source_tag);
226 $primary && $self->primary_tag($primary);
227 $source && $self->source_tag($source);
228 defined $start && $self->start($start);
229 defined $end && $self->end($end);
230 defined $strand && $self->strand($strand);
231 defined $frame && $self->frame($frame);
232 defined $display_name && $self->display_name($display_name);
233 defined $score && $self->score($score);
234 $annot && $self->annotation($annot);
236 $self->warn("-seqname is deprecated. Please use -seq_id instead.");
237 $seqid = $seqname unless $seqid;
239 $seqid && $self->seq_id($seqid);
241 foreach my $t ( keys %$tag ) {
242 $self->add_tag_value($t, UNIVERSAL
::isa
($tag->{$t}, "ARRAY") ? @
{$tag->{$t}} : $tag->{$t});
251 Usage : my $obj = Bio::SeqFeature::Generic->direct_new
252 Function: create a blessed hash - for performance improvement in
254 Returns : Bio::SeqFeature::Generic object
272 Usage : my $location = $seqfeature->location()
273 Function: returns a location object suitable for identifying location
274 of feature on sequence or parent feature
275 Returns : Bio::LocationI object
276 Args : [optional] Bio::LocationI object to set the value to.
282 my($self, $value ) = @_;
284 if (defined($value)) {
285 unless (ref($value) and $value->isa('Bio::LocationI')) {
286 $self->throw("object $value pretends to be a location but ".
287 "does not implement Bio::LocationI");
289 $self->{'_location'} = $value;
291 elsif (! $self->{'_location'}) {
292 # guarantees a real location object is returned every time
293 $self->{'_location'} = Bio
::Location
::Simple
->new();
295 return $self->{'_location'};
302 Usage : $start = $feat->start
304 Function: Get/set on the start coordinate of the feature
312 my ($self,$value) = @_;
313 return $self->location->start($value);
319 Usage : $end = $feat->end
321 Function: get/set on the end coordinate of the feature
329 my ($self,$value) = @_;
330 return $self->location->end($value);
336 Usage : my $len = $feature->length
337 Function: Get the feature length computed as
338 $feat->end - $feat->start + 1
347 return $self->end - $self->start() + 1;
353 Usage : $strand = $feat->strand()
354 $feat->strand($strand)
355 Function: get/set on strand information, being 1,-1 or 0
364 return $self->location->strand(@_);
370 Usage : $score = $feat->score()
372 Function: get/set on score information
374 Args : none if get, the new value if set
385 if ( defined $value && $value && $value !~ /^[A-Za-z]+$/ &&
386 $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ and $value != 0) {
387 $self->throw(-class=>'Bio::Root::BadParameter',
388 -text
=>"'$value' is not a valid score",
391 if ($self->has_tag('score')) {
392 $self->warn("Removing score value(s)");
393 $self->remove_tag('score');
395 $self->add_tag_value('score',$value);
397 my ($score) = $self->has_tag('score') ?
$self->get_tag_values('score') : undef;
404 Usage : $frame = $feat->frame()
406 Function: get/set on frame information
408 Args : none if get, the new value if set
418 if ( defined $value &&
419 $value !~ /^[0-2.]$/ ) {
420 $self->throw("'$value' is not a valid frame");
422 if( defined $value && $value eq '.' ) { $value = '.' }
423 return $self->{'_gsf_frame'} = $value;
425 return $self->{'_gsf_frame'};
431 Usage : $tag = $feat->primary_tag()
432 $feat->primary_tag('exon')
433 Function: get/set on the primary tag for a feature,
443 return $self->{'_primary_tag'} = shift if @_;
444 return $self->{'_primary_tag'};
450 Usage : $tag = $feat->source_tag()
451 $feat->source_tag('genscan');
452 Function: Returns the source tag for a feature,
462 return $self->{'_source_tag'} = shift if @_;
463 return $self->{'_source_tag'};
469 Usage : $value = $self->has_tag('some_tag')
470 Function: Tests wether a feature contaings a tag
471 Returns : TRUE if the SeqFeature has the tag,
473 Args : The name of a tag
479 my ($self, $tag) = @_;
480 return exists $self->{'_gsf_tag_hash'}->{$tag};
485 Title : add_tag_value
486 Usage : $self->add_tag_value('note',"this is a note");
487 Returns : TRUE on success
488 Args : tag (string) and one or more values (any scalar(s))
496 $self->{'_gsf_tag_hash'}->{$tag} ||= [];
497 push(@
{$self->{'_gsf_tag_hash'}->{$tag}},@_);
501 =head2 get_tag_values
503 Title : get_tag_values
504 Usage : @values = $gsf->get_tag_values('note');
505 Function: Returns a list of all the values stored
506 under a particular tag.
507 Returns : A list of scalars
508 Args : The name of the tag
514 my ($self, $tag) = @_;
516 if( ! defined $tag ) { return (); }
517 if ( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
518 $self->throw("asking for tag value that does not exist $tag");
520 return @
{$self->{'_gsf_tag_hash'}->{$tag}};
527 Usage : @tags = $feat->get_all_tags()
528 Function: Get a list of all the tags in a feature
529 Returns : An array of tag names
532 # added a sort so that tags will be returned in a predictable order
533 # I still think we should be able to specify a sort function
534 # to the object at some point
540 my ($self, @args) = @_;
541 return sort keys %{ $self->{'_gsf_tag_hash'}};
547 Usage : $feat->remove_tag('some_tag')
548 Function: removes a tag from this feature
549 Returns : the array of values for this tag before removing it
556 my ($self, $tag) = @_;
558 if ( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
559 $self->throw("trying to remove a tag that does not exist: $tag");
561 my @vals = @
{$self->{'_gsf_tag_hash'}->{$tag}};
562 delete $self->{'_gsf_tag_hash'}->{$tag};
569 Usage : $sf->attach_seq($seq)
570 Function: Attaches a Bio::Seq object to this feature. This
571 Bio::Seq object is for the *entire* sequence: ie
574 Returns : TRUE on success
575 Args : a Bio::PrimarySeqI compliant object
581 my ($self, $seq) = @_;
583 if ( ! ($seq && ref($seq) && $seq->isa("Bio::PrimarySeqI")) ) {
584 $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures");
587 $self->{'_gsf_seq'} = $seq;
589 # attach to sub features if they want it
590 foreach ( $self->sub_SeqFeature() ) {
591 $_->attach_seq($seq);
599 Usage : $tseq = $sf->seq()
600 Function: returns the truncated sequence (if there) for this
602 Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence
603 bounded by start & end, or undef if there is no sequence attached
610 my ($self, $arg) = @_;
612 if ( defined $arg ) {
613 $self->throw("Calling SeqFeature::Generic->seq with an argument. You probably want attach_seq");
616 if ( ! exists $self->{'_gsf_seq'} ) {
620 # assumming our seq object is sensible, it should not have to yank
621 # the entire sequence out here.
623 my $seq = $self->{'_gsf_seq'}->trunc($self->start(), $self->end());
626 if ( defined $self->strand &&
627 $self->strand == -1 ) {
629 # ok. this does not work well (?)
630 #print STDERR "Before revcom", $seq->str, "\n";
632 #print STDERR "After revcom", $seq->str, "\n";
641 Usage : $whole_seq = $sf->entire_seq()
642 Function: gives the entire sequence that this seqfeature is attached to
644 Returns : a Bio::PrimarySeqI compliant object, or undef if there is no
652 return shift->{'_gsf_seq'};
659 Usage : $obj->seq_id($newval)
660 Function: There are many cases when you make a feature that you
661 do know the sequence name, but do not know its actual
662 sequence. This is an attribute such that you can store
663 the ID (e.g., display_id) of the sequence.
665 This attribute should *not* be used in GFF dumping, as
666 that should come from the collection in which the seq
668 Returns : value of seq_id
669 Args : newvalue (optional)
676 return $obj->{'_gsf_seq_id'} = shift if @_;
677 return $obj->{'_gsf_seq_id'};
683 Usage : $featname = $obj->display_name
684 Function: Implements the display_name() method, which is a human-readable
685 name for the feature.
686 Returns : value of display_name (a string)
687 Args : Optionally, on set the new value or undef
693 return $self->{'display_name'} = shift if @_;
694 return $self->{'display_name'};
697 =head1 Methods for implementing Bio::AnnotatableI
704 Usage : $obj->annotation($annot_obj)
705 Function: Get/set the annotation collection object for annotating this
709 Returns : A Bio::AnnotationCollectionI object
710 Args : newvalue (optional)
716 my ($obj,$value) = @_;
718 # we are smart if someone references the object and there hasn't been
720 if(defined $value || ! defined $obj->{'annotation'} ) {
721 $value = Bio
::Annotation
::Collection
->new() unless ( defined $value );
722 $obj->{'annotation'} = $value;
724 return $obj->{'annotation'};
727 =head1 Methods to implement Bio::FeatureHolderI
729 This includes methods for retrieving, adding, and removing
730 features. Since this is already a feature, features held by this
731 feature holder are essentially sub-features.
735 =head2 get_SeqFeatures
737 Title : get_SeqFeatures
738 Usage : @feats = $feat->get_SeqFeatures();
739 Function: Returns an array of sub Sequence Features
746 sub get_SeqFeatures
{
747 return @
{ shift->{'_gsf_sub_array'} || []};
750 =head2 add_SeqFeature
752 Title : add_SeqFeature
753 Usage : $feat->add_SeqFeature($subfeat);
754 $feat->add_SeqFeature($subfeat,'EXPAND')
755 Function: Adds a SeqFeature into the subSeqFeature array.
756 With no 'EXPAND' qualifer, subfeat will be tested
757 as to whether it lies inside the parent, and throw
760 If EXPAND is used, the parent's start/end/strand will
761 be adjusted so that it grows to accommodate the new
764 Args : An object which has the SeqFeatureI interface
771 my ($self,$feat,$expand) = @_;
772 unless( defined $feat ) {
773 $self->warn("Called add_SeqFeature with no feature, ignoring");
776 if ( !$feat->isa('Bio::SeqFeatureI') ) {
777 $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware...");
780 if($expand && ($expand eq 'EXPAND')) {
781 $self->_expand_region($feat);
783 if ( !$self->contains($feat) ) {
784 $self->throw("$feat is not contained within parent feature, and expansion is not valid");
788 $self->{'_gsf_sub_array'} = [] unless exists($self->{'_gsf_sub_array'});
789 push(@
{$self->{'_gsf_sub_array'}},$feat);
793 =head2 remove_SeqFeatures
795 Title : remove_SeqFeatures
796 Usage : $sf->remove_SeqFeatures
797 Function: Removes all SeqFeatures
799 If you want to remove only a subset of features then remove that
800 subset from the returned array, and add back the rest.
802 Returns : The array of Bio::SeqFeatureI implementing features that was
809 sub remove_SeqFeatures
{
812 my @subfeats = @
{$self->{'_gsf_sub_array'} || []};
813 $self->{'_gsf_sub_array'} = []; # zap the array implicitly.
817 =head1 GFF-related methods
825 $gffio = $feature->gff_format();
826 # set (change the default version of GFF2):
827 $feature->gff_format(Bio::Tools::GFF->new(-gff_version => 1));
828 Function: Get/set the GFF format interpreter. This object is supposed to
829 format and parse GFF. See Bio::Tools::GFF for the interface.
831 If this method is called as class method, the default for all
832 newly created instances will be changed. Otherwise only this
833 instance will be affected.
835 Returns : a Bio::Tools::GFF compliant object
836 Args : On set, an instance of Bio::Tools::GFF or a derived object.
842 my ($self, $gffio) = @_;
844 if(defined($gffio)) {
846 $self->{'_gffio'} = $gffio;
848 $Bio::SeqFeatureI
::static_gff_formatter
= $gffio;
851 return (ref($self) && exists($self->{'_gffio'}) ?
852 $self->{'_gffio'} : $self->_static_gff_formatter);
858 Usage : $str = $feat->gff_string;
859 $str = $feat->gff_string($gff_formatter);
860 Function: Provides the feature information in GFF format.
862 We override this here from Bio::SeqFeatureI in order to use the
863 formatter returned by gff_format().
866 Args : Optionally, an object implementing gff_string().
872 my ($self,$formatter) = @_;
874 $formatter = $self->gff_format() unless $formatter;
875 return $formatter->gff_string($self);
878 =head2 slurp_gff_file
881 Usage : @features = Bio::SeqFeature::Generic::slurp_gff_file(\*FILE);
882 Function: Sneaky function to load an entire file as in memory objects.
885 This method is deprecated. Use Bio::Tools::GFF instead, which can
886 also handle large files.
898 Bio
::Root
::Root
->throw("Must have a filehandle");
901 Bio
::Root
::Root
->deprecated( -message
=> "deprecated method slurp_gff_file() called in Bio::SeqFeature::Generic. Use Bio::Tools::GFF instead.",
902 -warn_version
=> '1.005',
903 -throw_version
=> '1.007',
907 my $sf = Bio
::SeqFeature
::Generic
->new('-gff_string' => $_);
914 =head2 _from_gff_string
916 Title : _from_gff_string
918 Function: Set feature properties from GFF string.
920 This method uses the object returned by gff_format() for the
921 actual interpretation of the string. Set a different GFF format
922 interpreter first if you need a specific version, like GFF1. (The
926 Args : a GFF-formatted string
931 sub _from_gff_string
{
932 my ($self, $string) = @_;
934 $self->gff_format()->from_gff_string($self, $string);
938 =head2 _expand_region
940 Title : _expand_region
941 Usage : $self->_expand_region($feature);
942 Function: Expand the total region covered by this feature to
943 accomodate for the given feature.
945 May be called whenever any kind of subfeature is added to this
946 feature. add_sub_SeqFeature() already does this.
948 Args : A Bio::SeqFeatureI implementing object.
954 my ($self, $feat) = @_;
955 if(! $feat->isa('Bio::SeqFeatureI')) {
956 $self->warn("$feat does not implement Bio::SeqFeatureI");
958 # if this doesn't have start set - forget it!
959 # changed to reflect sanity checks for LocationI
960 if(!$self->location->valid_Location) {
961 $self->start($feat->start);
962 $self->end($feat->end);
963 $self->strand($feat->strand) unless $self->strand;
965 my ($start,$end,$strand) = $self->union($feat);
966 $self->start($start);
968 $self->strand($strand);
976 Function: Parsing hints
986 return $self->{'_parse_h'};
993 Function: For internal use only. Convenience method for those tags that
994 may only have a single value.
995 Returns : The first value under the given tag as a scalar (string)
996 Args : The tag as a string. Optionally, the value on set.
1005 if(@_ || (! $self->has_tag($tag))) {
1006 $self->remove_tag($tag) if($self->has_tag($tag));
1007 $self->add_tag_value($tag, @_);
1009 return ($self->get_tag_values($tag))[0];
1012 #######################################################################
1013 # aliases for methods that changed their names in an attempt to make #
1014 # bioperl names more consistent #
1015 #######################################################################
1019 $self->warn("SeqFeatureI::seqname() is deprecated. Please use seq_id() instead.");
1020 return $self->seq_id(@_);
1025 $self->warn("SeqFeatureI::display_id() is deprecated. Please use display_name() instead.");
1026 return $self->display_name(@_);
1029 # # this is towards consistent naming
1030 sub each_tag_value
{ return shift->get_tag_values(@_); }
1031 sub all_tags
{ return shift->get_all_tags(@_); }
1033 # we revamped the feature containing property to implementing
1034 # Bio::FeatureHolderI
1035 *sub_SeqFeature
= \
&get_SeqFeatures
;
1036 *add_sub_SeqFeature
= \
&add_SeqFeature
;
1037 *flush_sub_SeqFeatures
= \
&remove_SeqFeatures
;
1038 # this one is because of inconsistent naming ...
1039 *flush_sub_SeqFeature
= \
&remove_SeqFeatures
;
1042 sub cleanup_generic
{
1044 foreach my $f ( @
{$self->{'_gsf_sub_array'} || []} ) {
1047 $self->{'_gsf_seq'} = undef;
1048 foreach my $t ( keys %{$self->{'_gsf_tag_hash'} } ) {
1049 $self->{'_gsf_tag_hash'}->{$t} = undef;
1050 delete($self->{'_gsf_tag_hash'}->{$t}); # bug 1720 fix