squash waffling test
[bioperl-live.git] / Bio / SeqFeature / Generic.pm
blobd0e6d5b50f586c52ce71fd38a3511af7e55c15fe
2 # BioPerl module for Bio::SeqFeature::Generic
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Ewan Birney <birney@sanger.ac.uk>
8 # Copyright Ewan Birney
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SeqFeature::Generic - Generic SeqFeature
18 =head1 SYNOPSIS
20 $feat = Bio::SeqFeature::Generic->new(
21 -start => 10,
22 -end => 100,
23 -strand => -1,
24 -primary => 'repeat', # -primary_tag is a synonym
25 -source_tag => 'repeatmasker',
26 -display_name => 'alu family',
27 -score => 1000,
28 -tag => { new => 1,
29 author => 'someone',
30 sillytag => 'this is silly!' } );
32 $feat = Bio::SeqFeature::Generic->new( -gff_string => $string );
33 # if you want explicitly GFF1
34 $feat = Bio::SeqFeature::Generic->new( -gff1_string => $string );
36 # add it to an annotated sequence
38 $annseq->add_SeqFeature($feat);
40 =head1 DESCRIPTION
42 Bio::SeqFeature::Generic is a generic implementation for the
43 Bio::SeqFeatureI interface, providing a simple object to provide all
44 the information for a feature on a sequence.
46 For many Features, this is all you will need to use (for example, this
47 is fine for Repeats in DNA sequence or Domains in protein
48 sequence). For other features, which have more structure, this is a
49 good base class to extend using inheritence to have new things: this
50 is what is done in the L<Bio::SeqFeature::Gene>,
51 L<Bio::SeqFeature::Transcript> and L<Bio::SeqFeature::Exon>, which provide
52 well coordinated classes to represent genes on DNA sequence (for
53 example, you can get the protein sequence out from a transcript
54 class).
56 For many Features, you want to add some piece of information, for
57 example a common one is that this feature is 'new' whereas other
58 features are 'old'. The tag system, which here is implemented using a
59 hash can be used here. You can use the tag system to extend the
60 L<Bio::SeqFeature::Generic> programmatically: that is, you know that you have
61 read in more information into the tag 'mytag' which you can then
62 retrieve. This means you do not need to know how to write inherited
63 Perl to provide more complex information on a feature, and/or, if you
64 do know but you do not want to write a new class every time you need
65 some extra piece of information, you can use the tag system to easily
66 store and then retrieve information.
68 The tag system can be written in/out of GFF format, and also into EMBL
69 format via the L<Bio::SeqIO> system
71 =head1 Implemented Interfaces
73 This class implements the following interfaces.
75 =over 4
77 =item L<Bio::SeqFeatureI>
79 Note that this includes implementing Bio::RangeI.
81 =item L<Bio::AnnotatableI>
83 =item L<Bio::FeatureHolderI>
85 Features held by a feature are essentially sub-features.
87 =back
89 =head1 FEEDBACK
91 =head2 Mailing Lists
93 User feedback is an integral part of the evolution of this and other
94 Bioperl modules. Send your comments and suggestions preferably to one
95 of the Bioperl mailing lists. Your participation is much appreciated.
97 bioperl-l@bioperl.org - General discussion
98 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
100 =head2 Support
102 Please direct usage questions or support issues to the mailing list:
104 I<bioperl-l@bioperl.org>
106 rather than to the module maintainer directly. Many experienced and
107 reponsive experts will be able look at the problem and quickly
108 address it. Please include a thorough description of the problem
109 with code and data examples if at all possible.
111 =head2 Reporting Bugs
113 Report bugs to the Bioperl bug tracking system to help us keep track
114 the bugs and their resolution. Bug reports can be submitted via
115 the web:
117 https://github.com/bioperl/bioperl-live/issues
119 =head1 AUTHOR - Ewan Birney
121 Ewan Birney E<lt>birney@sanger.ac.ukE<gt>
123 =head1 DEVELOPERS
125 This class has been written with an eye out for inheritance. The fields
126 the actual object hash are:
128 _gsf_tag_hash = reference to a hash for the tags
129 _gsf_sub_array = reference to an array for subfeatures
131 =head1 APPENDIX
133 The rest of the documentation details each of the object
134 methods. Internal methods are usually preceded with a _
136 =cut
139 # Let the code begin...
142 package Bio::SeqFeature::Generic;
143 use strict;
145 use Bio::Annotation::Collection;
146 use Bio::Location::Simple;
147 use Bio::Location::Split;
148 use Bio::Tools::GFF;
149 #use Tie::IxHash;
151 use base qw(Bio::Root::Root Bio::SeqFeatureI Bio::FeatureHolderI Bio::AnnotatableI);
153 sub new {
154 my ( $caller, @args) = @_;
155 my ($self) = $caller->SUPER::new(@args);
156 $self->_register_for_cleanup(\&cleanup_generic);
157 $self->{'_parse_h'} = {};
158 $self->{'_gsf_tag_hash'} = {};
160 # bulk-set attributes
161 $self->set_attributes(@args);
163 # done - we hope
164 return $self;
167 =head2 set_attributes
169 Title : set_attributes
170 Usage :
171 Function: Sets a whole array of parameters at once.
172 Example :
173 Returns : none
174 Args : Named parameters, in the form as they would otherwise be passed
175 to new(). Currently recognized are:
177 -start start position
178 -end end position
179 -strand strand
180 -phase the phase of the feature (0..2)
181 -primary_tag primary tag
182 -primary (synonym for -primary_tag)
183 -source_tag source tag
184 -source (synonym for -source_tag)
185 -frame frame
186 -score score value
187 -tag a reference to a tag/value hash
188 -gff_string GFF v.2 string to initialize from
189 -gff1_string GFF v.1 string to initialize from
190 -seq_id the display name of the sequence
191 -annotation the AnnotationCollectionI object
192 -location the LocationI object
194 =cut
196 sub set_attributes {
197 my ($self,@args) = @_;
198 my ($start, $end, $strand, $primary_tag, $source_tag, $primary,
199 $source, $frame, $score, $tag, $gff_string, $gff1_string,
200 $seqname, $seqid, $annot, $location, $display_name, $pid, $phase) =
201 $self->_rearrange([qw(START
203 STRAND
204 PRIMARY_TAG
205 SOURCE_TAG
206 PRIMARY
207 SOURCE
208 FRAME
209 SCORE
211 GFF_STRING
212 GFF1_STRING
213 SEQNAME
214 SEQ_ID
215 ANNOTATION
216 LOCATION
217 DISPLAY_NAME
218 PRIMARY_ID
219 PHASE
220 )], @args);
221 $location && $self->location($location);
222 $gff_string && $self->_from_gff_string($gff_string);
223 $gff1_string && do {
224 $self->gff_format(Bio::Tools::GFF->new('-gff_version' => 1));
225 $self->_from_gff_stream($gff1_string);
228 $pid && $self->primary_id($pid);
229 $primary_tag && $self->primary_tag($primary_tag);
230 $source_tag && $self->source_tag($source_tag);
231 $primary && $self->primary_tag($primary);
232 $source && $self->source_tag($source);
233 $annot && $self->annotation($annot);
234 defined $start && $self->start($start);
235 defined $end && $self->end($end);
236 defined $strand && $self->strand($strand);
237 defined $frame && $self->frame($frame);
238 defined $display_name && $self->display_name($display_name);
239 defined $score && $self->score($score);
240 defined $phase && $self->phase($phase);
242 if($seqname) {
243 $self->warn("-seqname is deprecated. Please use -seq_id instead.");
244 $seqid = $seqname unless $seqid;
246 $self->seq_id($seqid) if (defined($seqid));
247 $tag && do {
248 foreach my $t ( keys %$tag ) {
249 $self->add_tag_value($t, UNIVERSAL::isa($tag->{$t}, "ARRAY") ? @{$tag->{$t}} : $tag->{$t});
255 =head2 direct_new
257 Title : direct_new
258 Usage : my $feat = Bio::SeqFeature::Generic->direct_new;
259 Function: create a blessed hash - for performance improvement in
260 object creation
261 Returns : Bio::SeqFeature::Generic object
262 Args : none
264 =cut
266 sub direct_new {
267 my ( $class) = @_;
268 my ($self) = {};
270 bless $self,$class;
272 return $self;
276 =head2 location
278 Title : location
279 Usage : my $location = $feat->location();
280 Function: returns a location object suitable for identifying location
281 of feature on sequence or parent feature
282 Returns : Bio::LocationI object
283 Args : [optional] Bio::LocationI object to set the value to.
285 =cut
287 sub location {
288 my($self, $value ) = @_;
290 if (defined($value)) {
291 unless (ref($value) and $value->isa('Bio::LocationI')) {
292 $self->throw("object $value pretends to be a location but ".
293 "does not implement Bio::LocationI");
295 $self->{'_location'} = $value;
297 elsif (! $self->{'_location'}) {
298 # guarantees a real location object is returned every time
299 $self->{'_location'} = Bio::Location::Simple->new();
301 return $self->{'_location'};
305 =head2 start
307 Title : start
308 Usage : my $start = $feat->start;
309 $feat->start(20);
310 Function: Get/set on the start coordinate of the feature
311 Returns : integer
312 Args : none
314 =cut
316 sub start {
317 my ($self, $value) = @_;
318 # Return soon if setting value
319 if (defined $value) {
320 return $self->location->start($value);
323 return $self->location->start() if not defined $self->{'_gsf_seq'};
324 # Check circular sequences cut by origin
325 my $start;
326 if ( $self->{'_gsf_seq'}->is_circular
327 and $self->location->isa('Bio::Location::SplitLocationI')
329 my $primary_seq_length = $self->{'_gsf_seq'}->length;
330 my @sublocs = $self->location->sub_Location;
332 my $cut_by_origin = 0;
333 my ($a_end, $a_strand) = (0, 0);
334 my ($b_start, $b_strand) = (0, 0);
335 for (my $i = 1; $i < scalar @sublocs; $i++) {
336 $a_end = $sublocs[$i-1]->end;
337 $a_strand = $sublocs[$i-1]->strand;
338 $b_start = $sublocs[$i]->start;
339 $b_strand = $sublocs[$i]->strand;
340 # cut by origin condition
341 if ( $a_end == $primary_seq_length
342 and $b_start == 1
343 and $a_strand == $b_strand
345 $cut_by_origin = 1;
346 last;
349 $start = ($cut_by_origin == 1) ? ($sublocs[0]->start) : ($self->location->start);
351 else {
352 $start = $self->location->start;
354 return $start;
358 =head2 end
360 Title : end
361 Usage : my $end = $feat->end;
362 $feat->end($end);
363 Function: get/set on the end coordinate of the feature
364 Returns : integer
365 Args : none
367 =cut
369 sub end {
370 my ($self, $value) = @_;
371 # Return soon if setting value
372 if (defined $value) {
373 return $self->location->end($value);
376 return $self->location->end() if not defined $self->{'_gsf_seq'};
377 # Check circular sequences cut by origin
378 my $end;
379 if ( $self->{'_gsf_seq'}->is_circular
380 and $self->location->isa('Bio::Location::SplitLocationI')
382 my $primary_seq_length = $self->{'_gsf_seq'}->length;
383 my @sublocs = $self->location->sub_Location;
385 my $cut_by_origin = 0;
386 my ($a_end, $a_strand) = (0, 0);
387 my ($b_start, $b_strand) = (0, 0);
388 for (my $i = 1; $i < scalar @sublocs; $i++) {
389 $a_end = $sublocs[$i-1]->end;
390 $a_strand = $sublocs[$i-1]->strand;
391 $b_start = $sublocs[$i]->start;
392 $b_strand = $sublocs[$i]->strand;
393 # cut by origin condition
394 if ( $a_end == $primary_seq_length
395 and $b_start == 1
396 and $a_strand == $b_strand
398 $cut_by_origin = 1;
399 last;
402 $end = ($cut_by_origin == 1) ? ($sublocs[-1]->end) : ($self->location->end);
404 else {
405 $end = $self->location->end;
407 return $end;
411 =head2 length
413 Title : length
414 Usage : my $len = $feat->length;
415 Function: Get the feature length computed as:
416 $feat->end - $feat->start + 1
417 Returns : integer
418 Args : none
420 =cut
422 sub length {
423 my $self = shift;
424 my $length = $self->end() - $self->start() + 1;
426 # In circular sequences cut by origin $start > $end,
427 # e.g., join(5075..5386,1..51)), $start = 5075, $end = 51,
428 # then adjust using the primary_seq length (5386)
429 if ($length < 0 and defined $self->{'_gsf_seq'}) {
430 $length += $self->{'_gsf_seq'}->length;
432 return $length;
436 =head2 strand
438 Title : strand
439 Usage : my $strand = $feat->strand();
440 $feat->strand($strand);
441 Function: get/set on strand information, being 1,-1 or 0
442 Returns : -1,1 or 0
443 Args : none
445 =cut
447 sub strand {
448 my $self = shift;
449 return $self->location->strand(@_);
453 =head2 score
455 Title : score
456 Usage : my $score = $feat->score();
457 $feat->score($score);
458 Function: get/set on score information
459 Returns : float
460 Args : none if get, the new value if set
462 =cut
464 sub score {
465 my $self = shift;
467 if (@_) {
468 my $value = shift;
470 if ( defined $value && $value && $value !~ /^[A-Za-z]+$/ &&
471 $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ and $value != 0) {
472 $self->throw(-class=>'Bio::Root::BadParameter',
473 -text=>"'$value' is not a valid score",
474 -value=>$value);
476 if ($self->has_tag('score')) {
477 $self->warn("Removing score value(s)");
478 $self->remove_tag('score');
480 $self->add_tag_value('score',$value);
482 my ($score) = $self->has_tag('score') ? $self->get_tag_values('score') : undef;
483 return $score;
487 =head2 frame
489 Title : frame
490 Usage : my $frame = $feat->frame();
491 $feat->frame($frame);
492 Function: get/set on frame information
493 Returns : 0,1,2, '.'
494 Args : none if get, the new value if set
496 =cut
498 sub frame {
499 my $self = shift;
501 if ( @_ ) {
502 my $value = shift;
503 if ( defined $value &&
504 $value !~ /^[0-2.]$/ ) {
505 $self->throw("'$value' is not a valid frame");
507 if( defined $value && $value eq '.' ) { $value = '.' }
508 return $self->{'_gsf_frame'} = $value;
510 return $self->{'_gsf_frame'};
514 =head2 primary_tag
516 Title : primary_tag
517 Usage : my $tag = $feat->primary_tag();
518 $feat->primary_tag('exon');
519 Function: get/set on the primary tag for a feature,
520 eg 'exon'
521 Returns : a string
522 Args : none
524 =cut
526 sub primary_tag {
527 my $self = shift;
528 return $self->{'_primary_tag'} = shift if @_;
529 return $self->{'_primary_tag'} || '';
533 =head2 source_tag
535 Title : source_tag
536 Usage : my $tag = $feat->source_tag();
537 $feat->source_tag('genscan');
538 Function: Returns the source tag for a feature,
539 eg, 'genscan'
540 Returns : a string
541 Args : none
543 =cut
545 sub source_tag {
546 my $self = shift;
547 return $self->{'_source_tag'} = shift if @_;
548 return $self->{'_source_tag'} || '';
552 =head2 has_tag
554 Title : has_tag
555 Usage : my $value = $feat->has_tag('some_tag');
556 Function: Tests wether a feature contaings a tag
557 Returns : TRUE if the SeqFeature has the tag,
558 and FALSE otherwise.
559 Args : The name of a tag
561 =cut
563 sub has_tag {
564 my ($self, $tag) = @_;
565 return exists $_[0]->{'_gsf_tag_hash'}->{$tag};
569 =head2 add_tag_value
571 Title : add_tag_value
572 Usage : $feat->add_tag_value('note',"this is a note");
573 Returns : TRUE on success
574 Args : tag (string) and one or more values (any scalar(s))
576 =cut
578 sub add_tag_value {
579 my $self = shift;
580 my $tag = shift;
581 $self->{'_gsf_tag_hash'}->{$tag} ||= [];
582 push(@{$self->{'_gsf_tag_hash'}->{$tag}},@_);
586 =head2 get_tag_values
588 Title : get_tag_values
589 Usage : my @values = $feat->get_tag_values('note');
590 Function: Returns a list of all the values stored
591 under a particular tag.
592 Returns : A list of scalars
593 Args : The name of the tag
595 =cut
597 sub get_tag_values {
598 my ($self, $tag) = @_;
600 if( ! defined $tag ) { return (); }
601 if ( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
602 $self->throw("asking for tag value that does not exist $tag");
604 return @{$self->{'_gsf_tag_hash'}->{$tag}};
608 =head2 get_all_tags
610 Title : get_all_tags
611 Usage : my @tags = $feat->get_all_tags();
612 Function: Get a list of all the tags in a feature
613 Returns : An array of tag names
614 Args : none
616 # added a sort so that tags will be returned in a predictable order
617 # I still think we should be able to specify a sort function
618 # to the object at some point
619 # -js
621 =cut
623 sub get_all_tags {
624 my ($self, @args) = @_;
625 return sort keys %{ $self->{'_gsf_tag_hash'}};
629 =head2 remove_tag
631 Title : remove_tag
632 Usage : $feat->remove_tag('some_tag');
633 Function: removes a tag from this feature
634 Returns : the array of values for this tag before removing it
635 Args : tag (string)
637 =cut
639 sub remove_tag {
640 my ($self, $tag) = @_;
642 if ( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
643 $self->throw("trying to remove a tag that does not exist: $tag");
645 my @vals = @{$self->{'_gsf_tag_hash'}->{$tag}};
646 delete $self->{'_gsf_tag_hash'}->{$tag};
647 return @vals;
651 =head2 attach_seq
653 Title : attach_seq
654 Usage : $feat->attach_seq($seq);
655 Function: Attaches a Bio::Seq object to this feature. This
656 Bio::Seq object is for the *entire* sequence: ie
657 from 1 to 10000
658 Example :
659 Returns : TRUE on success
660 Args : a Bio::PrimarySeqI compliant object
662 =cut
664 sub attach_seq {
665 my ($self, $seq) = @_;
667 if ( ! ($seq && ref($seq) && $seq->isa("Bio::PrimarySeqI")) ) {
668 $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures but got '".ref($seq)."'");
671 $self->{'_gsf_seq'} = $seq;
673 # attach to sub features if they want it
674 foreach ( $self->sub_SeqFeature() ) {
675 $_->attach_seq($seq);
677 return 1;
681 =head2 seq
683 Title : seq
684 Usage : my $tseq = $feat->seq();
685 Function: returns the truncated sequence (if there) for this
686 Example :
687 Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence
688 bounded by start & end, or undef if there is no sequence attached
689 Args : none
691 =cut
693 sub seq {
694 my ($self, $arg) = @_;
696 if ( defined $arg ) {
697 $self->throw("Calling SeqFeature::Generic->seq with an argument. You probably want attach_seq");
700 if ( ! exists $self->{'_gsf_seq'} ) {
701 return;
704 # assumming our seq object is sensible, it should not have to yank
705 # the entire sequence out here.
706 my $seq;
707 my $start = $self->start;
708 my $end = $self->end;
709 # Check circular sequences cut by origin (e.g. join(2006035..2007700,1..257))
710 if ( $self->{'_gsf_seq'}->is_circular
711 and $self->location->isa('Bio::Location::SplitLocationI')
712 and $start > $end
714 my $primary_seq_length = $self->{'_gsf_seq'}->length;
716 # Get duplicate object with the first sequence piece using trunc()
717 $seq = $self->{'_gsf_seq'}->trunc($start, $primary_seq_length);
719 # Get post-origin sequence and build the complete sequence
720 my $post_origin = $self->{'_gsf_seq'}->subseq(1, $end);
721 my $complete_seq = $seq->seq() . $post_origin;
723 # Add complete sequence to object
724 $seq->seq($complete_seq);
726 else {
727 $seq = $self->{'_gsf_seq'}->trunc($start, $end);
730 if ( defined $self->strand && $self->strand == -1 ) {
731 $seq = $seq->revcom;
734 return $seq;
738 =head2 entire_seq
740 Title : entire_seq
741 Usage : my $whole_seq = $feat->entire_seq();
742 Function: gives the entire sequence that this seqfeature is attached to
743 Example :
744 Returns : a Bio::PrimarySeqI compliant object, or undef if there is no
745 sequence attached
746 Args :
748 =cut
750 sub entire_seq {
751 return shift->{'_gsf_seq'};
755 =head2 seq_id
757 Title : seq_id
758 Usage : $feat->seq_id($newval)
759 Function: There are many cases when you make a feature that you
760 do know the sequence name, but do not know its actual
761 sequence. This is an attribute such that you can store
762 the ID (e.g., display_id) of the sequence.
764 This attribute should *not* be used in GFF dumping, as
765 that should come from the collection in which the seq
766 feature was found.
767 Returns : value of seq_id
768 Args : newvalue (optional)
770 =cut
772 sub seq_id {
773 my $obj = shift;
774 return $obj->{'_gsf_seq_id'} = shift if @_;
775 return $obj->{'_gsf_seq_id'};
779 =head2 display_name
781 Title : display_name
782 Usage : my $featname = $feat->display_name;
783 Function: Implements the display_name() method, which is a human-readable
784 name for the feature.
785 Returns : value of display_name (a string)
786 Args : Optionally, on set the new value or undef
788 =cut
790 sub display_name {
791 my $self = shift;
792 return $self->{'display_name'} = shift if @_;
793 return $self->{'display_name'} || '';
797 =head1 Methods for implementing Bio::AnnotatableI
799 =head2 annotation
801 Title : annotation
802 Usage : $feat->annotation($annot_obj);
803 Function: Get/set the annotation collection object for annotating this
804 feature.
806 Example :
807 Returns : A Bio::AnnotationCollectionI object
808 Args : newvalue (optional)
810 =cut
812 sub annotation {
813 my ($obj,$value) = @_;
815 # we are smart if someone references the object and there hasn't been
816 # one set yet
817 if(defined $value || ! defined $obj->{'annotation'} ) {
818 $value = Bio::Annotation::Collection->new() unless ( defined $value );
819 $obj->{'annotation'} = $value;
821 return $obj->{'annotation'};
825 =head1 Methods to implement Bio::FeatureHolderI
827 This includes methods for retrieving, adding, and removing
828 features. Since this is already a feature, features held by this
829 feature holder are essentially sub-features.
831 =head2 get_SeqFeatures
833 Title : get_SeqFeatures
834 Usage : my @feats = $feat->get_SeqFeatures();
835 Function: Returns an array of sub Sequence Features
836 Returns : An array
837 Args : none
839 =cut
841 sub get_SeqFeatures {
842 return @{ shift->{'_gsf_sub_array'} || []};
846 =head2 add_SeqFeature
848 Title : add_SeqFeature
849 Usage : $feat->add_SeqFeature($subfeat);
850 $feat->add_SeqFeature($subfeat,'EXPAND');
851 Function: Adds a SeqFeature into the subSeqFeature array.
852 With no 'EXPAND' qualifer, subfeat will be tested
853 as to whether it lies inside the parent, and throw
854 an exception if not.
856 If EXPAND is used, the parent's start/end/strand will
857 be adjusted so that it grows to accommodate the new
858 subFeature
860 !IMPORTANT! The coordinates of the subfeature should not be relative
861 to the parent feature it is attached to, but relative to the sequence
862 the parent feature is located on.
864 Returns : nothing
865 Args : An object which has the SeqFeatureI interface
867 =cut
869 sub add_SeqFeature {
870 my ($self,$feat,$expand) = @_;
871 unless( defined $feat ) {
872 $self->warn("Called add_SeqFeature with no feature, ignoring");
873 return;
875 if ( !$feat->isa('Bio::SeqFeatureI') ) {
876 $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware...");
879 if($expand && ($expand eq 'EXPAND')) {
880 $self->_expand_region($feat);
881 } else {
882 if ( !$self->contains($feat) ) {
883 $self->throw("$feat is not contained within parent feature, and expansion is not valid");
887 $self->{'_gsf_sub_array'} = [] unless exists($self->{'_gsf_sub_array'});
888 push(@{$self->{'_gsf_sub_array'}},$feat);
893 =head2 remove_SeqFeatures
895 Title : remove_SeqFeatures
896 Usage : $feat->remove_SeqFeatures;
897 Function: Removes all SeqFeatures
899 If you want to remove only a subset of features then remove that
900 subset from the returned array, and add back the rest.
901 Example :
902 Returns : The array of Bio::SeqFeatureI implementing features that was
903 deleted.
904 Args : none
906 =cut
908 sub remove_SeqFeatures {
909 my ($self) = @_;
910 my @subfeats = @{$self->{'_gsf_sub_array'} || []};
911 $self->{'_gsf_sub_array'} = []; # zap the array implicitly.
912 return @subfeats;
916 =head1 GFF-related methods
918 =head2 gff_format
920 Title : gff_format
921 Usage : # get:
922 my $gffio = $feat->gff_format();
923 # set (change the default version of GFF2):
924 $feat->gff_format(Bio::Tools::GFF->new(-gff_version => 1));
925 Function: Get/set the GFF format interpreter. This object is supposed to
926 format and parse GFF. See Bio::Tools::GFF for the interface.
928 If this method is called as class method, the default for all
929 newly created instances will be changed. Otherwise only this
930 instance will be affected.
931 Example :
932 Returns : a Bio::Tools::GFF compliant object
933 Args : On set, an instance of Bio::Tools::GFF or a derived object.
935 =cut
937 sub gff_format {
938 my ($self, $gffio) = @_;
939 if(defined($gffio)) {
940 if(ref($self)) {
941 $self->{'_gffio'} = $gffio;
942 } else {
943 $Bio::SeqFeatureI::static_gff_formatter = $gffio;
946 return (ref($self) && exists($self->{'_gffio'}) ?
947 $self->{'_gffio'} : $self->_static_gff_formatter);
951 =head2 gff_string
953 Title : gff_string
954 Usage : my $str = $feat->gff_string;
955 my $str = $feat->gff_string($gff_formatter);
956 Function: Provides the feature information in GFF format.
958 We override this here from Bio::SeqFeatureI in order to use the
959 formatter returned by gff_format().
961 Returns : A string
962 Args : Optionally, an object implementing gff_string().
964 =cut
966 sub gff_string {
967 my ($self,$formatter) = @_;
968 $formatter = $self->gff_format() unless $formatter;
969 return $formatter->gff_string($self);
973 =head2 slurp_gff_file
975 Title : slurp_file
976 Usage : my @features = Bio::SeqFeature::Generic::slurp_gff_file(\*FILE);
977 Function: Sneaky function to load an entire file as in memory objects.
978 Beware of big files.
980 This method is deprecated. Use Bio::Tools::GFF instead, which can
981 also handle large files.
983 Example :
984 Returns :
985 Args :
987 =cut
989 sub slurp_gff_file {
990 my ($f) = @_;
991 my @out;
992 if ( !defined $f ) {
993 Bio::Root::Root->throw("Must have a filehandle");
996 Bio::Root::Root->deprecated( -message => "deprecated method slurp_gff_file() called in Bio::SeqFeature::Generic. Use Bio::Tools::GFF instead.",
997 -warn_version => '1.005',
998 -throw_version => '1.007',
1001 while(<$f>) {
1002 my $sf = Bio::SeqFeature::Generic->new('-gff_string' => $_);
1003 push(@out, $sf);
1006 return @out;
1010 =head2 _from_gff_string
1012 Title : _from_gff_string
1013 Usage :
1014 Function: Set feature properties from GFF string.
1016 This method uses the object returned by gff_format() for the
1017 actual interpretation of the string. Set a different GFF format
1018 interpreter first if you need a specific version, like GFF1. (The
1019 default is GFF2.)
1020 Example :
1021 Returns :
1022 Args : a GFF-formatted string
1024 =cut
1026 sub _from_gff_string {
1027 my ($self, $string) = @_;
1028 $self->gff_format()->from_gff_string($self, $string);
1032 =head2 _expand_region
1034 Title : _expand_region
1035 Usage : $feat->_expand_region($feature);
1036 Function: Expand the total region covered by this feature to
1037 accommodate for the given feature.
1039 May be called whenever any kind of subfeature is added to this
1040 feature. add_SeqFeature() already does this.
1041 Returns :
1042 Args : A Bio::SeqFeatureI implementing object.
1044 =cut
1046 sub _expand_region {
1047 my ($self, $feat) = @_;
1048 if(! $feat->isa('Bio::SeqFeatureI')) {
1049 $self->warn("$feat does not implement Bio::SeqFeatureI");
1051 # if this doesn't have start set - forget it!
1052 # changed to reflect sanity checks for LocationI
1053 if(!$self->location->valid_Location) {
1054 $self->start($feat->start);
1055 $self->end($feat->end);
1056 $self->strand($feat->strand) unless $self->strand;
1057 } else {
1058 my ($start,$end,$strand) = $self->union($feat);
1059 $self->start($start);
1060 $self->end($end);
1061 $self->strand($strand);
1066 =head2 _parse
1068 Title : _parse
1069 Usage :
1070 Function: Parsing hints
1071 Example :
1072 Returns :
1073 Args :
1075 =cut
1077 sub _parse {
1078 my ($self) = @_;
1079 return $self->{'_parse_h'};
1083 =head2 _tag_value
1085 Title : _tag_value
1086 Usage :
1087 Function: For internal use only. Convenience method for those tags that
1088 may only have a single value.
1089 Returns : The first value under the given tag as a scalar (string)
1090 Args : The tag as a string. Optionally, the value on set.
1092 =cut
1094 sub _tag_value {
1095 my $self = shift;
1096 my $tag = shift;
1098 if(@_ || (! $self->has_tag($tag))) {
1099 $self->remove_tag($tag) if($self->has_tag($tag));
1100 $self->add_tag_value($tag, @_);
1102 return ($self->get_tag_values($tag))[0];
1106 #######################################################################
1107 # aliases for methods that changed their names in an attempt to make #
1108 # bioperl names more consistent #
1109 #######################################################################
1111 sub seqname {
1112 my $self = shift;
1113 $self->warn("SeqFeatureI::seqname() is deprecated. Please use seq_id() instead.");
1114 return $self->seq_id(@_);
1117 sub display_id {
1118 my $self = shift;
1119 $self->warn("SeqFeatureI::display_id() is deprecated. Please use display_name() instead.");
1120 return $self->display_name(@_);
1123 # this is towards consistent naming
1124 sub each_tag_value { return shift->get_tag_values(@_); }
1125 sub all_tags { return shift->get_all_tags(@_); }
1127 # we revamped the feature containing property to implementing
1128 # Bio::FeatureHolderI
1129 *sub_SeqFeature = \&get_SeqFeatures;
1130 *add_sub_SeqFeature = \&add_SeqFeature;
1131 *flush_sub_SeqFeatures = \&remove_SeqFeatures;
1132 # this one is because of inconsistent naming ...
1133 *flush_sub_SeqFeature = \&remove_SeqFeatures;
1135 sub cleanup_generic {
1136 my $self = shift;
1137 foreach my $f ( @{$self->{'_gsf_sub_array'} || []} ) {
1138 $f = undef;
1140 $self->{'_gsf_seq'} = undef;
1141 foreach my $t ( keys %{$self->{'_gsf_tag_hash'} } ) {
1142 $self->{'_gsf_tag_hash'}->{$t} = undef;
1143 delete($self->{'_gsf_tag_hash'}->{$t}); # bug 1720 fix