1 package Bio
::SeqFeature
::Lite
;
5 Bio::SeqFeature::Lite - Lightweight Bio::SeqFeatureI class
9 # create a simple feature with no internal structure
10 $f = Bio::SeqFeature::Lite->new(-start => 1000,
12 -type => 'transcript',
13 -name => 'alpha-1 antitrypsin',
14 -desc => 'an enzyme inhibitor',
17 # create a feature composed of multiple segments, all of type "similarity"
18 $f = Bio::SeqFeature::Lite->new(-segments => [[1000,1100],[1500,1550],[1800,2000]],
20 -type => 'gapped_alignment',
21 -subtype => 'similarity');
23 # build up a gene exon by exon
24 $e1 = Bio::SeqFeature::Lite->new(-start=>1,-stop=>100,-type=>'exon');
25 $e2 = Bio::SeqFeature::Lite->new(-start=>150,-stop=>200,-type=>'exon');
26 $e3 = Bio::SeqFeature::Lite->new(-start=>300,-stop=>500,-type=>'exon');
27 $f = Bio::SeqFeature::Lite->new(-segments=>[$e1,$e2,$e3],-type=>'gene');
31 This is a simple Bio::SeqFeatureI-compliant object that is compatible
32 with Bio::Graphics::Panel. With it you can create lightweight feature
35 All methods are as described in L<Bio::SeqFeatureI> with the following additions:
37 =head2 The new() Constructor
39 $feature = Bio::SeqFeature::Lite->new(@args);
41 This method creates a new feature object. You can create a simple
42 feature that contains no subfeatures, or a hierarchically nested object.
44 Arguments are as follows:
46 -seq_id the reference sequence
47 -start the start position of the feature
48 -end the stop position of the feature
49 -stop an alias for end
50 -name the feature name (returned by seqname())
51 -type the feature type (returned by primary_tag())
52 -primary_tag the same as -type
53 -source the source tag
54 -score the feature score (for GFF compatibility)
55 -desc a description of the feature
56 -segments a list of subfeatures (see below)
57 -subtype the type to use when creating subfeatures
58 -strand the strand of the feature (one of -1, 0 or +1)
59 -phase the phase of the feature (0..2)
60 -seq a dna or protein sequence string to attach to feature
61 -id an alias for -name
62 -seqname an alias for -name
63 -display_id an alias for -name
64 -display_name an alias for -name (do you get the idea the API has changed?)
65 -primary_id unique database ID
66 -url a URL to link to when rendered with Bio::Graphics
67 -attributes a hashref of tag value attributes, in which the key is the tag
68 and the value is an array reference of values
69 -factory a reference to a feature factory, used for compatibility with
70 more obscure parts of Bio::DB::GFF
72 The subfeatures passed in -segments may be an array of
73 Bio::SeqFeature::Lite objects, or an array of [$start,$stop]
74 pairs. Each pair should be a two-element array reference. In the
75 latter case, the feature type passed in -subtype will be used when
76 creating the subfeatures.
78 If no feature type is passed, then it defaults to "feature".
80 =head2 Non-SeqFeatureI methods
82 A number of new methods are provided for compatibility with
83 Ace::Sequence, which has a slightly different API from SeqFeatureI:
89 Get/set the URL that the graphical rendering of this feature will link to.
91 =item add_segment(@segments)
93 Add one or more segments (a subfeature). Segments can either be
94 Feature objects, or [start,stop] arrays, as in the -segments argument
95 to new(). The feature endpoints are automatically adjusted.
99 An alias for sub_SeqFeature().
101 =item get_SeqFeatures()
103 Alias for sub_SeqFeature()
105 =item get_all_SeqFeatures()
107 Alias for sub_SeqFeature()
109 =item merged_segments()
111 Another alias for sub_SeqFeature().
119 An alias for seqname().
123 An alias for sub_SeqFeature() (you don't want to know why!)
131 use base
qw(Bio::Root::Root Bio::SeqFeatureI Bio::LocationI Bio::SeqI Bio::RangeI);
136 *exons
= *sub_SeqFeature
= *merged_segments
= \
&segments
;
137 *get_all_SeqFeatures
= *get_SeqFeatures
= \
&segments
;
138 *method
= \
&primary_tag
;
139 *source
= \
&source_tag
;
140 *get_tag_values
= \
&each_tag_value
;
141 *add_SeqFeature
= \
&add_segment
;
142 *get_all_tags
= \
&all_tags
;
145 # implement Bio::SeqI and FeatureHolderI interface
147 sub primary_seq
{ return $_[0] }
149 my ($obj,$value) = @_;
150 if( defined $value ) {
151 $obj->throw("object of class ".ref($value)." does not implement ".
152 "Bio::AnnotationCollectionI. Too bad.")
153 unless $value->isa("Bio::AnnotationCollectionI");
154 $obj->{'_annotation'} = $value;
155 } elsif( ! defined $obj->{'_annotation'}) {
156 $obj->{'_annotation'} = Bio
::Annotation
::Collection
->new();
158 return $obj->{'_annotation'};
161 my ($self, $species) = @_;
163 $self->{'species'} = $species;
165 return $self->{'species'};
168 sub is_remote
{ return }
169 sub feature_count
{ return scalar @
{shift->{segments
} || []} }
171 sub target
{ return; }
172 sub hit
{ shift->target }
176 my $method = $self->primary_tag;
177 my $source = $self->source_tag;
178 return $source ne '' ?
"$method:$source" : $method;
182 # Bio::SeqFeature::Lite->new(
185 # -name => 'fred feature',
188 # Alternatively, use -segments => [ [start,stop],[start,stop]...]
189 # to create a multisegmented feature.
192 $class = ref($class) if ref $class;
195 my $self = bless {},$class;
198 if ($arg{-strand
} =~ /^[\+\-\.]$/){
199 $arg{-strand
} = "+" && $self->{strand
} ='1';
200 $arg{-strand
} = "-" && $self->{strand
} = '-1';
201 $arg{-strand
} = "." && $self->{strand
} = '0';
203 $self->{strand
} = $arg{-strand
} ?
($arg{-strand
} >= 0 ?
+1 : -1) : 0;
205 $self->{name
} = $arg{-name
} || $arg{-seqname
} || $arg{-display_id
}
206 || $arg{-display_name
} || $arg{-id
};
207 $self->{type
} = $arg{-type
} || $arg{-primary_tag
} || 'feature';
208 $self->{subtype
} = $arg{-subtype
} if exists $arg{-subtype
};
209 $self->{source
} = $arg{-source
} || $arg{-source_tag
} || '';
210 $self->{score
} = $arg{-score
} if exists $arg{-score
};
211 $self->{start
} = $arg{-start
};
212 $self->{stop
} = exists $arg{-end
} ?
$arg{-end
} : $arg{-stop
};
213 $self->{ref} = $arg{-seq_id
} || $arg{-ref};
214 for my $option (qw(class url seq phase desc attributes primary_id)) {
215 $self->{$option} = $arg{"-$option"} if exists $arg{"-$option"};
218 # is_circular is needed for Bio::PrimarySeqI compliance
219 $self->{is_circular
} = $arg{-is_circular
} || 0;
222 if (defined $self->{stop
} && defined $self->{start
}
223 && $self->{stop
} < $self->{start
}) {
224 @
{$self}{'start','stop'} = @
{$self}{'stop','start'};
225 $self->{strand
} *= -1;
229 if (my $s = $arg{-segments
}) {
230 # NB: when $self ISA Bio::DB::SeqFeature the following invokes
231 # Bio::DB::SeqFeature::add_segment and not
232 # Bio::DB::SeqFeature::add_segment (as might be expected?)
233 $self->add_segment(@
$s);
241 my $type = $self->{subtype
} || $self->{type
};
242 $self->{segments
} ||= [];
243 my $ref = $self->seq_id;
244 my $name = $self->name;
245 my $class = $self->class;
246 my $source_tag = $self->source_tag;
248 my $min_start = $self->start || 999_999_999_999
;
249 my $max_stop = $self->end || -999_999_999_999
;
251 my @segments = @
{$self->{segments
}};
254 if (ref($seg) eq 'ARRAY') {
255 my ($start,$stop) = @
{$seg};
256 next unless defined $start && defined $stop; # fixes an obscure bug somewhere above us
257 my $strand = $self->{strand
};
259 if ($start > $stop) {
260 ($start,$stop) = ($stop,$start);
264 push @segments,$self->new(-start
=> $start,
271 -phase
=> $self->{phase
},
272 -score
=> $self->{score
},
273 -source_tag
=> $source_tag,
274 -attributes
=> $self->{attributes
},
276 $min_start = $start if $start < $min_start;
277 $max_stop = $stop if $stop > $max_stop;
282 $min_start = $seg->start if ($seg->start && $seg->start < $min_start);
283 $max_stop = $seg->end if ($seg->end && $seg->end > $max_stop);
287 local $^W
= 0; # some warning of an uninitialized variable...
288 $self->{segments
} = \
@segments;
289 $self->{ref} ||= $self->{segments
}[0]->seq_id;
290 $self->{start
} = $min_start;
291 $self->{stop
} = $max_stop;
297 my $s = $self->{segments
} or return wantarray ?
() : 0;
302 my $d = $self->{score
};
303 $self->{score
} = shift if @_;
308 my $d = $self->{type
};
309 $self->{type
} = shift if @_;
314 my $d = $self->{name
};
315 $self->{name
} = shift if @_;
318 sub seq_id
{ shift->ref(@_) }
321 my $d = $self->{ref};
322 $self->{ref} = shift if @_;
327 my $d = $self->{start
};
328 $self->{start
} = shift if @_;
329 if (my $rs = $self->{refseq
}) {
330 my $strand = $rs->strand || 1;
331 return $strand >= 0 ?
($d - $rs->start + 1) : ($rs->end - $d + 1);
338 my $d = $self->{stop
};
339 $self->{stop
} = shift if @_;
340 if (my $rs = $self->{refseq
}) {
341 my $strand = $rs->strand || 1;
342 return $strand >= 0 ?
($d - $rs->start + 1) : ($rs->end - $d + 1);
348 my $d = $self->{strand
};
349 $self->{strand
} = shift if @_;
350 if (my $rs = $self->{refseq
}) {
351 my $rstrand = $rs->strand;
353 return 1 if $rstrand == $d;
354 return -1 if $rstrand != $d;
359 # this does nothing, but it is here for compatibility reasons
362 my $d = $self->{absolute
};
363 $self->{absolute
} = shift if @_;
369 local $self->{refseq
} = undef;
374 local $self->{refseq
} = undef;
379 local $self->{refseq
} = undef;
385 return $self->end - $self->start + 1;
388 #is_circular is needed for Bio::PrimarySeqI
391 my $d = $self->{is_circular
};
392 $self->{is_circular
} = shift if @_;
399 my $seq = exists $self->{seq
} ?
$self->{seq
} : '';
404 my $seq = shift->seq;
405 $seq = $seq->seq if CORE
::ref($seq);
412 Usage : $id = $obj->display_name or $obj->display_name($newid);
413 Function: Gets or sets the display id, also known as the common name of
416 The semantics of this is that it is the most likely string
417 to be used as an identifier of the sequence, and likely to
418 have "human" readability. The id is equivalent to the LOCUS
419 field of the GenBank/EMBL databanks and the ID field of the
420 Swissprot/sptrembl database. In fasta format, the >(\S+) is
421 presumed to be the id, though some people overload the id
422 to embed other information. Bioperl does not use any
423 embedded information in the ID field, and people are
424 encouraged to use other mechanisms (accession field for
425 example, or extending the sequence object) to solve this.
427 Notice that $seq->id() maps to this function, mainly for
428 legacy/convenience issues.
430 Args : None or a new id
435 sub display_name
{ shift->name }
437 *display_id
= \
&display_name
;
439 =head2 accession_number
441 Title : accession_number
442 Usage : $unique_biological_key = $obj->accession_number;
443 Function: Returns the unique biological id for a sequence, commonly
444 called the accession_number. For sequences from established
445 databases, the implementors should try to use the correct
446 accession number. Notice that primary_id() provides the
447 unique id for the implemetation, allowing multiple objects
448 to have the same accession number in a particular implementation.
450 For sequences with no accession number, this method should return
458 sub accession_number
{
465 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
466 Function: Returns the type of sequence being one of
467 'dna', 'rna' or 'protein'. This is case sensitive.
469 This is not called <type> because this would cause
470 upgrade problems from the 0.5 and earlier Seq objects.
472 Returns : a string either 'dna','rna','protein'. NB - the object must
473 make a call of the type - if there is no type specified it
482 return 'dna'; # no way this will be anything other than dna!
490 Usage : $seqobj->desc($string) or $seqobj->desc()
491 Function: Sets or gets the description of the sequence
493 Returns : The description
494 Args : The description or none
501 my $d = $self->notes;
502 $self->{desc
} = shift if @_;
509 return $self->each_tag_value(@_);
511 return $self->{attributes
} ?
%{$self->{attributes
}} : ();
517 my $d = $self->{primary_id
};
518 $self->{primary_id
} = shift if @_;
520 # return $d if defined $d;
521 # return (overload::StrVal($self) =~ /0x([a-f0-9]+)/)[0];
526 my $notes = $self->{desc
};
527 return $notes if defined $notes;
528 return $self->attributes('Note');
533 return $self->attributes('Alias');
538 return $self->start < $self->end ?
$self->start : $self->end;
543 return $self->start > $self->end ?
$self->start : $self->end;
549 Usage : my $location = $seqfeature->location()
550 Function: returns a location object suitable for identifying location
551 of feature on sequence or parent feature
552 Returns : Bio::LocationI object
559 require Bio
::Location
::Split
unless Bio
::Location
::Split
->can('new');
561 if (my @segments = $self->segments) {
562 $location = Bio
::Location
::Split
->new();
563 foreach (@segments) {
564 $location->add_sub_Location($_);
574 require Bio
::Location
::Simple
unless Bio
::Location
::Simple
->can('new');
575 if (my @segments = $self->segments) {
577 Bio
::Location
::Simple
->new(-start
=> $_->start,
579 -strand
=> $_->strand);
582 return Bio
::Location
::Simple
->new(-start
=> $self->start,
584 -strand
=> $self->strand);
588 =head2 location_string
590 Title : location_string
591 Usage : my $string = $seqfeature->location_string()
592 Function: Returns a location string in a format recognized by gbrowse
596 This is a convenience function used by the generic genome browser. It
597 returns the location of the feature and its subfeatures in the compact
598 form "start1..end1,start2..end2,...". Use
599 $seqfeature-E<gt>location()-E<gt>toFTString() to obtain a standard
600 GenBank/EMBL location representation.
604 sub location_string
{
606 my @segments = $self->segments or return $self->to_FTstring;
607 join ',',map {$_->to_FTstring} @segments;
610 sub coordinate_policy
{
611 require Bio
::Location
::WidestCoordPolicy
unless Bio
::Location
::WidestCoordPolicy
->can('new');
612 return Bio
::Location
::WidestCoordPolicy
->new();
615 sub min_start
{ shift->low }
616 sub max_start
{ shift->low }
617 sub min_end
{ shift->high }
618 sub max_end
{ shift->high}
619 sub start_pos_type
{ 'EXACT' }
620 sub end_pos_type
{ 'EXACT' }
623 my $low = $self->min_start;
624 my $high = $self->max_end;
625 my $strand = $self->strand;
626 my $str = defined $strand && $strand<0 ?
"complement($low..$high)" : "$low..$high";
627 if (my $id = $self->seq_id()) {
628 $str = $id . ":" . $str;
634 my $d = $self->{phase
};
635 $self->{phase
} = shift if @_;
641 my $d = $self->{class};
642 $self->{class} = shift if @_;
643 return defined($d) ?
$d : 'Sequence'; # acedb is still haunting me - LS
646 # set GFF dumping version
649 my $d = $self->{gff3_version
} || 2;
650 $self->{gff3_version
} = shift if @_;
657 if ($self->version == 3) {
658 return $self->gff3_string(@_);
662 my $name = $self->name;
663 my $class = $self->class;
664 my $group = "$class $name" if $name;
665 my $strand = ('-','.','+')[$self->strand+1];
667 $string .= join("\t",
668 $self->ref||'.',$self->source||'.',$self->method||'.',
669 $self->start||'.',$self->stop||'.',
670 defined($self->score) ?
$self->score : '.',
672 defined($self->phase) ?
$self->phase : '.',
677 foreach ($self->sub_SeqFeature) {
678 $string .= $_->gff_string($recurse);
684 # Suggested strategy for dealing with the multiple parentage issue.
685 # First recurse through object tree and record parent tree.
686 # Then recurse again, skipping objects we've seen before.
688 my ($self,$recurse,$parent_tree,$seenit,$force_id) = @_;
695 $self->_traverse($parent_tree) unless %$parent_tree; # this will record parents of all children
696 my $primary_id = defined $force_id ?
$force_id : $self->_real_or_dummy_id;
698 return if $seenit->{$primary_id}++;
700 @rsf = $self->get_SeqFeatures;
702 # Detect case in which we have a split location feature. In this case we
703 # skip to the grandchildren and trick them into thinking that our parent is theirs.
704 my %types = map {$_->primary_tag=>1} @rsf;
705 my @types = keys %types;
706 if (@types == 1 && $types[0] eq $self->primary_tag) {
707 return join ("\n",map {$_->gff3_string(1,$parent_tree,{},$primary_id)} @rsf);
711 @parent_ids = keys %{$parent_tree->{$primary_id}};
714 my $group = $self->format_attributes(\
@parent_ids,$force_id);
715 my $name = $self->name;
717 my $class = $self->class;
718 my $strand = ('-','.','+')[$self->strand+1];
725 defined($self->score) ?
$self->score : '.',
727 defined($self->phase) ?
$self->phase : '.',
731 map {$_->gff3_string(1,$parent_tree,$seenit)} @rsf);
734 sub _real_or_dummy_id
{
736 my $id = $self->primary_id;
737 return $id if defined $id;
738 return return (overload
::StrVal
($self) =~ /0x([a-f0-9]+)/)[0];
743 my $tree = shift; # tree => {$child}{$parent} = 1
745 my $id = $self->_real_or_dummy_id;
746 defined $id or return;
747 $tree->{$id}{$parent->_real_or_dummy_id}++ if $parent;
748 $_->_traverse($tree,$self) foreach $self->get_SeqFeatures;
755 my $d = $self->{source
};
756 $self->{source
} = shift if @_;
760 # This probably should be deleted. Not sure why it's here, but might
761 # have been added for Ace::Sequence::Feature-compliance.
767 sub has_tag
{ exists shift->{attributes
}{shift()} }
771 my $toencode = shift;
772 $toencode =~ s/([^a-zA-Z0-9_.:?^*\(\)\[\]@!+-])/uc sprintf("%%%02x",ord($1))/eg;
778 return keys %{$self->{attributes
}};
783 my ($tag_name,@tag_values) = @_;
784 push @
{$self->{attributes
}{$tag_name}},@tag_values;
789 my $tag_name = shift;
790 delete $self->{attributes
}{$tag_name};
796 my $value = $self->{attributes
}{$tag} or return;
797 my $ref = CORE
::ref $value;
798 return $ref && $ref eq 'ARRAY' ? @
{$self->{attributes
}{$tag}}
799 : $self->{attributes
}{$tag};
802 sub get_Annotations
{
805 my @values = $self->get_tag_values($tag);
806 return $values[0] if @values == 1;
810 sub format_attributes
{
813 my $fallback_id = shift;
815 my @tags = $self->all_tags;
818 my @values = $self->each_tag_value($t);
819 push @result,join '=',$self->escape($t),join(',', map {$self->escape($_)} @values) if @values;
821 # my $id = $self->escape($self->_real_or_dummy_id) || $fallback_id;
822 my $id = $fallback_id || $self->escape($self->_real_or_dummy_id);
826 $parent_id = join (',',map {$self->escape($_)} @
$parent);
829 my $name = $self->display_name;
830 unshift @result,"ID=".$id if defined $id;
831 unshift @result,"Parent=".$parent_id if defined $parent_id;
832 unshift @result,"Name=".$self->escape($name) if defined $name;
833 return join ';',@result;
839 Usage : my $feature = $seqfeature->clone
840 Function: Create a deep copy of the feature
841 Returns : A copy of the feature
849 # overwrite attributes
850 my $clone = bless \
%clone,CORE
::ref($self);
851 $clone{attributes
} = {};
852 for my $k (keys %{$self->{attributes
}}) {
853 @
{$clone{attributes
}{$k}} = @
{$self->{attributes
}{$k}};
861 Usage : $ref = $s->refseq([$newseq] [,$newseqclass])
862 Function: get/set reference sequence
863 Returns : current reference sequence
864 Args : new reference sequence and class (optional)
867 This method will get or set the reference sequence. Called with no
868 arguments, it returns the current reference sequence. Called with any
869 Bio::SeqFeatureI object that provides the seq_id(), start(), end() and
872 The method will generate an exception if you attempt to set the
873 reference sequence to a sequence that has a different seq_id from the
880 my $d = $self->{refseq
};
883 $self->throw("attempt to set refseq using a feature that does not share the same seq_id")
884 unless $newref->seq_id eq $self->seq_id;
885 $self->{refseq
} = $newref;
898 L<Bio::Graphics::Feature>
902 Lincoln Stein E<lt>lstein@cshl.eduE<gt>.
904 Copyright (c) 2006 Cold Spring Harbor Laboratory
906 This library is free software; you can redistribute it and/or modify
907 it under the same terms as Perl itself. See DISCLAIMER.txt for
908 disclaimers of warranty.