1 package Bio
::DB
::SeqFeature
::NormalizedFeature
;
7 Bio::DB::SeqFeature::NormalizedFeature -- Normalized feature for use with Bio::DB::SeqFeature::Store
11 use Bio::DB::SeqFeature::Store;
12 # Open the sequence database
13 my $db = Bio::DB::SeqFeature::Store->new( -adaptor => 'DBI::mysql',
14 -dsn => 'dbi:mysql:test');
15 my ($feature) = $db->get_features_by_name('ZK909');
16 my @subfeatures = $feature->get_SeqFeatures();
17 my @exons_only = $feature->get_SeqFeatures('exon');
20 $db->seqfeature_class('Bio::DB::SeqFeature::NormalizedFeature');
21 my $new = $db->new_feature(-primary_tag=>'gene',
27 $feature->add_SeqFeature($db->new_feature(-primary_tag=>'exon',
34 The Bio::DB::SeqFeature::NormalizedFeature object is an alternative
35 representation of SeqFeatures for use with Bio::DB::SeqFeature::Store
36 database system. It is identical to Bio::DB::SeqFeature, except that
37 instead of storing feature/subfeature relationships in a database
38 table, the information is stored in the object itself. This actually
39 makes the objects somewhat inconvenient to work with from SQL, but
40 does speed up access somewhat.
42 To use this class, pass the name of the class to the
43 Bio::DB::SeqFeature::Store object's seqfeature_class() method. After
44 this, $db-E<gt>new_feature() will create objects of type
45 Bio::DB::SeqFeature::NormalizedFeature. If you are using the GFF3
46 loader, pass Bio::DB::SeqFeature::Store::GFF3Loader-E<gt>new() the
47 -seqfeature_class argument:
49 use Bio::DB::SeqFeature::Store::GFF3Loader;
51 my $store = connect_to_db_somehow();
52 my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(
54 -seqfeature_class => 'Bio::DB::SeqFeature::NormalizedFeature'
61 use base
'Bio::SeqFeature::Lite';
62 use base
'Bio::DB::SeqFeature::NormalizedFeatureI';
63 use overload
'""' => \
&as_string
,
70 my $USE_OVERLOADED_NAMES = 1;
72 # some of this is my fault and some of it is changing bioperl API
73 *get_all_SeqFeatures
= *sub_SeqFeature
= *merged_segments
= \
&segments
;
75 ##### CLASS METHODS ####
80 Usage : $feature = Bio::DB::SeqFeature::NormalizedFeature->new(@args)
81 Function: create a new feature
82 Returns : the new seqfeature
86 This method creates and, if possible stores into a database, a new
87 Bio::DB::SeqFeature::NormalizedFeature object using the specialized
88 Bio::DB::SeqFeature class.
90 The arguments are the same to Bio::SeqFeature::Generic-E<gt>new() and
91 Bio::Graphics::Feature-E<gt>new(). The most important difference is the
92 B<-store> option, which if present creates the object in a
93 Bio::DB::SeqFeature::Store database, and he B<-index> option, which
94 controls whether the feature will be indexed for retrieval (default is
95 true). Ordinarily, you would only want to turn indexing on when
96 creating top level features, and off only when storing
97 subfeatures. The default is on.
99 Arguments are as follows:
101 -seq_id the reference sequence
102 -start the start position of the feature
103 -end the stop position of the feature
104 -display_name the feature name (returned by seqname)
105 -primary_tag the feature type (returned by primary_tag)
106 -source the source tag
107 -score the feature score (for GFF compatibility)
108 -desc a description of the feature
109 -segments a list of subfeatures (see Bio::Graphics::Feature)
110 -subtype the type to use when creating subfeatures
111 -strand the strand of the feature (one of -1, 0 or +1)
112 -phase the phase of the feature (0..2)
113 -url a URL to link to when rendered with Bio::Graphics
114 -attributes a hashref of tag value attributes, in which the key is the tag
115 and the value is an array reference of values
116 -store a previously-opened Bio::DB::SeqFeature::Store object
117 -index index this feature if true
121 -id an alias for -display_name
122 -seqname an alias for -display_name
123 -display_id an alias for -display_name
124 -name an alias for -display_name
125 -stop an alias for end
126 -type an alias for primary_tag
133 my $db = $args{-store
} || $args{-factory
};
134 my $index = exists $args{-index} ?
$args{-index} : 1;
135 my $self = $class->SUPER::new
(@_);
139 $db->store($self); # this will set the primary_id
141 $db->store_noindex($self); # this will set the primary_id
143 $self->object_store($db);
148 =head2 Bio::SeqFeatureI methods
150 The following Bio::SeqFeatureI methods are supported:
152 seq_id(), start(), end(), strand(), get_SeqFeatures(),
153 display_name(), primary_tag(), source_tag(), seq(),
154 location(), primary_id(), overlaps(), contains(), equals(),
155 intersection(), union(), has_tag(), remove_tag(),
156 add_tag_value(), get_tag_values(), get_all_tags()
158 Some methods that do not make sense in the context of a genome
159 annotation database system, such as attach_seq(), are not supported.
161 Please see L<Bio::SeqFeatureI> for more details.
168 require Bio
::PrimarySeq
unless Bio
::PrimarySeq
->can('new');
170 my ($start,$end) = ($self->start,$self->end);
171 if ($self->strand < 0) {
172 ($start,$end) = ($end,$start);
175 if (my $store = $self->object_store) {
176 return Bio
::PrimarySeq
->new(-seq
=> $store->fetch_sequence($self->seq_id,$start,$end) || '',
177 -id
=> $self->display_name);
179 return $self->SUPER::seq
($self->seq_id,$start,$end);
185 my ($newstart,$newstop) = @_;
186 my $store = $self->object_store or return;
187 my ($start,$stop) = ($self->start+$newstart-1,$self->end+$newstop-1);
188 if ($self->strand < 0) {
189 ($start,$stop) = ($stop,$start);
191 my $seq = $store->fetch_sequence($self->seq_id,$start,$stop);
192 return Bio
::PrimarySeq
->new($seq);
195 =head2 add_SeqFeature
197 Title : add_SeqFeature
198 Usage : $flag = $feature->add_SeqFeature(@features)
199 Function: Add subfeatures to the feature
200 Returns : true if successful
201 Args : list of Bio::SeqFeatureI objects
204 Add one or more subfeatures to the feature. For best results,
205 subfeatures should be of the same class as the parent feature
206 (i.e. don't try mixing Bio::DB::SeqFeature::NormalizedFeature with
207 other feature types).
209 An alias for this method is add_segment().
215 $self->_add_segment(1,@_);
221 Usage : $flag = $feature->update()
222 Function: Update feature in the database
223 Returns : true if successful
227 After changing any fields in the feature, call update() to write it to
228 the database. This is not needed for add_SeqFeature() as update() is
229 invoked automatically.
235 my $store = $self->object_store or return;
236 $store->store($self);
239 =head2 get_SeqFeatures
241 Title : get_SeqFeature
242 Usage : @subfeatures = $feature->get_SeqFeatures([@types])
243 Function: return subfeatures of this feature
244 Returns : list of subfeatures
245 Args : list of subfeature primary_tags (optional)
248 This method extends the Bio::SeqFeatureI get_SeqFeatures() slightly by
249 allowing you to pass a list of primary_tags, in which case only
250 subfeatures whose primary_tag is contained on the list will be
251 returned. Without any types passed all subfeatures are returned.
256 # segments can be either normalized IDs or ordinary feature objects
257 sub get_SeqFeatures
{
261 my $s = $self->{segments
} or return;
262 my $store = $self->object_store;
271 my @r = grep {$_->type_match(@types)} (@ordinary,$store->fetch_many(\
@ids));
273 eval {$r->object_store($store) };
281 Usage : $store = $feature->object_store([$new_store])
282 Function: get or set the database handle
283 Returns : current database handle
284 Args : new database handle (optional)
287 This method will get or set the Bio::DB::SeqFeature::Store object that
288 is associated with the feature. After changing the store, you should
289 probably unset the feature's primary_id() and call update() to ensure
290 that the object is written into the database as a new feature.
296 my $d = $self->{store
};
297 $self->{store
} = shift if @_;
302 =head2 overloaded_names
304 Title : overloaded_names
305 Usage : $overload = $feature->overloaded_names([$new_overload])
306 Function: get or set overloading of object strings
307 Returns : current flag
308 Args : new flag (optional)
311 For convenience, when objects of this class are stringified, they are
312 represented in the form "primary_tag(display_name)". To turn this
313 feature off, call overloaded_names() with a false value. You can
314 invoke this on an individual feature object or on the class:
316 Bio::DB::SeqFeature::NormalizedFeature->overloaded_names(0);
321 sub overloaded_names
{
323 my $d = $USE_OVERLOADED_NAMES;
324 $USE_OVERLOADED_NAMES = shift if @_;
331 Usage : $segment = $feature->segment
332 Function: return a Segment object corresponding to feature
333 Returns : a Bio::DB::SeqFeature::Segment
337 This turns the feature into a Bio::DB::SeqFeature::Segment object,
338 which you can then use to query for overlapping features. See
339 L<Bio::DB::SeqFeature::Segment>.
345 return Bio
::DB
::SeqFeature
::Segment
->new($self);
350 =head2 AUTOLOADED methods
352 @subfeatures = $feature->Exon;
354 If you use an unknown method that begins with a capital letter, then
355 the feature autogenerates a call to get_SeqFeatures() using the
356 lower-cased method name as the primary_tag. In other words
357 $feature-E<gt>Exon is equivalent to:
359 @subfeature s= $feature->get_SeqFeatures('exon')
361 If you use an unknown method that begins with Tag_(tagname),
362 Att_(tagname) Is_(tagname), then it will be the same as calling the
363 each_tag_value() method with the tagname. In a list context, these
364 autogenerated procedures return the list of results. In scalar
365 context, they return the first item in the list!!
371 my($pack,$func_name) = $AUTOLOAD=~/(.+)::([^:]+)$/;
375 # ignore DESTROY calls
376 return if $func_name eq 'DESTROY';
378 # call attributes if func_name begins with "Tag_" or "Att_":
379 if ($func_name =~ /^(Tag|Att|Is)_(\w+)/) {
380 my @result = $self->each_tag_value($2);
381 return wantarray ?
@result : $result[0];
384 # fetch subfeatures if func_name has an initial cap
385 if ($func_name =~ /^[A-Z]/) {
386 return $self->get_SeqFeatures(lc $func_name);
389 # error message of last resort
390 $self->throw(qq(Can
't locate object method "$func_name" via package "$pack"));
396 $self->_add_segment(0,@_);
399 # This adds subfeatures. It has the property of converting the
400 # provided features into an object like itself and storing them
401 # into the database. If the feature already has a primary id and
402 # an object_store() method, then it is not stored into the database,
403 # but its primary id is reused.
406 my $normalized = shift;
407 my $store = $self->object_store;
409 my @segments = $self->_create_subfeatures($normalized,@_);
412 $self->_fix_boundaries(\
@segments);
414 # freakish fixing of our non-standard Target attribute
415 $self->_fix_target(\
@segments);
417 for my $seg (@segments) {
418 my $id = $normalized ?
$seg->primary_id : $seg;
419 defined $id or $self->throw("No primary ID when there should be");
420 push @
{$self->{segments
}},$id;
423 $self->update if $self->primary_id; # write us back to disk
426 sub _fix_boundaries
{
428 my $segments = shift;
429 my $normalized = shift;
431 my $min_start = $self->start || 999_999_999_999
;
432 my $max_stop = $self->end || -999_999_999_999
;
434 for my $seg (@
$segments) {
435 $min_start = $seg->start if $seg->start < $min_start;
436 $max_stop = $seg->end if $seg->end > $max_stop;
439 # adjust our boundaries, etc.
440 $self->start($min_start) if $min_start < $self->start;
441 $self->end($max_stop) if $max_stop > $self->end;
442 $self->{ref} ||= $segments->[0]->seq_id;
443 $self->{strand
} ||= $segments->[0]->strand;
449 my $normalized = shift; # ignored for now
451 # freakish fixing of our non-standard Target attribute
452 if (my $t = ($self->attributes('Target'))[0]) {
453 my ($seqid,$tstart,$tend,$strand) = split /\s+/,$t;
454 if (defined $tstart && defined $tend) {
455 my $min_tstart = $tstart;
456 my $max_tend = $tend;
457 for my $seg (@
$segs) {
458 my $st = ($seg->attributes('Target'))[0] or next;
459 (undef,$tstart,$tend) = split /\s+/,$st;
460 next unless defined $tstart && defined $tend;
461 $min_tstart = $tstart if $tstart < $min_tstart;
462 $max_tend = $tend if $tend > $max_tend;
464 if ($min_tstart < $tstart or $max_tend > $tend) {
465 $self->{attributes
}{Target
}[0] = join ' ',($seqid,$min_tstart,$max_tend,$strand||'');
471 # undo the load_id and Target hacks on the way out
472 sub format_attributes
{
475 my $fallback_id = shift;
477 my $load_id = $self->load_id || '';
479 my $targobj = ($self->attributes('Target'))[0];
480 # was getting an 'Use of uninitialized value with split' here, changed to cooperate -cjf 7/10/07
481 my ($target) = $targobj ?
split /\s+/,($self->attributes('Target'))[0] : ('');
482 my @tags = $self->all_tags;
485 my @values = $self->each_tag_value($t);
487 # This line prevents Alias from showing up if it matches the load id, but this is not good
488 # @values = grep {$_ ne $load_id && $_ ne $target} @values if $t eq 'Alias';
490 # these are hacks, which we don't want to appear in the file
491 next if $t eq 'load_id';
492 next if $t eq 'parent_id';
494 foreach (@values) { s/\s+$// } # get rid of trailing whitespace
495 push @result,join '=',$self->escape($t),join(',', map {$self->escape($_)} @values) if @values;
497 my $id = $self->primary_id || $fallback_id;
500 $parent_id = join (',',map {$self->escape($_)} @
$parent);
502 my $name = $self->display_name;
503 unshift @result,"ID=".$self->escape($id) if defined $id;
504 unshift @result,"Parent=".$parent_id if defined $parent_id;
505 unshift @result,"Name=".$self->escape($name) if defined $name;
506 return join ';',@result;
509 sub _create_subfeatures
{
511 my $normalized = shift;
513 my $type = $self->{subtype
} || $self->{type
};
514 my $ref = $self->seq_id;
515 my $name = $self->name;
516 my $class = $self->class;
517 my $store = $self->object_store;
518 my $source = $self->source;
521 $store or $self->throw("Feature must be associated with a Bio::DB::SeqFeature::Store database before attempting to add subfeatures to a normalized object");
524 my $index_subfeatures_policy = eval{$store->index_subfeatures};
530 if (UNIVERSAL
::isa
($seg,ref $self)) {
532 if (!$normalized) { # make sure the object has no lazy behavior
533 $seg->primary_id(undef);
534 $seg->object_store(undef);
539 elsif (ref($seg) eq 'ARRAY') {
540 my ($start,$stop) = @
{$seg};
541 next unless defined $start && defined $stop; # fixes an obscure bug somewhere above us
542 my $strand = $self->{strand
};
544 if ($start > $stop) {
545 ($start,$stop) = ($stop,$start);
548 push @segments,$self->new(-start
=> $start,
560 elsif (UNIVERSAL
::isa
($seg,'Bio::SeqFeatureI')) {
561 my $score = $seg->score if $seg->can('score');
562 my $f = $self->new(-start
=> $seg->start,
564 -strand
=> $seg->strand,
565 -seq_id
=> $seg->seq_id,
566 -name
=> $seg->display_name,
567 -primary_tag
=> $seg->primary_tag,
568 -source_tag
=> $seg->source,
572 for my $tag ($seg->get_all_tags) {
573 my @values = $seg->get_tag_values($tag);
574 $f->{attributes
}{$tag} = \
@values;
580 croak
"$seg is neither a Bio::SeqFeatureI object nor an arrayref";
584 return unless @segments;
586 if ($normalized && $store) { # parent/child data is going to be stored in the database
588 my @need_loading = grep {!defined $_->primary_id || $_->object_store ne $store} @segments;
591 if ($index_subfeatures_policy) {
592 $result = $store->store(@need_loading);
594 $result = $store->store_noindex(@need_loading);
596 $result or croak
"Couldn't store one or more subseqfeatures";
606 Usage : $id = $feature->load_id
607 Function: get the GFF3 load ID
608 Returns : the GFF3 load ID (string)
612 For features that were originally loaded by the GFF3 loader, this
613 method returns the GFF3 load ID. This method may not be supported in
614 future versions of the module.
619 return (shift->attributes('load_id'))[0];
626 Usage : @notes = $feature->notes
627 Function: get contents of the GFF3 Note tag
628 Returns : List of GFF3 Note tags
632 For features that were originally loaded by the GFF3 loader, this
633 method returns the contents of the Note tag as a list. This is a
634 convenience for Bio::Graphics, which looks for notes() when it
635 constructs a default description line.
640 return shift->attributes('Note');
646 Usage : $id = $feature->primary_id([$new_id])
647 Function: get/set the feature's database ID
648 Returns : the current primary ID
652 This method gets or sets the primary ID of the feature in the
653 underlying Bio::DB::SeqFeature::Store database. If you change this
654 field and then call update(), it will have the effect of making a copy
655 of the feature in the database under a new ID.
661 my $d = $self->{primary_id
};
662 $self->{primary_id
} = shift if @_;
669 Usage : $segment = $feature->target
670 Function: return the segment correspondent to the "Target" attribute
671 Returns : a Bio::DB::SeqFeature::Segment object
675 For features that are aligned with others via the GFF3 Target
676 attribute, this returns a segment corresponding to the aligned
677 region. The CIGAR gap string is not yet supported.
683 my @targets = $self->attributes('Target');
685 for my $t (@targets) {
686 my ($seqid,$start,$end,$strand) = split /\s+/,$t;
688 $strand = $strand eq '+' ?
1
689 : $strand eq '-' ?
-1
691 push @result,Bio
::DB
::SeqFeature
::Segment
->new($self->object_store,
697 return wantarray ?
@result : $result[0];
700 =head2 Internal methods
704 =item $feature-E<gt>as_string()
706 Internal method used to implement overloaded stringification.
708 =item $boolean = $feature-E<gt>type_match(@list_of_types)
710 Internal method that will return true if the feature's primary_tag and
711 source_tag match any of the list of types (in primary_tag:source_tag
720 return overload
::StrVal
($self) unless $self->overloaded_names;
721 my $name = $self->display_name || $self->load_id;
722 $name ||= "id=".$self->primary_id if $self->primary_id;
723 $name ||= "<unnamed>";
724 my $method = $self->primary_tag;
725 my $source= $self->source_tag;
726 my $type = $source ?
"$method:$source" : $method;
727 return "$type($name)";
733 my $store1 = $self->object_store;
734 my $store2 = eval {$b->object_store} || '';
735 return $store1 eq $store2 && $self->primary_id eq $b->primary_id;
740 return !$self->eq(shift);
743 # completely case insensitive
747 my $method = lc $self->primary_tag;
748 my $source = lc $self->source_tag;
750 my ($m,$s) = map {lc $_} split /:/,$t;
751 return 1 if $method eq $m && (!defined $s || $source eq $s);
756 sub segments
{ shift->get_SeqFeatures(@_) }
765 This is an early version, so there are certainly some bugs. Please
766 use the BioPerl bug tracking system to report bugs.
771 L<Bio::DB::SeqFeature>,
772 L<Bio::DB::SeqFeature::Store>,
773 L<Bio::DB::SeqFeature::Segment>,
774 L<Bio::DB::SeqFeature::GFF3Loader>,
775 L<Bio::DB::SeqFeature::Store::DBI::mysql>,
776 L<Bio::DB::SeqFeature::Store::bdb>
780 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
782 Copyright (c) 2006 Cold Spring Harbor Laboratory.
784 This library is free software; you can redistribute it and/or modify
785 it under the same terms as Perl itself.