tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / DB / SeqFeature / Store.pm
blob2a87c7e3f7e38d7e0c9a0819b7486de378f6a0af
1 package Bio::DB::SeqFeature::Store;
3 # $Id$
5 =head1 NAME
7 Bio::DB::SeqFeature::Store -- Storage and retrieval of sequence annotation data
9 =head1 SYNOPSIS
11 use Bio::DB::SeqFeature::Store;
13 # Open the feature database
14 my $db = Bio::DB::SeqFeature::Store->new( -adaptor => 'DBI::mysql',
15 -dsn => 'dbi:mysql:test',
16 -create => 1 );
18 # get a feature from somewhere
19 my $feature = Bio::SeqFeature::Generic->new(...);
21 # store it
22 $db->store($feature) or die "Couldn't store!";
24 # primary ID of the feature is changed to indicate its primary ID
25 # in the database...
26 my $id = $feature->primary_id;
28 # get the feature back out
29 my $f = $db->fetch($id);
31 # change the feature and update it
32 $f->start(100);
33 $db->update($f) or die "Couldn't update!";
35 # searching...
36 # ...by id
37 my @features = $db->fetch_many(@list_of_ids);
39 # ...by name
40 @features = $db->get_features_by_name('ZK909');
42 # ...by alias
43 @features = $db->get_features_by_alias('sma-3');
45 # ...by type
46 @features = $db->get_features_by_type('gene');
48 # ...by location
49 @features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
51 # ...by attribute
52 @features = $db->get_features_by_attribute({description => 'protein kinase'})
54 # ...by primary id
55 @features = $db->get_feature_by_primary_id(42); # note no plural!!!
57 # ...by the GFF "Note" field
58 @result_list = $db->search_notes('kinase');
60 # ...by arbitrary combinations of selectors
61 @features = $db->features(-name => $name,
62 -type => $types,
63 -seq_id => $seqid,
64 -start => $start,
65 -end => $end,
66 -attributes => $attributes);
68 # ...using an iterator
69 my $iterator = $db->get_seq_stream(-name => $name,
70 -type => $types,
71 -seq_id => $seqid,
72 -start => $start,
73 -end => $end,
74 -attributes => $attributes);
76 while (my $feature = $iterator->next_seq) {
77 # do something with the feature
80 # ...limiting the search to a particular region
81 my $segment = $db->segment('Chr1',5000=>6000);
82 my @features = $segment->features(-type=>['mRNA','match']);
84 # getting & storing sequence information
85 # Warning: this returns a string, and not a PrimarySeq object
86 $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
87 my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
89 # what feature types are defined in the database?
90 my @types = $db->types;
92 # create a new feature in the database
93 my $feature = $db->new_feature(-primary_tag => 'mRNA',
94 -seq_id => 'chr3',
95 -start => 10000,
96 -end => 11000);
98 # load an entire GFF3 file, using the GFF3 loader...
99 my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(-store => $db,
100 -verbose => 1,
101 -fast => 1);
103 $loader->load('./my_genome.gff3');
107 =head1 DESCRIPTION
109 Bio::DB::SeqFeature::Store implements the Bio::SeqFeature::CollectionI
110 interface to allow you to persistently store Bio::SeqFeatureI objects
111 in a database and to later to retrieve them by a variety of
112 searches. This module is similar to the older Bio::DB::GFF module,
113 with the following differences:
115 =over 4
117 =item 1.
119 No limitation on Bio::SeqFeatureI implementations
121 Unlike Bio::DB::GFF, Bio::DB::SeqFeature::Store works with
122 any Bio::SeqFeatureI object.
124 =item 2.
126 No limitation on nesting of features & subfeatures
128 Bio::DB::GFF is limited to features that have at most one
129 level of subfeature. Bio::DB::SeqFeature::Store can work with features
130 that have unlimited levels of nesting.
132 =item 3.
134 No aggregators
136 The aggregator architecture, which was necessary to impose order on
137 the GFF2 files that Bio::DB::GFF works with, does not apply to
138 Bio::DB::SeqFeature::Store. It is intended to store features that obey
139 well-defined ontologies, such as the Sequence Ontology
140 (http://song.sourceforge.net).
142 =item 4.
144 No relative locations
146 All locations defined by this module are relative to an absolute
147 sequence ID, unlike Bio::DB::GFF which allows you to define the
148 location of one feature relative to another.
150 =back
152 We'll discuss major concepts in Bio::DB::SeqFeature::Store and then
153 describe how to use the module.
155 =head2 Adaptors
157 Bio::DB::SeqFeature::Store is designed to work with a variety of
158 storage back ends called "adaptors." Adaptors are subclasses of
159 Bio::DB::SeqFeature::Store and provide the interface between the
160 store() and fetch() methods and the physical database. Currently the
161 number of adaptors is quite limited, but the number will grow soon.
163 =over 4
165 =item memory
167 An implementation that stores all data in memory. This is useful for
168 small data sets of no more than 10,000 features (more or less,
169 depending on system memory).
171 =item DBI::mysql
173 A full-featured implementation on top of the MySQL relational database
174 system.
176 =item berkeleydb
178 A full-feature implementation that runs on top of the BerkeleyDB
179 database. See L<Bio::DB::SeqFeature::Store::berkeleydb>.
182 =back
184 If you do not explicitly specify the adaptor, then DBI::mysql will be
185 used by default.
187 =head2 Serializers
189 When Bio::DB::SeqFeature::Store stores a Bio::SeqFeatureI object into
190 the database, it serializes it into binary or text form. When it later
191 fetches the feature from the database, it unserializes it. Two
192 serializers are available: Recent versions of
194 =over 4
196 =item Storable
198 This is a fast binary serializer. It is available in Perl versions
199 5.8.7 and higher and is used when available.
201 =item Data::Dumper
203 This is a slow text serializer that is available in Perl 5.8.0 and
204 higher. It is used when Storable is unavailable.
206 =back
208 If you do not specify the serializer, then Storable will be used if
209 available; otherwise Data::Dumper.
211 =head2 Loaders and Normalized Features
213 The Bio::DB::SeqFeature::Store::GFF3Loader parses a GFF3-format file
214 and loads the annotations and sequence data into the database of your
215 choice. The script bp_seqfeature_load.pl (found in the
216 scripts/Bio-SeqFeature-Store/ subdirectory) is a thin front end to the
217 GFF3Loader. Other loaders may be written later.
219 Although Bio::DB::SeqFeature::Store should work with any
220 Bio::SeqFeatureI object, there are some disadvantages to using
221 Bio::SeqFeature::Generic and other vanilla implementations. The major
222 issue is that if two vanilla features share the same subfeature
223 (e.g. two transcripts sharing an exon), the shared subfeature will be
224 cloned when stored into the database.
226 The special-purpose L<Bio::DB::SeqFeature> class is able to normalize
227 its subfeatures in the database, so that shared subfeatures are stored
228 only once. This minimizes wasted storage space. In addition, when
229 in-memory caching is turned on, each shared subfeature will usually
230 occupy only a single memory location upon restoration.
232 =cut
235 use strict;
236 use warnings;
238 use base 'Bio::SeqFeature::CollectionI';
239 use Carp 'croak';
240 use Bio::DB::GFF::Util::Rearrange;
241 use Bio::DB::SeqFeature::Segment;
242 use Scalar::Util 'blessed';
244 # this probably shouldn't be here
245 use Bio::DB::SeqFeature;
247 *dna = *get_dna = *get_sequence = \&fetch_sequence;
248 *get_SeqFeatures = \&fetch_SeqFeatures;
250 # local version
251 sub api_version { 1.2 }
253 =head1 Methods for Connecting and Initializating a Database
255 ## TODO: http://iowg.brcdevel.org/gff3.html#a_fasta is a dead link
257 =head2 new
259 Title : new
260 Usage : $db = Bio::DB::SeqFeature::Store->new(@options)
261 Function: connect to a database
262 Returns : A descendent of Bio::DB::Seqfeature::Store
263 Args : several - see below
264 Status : public
266 This class method creates a new database connection. The following
267 -name=E<gt>$value arguments are accepted:
269 Name Value
270 ---- -----
272 -adaptor The name of the Adaptor class (default DBI::mysql)
274 -serializer The name of the serializer class (default Storable)
276 -index_subfeatures Whether or not to make subfeatures searchable
277 (default false)
279 -cache Activate LRU caching feature -- size of cache
281 -compress Compresses features before storing them in database
282 using Compress::Zlib
284 -create (Re)initialize the database.
286 The B<-index_subfeatures> argument, if true, tells the module to
287 create indexes for a feature and all its subfeatures (and its
288 subfeatues' subfeatures). Indexing subfeatures means that you will be
289 able to search for the gene, its mRNA subfeatures and the exons inside
290 each mRNA. It also means when you search the database for all features
291 contained within a particular location, you will get the gene, the
292 mRNAs and all the exons as individual objects as well as subfeatures
293 of each other. NOTE: this option is only honored when working with a
294 normalized feature class such as Bio::DB::SeqFeature.
296 The B<-cache> argument, if true, tells the module to try to create a
297 LRU (least-recently-used) object cache using the Tie::Cacher
298 module. Caching will cause two objects that share the same primary_id
299 to (often, but not always) share the same memory location, and may
300 improve performance modestly. The argument is taken as the desired
301 size for the cache. If you pass "1" as the cache value, a reasonable
302 default cache size will be chosen. Caching requires the Tie::Cacher
303 module to be installed. If the module is not installed, then caching
304 will silently be disabled.
306 The B<-compress> argument, if true, will cause the feature data to be
307 compressed before storing it. This will make the database somewhat
308 smaller at the cost of decreasing performance.
310 The B<-create> argument, if true, will either initialize or
311 reinitialize the database. It is needed the first time a database is
312 used.
314 The new() method of individual adaptors recognize additional
315 arguments. The default DBI::mysql adaptor recognizes the following
316 ones:
318 Name Value
319 ---- -----
321 -dsn DBI data source (default dbi:mysql:test)
323 -autoindex A flag that controls whether or not to update
324 all search indexes whenever a feature is stored
325 or updated (default true).
327 -namespace A string that will be used to qualify each table,
328 thereby allowing you to store several independent
329 sequence feature databases in a single Mysql
330 database.
332 -dumpdir The path to a temporary directory that will be
333 used during "fast" loading. See
334 L<Bio::DB::SeqFeature::Store::GFF3Loader> for a
335 description of this. Default is the current
336 directory.
337 -write Make the database writeable (implied by -create)
339 =cut
341 ###
342 # object constructor
344 sub new {
345 my $self = shift;
346 my ($adaptor,$serializer,$index_subfeatures,$cache,$compress,$debug,$create,$args);
347 if (@_ == 1) {
348 $args = {DSN => shift}
350 else {
351 ($adaptor,$serializer,$index_subfeatures,$cache,$compress,$debug,$create,$args) =
352 rearrange(['ADAPTOR',
353 'SERIALIZER',
354 'INDEX_SUBFEATURES',
355 'CACHE',
356 'COMPRESS',
357 'DEBUG',
358 'CREATE',
359 ],@_);
361 $adaptor ||= 'DBI::mysql';
362 $args->{WRITE}++ if $create;
363 $args->{CREATE}++ if $create;
365 my $class = "Bio::DB::SeqFeature::Store::$adaptor";
366 eval "require $class " or croak $@;
367 $cache &&= eval "require Tie::Cacher; 1";
368 my $obj = $class->new_instance();
369 $obj->debug($debug) if defined $debug;
370 $obj->init($args);
371 $obj->init_cache($cache) if $cache;
372 $obj->do_compress($compress);
373 $obj->serializer($serializer) if defined $serializer;
374 $obj->index_subfeatures($index_subfeatures) if defined $index_subfeatures;
375 $obj->seqfeature_class('Bio::DB::SeqFeature');
376 $obj->post_init($args);
377 $obj;
380 =head2 init_database
382 Title : init_database
383 Usage : $db->init_database([$erase_flag])
384 Function: initialize a database
385 Returns : true
386 Args : (optional) flag to erase current data
387 Status : public
389 Call this after Bio::DB::SeqFeature::Store-E<gt>new() to initialize a
390 new database. In the case of a DBI database, this method installs the
391 schema but does B<not> create the database. You have to do this
392 offline using the appropriate command-line tool. In the case of the
393 "berkeleydb" adaptor, this creates an empty BTREE database.
395 If there is any data already in the database, init_database() called
396 with no arguments will have no effect. To permanently erase the data
397 already there and prepare to receive a fresh set of data, pass a true
398 argument.
400 =cut
403 # wipe database clean and reinstall schema
405 sub init_database {
406 my $self = shift;
407 $self->_init_database(@_);
410 =head2 post_init
412 This method is invoked after init_database for use by certain adaptors
413 (currently only the memory adaptor) to do automatic data loading after
414 initialization. It is passed a copy of the init_database() args.
416 =cut
418 sub post_init { }
420 =head2 store
422 Title : store
423 Usage : $success = $db->store(@features)
424 Function: store one or more features into the database
425 Returns : true if successful
426 Args : list of Bio::SeqFeatureI objects
427 Status : public
429 This method stores a list of features into the database. Each feature
430 is updated so that its primary_id becomes the primary ID of the
431 serialized feature stored in the database. If all features were
432 successfully stored, the method returns true. In the DBI
433 implementation, the store is performed as a single transaction and the
434 transaction is rolled back if one or more store operations failed.
436 You can find out what the primary ID of the feature has become by
437 calling the feature's primary_id() method:
439 $db->store($my_feature) or die "Oh darn";
440 my $id = $my_feature->primary_id;
442 If the feature contains subfeatures, they will all be stored
443 recursively. In the case of Bio::DB::SeqFeature and
444 Bio::DB::SeqFeature::Store::NormalizedFeature, the subfeatures will be
445 stored in a normalized way so that each subfeature appears just once
446 in the database.
448 Subfeatures will be indexed for separate retrieval based on the
449 current value of index_subfeatures().
451 If you call store() with one or more features that already have valid
452 primary_ids, then an existing object(s) will be B<replaced>. Note that
453 when using normalized features such as Bio::DB::SeqFeature, the
454 subfeatures are not recursively updated when you update the parent
455 feature. You must manually update each subfeatures that has changed.
457 =cut
460 # store one or more Bio::SeqFeatureI objects
461 # if they already have a primary_id will replace into the database
462 # otherwise will insert and primary_id will be added
465 # this version stores the object and flags it to be indexed
466 # for search via attributes, name, type or location
468 sub store {
469 my $self = shift;
470 my $result = $self->store_and_cache(1,@_);
473 =head2 store_noindex
475 Title : store_noindex
476 Usage : $success = $db->store_noindex(@features)
477 Function: store one or more features into the database without indexing
478 Returns : true if successful
479 Args : list of Bio::SeqFeatureI objects
480 Status : public
482 This method stores a list of features into the database but does not
483 make them searchable. The only way to access the features is via their
484 primary IDs. This method is ordinarily only used internally to store
485 subfeatures that are not indexed.
487 =cut
489 # this version stores the object and flags it so that it is
490 # not searchable via attributes, name, type or location
491 # (typically used only for subfeatures)
492 sub store_noindex {
493 my $self = shift;
494 $self->store_and_cache(0,@_);
497 =head2 no_blobs
499 Title : no_blobs
500 Usage : $db->no_blobs(1);
501 Function: decide if objects should be stored in the database as blobs.
502 Returns : boolean (default false)
503 Args : boolean (true to no longer store objects; when the corresponding
504 feature is retrieved it will instead be a minimal representation of
505 the object that was stored, as some simple Bio::SeqFeatureI object)
506 Status : dubious (new)
508 This method saves lots of space in the database, which may in turn lead to large
509 performance increases in extreme cases (over 7 million features in the db).
511 Currently only applies to the mysql implementation.
513 =cut
515 sub no_blobs {
516 my $self = shift;
517 if (@_) { $self->{no_blobs} = shift }
518 return $self->{no_blobs} || 0;
521 =head2 new_feature
523 Title : new_feature
524 Usage : $feature = $db->new_feature(@args)
525 Function: create a new Bio::DB::SeqFeature object in the database
526 Returns : the new seqfeature
527 Args : see below
528 Status : public
530 This method creates and stores a new Bio::SeqFeatureI object using the
531 specialized Bio::DB::SeqFeature class. This class is able to store its
532 subfeatures in a normalized fashion, allowing subfeatures to be shared
533 among multiple parents (e.g. multiple exons shared among several
534 mRNAs).
536 The arguments are the same as for Bio::DB::SeqFeature-E<gt>new(), which in
537 turn are similar to Bio::SeqFeature::Generic-E<gt>new() and
538 Bio::Graphics::Feature-E<gt>new(). The most important difference is the
539 B<-index> option, which controls whether the feature will be indexed
540 for retrieval (default is true). Ordinarily, you would only want to
541 turn indexing off when creating subfeatures, because features stored
542 without indexes will only be reachable via their primary IDs or their
543 parents.
545 Arguments are as follows:
547 -seq_id the reference sequence
548 -start the start position of the feature
549 -end the stop position of the feature
550 -display_name the feature name (returned by seqname)
551 -primary_tag the feature type (returned by primary_tag)
552 -source the source tag
553 -score the feature score (for GFF compatibility)
554 -desc a description of the feature
555 -segments a list of subfeatures (see Bio::Graphics::Feature)
556 -subtype the type to use when creating subfeatures
557 -strand the strand of the feature (one of -1, 0 or +1)
558 -phase the phase of the feature (0..2)
559 -url a URL to link to when rendered with Bio::Graphics
560 -attributes a hashref of tag value attributes, in which the key is the tag
561 and the value is an array reference of values
562 -index index this feature if true
564 Aliases:
566 -id an alias for -display_name
567 -seqname an alias for -display_name
568 -display_id an alias for -display_name
569 -name an alias for -display_name
570 -stop an alias for end
571 -type an alias for primary_tag
573 You can change the seqfeature implementation generated by new() by
574 passing the name of the desired seqfeature class to
575 $db-E<gt>seqfeature_class().
577 =cut
579 sub new_feature {
580 my $self = shift;
581 return $self->seqfeature_class->new(-store=>$self,@_);
584 =head2 delete
586 Title : delete
587 Usage : $success = $db->delete(@features)
588 Function: delete a list of feature from the database
589 Returns : true if successful
590 Args : list of features
591 Status : public
593 This method looks up the primary IDs from a list of features and
594 deletes them from the database, returning true if all deletions are
595 successful.
597 WARNING: The current DBI::mysql implementation has some issues that
598 need to be resolved, namely (1) normalized subfeatures are NOT
599 recursively deleted; and (2) the deletions are not performed in a
600 transaction.
602 =cut
604 sub delete {
605 my $self = shift;
606 my $success = 1;
607 for my $object (@_) {
608 my $id = $object->primary_id;
609 $success &&= $self->_deleteid($id);
611 $success;
614 =head2 get_feature_by_id
616 Title : get_feature_by_id
617 Usage : $feature = $db->get_feature_by_id($primary_id)
618 Function: fetch a feature from the database using its primary ID
619 Returns : a feature
620 Args : primary ID of desired feature
621 Status : public
623 This method returns a previously-stored feature from the database
624 using its primary ID. If the primary ID is invalid, it returns undef.
626 =cut
628 sub get_feature_by_id {
629 my $self = shift;
630 $self->fetch(@_);
633 =head2 fetch
635 Title : fetch
636 Usage : $feature = $db->fetch($primary_id)
637 Function: fetch a feature from the database using its primary ID
638 Returns : a feature
639 Args : primary ID of desired feature
640 Status : public
642 This is an alias for get_feature_by_id().
644 =cut
647 # Fetch a Bio::SeqFeatureI from database using its primary_id
649 sub fetch {
650 my $self = shift;
651 @_ or croak "usage: fetch(\$primary_id)";
652 my $primary_id = shift;
653 if (my $cache = $self->cache()) {
654 return $cache->fetch($primary_id) if $cache->exists($primary_id);
655 my $object = $self->_fetch($primary_id);
656 $cache->store($primary_id,$object);
657 return $object;
659 else {
660 return $self->_fetch($primary_id);
664 =head2 get_feature_by_primary_id
666 Title : get_feature_by_primary_id
667 Usage : $feature = $db->get_feature_by_primary_id($primary_id)
668 Function: fetch a feature from the database using its primary ID
669 Returns : a feature
670 Args : primary ID of desired feature
671 Status : public
673 This method returns a previously-stored feature from the database
674 using its primary ID. If the primary ID is invalid, it returns
675 undef. This method is identical to fetch().
677 =cut
679 sub get_feature_by_primary_id {
680 shift->fetch(@_);
683 =head2 fetch_many
685 Title : fetch_many
686 Usage : @features = $db->fetch_many($primary_id,$primary_id,$primary_id...)
687 Function: fetch many features from the database using their primary ID
688 Returns : list of features
689 Args : a list of primary IDs or an array ref of primary IDs
690 Status : public
692 Same as fetch() except that you can pass a list of primary IDs or a
693 ref to an array of IDs.
695 =cut
698 # Efficiently fetch a series of IDs from the database
699 # Can pass an array or an array ref
701 sub fetch_many {
702 my $self = shift;
703 @_ or croak 'usage: fetch_many($id1,$id2,$id3...)';
704 my @ids = map {ref($_) ? @$_ : $_} @_ or return;
705 $self->_fetch_many(@ids);
708 =head2 get_seq_stream
710 Title : get_seq_stream
711 Usage : $iterator = $db->get_seq_stream(@args)
712 Function: return an iterator across all features in the database
713 Returns : a Bio::DB::SeqFeature::Store::Iterator object
714 Args : feature filters (optional)
715 Status : public
717 When called without any arguments this method will return an iterator
718 object that will traverse all indexed features in the database. Call
719 the iterator's next_seq() method to step through them (in no
720 particular order):
722 my $iterator = $db->get_seq_stream;
723 while (my $feature = $iterator->next_seq) {
724 print $feature->primary_tag,' ',$feature->display_name,"\n";
727 You can select a subset of features by passing a series of filter
728 arguments. The arguments are identical to those accepted by
729 $db-E<gt>features().
731 =cut
734 # Return an iterator across all features that are indexable
736 sub get_seq_stream {
737 my $self = shift;
738 $self->_features(-iterator=>1,@_);
741 =head2 get_features_by_name
743 Title : get_features_by_name
744 Usage : @features = $db->get_features_by_name($name)
745 Function: looks up features by their display_name
746 Returns : a list of matching features
747 Args : the desired name
748 Status : public
750 This method searches the display_name of all features for matches
751 against the provided name. GLOB style wildcares ("*", "?") are
752 accepted, but may be slow.
754 The method returns the list of matches, which may be zero, 1 or more
755 than one features. Be prepared to receive more than one result, as
756 display names are not guaranteed to be unique.
758 For backward compatibility with gbrowse, this method is also known as
759 get_feature_by_name().
761 =cut
764 # get_feature_by_name() return 0 or more features using a name lookup
765 # uses the Bio::DB::GFF API
767 sub get_features_by_name {
768 my $self = shift;
769 my ($class,$name,$types,$allow_alias);
771 if (@_ == 1) { # get_features_by_name('name');
772 $name = shift;
773 } else { # get_features_by_name('class'=>'name'), get_feature_by_name(-name=>'name')
774 ($class,$name,$allow_alias,$types) = rearrange([qw(CLASS NAME ALIASES),[qw(TYPE TYPES)]],@_);
777 # hacky workaround for assumption in Bio::DB::GFF that unclassed reference points were of type "Sequence"
778 undef $class if $class && $class eq 'Sequence';
780 $self->_features(-name=>$name,-class=>$class,-aliases=>$allow_alias,-type=>$types);
783 =head2 get_feature_by_name
785 Title : get_feature_by_name
786 Usage : @features = $db->get_feature_by_name($name)
787 Function: looks up features by their display_name
788 Returns : a list of matching features
789 Args : the desired name
790 Status : Use get_features_by_name instead.
792 This method is provided for backward compatibility with gbrowse.
794 =cut
796 sub get_feature_by_name { shift->get_features_by_name(@_) }
798 =head2 get_features_by_alias
800 Title : get_features_by_alias
801 Usage : @features = $db->get_features_by_alias($name)
802 Function: looks up features by their display_name or alias
803 Returns : a list of matching features
804 Args : the desired name
805 Status : public
807 This method is similar to get_features_by_name() except that it will
808 also search through the feature aliases. Aliases can be created by
809 storing features that contain one or more Alias tags. Wildards are
810 accepted.
812 =cut
814 sub get_features_by_alias {
815 my $self = shift;
816 my @args = @_;
817 if (@_ == 1) {
818 @args = (-name=>shift);
820 push @args,(-aliases=>1);
821 $self->get_features_by_name(@args);
824 =head2 get_features_by_type
826 Title : get_features_by_type
827 Usage : @features = $db->get_features_by_type(@types)
828 Function: looks up features by their primary_tag
829 Returns : a list of matching features
830 Args : list of primary tags
831 Status : public
833 This method will return a list of features that have any of the
834 primary tags given in the argument list. For compatibility with
835 gbrowse and Bio::DB::GFF, types can be qualified using a colon:
837 primary_tag:source_tag
839 in which case only features that match both the primary_tag B<and> the
840 indicated source_tag will be returned. If the database was loaded from
841 a GFF3 file, this corresponds to the third and second columns of the
842 row, in that order.
844 For example, given the GFF3 lines:
846 ctg123 geneFinder exon 1300 1500 . + . ID=exon001
847 ctg123 fgenesH exon 1300 1520 . + . ID=exon002
849 exon001 and exon002 will be returned by searching for type "exon", but
850 only exon001 will be returned by searching for type "exon:fgenesH".
852 =cut
854 sub get_features_by_type {
855 my $self = shift;
856 my @types = @_;
857 $self->_features(-type=>\@types);
860 =head2 get_features_by_location
862 Title : get_features_by_location
863 Usage : @features = $db->get_features_by_location(@args)
864 Function: looks up features by their location
865 Returns : a list of matching features
866 Args : see below
867 Status : public
869 This method fetches features based on a location range lookup. You
870 call it using a positional list of arguments, or a list of
871 (-argument=E<gt>$value) pairs.
873 The positional form is as follows:
875 $db->get_features_by_location($seqid [[,$start,]$end])
877 The $seqid is the name of the sequence on which the feature resides,
878 and start and end are optional endpoints for the match. If the
879 endpoints are missing then any feature on the indicated seqid is
880 returned.
882 Examples:
884 get_features_by_location('chr1'); # all features on chromosome 1
885 get_features_by_location('chr1',5000); # features between 5000 and the end
886 get_features_by_location('chr1',5000,8000); # features between 5000 and 8000
888 Location lookups are overlapping. A feature will be returned if it
889 partially or completely overlaps the indicated range.
891 The named argument form gives you more control:
893 Argument Value
894 -------- -----
896 -seq_id The name of the sequence on which the feature resides
897 -start Start of the range
898 -end End of the range
899 -strand Strand of the feature
900 -range_type Type of range to search over
902 The B<-strand> argument, if present, can be one of "0" to find
903 features that are on both strands, "+1" to find only plus strand
904 features, and "-1" to find only minus strand features. Specifying a
905 strand of undef is the same as not specifying this argument at all,
906 and retrieves all features regardless of their strandedness.
908 The B<-range_type> argument, if present, can be one of "overlaps" (the
909 default), to find features whose positions overlap the indicated
910 range, "contains," to find features whose endpoints are completely
911 contained within the indicated range, and "contained_in" to find
912 features whose endpoints are both outside the indicated range.
914 =cut
916 sub get_features_by_location {
917 my $self = shift;
918 my ($seqid,$start,$end,$strand,$rangetype) =
919 rearrange([['SEQ_ID','SEQID','REF'],'START',['STOP','END'],'STRAND','RANGE_TYPE'],@_);
920 $self->_features(-seqid=>$seqid,
921 -start=>$start||undef,
922 -end=>$end||undef,
923 -strand=>$strand||undef,
924 -range_type=>$rangetype);
927 =head2 get_features_by_attribute
929 Title : get_features_by_attribute
930 Usage : @features = $db->get_features_by_attribute(@args)
931 Function: looks up features by their attributes/tags
932 Returns : a list of matching features
933 Args : see below
934 Status : public
936 This implements a simple tag filter. Pass a list of tag names and
937 their values. The module will return a list of features whose tag
938 names and values match. Tag names are case insensitive. If multiple
939 tag name/value pairs are present, they will be ANDed together. To
940 match any of a list of values, use an array reference for the value.
942 Examples:
944 # return all features whose "function" tag is "GO:0000123"
945 @features = $db->get_features_by_attribute(function => 'GO:0000123');
947 # return all features whose "function" tag is "GO:0000123" or "GO:0000555"
948 @features = $db->get_features_by_attribute(function => ['GO:0000123','GO:0000555']);
950 # return all features whose "function" tag is "GO:0000123" or "GO:0000555"
951 # and whose "confirmed" tag is 1
952 @features = $db->get_features_by_attribute(function => ['GO:0000123','GO:0000555'],
953 confirmed => 1);
955 =cut
957 sub get_features_by_attribute {
958 my $self = shift;
959 my %attributes = ref($_[0]) ? %{$_[0]} : @_;
960 %attributes or $self->throw("Usage: get_feature_by_attribute(attribute_name=>\$attribute_value...)");
961 $self->_features(-attributes=>\%attributes);
964 # features() call -- main query interface
967 =head2 features
969 Title : features
970 Usage : @features = $db->features(@args)
971 Function: generalized query & retrieval interface
972 Returns : list of features
973 Args : see below
974 Status : Public
976 This is the workhorse for feature query and retrieval. It takes a
977 series of -name=E<gt>$value arguments filter arguments. Features that
978 match all the filters are returned.
980 Argument Value
981 -------- -----
983 Location filters:
984 -seq_id Chromosome, contig or other DNA segment
985 -seqid Synonym for -seqid
986 -ref Synonym for -seqid
987 -start Start of range
988 -end End of range
989 -stop Synonym for -end
990 -strand Strand
991 -range_type Type of range match ('overlaps','contains','contained_in')
993 Name filters:
994 -name Name of feature (may be a glob expression)
995 -aliases If true, match aliases as well as display names
996 -class Archaic argument for backward compatibility.
997 (-class=>'Clone',-name=>'ABC123') is equivalent
998 to (-name=>'Clone:ABC123')
1000 Type filters:
1001 -types List of feature types (array reference) or one type (scalar)
1002 -type Synonym for the above
1003 -primary_tag Synonym for the above
1005 -attributes Hashref of attribute=>value pairs as per
1006 get_features_by_attribute(). Multiple alternative values
1007 can be matched by providing an array reference.
1008 -attribute synonym for -attributes
1010 You may also provide features() with a list of scalar values (the
1011 first element of which must B<not> begin with a dash), in which case
1012 it will treat the list as a feature type filter.
1014 Examples:
1016 All features on chromosome 1:
1018 @features = $db->features(-seqid=>'Chr1');
1020 All features on chromosome 1 between 5000 and 6000:
1022 @features = $db->features(-seqid=>'Chr1',-start=>5000,-end=>6000);
1024 All mRNAs on chromosome 1 between 5000 and 6000:
1026 @features = $db->features(-seqid=>'Chr1',-start=>5000,-end=>6000,-types=>'mRNA');
1028 All confirmed mRNAs and repeats on chromosome 1 that overlap the range 5000..6000:
1030 @features = $db->features(-seqid => 'Chr1',-start=>5000,-end=>6000,
1031 -types => ['mRNA','repeat'],
1032 -attributes=> {confirmed=>1}
1035 All confirmed mRNAs and repeats on chromosome 1 strictly contained within the range 5000..6000:
1037 @features = $db->features(-seqid => 'Chr1',-start=>5000,-end=>6000,
1038 -types => ['mRNA','repeat'],
1039 -attributes=> {confirmed=>1}
1040 -range_type => 'contained_in',
1045 All genes and repeats:
1047 @features = $db->features('gene','repeat_region');
1049 =cut
1051 # documentation of args
1052 # my ($seq_id,$start,$end,$strand,
1053 # $name,$class,$allow_aliases,
1054 # $types,
1055 # $attributes,
1056 # $range_type,
1057 # $iterator,
1058 # ) = rearrange([['SEQID','SEQ_ID','REF'],'START',['STOP','END'],'STRAND',
1059 # 'NAME','CLASS','ALIASES',
1060 # ['TYPES','TYPE','PRIMARY_TAG'],
1061 # ['ATTRIBUTES','ATTRIBUTE'],
1062 # 'RANGE_TYPE',
1063 # ],@_);
1064 # $range_type ||= 'overlaps';
1065 sub features {
1066 my $self = shift;
1067 my @args;
1068 if (@_ == 0) {
1069 @args = ();
1071 elsif ($_[0] !~/^-/) {
1072 my @types = @_;
1073 @args = (-type=>\@types);
1074 } else {
1075 @args = @_;
1077 $self->_features(@args);
1080 =head2 seq_ids
1082 Title : seq_ids
1083 Usage : @ids = $db->seq_ids()
1084 Function: Return all sequence IDs contained in database
1085 Returns : list of sequence Ids
1086 Args : none
1087 Status : public
1089 =cut
1091 sub seq_ids {
1092 my $self = shift;
1093 return $self->_seq_ids();
1096 =head2 search_attributes
1098 Title : search_attributes
1099 Usage : @result_list = $db->search_attributes("text search string",[$tag1,$tag2...],$limit)
1100 Function: Search attributes for keywords occurring in a text string
1101 Returns : array of results
1102 Args : full text search string, array ref of attribute names, and an optional feature limit
1103 Status : public
1105 Given a search string, this method performs a full-text search of the
1106 specified attributes and returns an array of results. You may pass a
1107 scalar attribute name to search the values of one attribute
1108 (e.g. "Note") or you may pass an array reference to search inside
1109 multiple attributes (['Note','Alias','Parent']).Each row of the
1110 returned array is a arrayref containing the following fields:
1112 column 1 The display name of the feature
1113 column 2 The text of the note
1114 column 3 A relevance score.
1115 column 4 The feature type
1116 column 5 The unique ID of the feature
1118 NOTE: This search will fail to find features that do not have a display name!
1120 You can use fetch() or fetch_many() with the returned IDs to get to
1121 the features themselves.
1123 =cut
1125 sub search_attributes {
1126 my $self = shift;
1127 my ($search_string,$attribute_names,$limit) = @_;
1128 my $attribute_array = ref $attribute_names
1129 && ref $attribute_names eq 'ARRAY' ? $attribute_names : [$attribute_names];
1130 return $self->_search_attributes($search_string,$attribute_array,$limit);
1133 =head2 search_notes
1135 Title : search_notes
1136 Usage : @result_list = $db->search_notes("full text search string",$limit)
1137 Function: Search the notes for a text string
1138 Returns : array of results
1139 Args : full text search string, and an optional feature limit
1140 Status : public
1142 Given a search string, this method performs a full-text search of the
1143 "Notes" attribute and returns an array of results. Each row of the
1144 returned array is a arrayref containing the following fields:
1146 column 1 The display_name of the feature, suitable for passing to get_feature_by_name()
1147 column 2 The text of the note
1148 column 3 A relevance score.
1149 column 4 The type
1151 NOTE: This is equivalent to $db-E<gt>search_attributes('full text search
1152 string','Note',$limit). This search will fail to find features that do
1153 not have a display name!
1155 =cut
1158 # search_notes()
1160 sub search_notes {
1161 my $self = shift;
1162 my ($search_string,$limit) = @_;
1163 return $self->_search_attributes($search_string,['Note'],$limit);
1166 =head2 types
1168 Title : types
1169 Usage : @type_list = $db->types
1170 Function: Get all the types in the database
1171 Returns : array of Bio::DB::GFF::Typename objects
1172 Args : none
1173 Status : public
1175 =cut
1177 sub types {
1178 shift->throw_not_implemented;
1181 =head2 insert_sequence
1183 Title : insert_sequence
1184 Usage : $success = $db->insert_sequence($seqid,$sequence_string,$offset)
1185 Function: Inserts sequence data into the database at the indicated offset
1186 Returns : true if successful
1187 Args : see below
1188 Status : public
1190 This method inserts the DNA or protein sequence fragment
1191 $sequence_string, identified by the ID $seq_id, into the database at
1192 the indicated offset $offset. It is used internally by the GFF3Loader
1193 to load sequence data from the files.
1195 =cut
1198 # insert_sequence()
1200 # insert a bit of primary sequence into the database
1202 sub insert_sequence {
1203 my $self = shift;
1204 my ($seqid,$seq,$offset) = @_;
1205 $offset ||= 0;
1206 $self->_insert_sequence($seqid,$seq,$offset);
1210 =head2 fetch_sequence
1212 Title : fetch_sequence
1213 Usage : $sequence = $db->fetch_sequence(-seq_id=>$seqid,-start=>$start,-end=>$end)
1214 Function: Fetch the indicated subsequene from the database
1215 Returns : The sequence string (not a Bio::PrimarySeq object!)
1216 Args : see below
1217 Status : public
1219 This method retrieves a portion of the indicated sequence. The arguments are:
1221 Argument Value
1222 -------- -----
1223 -seq_id Chromosome, contig or other DNA segment
1224 -seqid Synonym for -seq_id
1225 -name Synonym for -seq_id
1226 -start Start of range
1227 -end End of range
1228 -class Obsolete argument used for Bio::DB::GFF compatibility. If
1229 specified will qualify the seq_id as "$class:$seq_id".
1230 -bioseq Boolean flag; if true, returns a Bio::PrimarySeq object instead
1231 of a sequence string.
1233 You can call fetch_sequence using the following shortcuts:
1235 $seq = $db->fetch_sequence('chr3'); # entire chromosome
1236 $seq = $db->fetch_sequence('chr3',1000); # position 1000 to end of chromosome
1237 $seq = $db->fetch_sequence('chr3',undef,5000); # position 1 to 5000
1238 $seq = $db->fetch_sequence('chr3',1000,5000); # positions 1000 to 5000
1240 =cut
1243 # fetch_sequence()
1245 # equivalent to old Bio::DB::GFF->dna() method
1247 sub fetch_sequence {
1248 my $self = shift;
1249 my ($seqid,$start,$end,$class,$bioseq) = rearrange([['NAME','SEQID','SEQ_ID'],
1250 'START',['END','STOP'],'CLASS','BIOSEQ'],@_);
1251 $seqid = "$seqid:$class" if defined $class;
1252 my $seq = $self->_fetch_sequence($seqid,$start,$end);
1253 return $seq unless $bioseq;
1255 require Bio::Seq unless Bio::Seq->can('new');
1256 my $display_id = defined $start ? "$seqid:$start..$end" : $seqid;
1257 return Bio::Seq->new(-display_id=>$display_id,-seq=>$seq);
1260 =head2 segment
1262 Title : segment
1263 Usage : $segment = $db->segment($seq_id [,$start] [,$end] [,$absolute])
1264 Function: restrict the database to a sequence range
1265 Returns : a Bio::DB::SeqFeature::Segment object
1266 Args : sequence id, start and end ranges (optional)
1267 Status : public
1269 This is a convenience method that can be used when you are interested
1270 in the contents of a particular sequence landmark, such as a
1271 contig. Specify the ID of a sequence or other landmark in the database
1272 and optionally a start and endpoint relative to that landmark. The
1273 method will look up the region and return a
1274 Bio::DB::SeqFeature::Segment object that spans it. You can then use
1275 this segment object to make location-restricted queries on the database.
1277 Example:
1279 $segment = $db->segment('contig23',1,1000); # first 1000 bp of contig23
1280 my @mRNAs = $segment->features('mRNA'); # all mRNAs that overlap segment
1282 Although you will usually want to fetch segments that correspond to
1283 physical sequences in the database, you can actually use any feature
1284 in the database as the sequence ID. The segment() method will perform
1285 a get_features_by_name() internally and then transform the feature
1286 into the appropriate coordinates.
1288 The named feature should exist once and only once in the database. If
1289 it exists multiple times in the database and you attempt to call
1290 segment() in a scalar context, you will get an exception. A workaround
1291 is to call the method in a list context, as in:
1293 my ($segment) = $db->segment('contig23',1,1000);
1297 my @segments = $db->segment('contig23',1,1000);
1299 However, having multiple same-named features in the database is often
1300 an indication of underlying data problems.
1302 If the optional $absolute argument is a true value, then the specified
1303 coordinates are relative to the reference (absolute) coordinates.
1305 =cut
1308 # Replacement for Bio::DB::GFF->segment() method
1310 sub segment {
1311 my $self = shift;
1312 my (@features,@args);
1314 if (@_ == 1 && blessed($_[0])) {
1315 @features = @_;
1316 @args = ();
1318 else {
1319 @args = $self->setup_segment_args(@_);
1320 @features = $self->get_features_by_name(@args);
1322 if (!wantarray && @features > 1) {
1323 $self->throw(<<END);
1324 segment() called in a scalar context but multiple features match.
1325 Either call in a list context or narrow your search using the -types or -class arguments
1328 my ($rel_start,$rel_end,$abs) = rearrange(['START',['STOP','END'],'ABSOLUTE'],@args);
1329 $rel_start = 1 unless defined $rel_start;
1331 my @segments;
1332 for my $f (@features) {
1333 my $seqid = $f->seq_id;
1334 my $strand = $f->strand;
1335 my ($start,$end);
1336 if ($abs) {
1337 $start = $rel_start;
1338 $end = defined $rel_end ? $rel_end : $start + $f->length - 1;
1340 else {
1341 my $re = defined $rel_end ? $rel_end : $f->end - $f->start + 1;
1343 if ($strand >= 0) {
1344 $start = $f->start + $rel_start - 1;
1345 $end = $f->start + $re - 1;
1347 else {
1348 $start = $f->end - $re + 1;
1349 $end = $f->end - $rel_start + 1;
1352 push @segments,Bio::DB::SeqFeature::Segment->new($self,$seqid,$start,$end,$strand);
1354 return wantarray ? @segments : $segments[0];
1357 =head2 seqfeature_class
1359 Title : seqfeature_class
1360 Usage : $classname = $db->seqfeature_class([$new_classname])
1361 Function: get or set the name of the Bio::SeqFeatureI class generated by new_feature()
1362 Returns : name of class
1363 Args : new classname (optional)
1364 Status : public
1366 =cut
1368 sub seqfeature_class {
1369 my $self = shift;
1370 my $d = $self->{seqfeatureclass};
1371 if (@_) {
1372 my $class = shift;
1373 eval "require $class";
1374 $self->throw("$class does not implement the Bio::SeqFeatureI interface")
1375 unless $class->isa('Bio::SeqFeatureI');
1376 $self->{seqfeatureclass} = $class;
1381 =head2 reindex
1383 Title : reindex
1384 Usage : $db->reindex
1385 Function: reindex the database
1386 Returns : nothing
1387 Args : nothing
1388 Status : public
1390 This method will force the secondary indexes (name, location,
1391 attributes, feature types) to be recalculated. It may be useful to
1392 rebuild a corrupted database.
1394 =cut
1397 # force reindexing
1399 sub reindex {
1400 my $self = shift;
1402 my $count = 0;
1403 my $now;
1404 my $last_time = time();
1406 $self->_start_reindexing;
1408 my $iterator = $self->get_seq_stream;
1409 while (my $f = $iterator->next_seq) {
1410 if (++$count %1000 == 0) {
1411 $now = time();
1412 my $elapsed = sprintf(" in %5.2fs",$now - $last_time);
1413 $last_time = $now;
1414 print STDERR "$count features indexed$elapsed...",' 'x60;
1415 print STDERR -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
1417 $self->_update_indexes($f);
1420 $self->_end_reindexing;
1423 =head2 attributes
1425 Title : attributes
1426 Usage : @a = $db->attributes
1427 Function: Returns list of all known attributes
1428 Returns : Returns list of all known attributes
1429 Args : nothing
1430 Status : public
1432 =cut
1434 sub attributes {
1435 my $self = shift;
1436 shift->throw_not_implemented;
1440 =head2 start_bulk_update,finish_bulk_update
1442 Title : start_bulk_update,finish_bulk_update
1443 Usage : $db->start_bulk_update
1444 $db->finish_bulk_update
1445 Function: Activate optimizations for large number of insertions/updates
1446 Returns : nothing
1447 Args : nothing
1448 Status : public
1450 With some adaptors (currently only the DBI::mysql adaptor), these
1451 methods signal the adaptor that a large number of insertions or
1452 updates are to be performed, and activate certain optimizations. These
1453 methods are called automatically by the
1454 Bio::DB::SeqFeature::Store::GFF3Loader module.
1456 Example:
1458 $db->start_bulk_update;
1459 for my $f (@features) {
1460 $db->store($f);
1462 $db->finish_bulk_update;
1464 =cut
1466 sub start_bulk_update { shift->_start_bulk_update(@_) }
1467 sub finish_bulk_update { shift->_finish_bulk_update(@_) }
1469 =head2 add_SeqFeature
1471 Title : add_SeqFeature
1472 Usage : $count = $db->add_SeqFeature($parent,@children)
1473 Function: store a parent/child relationship between $parent and @children
1474 Returns : number of children successfully stored
1475 Args : parent feature and one or more children
1476 Status : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS
1478 If can_store_parentage() returns true, then some store-aware features
1479 (e.g. Bio::DB::SeqFeature) will invoke this method to store
1480 feature/subfeature relationships in a normalized table.
1482 =cut
1484 # these two are called only if _can_store_subFeatures() returns true
1485 # _add_SeqFeature ($parent,@children)
1486 sub add_SeqFeature { shift->_add_SeqFeature(@_) }
1488 =head2 fetch_SeqFeatures
1490 Title : fetch_SeqFeatures
1491 Usage : @children = $db->fetch_SeqFeatures($parent_feature)
1492 Function: return the immediate subfeatures of the indicated feature
1493 Returns : list of subfeatures
1494 Args : the parent feature
1495 Status : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS
1497 If can_store_parentage() returns true, then some store-aware features
1498 (e.g. Bio::DB::SeqFeature) will invoke this method to retrieve
1499 feature/subfeature relationships from the database.
1501 =cut
1503 # _get_SeqFeatures($parent,@list_of_child_types)
1504 sub fetch_SeqFeatures {
1505 my $self = shift;
1506 my $obj = shift;
1507 return unless defined $obj->primary_id;
1508 $self->_fetch_SeqFeatures($obj,@_);
1513 =head1 Changing the Behavior of the Database
1515 These methods allow you to modify the behavior of the database.
1517 =head2 debug
1519 Title : debug
1520 Usage : $debug_flag = $db->debug([$new_flag])
1521 Function: set the debug flag
1522 Returns : current debug flag
1523 Args : new debug flag
1524 Status : public
1526 This method gets/sets a flag that turns on verbose progress
1527 messages. Currently this will not do very much.
1529 =cut
1531 sub debug {
1532 my $self = shift;
1533 my $d = $self->{debug};
1534 $self->{debug} = shift if @_;
1538 =head2 serializer
1540 Title : serializer
1541 Usage : $serializer = $db->serializer([$new_serializer])
1542 Function: get/set the name of the serializer
1543 Returns : the name of the current serializer class
1544 Args : (optional) the name of a new serializer
1545 Status : public
1547 You can use this method to set the serializer, but do not attempt to
1548 change the serializer once the database is initialized and populated.
1550 =cut
1553 # serializer
1555 sub serializer {
1556 my $self = shift;
1557 my $d = $self->setting('serializer');
1558 if (@_) {
1559 my $serializer = shift;
1560 eval "require $serializer; 1" or croak $@;
1561 $self->setting(serializer=>$serializer);
1562 if ($serializer eq 'Storable') {
1563 $Storable::forgive_me =1;
1564 $Storable::Deparse = 1;
1565 $Storable::Eval = 1;
1571 sub do_compress {
1572 my $self = shift;
1573 if (@_) {
1574 my $do_compress = shift;
1575 $self->setting(compress => $do_compress);
1577 my $d = $self->setting('compress');
1578 if ($d) {
1579 eval "use Compress::Zlib; 1" or croak $@ unless Compress::Zlib->can('compress');
1584 =head2 index_subfeatures
1586 Title : index_subfeatures
1587 Usage : $flag = $db->index_subfeatures([$new_value])
1588 Function: flag whether to index subfeatures
1589 Returns : current value of the flag
1590 Args : (optional) new value of the flag
1591 Status : public
1593 If true, the store() method will add a searchable index to both the
1594 top-level feature and all its subfeatures, allowing the search
1595 functions to return features at any level of the conainment
1596 hierarchy. If false, only the top level feature will be indexed,
1597 meaning that you will only be able to get at subfeatures by fetching
1598 the top-level feature and then traversing downward using
1599 get_SeqFeatures().
1601 You are free to change this setting at any point during the creation
1602 and population of a database. One database can contain both indexed
1603 and unindexed subfeatures.
1605 =cut
1608 # whether to index subfeatures by default
1610 sub index_subfeatures {
1611 my $self = shift;
1612 my $d = $self->setting('index_subfeatures');
1613 $self->setting('index_subfeatures'=>shift) if @_;
1617 =head2 clone
1619 The clone() method should be used when you want to pass the
1620 Bio::DB::SeqFeature::Store object to a child process across a
1621 fork(). The child must call clone() before making any queries.
1623 The default behavior is to do nothing, but adaptors that use the DBI
1624 interface may need to implement this in order to avoid database handle
1625 errors. See the dbi adaptor for an example.
1627 =cut
1629 sub clone { }
1631 ################################# TIE interface ####################
1633 =head1 TIE Interface
1635 This module implements a full TIEHASH interface. The keys are the
1636 primary IDs of the features in the database. Example:
1638 tie %h,'Bio::DB::SeqFeature::Store',-adaptor=>'DBI::mysql',-dsn=>'dbi:mysql:elegans';
1639 $h{123} = $feature1;
1640 $h{124} = $feature2;
1641 print $h{123}->display_name;
1643 =cut
1645 sub TIEHASH {
1646 my $class = shift;
1647 return $class->new(@_);
1650 sub STORE {
1651 my $self = shift;
1652 my ($key,$feature) = @_;
1653 $key =~ /^\d+$/ && $key > 0 or croak "keys must be positive integers";
1654 $self->load_class($feature);
1655 $feature->primary_id($key);
1656 $self->store($feature);
1659 sub FETCH {
1660 my $self = shift;
1661 $self->fetch(@_);
1664 sub FIRSTKEY {
1665 my $self = shift;
1666 $self->_firstid;
1669 sub NEXTKEY {
1670 my $self = shift;
1671 my $lastkey = shift;
1672 $self->_nextid($lastkey);
1675 sub EXISTS {
1676 my $self = shift;
1677 my $key = shift;
1678 $self->existsid($key);
1681 sub DELETE {
1682 my $self = shift;
1683 my $key = shift;
1684 $self->_deleteid($key);
1687 sub CLEAR {
1688 my $self = shift;
1689 $self->_clearall;
1692 sub SCALAR {
1693 my $self = shift;
1694 $self->_featurecount;
1698 ###################### TO BE IMPLEMENTED BY ADAPTOR ##########
1700 =head2 _init_database
1702 Title : _init_database
1703 Usage : $success = $db->_init_database([$erase])
1704 Function: initialize an empty database
1705 Returns : true on success
1706 Args : optional boolean flag to erase contents of an existing database
1707 Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY AN ADAPTOR
1709 This method is the back end for init_database(). It must be
1710 implemented by an adaptor that inherits from
1711 Bio::DB::SeqFeature::Store. It returns true on success.
1713 =cut
1715 sub _init_database { shift->throw_not_implemented }
1717 =head2 _store
1719 Title : _store
1720 Usage : $success = $db->_store($indexed,@objects)
1721 Function: store seqfeature objects into database
1722 Returns : true on success
1723 Args : a boolean flag indicating whether objects are to be indexed,
1724 and one or more objects
1725 Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY AN ADAPTOR
1727 This method is the back end for store() and store_noindex(). It should
1728 write the seqfeature objects into the database. If indexing is
1729 requested, the features should be indexed for query and
1730 retrieval. Otherwise the features should be stored without indexing
1731 (it is not required that adaptors respect this).
1733 If the object has no primary_id (undef), then the object is written
1734 into the database and assigned a new primary_id. If the object already
1735 has a primary_id, then the system will perform an update, replacing
1736 whatever was there before.
1738 In practice, the implementation will serialize each object using the
1739 freeze() method and then store it in the database under the
1740 corresponding primary_id. The object is then updated with the
1741 primary_id.
1743 =cut
1745 # _store($indexed,@objs)
1746 sub _store {
1747 my $self = shift;
1748 my $indexed = shift;
1749 my @objs = @_;
1750 $self->throw_not_implemented;
1753 =head2 _fetch
1755 Title : _fetch
1756 Usage : $feature = $db->_fetch($primary_id)
1757 Function: fetch feature from database
1758 Returns : feature
1759 Args : primary id
1760 Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY AN ADAPTOR
1762 This method is the back end for fetch(). It accepts a primary_id and
1763 returns a feature object. It must be implemented by the adaptor.
1765 In practice, the implementation will retrieve the serialized
1766 Bio::SeqfeatureI object from the database and pass it to the thaw()
1767 method to unserialize it and synchronize the primary_id.
1769 =cut
1771 # _fetch($id)
1772 sub _fetch { shift->throw_not_implemented }
1774 =head2 _fetch_many
1776 Title : _fetch_many
1777 Usage : $feature = $db->_fetch_many(@primary_ids)
1778 Function: fetch many features from database
1779 Returns : feature
1780 Args : primary id
1781 Status : private -- does not need to be implemented
1783 This method fetches many features specified by a list of IDs. The
1784 default implementation simply calls _fetch() once for each
1785 primary_id. Implementors can override it if needed for efficiency.
1787 =cut
1789 # _fetch_many(@ids)
1790 # this one will fall back to many calls on fetch() if you don't
1791 # override it
1792 sub _fetch_many {
1793 my $self = shift;
1794 return map {$self->_fetch($_)} @_;
1797 =head2 _update_indexes
1799 Title : _update_indexes
1800 Usage : $success = $db->_update_indexes($feature)
1801 Function: update the indexes for a feature
1802 Returns : true on success
1803 Args : A seqfeature object
1804 Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY AN ADAPTOR
1806 This method is called by reindex() to update the searchable indexes
1807 for a feature object that has changed.
1809 =cut
1811 # this is called to index a feature
1812 sub _update_indexes { shift->throw_not_implemented }
1814 =head2 _start_reindexing, _end_reindexing
1816 Title : _start_reindexing, _end_reindexing
1817 Usage : $db->_start_reindexing()
1818 $db->_end_reindexing
1819 Function: flag that a series of reindexing operations is beginning/ending
1820 Returns : true on success
1821 Args : none
1822 Status : MAY BE IMPLEMENTED BY AN ADAPTOR (optional)
1824 These methods are called by reindex() before and immediately after a
1825 series of reindexing operations. The default behavior is to do
1826 nothing, but these methods can be overridden by an adaptor in order to
1827 perform optimizations, turn off autocommits, etc.
1829 =cut
1831 # these do not necessary have to be overridden
1832 # they are called at beginning and end of reindexing process
1833 sub _start_reindexing {}
1834 sub _end_reindexing {}
1836 =head2 _features
1838 Title : _features
1839 Usage : @features = $db->_features(@args)
1840 Function: back end for all get_feature_by_*() queries
1841 Returns : list of features
1842 Args : see below
1843 Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY ADAPTOR
1845 This is the backend for features(), get_features_by_name(),
1846 get_features_by_location(), etc. Arguments are as described for the
1847 features() method, except that only the named-argument form is
1848 recognized.
1850 =cut
1852 # bottleneck query generator
1853 sub _features { shift->throw_not_implemented }
1855 =head2 _search_attributes
1857 Title : _search_attributes
1858 Usage : @result_list = $db->_search_attributes("text search string",[$tag1,$tag2...],$limit)
1859 Function: back end for the search_attributes() method
1860 Returns : results list
1861 Args : as per search_attributes()
1862 Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY ADAPTOR
1864 See search_attributes() for the format of the results list. The only
1865 difference between this and the public method is that the tag list is
1866 guaranteed to be an array reference.
1868 =cut
1870 sub _search_attributes { shift->throw_not_implemented }
1872 =head2 can_store_parentage
1874 Title : can_store_parentage
1875 Usage : $flag = $db->can_store_parentage
1876 Function: return true if this adaptor can store parent/child relationships
1877 Returns : boolean
1878 Args : none
1879 Status : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS
1881 Override this method and return true if this adaptor supports the
1882 _add_SeqFeature() and _get_SeqFeatures() methods, which are used for
1883 storing feature parent/child relationships in a normalized
1884 fashion. Default is false (parent/child relationships are stored in
1885 denormalized form in each feature).
1887 =cut
1889 # return true here if the storage engine is prepared to store parent/child
1890 # relationships using _add_SeqFeature and return them using _fetch_SeqFeatures
1891 sub can_store_parentage { return; }
1893 =head2 _add_SeqFeature
1895 Title : _add_SeqFeature
1896 Usage : $count = $db->_add_SeqFeature($parent,@children)
1897 Function: store a parent/child relationship between $parent and @children
1898 Returns : number of children successfully stored
1899 Args : parent feature and one or more children
1900 Status : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS
1902 If can_store_parentage() returns true, then some store-aware features
1903 (e.g. Bio::DB::SeqFeature) will invoke this method to store
1904 feature/subfeature relationships in a normalized table.
1906 =cut
1908 sub _add_SeqFeature { shift->throw_not_implemented }
1910 =head2 _fetch_SeqFeatures
1912 Title : _fetch_SeqFeatures
1913 Usage : @children = $db->_fetch_SeqFeatures($parent_feature)
1914 Function: return the immediate subfeatures of the indicated feature
1915 Returns : list of subfeatures
1916 Args : the parent feature
1917 Status : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTORS
1919 If can_store_parentage() returns true, then some store-aware features
1920 (e.g. Bio::DB::SeqFeature) will invoke this method to retrieve
1921 feature/subfeature relationships from the database.
1923 =cut
1925 # _get_SeqFeatures($parent,@list_of_child_types)
1926 sub _fetch_SeqFeatures {shift->throw_not_implemented }
1928 =head2 _insert_sequence
1930 Title : _insert_sequence
1931 Usage : $success = $db->_insert_sequence($seqid,$sequence_string,$offset)
1932 Function: Inserts sequence data into the database at the indicated offset
1933 Returns : true if successful
1934 Args : see below
1935 Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY ADAPTOR
1937 This is the back end for insert_sequence(). Adaptors must implement
1938 this method in order to store and retrieve nucleotide or protein
1939 sequence.
1941 =cut
1943 sub _insert_sequence { shift->throw_not_implemented }
1945 # _fetch_sequence() is similar to old dna() method
1947 =head2 _fetch_sequence
1949 Title : _fetch_sequence
1950 Usage : $sequence = $db->_fetch_sequence(-seq_id=>$seqid,-start=>$start,-end=>$end)
1951 Function: Fetch the indicated subsequene from the database
1952 Returns : The sequence string (not a Bio::PrimarySeq object!)
1953 Args : see below
1954 Status : ABSTRACT METHOD; MUST BE IMPLEMENTED BY ADAPTOR
1956 This is the back end for fetch_sequence(). Adaptors must implement
1957 this method in order to store and retrieve nucleotide or protein
1958 sequence.
1960 =cut
1962 sub _fetch_sequence { shift->throw_not_implemented }
1964 =head2 _seq_ids
1966 Title : _seq_ids
1967 Usage : @ids = $db->_seq_ids()
1968 Function: Return all sequence IDs contained in database
1969 Returns : list of sequence Ids
1970 Args : none
1971 Status : TO BE IMPLEMENTED BY ADAPTOR
1973 This method is invoked by seq_ids() to return all sequence IDs
1974 (coordinate systems) known to the database.
1976 =cut
1978 sub _seq_ids { shift->throw_not_implemented }
1980 =head2 _start_bulk_update,_finish_bulk_update
1982 Title : _start_bulk_update, _finish_bulk_update
1983 Usage : $db->_start_bulk_update
1984 $db->_finish_bulk_update
1985 Function: Activate optimizations for large number of insertions/updates
1986 Returns : nothing
1987 Args : nothing
1988 Status : OPTIONAL; MAY BE IMPLEMENTED BY ADAPTOR
1990 These are the backends for start_bulk_update() and
1991 finish_bulk_update(). The default behavior of both methods is to do
1992 nothing.
1994 =cut
1996 # Optional flags to change behavior to optimize bulk updating.
1997 sub _start_bulk_update { }
1998 sub _finish_bulk_update { }
2001 # for full TIE() interface - not necessary to implement in most cases
2003 =head2 Optional methods needed to implement full TIEHASH interface
2005 The core TIEHASH interface will work if just the _store() and _fetch()
2006 methods are implemented. To support the full TIEHASH interface,
2007 including support for keys(), each(), and exists(), the following
2008 methods should be implemented:
2010 =over 4
2012 =item $id = $db-E<gt>_firstid()
2014 Return the first primary ID in the database. Needed for the each()
2015 function.
2017 =item $next_id = $db-E<gt>_nextid($id)
2019 Given a primary ID, return the next primary ID in the series. Needed
2020 for the each() function.
2022 =item $boolean = $db-E<gt>_existsid($id)
2024 Returns true if the indicated primary ID is in the database. Needed
2025 for the exists() function.
2027 =item $db-E<gt>_deleteid($id)
2029 Delete the feature corresponding to the given primary ID. Needed for
2030 delete().
2032 =item $db-E<gt>_clearall()
2034 Empty the database. Needed for %tied_hash = ().
2036 =item $count = $db-E<gt>_featurecount()
2038 Return the number of features in the database. Needed for scalar
2039 %tied_hash.
2041 =back
2043 =cut
2045 sub _firstid { shift->throw_not_implemented }
2046 sub _nextid { shift->throw_not_implemented }
2047 sub _existsid { shift->throw_not_implemented }
2048 sub _deleteid { shift->throw_not_implemented }
2049 sub _clearall { shift->throw_not_implemented }
2050 sub _featurecount { shift->throw_not_implemented }
2053 =head1 Internal Methods
2055 These methods are internal to Bio::DB::SeqFeature::Store and adaptors.
2057 =head2 new_instance
2059 Title : new_instance
2060 Usage : $db = $db->new_instance()
2061 Function: class constructor
2062 Returns : A descendent of Bio::DB::SeqFeature::Store
2063 Args : none
2064 Status : internal
2066 This method is called internally by new() to create a new
2067 uninitialized instance of Bio::DB::SeqFeature::Store. It is used
2068 internally and should not be called by application software.
2070 =cut
2072 sub new_instance {
2073 my $class = shift;
2074 return bless {},ref($class) || $class;
2077 =head2 init
2079 Title : init
2080 Usage : $db->init(@args)
2081 Function: initialize object
2082 Returns : none
2083 Args : Arguments passed to new()
2084 Status : private
2086 This method is called internally by new() to initialize a
2087 newly-created object using the arguments passed to new(). It is to be
2088 overridden by Bio::DB::SeqFeature::Store adaptors.
2090 =cut
2092 sub init {
2093 my $self = shift;
2094 $self->default_settings();
2097 =head2 default_settings
2099 Title : default_settings
2100 Usage : $db->default_settings()
2101 Function: set up default settings for the adaptor
2102 Returns : none
2103 Args : none
2104 Status : private
2106 This method is may be overridden by adaptors. It is responsible for
2107 setting up object default settings.
2109 =cut
2112 # default settings -- set up whatever are the proper default settings
2114 sub default_settings {
2115 my $self = shift;
2116 $self->serializer($self->default_serializer);
2117 $self->index_subfeatures(1);
2120 =head2 default_serializer
2122 Title : default_serializer
2123 Usage : $serializer = $db->default_serializer
2124 Function: finds an available serializer
2125 Returns : the name of an available serializer
2126 Args : none
2127 Status : private
2129 This method returns the name of an available serializer module.
2131 =cut
2134 # choose a serializer
2136 sub default_serializer {
2137 my $self = shift;
2138 # try Storable
2139 eval "require Storable; 1" and return 'Storable';
2140 eval "require Data::Dumper; 1" and return 'Data::Dumper';
2141 croak "Unable to load either Storable or Data::Dumper. Please provide a serializer using -serializer";
2144 =head2 setting
2146 Title : setting
2147 Usage : $value = $db->setting('setting_name' [=> $new_value])
2148 Function: get/set the value of a setting
2149 Returns : the value of the current setting
2150 Args : the name of the setting and optionally a new value for the setting
2151 Status : private
2153 This is a low-level procedure for persistently storing database
2154 settings. It can be overridden by adaptors.
2156 =cut
2158 # persistent settings
2159 # by default we store in the object
2160 sub setting {
2161 my $self = shift;
2162 my $variable_name = shift;
2163 my $d = $self->{setting}{$variable_name};
2164 $self->{setting}{$variable_name} = shift if @_;
2168 =head2 subfeatures_are_indexed
2170 Title : subfeatures_are_indexed
2171 Usage : $flag = $db->subfeatures_are_indexed([$new_value])
2172 Function: flag whether subfeatures are indexed
2173 Returns : a flag indicating that all subfeatures are indexed
2174 Args : (optional) new value of the flag
2175 Status : private
2177 This method is used internally by the Bio::DB::SeqFeature class to
2178 optimize some of its operations. It returns true if all of the
2179 subfeatures in the database are indexed; it returns false if at least
2180 one of the subfeatures is not indexed. Do not attempt to change the
2181 value of this setting unless you are writing an adaptor.
2183 =cut
2186 # whether subfeatures are all indexed
2188 sub subfeatures_are_indexed {
2189 my $self = shift;
2190 my $d = $self->setting('subfeatures_are_indexed');
2191 $self->setting(subfeatures_are_indexed => shift) if @_;
2195 =head2 subfeature_types_are_indexed
2197 Title : subfeature_types_are_indexed
2198 Usage : $flag = $db->subfeature_types_are_indexed
2199 Function: whether subfeatures are indexed by type
2200 Returns : a flag indicating that all subfeatures are indexed
2201 Args : none
2202 Status : private
2204 This method returns true if subfeature types are indexed. Default is
2205 to return the value of subfeatures_are_indexed().
2207 =cut
2209 sub subfeature_types_are_indexed {
2210 my $self = shift;
2211 return $self->subfeatures_are_indexed;
2214 =head2 subfeature_locations_are_indexed
2216 Title : subfeature_locations_are_indexed
2217 Usage : $flag = $db->subfeature_locations_are_indexed
2218 Function: whether subfeatures are indexed by type
2219 Returns : a flag indicating that all subfeatures are indexed
2220 Args : none
2221 Status : private
2223 This method returns true if subfeature locations are indexed. Default is
2224 to return the value of subfeatures_are_indexed().
2226 =cut
2228 sub subfeature_locations_are_indexed {
2229 my $self = shift;
2230 return $self->subfeatures_are_indexed;
2233 =head2 setup_segment_args
2235 Title : setup_segment_args
2236 Usage : @args = $db->setup_segment_args(@args)
2237 Function: munge the arguments to the segment() call
2238 Returns : munged arguments
2239 Args : see below
2240 Status : private
2242 This method is used internally by segment() to translate positional
2243 arguments into named argument=E<gt>value pairs.
2245 =cut
2247 sub setup_segment_args {
2248 my $self = shift;
2249 return @_ if defined $_[0] && $_[0] =~ /^-/;
2250 return (-name=>$_[0],-start=>$_[1],-end=>$_[2]) if @_ == 3;
2251 return (-class=>$_[0],-name=>$_[1]) if @_ == 2;
2252 return (-name=>$_[0]) if @_ == 1;
2253 return;
2256 =head2 store_and_cache
2258 Title : store_and_cache
2259 Usage : $success = $db->store_and_cache(@features)
2260 Function: store features into database and update cache
2261 Returns : number of features stored
2262 Args : list of features
2263 Status : private
2265 This private method stores the list of Bio::SeqFeatureI objects into
2266 the database and caches them in memory for retrieval.
2268 =cut
2270 sub store_and_cache {
2271 my $self = shift;
2272 my $indexit = shift;
2273 my $result = $self->_store($indexit,@_);
2274 if (my $cache = $self->cache) {
2275 for my $obj (@_) {
2276 defined (my $id = eval {$obj->primary_id}) or next;
2277 $cache->store($id,$obj);
2280 $result;
2283 =head2 init_cache
2285 Title : init_cache
2286 Usage : $db->init_cache($size)
2287 Function: initialize the in-memory feature cache
2288 Returns : the Tie::Cacher object
2289 Args : desired size of the cache
2290 Status : private
2292 This method is used internally by new() to create the Tie::Cacher
2293 instance used for the in-memory feature cache.
2295 =cut
2297 sub init_cache {
2298 my $self = shift;
2299 my $cache_size = shift;
2300 $cache_size = 5000 if $cache_size == 1; # in case somebody treats it as a flag
2301 $self->{cache} = Tie::Cacher->new($cache_size) or $self->throw("Couldn't tie cache: $!");
2304 =head2 cache
2306 Title : cache
2307 Usage : $cache = $db->cache
2308 Function: return the cache object
2309 Returns : the Tie::Cacher object
2310 Args : none
2311 Status : private
2313 This method returns the Tie::Cacher object used for the in-memory
2314 feature cache.
2316 =cut
2318 sub cache { shift->{cache} }
2320 =head2 load_class
2322 Title : load_class
2323 Usage : $db->load_class($blessed_object)
2324 Function: loads the module corresponding to a blessed object
2325 Returns : empty
2326 Args : a blessed object
2327 Status : private
2329 This method is used by thaw() to load the code for a blessed
2330 object. This ensures that all the object's methods are available.
2332 =cut
2334 sub load_class {
2335 my $self = shift;
2336 my $obj = shift;
2337 return unless defined $obj;
2338 return if $self->{class_loaded}{ref $obj}++;
2339 unless ($obj && $obj->can('primary_id')) {
2340 my $class = ref $obj;
2341 eval "require $class";
2346 #################################### Internal methods ####################
2348 =head2 freeze
2350 Title : freeze
2351 Usage : $serialized_object = $db->freeze($feature)
2352 Function: serialize a feature object into a string
2353 Returns : serialized feature object
2354 Args : a seqfeature object
2355 Status : private
2357 This method converts a Bio::SeqFeatureI object into a serialized form
2358 suitable for storage into a database. The feature's primary ID is set
2359 to undef before it is serialized. This avoids any potential mismatch
2360 between the primary ID used as the database key and the primary ID
2361 stored in the serialized object.
2363 =cut
2365 sub freeze {
2366 my $self = shift;
2367 my $obj = shift;
2369 # Bio::SeqFeature::Generic contains cleanup methods, so we need to
2370 # localize the methods to undef temporarily so that we can serialize
2371 local $obj->{'_root_cleanup_methods'} if exists $obj->{'_root_cleanup_methods'};
2373 my ($id,$store);
2374 $id = $obj->primary_id();
2375 $obj->primary_id(undef); # don't want primary ID to be stored in object
2376 eval {
2377 $store = $obj->object_store;
2378 $obj->object_store(undef); # don't want a copy of the store in the object
2380 my $serializer = $self->serializer;
2381 my $data;
2382 if ($serializer eq 'Data::Dumper') {
2383 my $d = Data::Dumper->new([$obj]);
2384 $d->Terse(1);
2385 $d->Deepcopy(1);
2386 $data = $d->Dump;
2387 } elsif ($serializer eq 'Storable') {
2388 $data = Storable::nfreeze($obj);
2391 $obj->primary_id($id); # restore to original state
2392 eval {
2393 $obj->object_store($store);
2396 $data = compress($data) if $self->do_compress;
2397 return $data;
2400 =head2 thaw
2402 Title : thaw
2403 Usage : $feature = $db->thaw($serialized_object,$primary_id)
2404 Function: unserialize a string into a feature object
2405 Returns : Bio::SeqFeatureI object
2406 Args : serialized form of object from freeze() and primary_id of object
2407 Status : private
2409 This method is the reverse of the freeze(). The supplied primary_id
2410 becomes the primary_id() of the returned Bio::SeqFeatureI object. This
2411 implementation checks for a deserialized object in the cache before it
2412 calls thaw_object() to do the actual deserialization.
2414 =cut
2416 sub thaw {
2417 my $self = shift;
2418 my ($obj,$primary_id) = @_;
2420 if (my $cache = $self->cache) {
2421 return $cache->fetch($primary_id) if $cache->exists($primary_id);
2422 my $object = $self->thaw_object($obj,$primary_id) or return;
2423 $cache->store($primary_id,$object);
2424 return $object;
2425 } else {
2426 return $self->thaw_object($obj,$primary_id);
2431 =head2 thaw_object
2433 Title : thaw_object
2434 Usage : $feature = $db->thaw_object($serialized_object,$primary_id)
2435 Function: unserialize a string into a feature object
2436 Returns : Bio::SeqFeatureI object
2437 Args : serialized form of object from freeze() and primary_id of object
2438 Status : private
2440 After thaw() checks the cache and comes up empty, this method is
2441 invoked to thaw the object.
2443 =cut
2445 sub thaw_object {
2446 my $self = shift;
2447 my ($obj,$primary_id) = @_;
2449 my $serializer = $self->serializer;
2450 my $object;
2452 $obj = uncompress($obj) if $self->do_compress;
2454 if ($serializer eq 'Data::Dumper') {
2455 $object = eval $obj;
2456 } elsif ($serializer eq 'Storable') {
2457 $object = Storable::thaw($obj);
2460 # remember the primary ID of this object as well as the
2461 # identity of the store, so that we can do lazy loading;
2462 # both of these are wrapped in an eval because not all
2463 # bioseqfeatures support them (or want to)
2464 $self->load_class($object);
2465 eval {
2466 $object->primary_id($primary_id);
2467 $object->object_store($self);
2469 $object;
2472 =head2 feature_names
2474 Title : feature_names
2475 Usage : ($names,$aliases) = $db->feature_names($feature)
2476 Function: get names and aliases for a feature
2477 Returns : an array of names and an array of aliases
2478 Args : a Bio::SeqFeatureI object
2479 Status : private
2481 This is an internal utility function which, given a Bio::SeqFeatureI
2482 object, returns two array refs. The first is a list of official names
2483 for the feature, and the second is a list of aliases. This is slightly
2484 skewed towards GFF3 usage, so the official names are the
2485 display_name(), plus all tag values named 'Name', plus all tag values
2486 named 'ID'. The aliases are all tag values named 'Alias'.
2488 =cut
2490 sub feature_names {
2491 my $self = shift;
2492 my $obj = shift;
2494 my $primary_id = $obj->primary_id;
2495 my @names = $obj->display_name;
2496 push @names,$obj->get_tag_values('Name') if $obj->has_tag('Name');
2497 push @names,$obj->get_tag_values('ID') if $obj->has_tag('ID');
2498 @names = grep {defined $_ && $_ ne $primary_id} @names;
2500 my @aliases = grep {defined} $obj->get_tag_values('Alias') if $obj->has_tag('Alias');
2502 return (\@names,\@aliases);
2508 __END__
2510 =head1 BUGS
2512 This is an early version, so there are certainly some bugs. Please
2513 use the BioPerl bug tracking system to report bugs.
2515 =head1 SEE ALSO
2517 L<Bio::DB::SeqFeature>,
2518 L<Bio::DB::SeqFeature::Store::GFF3Loader>,
2519 L<Bio::DB::SeqFeature::Segment>,
2520 L<Bio::DB::SeqFeature::Store::DBI::mysql>,
2521 L<Bio::DB::SeqFeature::Store::berkeleydb>
2522 L<Bio::DB::SeqFeature::Store::memory>
2524 =head1 AUTHOR
2526 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
2528 Copyright (c) 2006 Cold Spring Harbor Laboratory.
2530 This library is free software; you can redistribute it and/or modify
2531 it under the same terms as Perl itself.
2533 =cut