3 # BioPerl module for Bio::DB::Taxonomy
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Jason Stajich <jason-at-bioperl.org>
9 # Copyright Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::DB::Taxonomy - Access to a taxonomy database
21 use Bio::DB::Taxonomy;
22 my $db = Bio::DB::Taxonomy->new(-source => 'entrez');
23 # use NCBI Entrez over HTTP
24 my $taxonid = $db->get_taxonid('Homo sapiens');
27 my $taxon = $db->get_taxon(-taxonid => $taxonid);
31 This is a front end module for access to a taxonomy database.
37 User feedback is an integral part of the evolution of this and other
38 Bioperl modules. Send your comments and suggestions preferably to
39 the Bioperl mailing list. Your participation is much appreciated.
41 bioperl-l@bioperl.org - General discussion
42 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 Please direct usage questions or support issues to the mailing list:
48 I<bioperl-l@bioperl.org>
50 rather than to the module maintainer directly. Many experienced and
51 reponsive experts will be able look at the problem and quickly
52 address it. Please include a thorough description of the problem
53 with code and data examples if at all possible.
57 Report bugs to the Bioperl bug tracking system to help us keep track
58 of the bugs and their resolution. Bug reports can be submitted via
61 http://bugzilla.open-bio.org/
63 =head1 AUTHOR - Jason Stajich
65 Email jason-at-bioperl.org
69 Sendu Bala: bix@sendu.me.uk
73 The rest of the documentation details each of the object methods.
74 Internal methods are usually preceded with a _
78 # Let the code begin...
80 package Bio
::DB
::Taxonomy
;
81 use vars
qw($DefaultSource $TAXON_IIDS);
85 use base qw(Bio::Root::Root);
87 $DefaultSource = 'entrez';
93 Usage : my $obj = Bio::DB::Taxonomy->new(-source => 'entrez');
94 Function: Builds a new Bio::DB::Taxonomy object.
95 Returns : an instance of Bio::DB::Taxonomy
96 Args : -source => which database source 'entrez' or 'flatfile' or 'list'
101 my($class,@args) = @_;
103 if( $class =~ /Bio::DB::Taxonomy::(\S+)/ ) {
104 my ($self) = $class->SUPER::new
(@args);
105 $self->_initialize(@args);
109 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
110 my $source = $param{'-source'} || $DefaultSource;
112 $source = "\L$source"; # normalize capitalization to lower case
114 # normalize capitalization
115 return unless( $class->_load_tax_module($source) );
116 return "Bio::DB::Taxonomy::$source"->new(@args);
126 Usage : my $taxon = $db->get_taxon(-taxonid => $taxonid)
127 Function: Get a Bio::Taxon object from the database.
128 Returns : Bio::Taxon object
129 Args : just a single value which is the database id, OR named args:
130 -taxonid => taxonomy id (to query by taxonid)
132 -name => string (to query by a taxonomy name: common name,
133 scientific name, etc)
138 shift->throw_not_implemented();
141 *get_Taxonomy_Node
= \
&get_taxon
;
146 Usage : my @taxonids = $db->get_taxonids('Homo sapiens');
147 Function: Searches for a taxonid (typically ncbi_taxon_id) based on a query
148 string. Note that multiple taxonids can match to the same supplied
150 Returns : array of integer ids in list context, one of these in scalar context
151 Args : string representing taxon's name
156 shift->throw_not_implemented();
159 *get_taxonid
= \
&get_taxonids
;
160 *get_taxaid
= \
&get_taxonids
;
165 Usage : my $tree = $db->get_tree(@species_names)
166 Function: Generate a tree comprised of the full lineages of all the supplied
167 species names. The nodes for the requested species are given
168 name('supplied') values corresponding to the supplied name, such that
169 they can be identified if the real species name in the database
170 (stored under node_name()) is different.
171 Returns : Bio::Tree::Tree
172 Args : a list of species names (strings)
177 my ($self, @species_names) = @_;
179 # the full lineages of the species are merged into a single tree
181 foreach my $name (@species_names) {
182 my $ncbi_id = $self->get_taxonid($name);
184 my $node = $self->get_taxon(-taxonid
=> $ncbi_id);
185 $node->name('supplied', $name);
188 $tree->merge_lineage($node);
191 $tree = Bio
::Tree
::Tree
->new(-verbose
=> $self->verbose, -node
=> $node);
195 $self->throw("No taxonomy database node for species ".$name);
205 Usage : my $ancestor_taxon = $db->ancestor($taxon)
206 Function: Retrieve the full ancestor taxon of a supplied Taxon from the
209 Args : Bio::Taxon (that was retrieved from this database)
214 shift->throw_not_implemented();
217 =head2 each_Descendent
219 Title : each_Descendent
220 Usage : my @taxa = $db->each_Descendent($taxon);
221 Function: Get all the descendents of the supplied Taxon (but not their
222 descendents, ie. not a recursive fetchall).
223 Returns : Array of Bio::Taxon objects
224 Args : Bio::Taxon (that was retrieved from this database)
228 sub each_Descendent
{
229 shift->throw_not_implemented();
232 =head2 get_all_Descendents
234 Title : get_all_Descendents
235 Usage : my @taxa = $db->get_all_Descendents($taxon);
236 Function: Like each_Descendent(), but do a recursive fetchall
237 Returns : Array of Bio::Taxon objects
238 Args : Bio::Taxon (that was retrieved from this database)
242 sub get_all_Descendents
{
243 my ($self, $taxon) = @_;
245 foreach my $desc_taxon ($self->each_Descendent($taxon)) {
246 push @taxa, ($desc_taxon, $self->get_all_Descendents($desc_taxon));
251 =head2 _load_tax_module
253 Title : _load_tax_module
254 Usage : *INTERNAL Bio::DB::Taxonomy stuff*
255 Function: Loads up (like use) a module at run time on demand
259 sub _load_tax_module
{
260 my ($self, $source) = @_;
261 my $module = "Bio::DB::Taxonomy::" . $source;
264 eval { $ok = $self->_load_module($module) };
268 $self: $source cannot be found
270 For more information about the Bio::DB::Taxonomy system please see
271 the Bio::DB::Taxonomy docs. This includes ways of checking for
272 formats at compile time, not run time.
279 =head2 _handle_internal_id
281 Title : _handle_internal_id
282 Usage : *INTERNAL Bio::DB::Taxonomy stuff*
283 Function: Tries to ensure that when a taxon is requested from any database,
284 the Taxon object returned will have the same internal id regardless
286 Args : Bio::Taxon, and optionally true value to try and do the job using
287 scientific name & rank if your ids aren't comparable to other dbs.
291 sub _handle_internal_id
{
292 my ($self, $taxon, $try_name) = @_;
293 $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon');
294 my $taxid = $taxon->id || return;
295 my $sci_name = $taxon->scientific_name || '';
296 my $rank = $taxon->rank || 'no rank';
298 if ($try_name && $sci_name && defined $TAXON_IIDS->{names
}->{$sci_name}) {
299 if (defined $TAXON_IIDS->{names
}->{$sci_name}->{$rank}) {
300 $TAXON_IIDS->{taxids
}->{$taxid} = $TAXON_IIDS->{names
}->{$sci_name}->{$rank};
302 elsif ($rank eq 'no rank') {
303 # pick the internal id of one named rank taxa at random
304 my ($iid) = values %{$TAXON_IIDS->{names
}->{$sci_name}};
305 $TAXON_IIDS->{taxids
}->{$taxid} = $iid;
309 if (defined $TAXON_IIDS->{taxids
}->{$taxid}) {
310 # a little dangerous to use this internal method of Bio::Tree::Node;
311 # but it is how internal_id() is set
312 $taxon->_creation_id($TAXON_IIDS->{taxids
}->{$taxid});
315 $TAXON_IIDS->{taxids
}->{$taxid} = $taxon->internal_id;
316 $TAXON_IIDS->{names
}->{$sci_name}->{$rank} = $taxon->internal_id if $sci_name;