3 # BioPerl module for Bio::Taxonomy
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Juguang Xiao
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::Taxonomy - representing Taxonomy.
19 # NB: This module is deprecated. Use Bio::Taxon in combination with
20 # Bio::Tree::Tree methods instead.
24 # CREATION: You can either create an instance by assigning it,
25 # or fetch it through factory.
27 # Create the nodes first. See Bio::Taxonomy::Node for details.
28 my $node_species_sapiens = Bio::Taxonomy::Node->new(
29 -object_id => 9606, # or -ncbi_taxid. Requird tag
31 'scientific' => ['sapiens'],
32 'common_name' => ['human']
34 -rank => 'species' # Required tag
36 my $node_genus_Homo = Bio::Taxonomy::Node->new(
38 -names => { 'scientific' => ['Homo'] },
41 my $node_class_Mammalia = Bio::Taxonomy::Node->new(
44 'scientific' => ['Mammalia'],
45 'common' => ['mammals']
49 my $taxonomy = Bio::Taxonomy->new;
50 $taxonomy->add_node($node_class_Mammalia);
51 $taxonomy->add_node($node_species_sapiens);
52 $taxonomy->add_node($node_genus_Homo);
54 # OR you can fetch it through a factory implementing
55 # Bio::Taxonomy::FactoryI
58 my $taxonomy = $factory->fetch_by_ncbi_taxid(40674);
62 # In this case, binomial returns a defined value.
63 my $binomial = $taxonomy->binomial;
65 # 'common_names' refers to the lowest-rank node's common names, in
67 my @common_names = $taxonomy->common_names;
69 # 'get_node', will return undef if the rank is no defined in
70 # taxonomy object. It will throw error if the rank string is not
71 # defined, say 'species lah'.
72 my $node = $taxonomy->get_node('class');
73 my @nodes = $taxonomy->get_all_nodes;
75 # Also, you can search for parent and children nodes, if taxonomy
78 my $parent_taxonomy = $taxonomy->get_parent
82 Bio::Taxonomy object represents any rank-level in taxonomy system,
83 rather than Bio::Species which is able to represent only
86 There are two ways to create Taxonomy object, e.g.
87 1) instantiate an object and assign all nodes on your own code; and
88 2) fetch an object by factory.
90 =head2 Creation by instantiation
92 The abstraction of Taxonomy is actually a hash in data structure
93 term. The keys of the hash are the rank names, such as 'genus' and
94 'species', and the values are the instances of Bio::Taxonomy::Node.
96 =head2 Creation by Factory fetching
98 NCBI Taxonomy system is well accepted as the standard. The Taxonomy
99 Factories in bioperl access this system, through HTTP to NCBI Entrez,
100 dump file, and advanced biosql database.
102 Bio::Taxonomy::FactoryI defines all methods that all implementations
105 $factory-E<gt>fetch is a general method to fetch Taxonomy by either
106 NCBI taxid or any types of names.
108 $factory-E<gt>fetch_parent($taxonomy), returns a Taxonomy that is
109 one-step higher rank of the taxonomy specified as argument.
111 $factory-E<gt>fetch_children($taxonomy), reports an array of Taxonomy
112 those are one-step lower rank of the taxonomy specified as the
115 =head2 Usage of Taxonomy object
123 User feedback is an integral part of the evolution of this and other
124 Bioperl modules. Send your comments and suggestions preferably to
125 the Bioperl mailing list. Your participation is much appreciated.
127 bioperl-l@bioperl.org - General discussion
128 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
132 Please direct usage questions or support issues to the mailing list:
134 I<bioperl-l@bioperl.org>
136 rather than to the module maintainer directly. Many experienced and
137 reponsive experts will be able look at the problem and quickly
138 address it. Please include a thorough description of the problem
139 with code and data examples if at all possible.
141 =head2 Reporting Bugs
143 Report bugs to the Bioperl bug tracking system to help us keep track
144 of the bugs and their resolution. Bug reports can be submitted via the
147 http://bugzilla.open-bio.org/
151 Juguang Xiao, juguang@tll.org.sg
155 The rest of the documentation details each of the object
156 methods. Internal methods are usually preceded with a _
164 package Bio
::Taxonomy
;
168 use base
qw(Bio::Root::Root);
174 Usage : my $obj = Bio::Taxonomy->new();
175 Function: Builds a new Bio::Taxonomy object
176 Returns : Bio::Taxonomy
177 Args : -method -> method used to decide classification
179 -ranks -> what ranks are there
185 my ($class,@args) = @_;
187 my $self = $class->SUPER::new
(@args);
188 $self->warn("Bio::Taxonomy is deprecated. Use Bio::Taxon in combination with Bio::Tree::Tree instead.");
190 $self->{'_method'}='none';
191 $self->{'_ranks'}=[];
192 $self->{'_rank_hash'}={};
193 $self->{_hierarchy
} = {}; # used to store the nodes, with ranks as keys.
194 my ($method,$ranks,$order) = $self->_rearrange([qw(METHOD RANKS ORDER)], @args);
197 $self->method($method);
200 if (defined $ranks &&
201 (ref($ranks) eq "ARRAY") ) {
202 $self->ranks(@
$ranks);
205 # I think these are in the right order, but not sure:
206 # some parvorder|suborder and varietas|subspecies seem
207 # to be at the same level - any taxonomists?
208 # I don't expect that these will actually be used except as a way
209 # to find what ranks there are in taxonomic use
210 $self->ranks(('root',
211 'superkingdom', 'kingdom',
212 'superphylum', 'phylum', 'subphylum',
213 'superclass', 'class', 'subclass', 'infraclass',
214 'superorder', 'order', 'suborder', 'parvorder', 'infraorder',
215 'superfamily', 'family', 'subfamily',
218 'species group', 'species subgroup', 'species', 'subspecies',
219 'varietas', 'forma', 'no rank'));
229 Usage : $obj = taxonomy->method($method);
230 Function: set or return the method used to decide classification
238 my ($self,$value) = @_;
239 if (defined $value && $value=~/none|trust|lookup/) {
240 $self->{'_method'} = $value;
242 return $self->{'_method'};
249 Usage : @obj[][0-1] = taxonomy->classify($species);
250 Function: return a ranked classification
251 Returns : @obj of taxa and ranks as word pairs separated by "@"
252 Args : Bio::Species object
258 my ($self,$value) = @_;
261 if (! $value->isa('Bio::Species') ) {
262 $self->throw("Trying to classify $value which is not a Bio::Species object");
265 my @classes=reverse($value->classification);
267 if ($self->method eq 'none') {
268 for (my $i=0; $i < @classes-2; $i++) {
269 ($ranks[$i][0],$ranks[$i][1])=($classes[$i],'no rank');
271 push @ranks,[$classes[-2],'genus'];
272 push @ranks,[$value->binomial,'species'];
273 } elsif ($self->method eq 'trust') {
274 if (scalar(@classes)==scalar($self->ranks)) {
275 for (my $i=0; $i < @classes; $i++) {
276 if ($self->rank_of_number($i) eq 'species') {
277 push @ranks,[$value->binomial,$self->rank_of_number($i)];
279 push @ranks,[$classes[$i],$self->rank_of_number($i)];
283 $self->throw("Species object and taxonomy object cannot be reconciled");
285 } elsif ($self->method eq 'lookup') {
286 # this will lookup a DB for the rank of a taxon name
287 # I imagine that some kind of Bio::DB class will be need to
288 # be given to the taxonomy object to act as an DB interface
289 # (I'm not sure how useful this is though - if you have a DB of
290 # taxonomy - why would you be doing things this way?)
291 $self->throw_not_implemented();
300 Title : level_of_rank
301 Usage : $obj = taxonomy->level_of_rank($obj);
302 Function: returns the level of a rank name
310 my ($self,$value) = @_;
312 return $self->{'_rank_hash'}{$value};
316 =head2 rank_of_number
318 Title : rank_of_number
319 Usage : $obj = taxonomy->rank_of_number($obj);
320 Function: returns the rank name of a rank level
328 my ($self,$value) = @_;
330 return ${$self->{'_ranks'}}[$value];
337 Usage : @obj = taxonomy->ranks(@obj);
338 Function: set or return all ranks
346 my ($self,@value) = @_;
348 # currently this makes no uniqueness sanity check (this should be done)
349 # I am think that adding a way of converting multiple 'no rank' ranks
350 # to unique 'no rank #' ranks so that the level of a 'no rank' is
351 # abstracted way from the user - I'm not sure of the value of this
354 $self->{'_ranks'}=\
@value;
357 for (my $i=0; $i <= @
{$self->{'_ranks'}}-1; $i++) {
358 $self->{'_rank_hash'}{$self->{'_ranks'}[$i]}=$i unless $self->{'_ranks'}[$i] eq 'no rank';
361 return @
{$self->{'_ranks'}};
367 Usage: $obj->add_node($node[, $node2, ...]);
368 Function: add one or more Bio::Taxonomy::Node objects
370 Args: any number of Bio::Taxonomy::Node(s)
375 my ($self, @nodes) = @_;
377 $self->throw("A Bio::Taxonomy::Node object needed")
378 unless($_->isa('Bio::Taxonomy::Node'));
379 my ($node, $rank) = ($_, $_->rank);
380 if(exists $self->{_hierarchy
}->{$rank}){
381 # $self->throw("$rank has been defined");
382 # print STDERR "RANK:$rank\n";
385 $self->{_hierarchy
}->{$rank} = $node;
392 Usage : my $val = $obj->binomial;
393 Function: returns the binomial name if this taxonomy reachs species level
394 Returns : the binomial name
395 OR undef if taxonmy does not reach species level
396 Args : [No arguments]
402 return $self->get_node('species')->scientific_name;
403 my $genus = $self->get_node('genus');
404 my $species = $self->get_node('species');
405 return ($species && $genus) ?
"$species $genus" : undef;
411 Usage : $node = $taxonomy->get_node('species');
412 Function: get a Bio::Taxonomy::Node object according to rank name
413 Returns : a Bio::Taxonomy::Node object or undef if null
414 Args : a vaild rank name
419 my ($self, $rank) = @_;
420 unless(grep /$rank/, keys %{$self->{_hierarchy
}}){
421 $self->throw("'$rank' is not in the rank list");
423 return (exists $self->{_hierarchy
}->{$rank})?
424 $self->{_hierarchy
}->{$rank} : undef;
427 =head2 classification
429 Title : classification
430 Usage : @names = $taxonomy->classification;
431 Function: get the classification names of one taxonomy
432 Returns : array of names
433 Args : [No arguments]
439 my %rank_hash = %{$self->{_rank_hash
}};
440 my %hierarchy = %{$self->{_hierarchy
}};
441 my @ordered_nodes = sort {
442 ($rank_hash{$a} <=> $rank_hash{$b})
444 return map {$hierarchy{$_}->scientific_name} @ordered_nodes;