nexml.t: Added a missing XML::Twig requirement.
[bioperl-live.git] / Bio / Taxonomy.pm
blob90419a16a366759eef507e3e29293ef8c49c0b03
2 # BioPerl module for Bio::Taxonomy
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Juguang Xiao
8 # You may distribute this module under the same terms as perl itself
10 # POD documentation - main docs before the code
12 =head1 NAME
14 Bio::Taxonomy - representing Taxonomy.
16 =head1 SYNOPSIS
18 # NB: This module is deprecated. Use Bio::Taxon in combination with
19 # Bio::Tree::Tree methods instead.
21 use Bio::Taxonomy;
23 # CREATION: You can either create an instance by assigning it,
24 # or fetch it through factory.
26 # Create the nodes first. See Bio::Taxonomy::Node for details.
27 my $node_species_sapiens = Bio::Taxonomy::Node->new(
28 -object_id => 9606, # or -ncbi_taxid. Requird tag
29 -names => {
30 'scientific' => ['sapiens'],
31 'common_name' => ['human']
33 -rank => 'species' # Required tag
35 my $node_genus_Homo = Bio::Taxonomy::Node->new(
36 -object_id => 9605,
37 -names => { 'scientific' => ['Homo'] },
38 -rank => 'genus'
40 my $node_class_Mammalia = Bio::Taxonomy::Node->new(
41 -object_id => 40674,
42 -names => {
43 'scientific' => ['Mammalia'],
44 'common' => ['mammals']
46 -rank => 'class'
48 my $taxonomy = Bio::Taxonomy->new;
49 $taxonomy->add_node($node_class_Mammalia);
50 $taxonomy->add_node($node_species_sapiens);
51 $taxonomy->add_node($node_genus_Homo);
53 # OR you can fetch it through a factory implementing
54 # Bio::Taxonomy::FactoryI
55 my $factory;
57 my $taxonomy = $factory->fetch_by_ncbi_taxid(40674);
59 # USAGE
61 # In this case, binomial returns a defined value.
62 my $binomial = $taxonomy->binomial;
64 # 'common_names' refers to the lowest-rank node's common names, in
65 # array.
66 my @common_names = $taxonomy->common_names;
68 # 'get_node', will return undef if the rank is no defined in
69 # taxonomy object. It will throw error if the rank string is not
70 # defined, say 'species lah'.
71 my $node = $taxonomy->get_node('class');
72 my @nodes = $taxonomy->get_all_nodes;
74 # Also, you can search for parent and children nodes, if taxonomy
75 # comes with factory.
77 my $parent_taxonomy = $taxonomy->get_parent
79 =head1 DESCRIPTION
81 Bio::Taxonomy object represents any rank-level in taxonomy system,
82 rather than Bio::Species which is able to represent only
83 species-level.
85 There are two ways to create Taxonomy object, e.g.
86 1) instantiate an object and assign all nodes on your own code; and
87 2) fetch an object by factory.
89 =head2 Creation by instantiation
91 The abstraction of Taxonomy is actually a hash in data structure
92 term. The keys of the hash are the rank names, such as 'genus' and
93 'species', and the values are the instances of Bio::Taxonomy::Node.
95 =head2 Creation by Factory fetching
97 NCBI Taxonomy system is well accepted as the standard. The Taxonomy
98 Factories in bioperl access this system, through HTTP to NCBI Entrez,
99 dump file, and advanced biosql database.
101 Bio::Taxonomy::FactoryI defines all methods that all implementations
102 must obey.
104 $factory-E<gt>fetch is a general method to fetch Taxonomy by either
105 NCBI taxid or any types of names.
107 $factory-E<gt>fetch_parent($taxonomy), returns a Taxonomy that is
108 one-step higher rank of the taxonomy specified as argument.
110 $factory-E<gt>fetch_children($taxonomy), reports an array of Taxonomy
111 those are one-step lower rank of the taxonomy specified as the
112 argument.
114 =head2 Usage of Taxonomy object
118 =head1 FEEDBACK
120 =head2 Mailing Lists
122 User feedback is an integral part of the evolution of this and other
123 Bioperl modules. Send your comments and suggestions preferably to
124 the Bioperl mailing list. Your participation is much appreciated.
126 bioperl-l@bioperl.org - General discussion
127 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
129 =head2 Support
131 Please direct usage questions or support issues to the mailing list:
133 I<bioperl-l@bioperl.org>
135 rather than to the module maintainer directly. Many experienced and
136 reponsive experts will be able look at the problem and quickly
137 address it. Please include a thorough description of the problem
138 with code and data examples if at all possible.
140 =head2 Reporting Bugs
142 Report bugs to the Bioperl bug tracking system to help us keep track
143 of the bugs and their resolution. Bug reports can be submitted via the
144 web:
146 https://github.com/bioperl/bioperl-live/issues
148 =head1 CONTACT
150 Juguang Xiao, juguang@tll.org.sg
152 =head1 APPENDIX
154 The rest of the documentation details each of the object
155 methods. Internal methods are usually preceded with a _
157 =cut
160 # code begins...
163 package Bio::Taxonomy;
164 use strict;
167 use base qw(Bio::Root::Root);
170 =head2 new
172 Title : new
173 Usage : my $obj = Bio::Taxonomy->new();
174 Function: Builds a new Bio::Taxonomy object
175 Returns : Bio::Taxonomy
176 Args : -method -> method used to decide classification
177 (none|trust|lookup)
178 -ranks -> what ranks are there
180 =cut
183 sub new {
184 my ($class,@args) = @_;
186 my $self = $class->SUPER::new(@args);
187 $self->warn("Bio::Taxonomy is deprecated. Use Bio::Taxon in combination with Bio::Tree::Tree instead.");
189 $self->{'_method'}='none';
190 $self->{'_ranks'}=[];
191 $self->{'_rank_hash'}={};
192 $self->{_hierarchy} = {}; # used to store the nodes, with ranks as keys.
193 my ($method,$ranks,$order) = $self->_rearrange([qw(METHOD RANKS ORDER)], @args);
195 if ($method) {
196 $self->method($method);
199 if (defined $ranks &&
200 (ref($ranks) eq "ARRAY") ) {
201 $self->ranks(@$ranks);
202 } else {
203 # default ranks
204 # I think these are in the right order, but not sure:
205 # some parvorder|suborder and varietas|subspecies seem
206 # to be at the same level - any taxonomists?
207 # I don't expect that these will actually be used except as a way
208 # to find what ranks there are in taxonomic use
209 $self->ranks(('root',
210 'superkingdom', 'kingdom',
211 'superphylum', 'phylum', 'subphylum',
212 'superclass', 'class', 'subclass', 'infraclass',
213 'superorder', 'order', 'suborder', 'parvorder', 'infraorder',
214 'superfamily', 'family', 'subfamily',
215 'tribe', 'subtribe',
216 'genus', 'subgenus',
217 'species group', 'species subgroup', 'species', 'subspecies',
218 'varietas', 'forma', 'no rank'));
221 return $self;
225 =head2 method
227 Title : method
228 Usage : $obj = taxonomy->method($method);
229 Function: set or return the method used to decide classification
230 Returns : $obj
231 Args : $obj
233 =cut
236 sub method {
237 my ($self,$value) = @_;
238 if (defined $value && $value=~/none|trust|lookup/) {
239 $self->{'_method'} = $value;
241 return $self->{'_method'};
245 =head2 classify
247 Title : classify
248 Usage : @obj[][0-1] = taxonomy->classify($species);
249 Function: return a ranked classification
250 Returns : @obj of taxa and ranks as word pairs separated by "@"
251 Args : Bio::Species object
253 =cut
256 sub classify {
257 my ($self,$value) = @_;
258 my @ranks;
260 if (! $value->isa('Bio::Species') ) {
261 $self->throw("Trying to classify $value which is not a Bio::Species object");
264 my @classes=reverse($value->classification);
266 if ($self->method eq 'none') {
267 for (my $i=0; $i < @classes-2; $i++) {
268 ($ranks[$i][0],$ranks[$i][1])=($classes[$i],'no rank');
270 push @ranks,[$classes[-2],'genus'];
271 push @ranks,[$value->binomial,'species'];
272 } elsif ($self->method eq 'trust') {
273 if (scalar(@classes)==scalar($self->ranks)) {
274 for (my $i=0; $i < @classes; $i++) {
275 if ($self->rank_of_number($i) eq 'species') {
276 push @ranks,[$value->binomial,$self->rank_of_number($i)];
277 } else {
278 push @ranks,[$classes[$i],$self->rank_of_number($i)];
281 } else {
282 $self->throw("Species object and taxonomy object cannot be reconciled");
284 } elsif ($self->method eq 'lookup') {
285 # this will lookup a DB for the rank of a taxon name
286 # I imagine that some kind of Bio::DB class will be need to
287 # be given to the taxonomy object to act as an DB interface
288 # (I'm not sure how useful this is though - if you have a DB of
289 # taxonomy - why would you be doing things this way?)
290 $self->throw_not_implemented();
293 return @ranks;
297 =head2 level_of_rank
299 Title : level_of_rank
300 Usage : $obj = taxonomy->level_of_rank($obj);
301 Function: returns the level of a rank name
302 Returns : $obj
303 Args : $obj
305 =cut
308 sub level_of {
309 my ($self,$value) = @_;
311 return $self->{'_rank_hash'}{$value};
315 =head2 rank_of_number
317 Title : rank_of_number
318 Usage : $obj = taxonomy->rank_of_number($obj);
319 Function: returns the rank name of a rank level
320 Returns : $obj
321 Args : $obj
323 =cut
326 sub rank_of_number {
327 my ($self,$value) = @_;
329 return ${$self->{'_ranks'}}[$value];
333 =head2 ranks
335 Title : ranks
336 Usage : @obj = taxonomy->ranks(@obj);
337 Function: set or return all ranks
338 Returns : @obj
339 Args : @obj
341 =cut
344 sub ranks {
345 my ($self,@value) = @_;
347 # currently this makes no uniqueness sanity check (this should be done)
348 # I am think that adding a way of converting multiple 'no rank' ranks
349 # to unique 'no rank #' ranks so that the level of a 'no rank' is
350 # abstracted way from the user - I'm not sure of the value of this
352 if (@value) {
353 $self->{'_ranks'}=\@value;
356 for (my $i=0; $i <= @{$self->{'_ranks'}}-1; $i++) {
357 $self->{'_rank_hash'}{$self->{'_ranks'}[$i]}=$i unless $self->{'_ranks'}[$i] eq 'no rank';
360 return @{$self->{'_ranks'}};
363 =head2 add_node
365 Title: add_node
366 Usage: $obj->add_node($node[, $node2, ...]);
367 Function: add one or more Bio::Taxonomy::Node objects
368 Returns: None
369 Args: any number of Bio::Taxonomy::Node(s)
371 =cut
373 sub add_node {
374 my ($self, @nodes) = @_;
375 foreach(@nodes){
376 $self->throw("A Bio::Taxonomy::Node object needed")
377 unless($_->isa('Bio::Taxonomy::Node'));
378 my ($node, $rank) = ($_, $_->rank);
379 if(exists $self->{_hierarchy}->{$rank}){
380 # $self->throw("$rank has been defined");
381 # print STDERR "RANK:$rank\n";
382 # return;
384 $self->{_hierarchy}->{$rank} = $node;
388 =head2 binomial
390 Title : binomial
391 Usage : my $val = $obj->binomial;
392 Function: returns the binomial name if this taxonomy reachs species level
393 Returns : the binomial name
394 OR undef if taxonmy does not reach species level
395 Args : [No arguments]
397 =cut
399 sub binomial {
400 my $self = shift;
401 return $self->get_node('species')->scientific_name;
402 my $genus = $self->get_node('genus');
403 my $species = $self->get_node('species');
404 return ($species && $genus) ? "$species $genus" : undef;
407 =head2 get_node
409 Title : get_node
410 Usage : $node = $taxonomy->get_node('species');
411 Function: get a Bio::Taxonomy::Node object according to rank name
412 Returns : a Bio::Taxonomy::Node object or undef if null
413 Args : a vaild rank name
415 =cut
417 sub get_node {
418 my ($self, $rank) = @_;
419 unless(grep /$rank/, keys %{$self->{_hierarchy}}){
420 $self->throw("'$rank' is not in the rank list");
422 return (exists $self->{_hierarchy}->{$rank})?
423 $self->{_hierarchy}->{$rank} : undef;
426 =head2 classification
428 Title : classification
429 Usage : @names = $taxonomy->classification;
430 Function: get the classification names of one taxonomy
431 Returns : array of names
432 Args : [No arguments]
434 =cut
436 sub classification {
437 my $self = shift;
438 my %rank_hash = %{$self->{_rank_hash}};
439 my %hierarchy = %{$self->{_hierarchy}};
440 my @ordered_nodes = sort {
441 ($rank_hash{$a} <=> $rank_hash{$b})
442 } keys %hierarchy;
443 return map {$hierarchy{$_}->scientific_name} @ordered_nodes;