tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / Taxonomy.pm
blobd816fedf1b85f02ea3b397db89c9924ccb8411b8
1 # $Id$
3 # BioPerl module for Bio::Taxonomy
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Juguang Xiao
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Taxonomy - representing Taxonomy.
17 =head1 SYNOPSIS
19 # NB: This module is deprecated. Use Bio::Taxon in combination with
20 # Bio::Tree::Tree methods instead.
22 use Bio::Taxonomy;
24 # CREATION: You can either create an instance by assigning it,
25 # or fetch it through factory.
27 # Create the nodes first. See Bio::Taxonomy::Node for details.
28 my $node_species_sapiens = Bio::Taxonomy::Node->new(
29 -object_id => 9606, # or -ncbi_taxid. Requird tag
30 -names => {
31 'scientific' => ['sapiens'],
32 'common_name' => ['human']
34 -rank => 'species' # Required tag
36 my $node_genus_Homo = Bio::Taxonomy::Node->new(
37 -object_id => 9605,
38 -names => { 'scientific' => ['Homo'] },
39 -rank => 'genus'
41 my $node_class_Mammalia = Bio::Taxonomy::Node->new(
42 -object_id => 40674,
43 -names => {
44 'scientific' => ['Mammalia'],
45 'common' => ['mammals']
47 -rank => 'class'
49 my $taxonomy = Bio::Taxonomy->new;
50 $taxonomy->add_node($node_class_Mammalia);
51 $taxonomy->add_node($node_species_sapiens);
52 $taxonomy->add_node($node_genus_Homo);
54 # OR you can fetch it through a factory implementing
55 # Bio::Taxonomy::FactoryI
56 my $factory;
58 my $taxonomy = $factory->fetch_by_ncbi_taxid(40674);
60 # USAGE
62 # In this case, binomial returns a defined value.
63 my $binomial = $taxonomy->binomial;
65 # 'common_names' refers to the lowest-rank node's common names, in
66 # array.
67 my @common_names = $taxonomy->common_names;
69 # 'get_node', will return undef if the rank is no defined in
70 # taxonomy object. It will throw error if the rank string is not
71 # defined, say 'species lah'.
72 my $node = $taxonomy->get_node('class');
73 my @nodes = $taxonomy->get_all_nodes;
75 # Also, you can search for parent and children nodes, if taxonomy
76 # comes with factory.
78 my $parent_taxonomy = $taxonomy->get_parent
80 =head1 DESCRIPTION
82 Bio::Taxonomy object represents any rank-level in taxonomy system,
83 rather than Bio::Species which is able to represent only
84 species-level.
86 There are two ways to create Taxonomy object, e.g.
87 1) instantiate an object and assign all nodes on your own code; and
88 2) fetch an object by factory.
90 =head2 Creation by instantiation
92 The abstraction of Taxonomy is actually a hash in data structure
93 term. The keys of the hash are the rank names, such as 'genus' and
94 'species', and the values are the instances of Bio::Taxonomy::Node.
96 =head2 Creation by Factory fetching
98 NCBI Taxonomy system is well accepted as the standard. The Taxonomy
99 Factories in bioperl access this system, through HTTP to NCBI Entrez,
100 dump file, and advanced biosql database.
102 Bio::Taxonomy::FactoryI defines all methods that all implementations
103 must obey.
105 $factory-E<gt>fetch is a general method to fetch Taxonomy by either
106 NCBI taxid or any types of names.
108 $factory-E<gt>fetch_parent($taxonomy), returns a Taxonomy that is
109 one-step higher rank of the taxonomy specified as argument.
111 $factory-E<gt>fetch_children($taxonomy), reports an array of Taxonomy
112 those are one-step lower rank of the taxonomy specified as the
113 argument.
115 =head2 Usage of Taxonomy object
119 =head1 FEEDBACK
121 =head2 Mailing Lists
123 User feedback is an integral part of the evolution of this and other
124 Bioperl modules. Send your comments and suggestions preferably to
125 the Bioperl mailing list. Your participation is much appreciated.
127 bioperl-l@bioperl.org - General discussion
128 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
130 =head2 Support
132 Please direct usage questions or support issues to the mailing list:
134 I<bioperl-l@bioperl.org>
136 rather than to the module maintainer directly. Many experienced and
137 reponsive experts will be able look at the problem and quickly
138 address it. Please include a thorough description of the problem
139 with code and data examples if at all possible.
141 =head2 Reporting Bugs
143 Report bugs to the Bioperl bug tracking system to help us keep track
144 of the bugs and their resolution. Bug reports can be submitted via the
145 web:
147 http://bugzilla.open-bio.org/
149 =head1 CONTACT
151 Juguang Xiao, juguang@tll.org.sg
153 =head1 APPENDIX
155 The rest of the documentation details each of the object
156 methods. Internal methods are usually preceded with a _
158 =cut
161 # code begins...
164 package Bio::Taxonomy;
165 use strict;
168 use base qw(Bio::Root::Root);
171 =head2 new
173 Title : new
174 Usage : my $obj = Bio::Taxonomy->new();
175 Function: Builds a new Bio::Taxonomy object
176 Returns : Bio::Taxonomy
177 Args : -method -> method used to decide classification
178 (none|trust|lookup)
179 -ranks -> what ranks are there
181 =cut
184 sub new {
185 my ($class,@args) = @_;
187 my $self = $class->SUPER::new(@args);
188 $self->warn("Bio::Taxonomy is deprecated. Use Bio::Taxon in combination with Bio::Tree::Tree instead.");
190 $self->{'_method'}='none';
191 $self->{'_ranks'}=[];
192 $self->{'_rank_hash'}={};
193 $self->{_hierarchy} = {}; # used to store the nodes, with ranks as keys.
194 my ($method,$ranks,$order) = $self->_rearrange([qw(METHOD RANKS ORDER)], @args);
196 if ($method) {
197 $self->method($method);
200 if (defined $ranks &&
201 (ref($ranks) eq "ARRAY") ) {
202 $self->ranks(@$ranks);
203 } else {
204 # default ranks
205 # I think these are in the right order, but not sure:
206 # some parvorder|suborder and varietas|subspecies seem
207 # to be at the same level - any taxonomists?
208 # I don't expect that these will actually be used except as a way
209 # to find what ranks there are in taxonomic use
210 $self->ranks(('root',
211 'superkingdom', 'kingdom',
212 'superphylum', 'phylum', 'subphylum',
213 'superclass', 'class', 'subclass', 'infraclass',
214 'superorder', 'order', 'suborder', 'parvorder', 'infraorder',
215 'superfamily', 'family', 'subfamily',
216 'tribe', 'subtribe',
217 'genus', 'subgenus',
218 'species group', 'species subgroup', 'species', 'subspecies',
219 'varietas', 'forma', 'no rank'));
222 return $self;
226 =head2 method
228 Title : method
229 Usage : $obj = taxonomy->method($method);
230 Function: set or return the method used to decide classification
231 Returns : $obj
232 Args : $obj
234 =cut
237 sub method {
238 my ($self,$value) = @_;
239 if (defined $value && $value=~/none|trust|lookup/) {
240 $self->{'_method'} = $value;
242 return $self->{'_method'};
246 =head2 classify
248 Title : classify
249 Usage : @obj[][0-1] = taxonomy->classify($species);
250 Function: return a ranked classification
251 Returns : @obj of taxa and ranks as word pairs separated by "@"
252 Args : Bio::Species object
254 =cut
257 sub classify {
258 my ($self,$value) = @_;
259 my @ranks;
261 if (! $value->isa('Bio::Species') ) {
262 $self->throw("Trying to classify $value which is not a Bio::Species object");
265 my @classes=reverse($value->classification);
267 if ($self->method eq 'none') {
268 for (my $i=0; $i < @classes-2; $i++) {
269 ($ranks[$i][0],$ranks[$i][1])=($classes[$i],'no rank');
271 push @ranks,[$classes[-2],'genus'];
272 push @ranks,[$value->binomial,'species'];
273 } elsif ($self->method eq 'trust') {
274 if (scalar(@classes)==scalar($self->ranks)) {
275 for (my $i=0; $i < @classes; $i++) {
276 if ($self->rank_of_number($i) eq 'species') {
277 push @ranks,[$value->binomial,$self->rank_of_number($i)];
278 } else {
279 push @ranks,[$classes[$i],$self->rank_of_number($i)];
282 } else {
283 $self->throw("Species object and taxonomy object cannot be reconciled");
285 } elsif ($self->method eq 'lookup') {
286 # this will lookup a DB for the rank of a taxon name
287 # I imagine that some kind of Bio::DB class will be need to
288 # be given to the taxonomy object to act as an DB interface
289 # (I'm not sure how useful this is though - if you have a DB of
290 # taxonomy - why would you be doing things this way?)
291 $self->throw_not_implemented();
294 return @ranks;
298 =head2 level_of_rank
300 Title : level_of_rank
301 Usage : $obj = taxonomy->level_of_rank($obj);
302 Function: returns the level of a rank name
303 Returns : $obj
304 Args : $obj
306 =cut
309 sub level_of {
310 my ($self,$value) = @_;
312 return $self->{'_rank_hash'}{$value};
316 =head2 rank_of_number
318 Title : rank_of_number
319 Usage : $obj = taxonomy->rank_of_number($obj);
320 Function: returns the rank name of a rank level
321 Returns : $obj
322 Args : $obj
324 =cut
327 sub rank_of_number {
328 my ($self,$value) = @_;
330 return ${$self->{'_ranks'}}[$value];
334 =head2 ranks
336 Title : ranks
337 Usage : @obj = taxonomy->ranks(@obj);
338 Function: set or return all ranks
339 Returns : @obj
340 Args : @obj
342 =cut
345 sub ranks {
346 my ($self,@value) = @_;
348 # currently this makes no uniqueness sanity check (this should be done)
349 # I am think that adding a way of converting multiple 'no rank' ranks
350 # to unique 'no rank #' ranks so that the level of a 'no rank' is
351 # abstracted way from the user - I'm not sure of the value of this
353 if (@value) {
354 $self->{'_ranks'}=\@value;
357 for (my $i=0; $i <= @{$self->{'_ranks'}}-1; $i++) {
358 $self->{'_rank_hash'}{$self->{'_ranks'}[$i]}=$i unless $self->{'_ranks'}[$i] eq 'no rank';
361 return @{$self->{'_ranks'}};
364 =head2 add_node
366 Title: add_node
367 Usage: $obj->add_node($node[, $node2, ...]);
368 Function: add one or more Bio::Taxonomy::Node objects
369 Returns: None
370 Args: any number of Bio::Taxonomy::Node(s)
372 =cut
374 sub add_node {
375 my ($self, @nodes) = @_;
376 foreach(@nodes){
377 $self->throw("A Bio::Taxonomy::Node object needed")
378 unless($_->isa('Bio::Taxonomy::Node'));
379 my ($node, $rank) = ($_, $_->rank);
380 if(exists $self->{_hierarchy}->{$rank}){
381 # $self->throw("$rank has been defined");
382 # print STDERR "RANK:$rank\n";
383 # return;
385 $self->{_hierarchy}->{$rank} = $node;
389 =head2 binomial
391 Title : binomial
392 Usage : my $val = $obj->binomial;
393 Function: returns the binomial name if this taxonomy reachs species level
394 Returns : the binomial name
395 OR undef if taxonmy does not reach species level
396 Args : [No arguments]
398 =cut
400 sub binomial {
401 my $self = shift;
402 return $self->get_node('species')->scientific_name;
403 my $genus = $self->get_node('genus');
404 my $species = $self->get_node('species');
405 return ($species && $genus) ? "$species $genus" : undef;
408 =head2 get_node
410 Title : get_node
411 Usage : $node = $taxonomy->get_node('species');
412 Function: get a Bio::Taxonomy::Node object according to rank name
413 Returns : a Bio::Taxonomy::Node object or undef if null
414 Args : a vaild rank name
416 =cut
418 sub get_node {
419 my ($self, $rank) = @_;
420 unless(grep /$rank/, keys %{$self->{_hierarchy}}){
421 $self->throw("'$rank' is not in the rank list");
423 return (exists $self->{_hierarchy}->{$rank})?
424 $self->{_hierarchy}->{$rank} : undef;
427 =head2 classification
429 Title : classification
430 Usage : @names = $taxonomy->classification;
431 Function: get the classification names of one taxonomy
432 Returns : array of names
433 Args : [No arguments]
435 =cut
437 sub classification {
438 my $self = shift;
439 my %rank_hash = %{$self->{_rank_hash}};
440 my %hierarchy = %{$self->{_hierarchy}};
441 my @ordered_nodes = sort {
442 ($rank_hash{$a} <=> $rank_hash{$b})
443 } keys %hierarchy;
444 return map {$hierarchy{$_}->scientific_name} @ordered_nodes;