sync w/ main trunk
[bioperl-live.git] / Bio / Taxonomy / Taxon.pm
blob0706c1c1ae8c84f8fa01655c09f2bb1b8efe6dc8
1 # $Id$
3 # BioPerl module for Bio::Taxonomy::Taxon
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Dan Kortschak but pilfered extensively from
8 # the Bio::Tree::Node code of Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Taxonomy::Taxon - Generic Taxonomic Entity object
18 =head1 SYNOPSIS
20 # NB: This module is deprecated. Use Bio::Taxon instead.
22 use Bio::Taxonomy::Taxon;
23 my $taxonA = Bio::Taxonomy::Taxon->new();
24 my $taxonL = Bio::Taxonomy::Taxon->new();
25 my $taxonR = Bio::Taxonomy::Taxon->new();
27 my $taxon = Bio::Taxonomy::Taxon->new();
28 $taxon->add_Descendents($taxonL);
29 $taxon->add_Descendents($taxonR);
31 my $species = $taxon->species;
33 =head1 DESCRIPTION
35 Makes a taxonomic unit suitable for use in a taxonomic tree
37 =head1 AUTHOR
39 Dan Kortschak email B<kortschak@rsbs.anu.edu.au>
41 =head1 CONTRIBUTORS
43 Sendu Bala: bix@sendu.me.uk
45 =head1 APPENDIX
47 The rest of the documentation details each of the object
48 methods. Internal methods are usually preceded with a _
50 =cut
52 # code begins...
54 package Bio::Taxonomy::Taxon;
55 use vars qw($CREATIONORDER);
56 use strict;
58 use Bio::Species;
60 use base qw(Bio::Root::Root Bio::Tree::NodeI);
62 BEGIN {
63 $CREATIONORDER = 0;
66 =head2 new
68 Title : new
69 Usage : my $obj = Bio::Taxonomy::Taxon->new();
70 Function: Builds a new Bio::Taxonomy::Taxon object
71 Returns : Bio::Taxonomy::Taxon
72 Args : -descendents => array pointer to descendents (optional)
73 -branch_length => branch length [integer] (optional)
74 -taxon => taxon
75 -id => unique taxon id for node (from NCBI's list preferably)
76 -rank => the taxonomic level of the node (also from NCBI)
78 =cut
80 #' for emacs
82 sub new {
83 my($class,@args) = @_;
85 my $self = $class->SUPER::new(@args);
86 $self->warn("Bio::Taxonomy::Taxon is deprecated. Use Bio::Taxon instead.");
88 my ($children,$branchlen,$id,$taxon,$rank,$desc) =
90 $self->_rearrange([qw(DESCENDENTS
91 BRANCH_LENGTH
93 TAXON
94 RANK
95 DESC)], @args);
97 $self->{'_desc'} = {};
98 defined $desc && $self->description($desc);
99 defined $taxon && $self->taxon($taxon);
100 defined $id && $self->id($id);
101 defined $branchlen && $self->branch_length($branchlen);
102 defined $rank && $self->rank($rank);
104 if( defined $children ) {
105 if( ref($children) !~ /ARRAY/i ) {
106 $self->warn("Must specify a valid ARRAY reference to initialize a Taxon's Descendents");
108 foreach my $c ( @$children ) {
109 $self->add_Descendent($c);
112 $self->_creation_id($CREATIONORDER++);
113 return $self;
116 =head2 add_Descendent
118 Title : add_Descendent
119 Usage : $taxon->add_Descendent($taxon);
120 Function: Adds a descendent to a taxon
121 Returns : number of current descendents for this taxon
122 Args : Bio::Taxonomy::Taxon
123 boolean flag, true if you want to ignore the fact that you are
124 adding a second node with the same unique id (typically memory
125 location reference in this implementation). default is false and
126 will throw an error if you try and overwrite an existing node.
128 =cut
130 sub add_Descendent{
132 my ($self,$node,$ignoreoverwrite) = @_;
134 return -1 if( ! defined $node ) ;
135 if( ! $node->isa('Bio::Taxonomy::Taxon') ) {
136 $self->warn("Trying to add a Descendent who is not a Bio::Taxonomy::Taxon");
137 return -1;
139 # do we care about order?
140 $node->{'_ancestor'} = $self;
141 if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) {
142 $self->throw("Going to overwrite a taxon which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future");
145 $self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate??
147 $self->invalidate_height();
149 return scalar keys %{$self->{'_desc'}};
152 =head2 each_Descendent
154 Title : each_Descendent($sortby)
155 Usage : my @taxa = $taxon->each_Descendent;
156 Function: all the descendents for this taxon (but not their descendents
157 i.e. not a recursive fetchall)
158 Returns : Array of Bio::Taxonomy::Taxon objects
159 Args : $sortby [optional] "height", "creation" or coderef to be used
160 to sort the order of children taxa.
162 =cut
164 sub each_Descendent{
165 my ($self, $sortby) = @_;
167 # order can be based on branch length (and sub branchlength)
169 $sortby ||= 'height';
171 if (ref $sortby eq 'CODE') {
172 my @values = sort $sortby values %{$self->{'_desc'}};
173 return @values;
174 } else {
175 if ($sortby eq 'height') {
176 return map { $_->[0] }
177 sort { $a->[1] <=> $b->[1] ||
178 $a->[2] <=> $b->[2] }
179 map { [$_, $_->height, $_->internal_id ] }
180 values %{$self->{'_desc'}};
181 } else {
182 return map { $_->[0] }
183 sort { $a->[1] <=> $b->[1] }
184 map { [$_, $_->height ] }
185 values %{$self->{'_desc'}};
190 =head2 remove_Descendent
192 Title : remove_Descendent
193 Usage : $taxon->remove_Descedent($taxon_foo);
194 Function: Removes a specific taxon from being a Descendent of this taxon
195 Returns : nothing
196 Args : An array of Bio::taxonomy::Taxon objects which have be previously
197 passed to the add_Descendent call of this object.
199 =cut
201 sub remove_Descendent{
202 my ($self,@nodes) = @_;
203 foreach my $n ( @nodes ) {
204 if( $self->{'_desc'}->{$n->internal_id} ) {
205 $n->{'_ancestor'} = undef;
206 $self->{'_desc'}->{$n->internal_id}->{'_ancestor'} = undef;
207 delete $self->{'_desc'}->{$n->internal_id};
209 } else {
210 $self->debug(sprintf("no taxon %s (%s) listed as a descendent in this taxon %s (%s)\n",$n->id, $n,$self->id,$self));
211 $self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n");
217 =head2 remove_all_Descendents
219 Title : remove_all_Descendents
220 Usage : $taxon->remove_All_Descendents()
221 Function: Cleanup the taxon's reference to descendents and reset
222 their ancestor pointers to undef, if you don't have a reference
223 to these objects after this call they will be cleanedup - so
224 a get_nodes from the Tree object would be a safe thing to do first
225 Returns : nothing
226 Args : none
228 =cut
230 sub remove_all_Descendents{
231 my ($self) = @_;
232 # this won't cleanup the taxa themselves if you also have
233 # a copy/pointer of them (I think)...
234 while( my ($node,$val) = each %{ $self->{'_desc'} } ) {
235 $val->{'_ancestor'} = undef;
237 $self->{'_desc'} = {};
241 =head2 get_Descendents
243 Title : get_Descendents
244 Usage : my @taxa = $taxon->get_Descendents;
245 Function: Recursively fetch all the taxa and their descendents
246 *NOTE* This is different from each_Descendent
247 Returns : Array or Bio::Taxonomy::Taxon objects
248 Args : none
250 =cut
252 # implemented in the interface
254 =head2 ancestor
256 Title : ancestor
257 Usage : $taxon->ancestor($newval)
258 Function: Set the Ancestor
259 Returns : value of ancestor
260 Args : newvalue (optional)
262 =cut
264 sub ancestor {
265 my ($self, $value) = @_;
266 if (defined $value) {
267 $self->{'_ancestor'} = $value;
269 return $self->{'_ancestor'};
272 =head2 branch_length
274 Title : branch_length
275 Usage : $obj->branch_length($newval)
276 Function:
277 Example :
278 Returns : value of branch_length
279 Args : newvalue (optional)
281 =cut
283 sub branch_length {
284 my ($self,$value) = @_;
285 if( defined $value) {
286 $self->{'branch_length'} = $value;
288 return $self->{'branch_length'};
291 =head2 description
293 Title : description
294 Usage : $obj->description($newval)
295 Function:
296 Returns : value of description
297 Args : newvalue (optional)
299 =cut
301 sub description {
302 my ($self,$value) = @_;
303 if( defined $value ) {
304 $self->{'_description'} = $value;
306 return $self->{'_description'};
309 =head2 rank
311 Title : rank
312 Usage : $obj->rank($newval)
313 Function: Set the taxonomic rank
314 Returns : taxonomic rank of taxon
315 Args : newvalue (optional)
317 =cut
319 sub rank {
320 my ($self,$value) = @_;
321 if (defined $value) {
322 $self->{'_rank'} = $value;
324 return $self->{'_rank'};
327 =head2 taxon
329 Title : taxon
330 Usage : $obj->taxon($newtaxon)
331 Function: Set the name of the taxon
332 Example :
333 Returns : name of taxon
334 Args : newtaxon (optional)
336 =cut
338 # because internal taxa have names too...
339 sub taxon {
340 my ($self,$value) = @_;
341 if( defined $value ) {
342 $self->{'_taxon'} = $value;
344 return $self->{'_taxon'};
347 =head2 id
349 Title : id
350 Usage : $obj->id($newval)
351 Function:
352 Example :
353 Returns : value of id
354 Args : newvalue (optional)
356 =cut
358 sub id {
359 my ($self,$value) = @_;
360 if( defined $value ) {
361 $self->{'_id'} = $value;
363 return $self->{'_id'};
366 sub DESTROY {
367 my ($self) = @_;
368 # try to insure that everything is cleaned up
369 $self->SUPER::DESTROY();
370 if( defined $self->{'_desc'} &&
371 ref($self->{'_desc'}) =~ /ARRAY/i ) {
372 while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) {
373 $node->{'_ancestor'} = undef; # ensure no circular references
374 $node->DESTROY();
375 $node = undef;
377 $self->{'_desc'} = {};
381 =head2 internal_id
383 Title : internal_id
384 Usage : my $internalid = $taxon->internal_id
385 Function: Returns the internal unique id for this taxon
386 (a monotonically increasing number for this in-memory implementation
387 but could be a database determined unique id in other
388 implementations)
389 Returns : unique id
390 Args : none
392 =cut
394 sub internal_id {
395 return $_[0]->_creation_id;
398 =head2 _creation_id
400 Title : _creation_id
401 Usage : $obj->_creation_id($newval)
402 Function: a private method signifying the internal creation order
403 Returns : value of _creation_id
404 Args : newvalue (optional)
407 =cut
409 sub _creation_id {
410 my ($self,$value) = @_;
411 if( defined $value) {
412 $self->{'_creation_id'} = $value;
414 return $self->{'_creation_id'} || 0;
417 # The following methods are implemented by NodeI decorated interface
419 =head2 is_Leaf
421 Title : is_Leaf
422 Usage : if( $node->is_Leaf )
423 Function: Get Leaf status
424 Returns : boolean
425 Args : none
427 =cut
429 sub is_Leaf {
430 my ($self) = @_;
431 my $rc = 0;
432 $rc = 1 if( ! defined $self->{'_desc'} ||
433 keys %{$self->{'_desc'}} == 0);
434 return $rc;
437 =head2 to_string
439 Title : to_string
440 Usage : my $str = $taxon->to_string()
441 Function: For debugging, provide a taxon as a string
442 Returns : string
443 Args : none
445 =cut
447 =head2 height
449 Title : height
450 Usage : my $len = $taxon->height
451 Function: Returns the height of the tree starting at this
452 taxon. Height is the maximum branchlength.
453 Returns : The longest length (weighting branches with branch_length) to a leaf
454 Args : none
456 =cut
458 sub height {
459 my ($self) = @_;
461 return $self->{'_height'} if( defined $self->{'_height'} );
463 if( $self->is_Leaf ) {
464 if( !defined $self->branch_length ) {
465 $self->debug(sprintf("Trying to calculate height of a taxon when a taxon (%s) has an undefined branch_length",$self->id || '?' ));
466 return 0;
468 return $self->branch_length;
470 my $max = 0;
471 foreach my $subnode ( $self->each_Descendent ) {
472 my $s = $subnode->height;
473 if( $s > $max ) { $max = $s; }
475 return ($self->{'_height'} = $max + ($self->branch_length || 1));
478 =head2 invalidate_height
480 Title : invalidate_height
481 Usage : private helper method
482 Function: Invalidate our cached value of the taxon's height in the tree
483 Returns : nothing
484 Args : none
486 =cut
488 sub invalidate_height {
489 my ($self) = @_;
491 $self->{'_height'} = undef;
492 if( $self->ancestor ) {
493 $self->ancestor->invalidate_height;
497 =head2 classify
499 Title : classify
500 Usage : @obj->classify()
501 Function: a method to return the classification of a species
502 Returns : name of taxon and ancestor's taxon recursively
503 Args : boolean to specify whether we want all taxa not just ranked
504 levels
506 =cut
508 sub classify {
509 my ($self,$allnodes) = @_;
511 my @classification=($self->taxon);
512 my $node=$self;
514 while (defined $node->ancestor) {
515 push @classification, $node->ancestor->taxon if $allnodes==1;
516 $node=$node->ancestor;
519 return (@classification);
522 =head2 has_rank
524 Title : has_rank
525 Usage : $obj->has_rank($rank)
526 Function: a method to query ancestors' rank
527 Returns : boolean
528 Args : $rank
530 =cut
532 sub has_rank {
533 my ($self,$rank) = @_;
535 return $self if $self->rank eq $rank;
537 while (defined $self->ancestor) {
538 return $self if $self->ancestor->rank eq $rank;
539 $self=$self->ancestor;
542 return;
545 =head2 has_taxon
547 Title : has_taxon
548 Usage : $obj->has_taxon($taxon)
549 Function: a method to query ancestors' taxa
550 Returns : boolean
551 Args : Bio::Taxonomy::Taxon object
553 =cut
555 sub has_taxon {
556 my ($self,$taxon) = @_;
558 return $self if
559 ((defined $self->id && $self->id == $taxon->id) ||
560 ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank));
562 while (defined $self->ancestor) {
563 return $self if
564 ((defined $self->id && $self->id == $taxon->id) ||
565 ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank) &&
566 ($self->taxon ne 'no rank'));
567 $self=$self->ancestor;
570 return;
573 =head2 distance_to_root
575 Title : distance_to_root
576 Usage : $obj->distance_to_root
577 Function: a method to query ancestors' taxa
578 Returns : number of links to root
579 Args :
581 =cut
583 sub distance_to_root {
584 my ($self,$taxon) = @_;
586 my $count=0;
588 while (defined $self->ancestor) {
589 $count++;
590 $self=$self->ancestor;
593 return $count;
596 =head2 recent_common_ancestor
598 Title : recent_common_ancestor
599 Usage : $obj->recent_common_ancestor($taxon)
600 Function: a method to query find common ancestors
601 Returns : Bio::Taxonomy::Taxon of query or undef if no ancestor of rank
602 Args : Bio::Taxonomy::Taxon
604 =cut
606 sub recent_common_ancestor {
607 my ($self,$node) = @_;
609 while (defined $node->ancestor) {
610 my $common=$self->has_taxon($node);
611 return $common if defined $common;
612 $node=$node->ancestor;
615 return;
618 =head2 species
620 Title : species
621 Usage : $obj=$taxon->species;
622 Function: Returns a Bio::Species object reflecting the taxon's tree position
623 Returns : a Bio::Species object
624 Args : none
626 =cut
628 sub species {
629 my ($self) = @_;
630 my $species;
632 if ($self->has_rank('subspecies') && $self->ancestor->rank eq 'species') {
633 $species = Bio::Species->new(-classification => $self->ancestor->classify);
634 $species->genus($self->ancestor->ancestor->taxon);
635 $species->species($self->ancestor->taxon);
636 $species->sub_species($self->taxon);
637 } elsif ($self->has_rank('species')) {
638 $species = Bio::Species->new(-classification => $self->classify);
639 $species->genus($self->ancestor->taxon);
640 $species->species($self->taxon);
641 } else {
642 $self->throw("Trying to create a species from a taxonomic entity without species rank. Use classify instead of species.\n");
644 return $species;