Added 'Data::Stag' requirement to many tests files
[bioperl-live.git] / Bio / Taxonomy / Taxon.pm
blob53b75faa0766d985e129f0f532cccce9be9c6446
2 # BioPerl module for Bio::Taxonomy::Taxon
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Dan Kortschak but pilfered extensively from
7 # the Bio::Tree::Node code of Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Taxonomy::Taxon - Generic Taxonomic Entity object
17 =head1 SYNOPSIS
19 # NB: This module is deprecated. Use Bio::Taxon instead.
21 use Bio::Taxonomy::Taxon;
22 my $taxonA = Bio::Taxonomy::Taxon->new();
23 my $taxonL = Bio::Taxonomy::Taxon->new();
24 my $taxonR = Bio::Taxonomy::Taxon->new();
26 my $taxon = Bio::Taxonomy::Taxon->new();
27 $taxon->add_Descendents($taxonL);
28 $taxon->add_Descendents($taxonR);
30 my $species = $taxon->species;
32 =head1 DESCRIPTION
34 Makes a taxonomic unit suitable for use in a taxonomic tree
36 =head1 AUTHOR
38 Dan Kortschak email B<kortschak@rsbs.anu.edu.au>
40 =head1 CONTRIBUTORS
42 Sendu Bala: bix@sendu.me.uk
44 =head1 APPENDIX
46 The rest of the documentation details each of the object
47 methods. Internal methods are usually preceded with a _
49 =cut
51 # code begins...
53 package Bio::Taxonomy::Taxon;
54 use vars qw($CREATIONORDER);
55 use strict;
57 use Bio::Species;
59 use base qw(Bio::Root::Root Bio::Tree::NodeI);
61 BEGIN {
62 $CREATIONORDER = 0;
65 =head2 new
67 Title : new
68 Usage : my $obj = Bio::Taxonomy::Taxon->new();
69 Function: Builds a new Bio::Taxonomy::Taxon object
70 Returns : Bio::Taxonomy::Taxon
71 Args : -descendents => array pointer to descendents (optional)
72 -branch_length => branch length [integer] (optional)
73 -taxon => taxon
74 -id => unique taxon id for node (from NCBI's list preferably)
75 -rank => the taxonomic level of the node (also from NCBI)
77 =cut
79 #' for emacs
81 sub new {
82 my($class,@args) = @_;
84 my $self = $class->SUPER::new(@args);
85 $self->warn("Bio::Taxonomy::Taxon is deprecated. Use Bio::Taxon instead.");
87 my ($children,$branchlen,$id,$taxon,$rank,$desc) =
89 $self->_rearrange([qw(DESCENDENTS
90 BRANCH_LENGTH
92 TAXON
93 RANK
94 DESC)], @args);
96 $self->{'_desc'} = {};
97 defined $desc && $self->description($desc);
98 defined $taxon && $self->taxon($taxon);
99 defined $id && $self->id($id);
100 defined $branchlen && $self->branch_length($branchlen);
101 defined $rank && $self->rank($rank);
103 if( defined $children ) {
104 if( ref($children) !~ /ARRAY/i ) {
105 $self->warn("Must specify a valid ARRAY reference to initialize a Taxon's Descendents");
107 foreach my $c ( @$children ) {
108 $self->add_Descendent($c);
111 $self->_creation_id($CREATIONORDER++);
112 return $self;
115 =head2 add_Descendent
117 Title : add_Descendent
118 Usage : $taxon->add_Descendent($taxon);
119 Function: Adds a descendent to a taxon
120 Returns : number of current descendents for this taxon
121 Args : Bio::Taxonomy::Taxon
122 boolean flag, true if you want to ignore the fact that you are
123 adding a second node with the same unique id (typically memory
124 location reference in this implementation). default is false and
125 will throw an error if you try and overwrite an existing node.
127 =cut
129 sub add_Descendent{
131 my ($self,$node,$ignoreoverwrite) = @_;
133 return -1 if( ! defined $node ) ;
134 if( ! $node->isa('Bio::Taxonomy::Taxon') ) {
135 $self->warn("Trying to add a Descendent who is not a Bio::Taxonomy::Taxon");
136 return -1;
138 # do we care about order?
139 $node->{'_ancestor'} = $self;
140 if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) {
141 $self->throw("Going to overwrite a taxon which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future");
144 $self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate??
146 $self->invalidate_height();
148 return scalar keys %{$self->{'_desc'}};
151 =head2 each_Descendent
153 Title : each_Descendent($sortby)
154 Usage : my @taxa = $taxon->each_Descendent;
155 Function: all the descendents for this taxon (but not their descendents
156 i.e. not a recursive fetchall)
157 Returns : Array of Bio::Taxonomy::Taxon objects
158 Args : $sortby [optional] "height", "creation" or coderef to be used
159 to sort the order of children taxa.
161 =cut
163 sub each_Descendent{
164 my ($self, $sortby) = @_;
166 # order can be based on branch length (and sub branchlength)
168 $sortby ||= 'height';
170 if (ref $sortby eq 'CODE') {
171 my @values = sort $sortby values %{$self->{'_desc'}};
172 return @values;
173 } else {
174 if ($sortby eq 'height') {
175 return map { $_->[0] }
176 sort { $a->[1] <=> $b->[1] ||
177 $a->[2] <=> $b->[2] }
178 map { [$_, $_->height, $_->internal_id ] }
179 values %{$self->{'_desc'}};
180 } else {
181 return map { $_->[0] }
182 sort { $a->[1] <=> $b->[1] }
183 map { [$_, $_->height ] }
184 values %{$self->{'_desc'}};
189 =head2 remove_Descendent
191 Title : remove_Descendent
192 Usage : $taxon->remove_Descedent($taxon_foo);
193 Function: Removes a specific taxon from being a Descendent of this taxon
194 Returns : nothing
195 Args : An array of Bio::taxonomy::Taxon objects which have be previously
196 passed to the add_Descendent call of this object.
198 =cut
200 sub remove_Descendent{
201 my ($self,@nodes) = @_;
202 foreach my $n ( @nodes ) {
203 if( $self->{'_desc'}->{$n->internal_id} ) {
204 $n->{'_ancestor'} = undef;
205 $self->{'_desc'}->{$n->internal_id}->{'_ancestor'} = undef;
206 delete $self->{'_desc'}->{$n->internal_id};
208 } else {
209 $self->debug(sprintf("no taxon %s (%s) listed as a descendent in this taxon %s (%s)\n",$n->id, $n,$self->id,$self));
210 $self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n");
216 =head2 remove_all_Descendents
218 Title : remove_all_Descendents
219 Usage : $taxon->remove_All_Descendents()
220 Function: Cleanup the taxon's reference to descendents and reset
221 their ancestor pointers to undef, if you don't have a reference
222 to these objects after this call they will be cleanedup - so
223 a get_nodes from the Tree object would be a safe thing to do first
224 Returns : nothing
225 Args : none
227 =cut
229 sub remove_all_Descendents{
230 my ($self) = @_;
231 # this won't cleanup the taxa themselves if you also have
232 # a copy/pointer of them (I think)...
233 while( my ($node,$val) = each %{ $self->{'_desc'} } ) {
234 $val->{'_ancestor'} = undef;
236 $self->{'_desc'} = {};
240 =head2 get_Descendents
242 Title : get_Descendents
243 Usage : my @taxa = $taxon->get_Descendents;
244 Function: Recursively fetch all the taxa and their descendents
245 *NOTE* This is different from each_Descendent
246 Returns : Array or Bio::Taxonomy::Taxon objects
247 Args : none
249 =cut
251 # implemented in the interface
253 =head2 ancestor
255 Title : ancestor
256 Usage : $taxon->ancestor($newval)
257 Function: Set the Ancestor
258 Returns : value of ancestor
259 Args : newvalue (optional)
261 =cut
263 sub ancestor {
264 my ($self, $value) = @_;
265 if (defined $value) {
266 $self->{'_ancestor'} = $value;
268 return $self->{'_ancestor'};
271 =head2 branch_length
273 Title : branch_length
274 Usage : $obj->branch_length($newval)
275 Function:
276 Example :
277 Returns : value of branch_length
278 Args : newvalue (optional)
280 =cut
282 sub branch_length {
283 my ($self,$value) = @_;
284 if( defined $value) {
285 $self->{'branch_length'} = $value;
287 return $self->{'branch_length'};
290 =head2 description
292 Title : description
293 Usage : $obj->description($newval)
294 Function:
295 Returns : value of description
296 Args : newvalue (optional)
298 =cut
300 sub description {
301 my ($self,$value) = @_;
302 if( defined $value ) {
303 $self->{'_description'} = $value;
305 return $self->{'_description'};
308 =head2 rank
310 Title : rank
311 Usage : $obj->rank($newval)
312 Function: Set the taxonomic rank
313 Returns : taxonomic rank of taxon
314 Args : newvalue (optional)
316 =cut
318 sub rank {
319 my ($self,$value) = @_;
320 if (defined $value) {
321 $self->{'_rank'} = $value;
323 return $self->{'_rank'};
326 =head2 taxon
328 Title : taxon
329 Usage : $obj->taxon($newtaxon)
330 Function: Set the name of the taxon
331 Example :
332 Returns : name of taxon
333 Args : newtaxon (optional)
335 =cut
337 # because internal taxa have names too...
338 sub taxon {
339 my ($self,$value) = @_;
340 if( defined $value ) {
341 $self->{'_taxon'} = $value;
343 return $self->{'_taxon'};
346 =head2 id
348 Title : id
349 Usage : $obj->id($newval)
350 Function:
351 Example :
352 Returns : value of id
353 Args : newvalue (optional)
355 =cut
357 sub id {
358 my ($self,$value) = @_;
359 if( defined $value ) {
360 $self->{'_id'} = $value;
362 return $self->{'_id'};
365 sub DESTROY {
366 my ($self) = @_;
367 # try to insure that everything is cleaned up
368 $self->SUPER::DESTROY();
369 if( defined $self->{'_desc'} &&
370 ref($self->{'_desc'}) =~ /ARRAY/i ) {
371 while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) {
372 $node->{'_ancestor'} = undef; # ensure no circular references
373 $node->DESTROY();
374 $node = undef;
376 $self->{'_desc'} = {};
380 =head2 internal_id
382 Title : internal_id
383 Usage : my $internalid = $taxon->internal_id
384 Function: Returns the internal unique id for this taxon
385 (a monotonically increasing number for this in-memory implementation
386 but could be a database determined unique id in other
387 implementations)
388 Returns : unique id
389 Args : none
391 =cut
393 sub internal_id {
394 return $_[0]->_creation_id;
397 =head2 _creation_id
399 Title : _creation_id
400 Usage : $obj->_creation_id($newval)
401 Function: a private method signifying the internal creation order
402 Returns : value of _creation_id
403 Args : newvalue (optional)
406 =cut
408 sub _creation_id {
409 my ($self,$value) = @_;
410 if( defined $value) {
411 $self->{'_creation_id'} = $value;
413 return $self->{'_creation_id'} || 0;
416 # The following methods are implemented by NodeI decorated interface
418 =head2 is_Leaf
420 Title : is_Leaf
421 Usage : if( $node->is_Leaf )
422 Function: Get Leaf status
423 Returns : boolean
424 Args : none
426 =cut
428 sub is_Leaf {
429 my ($self) = @_;
430 my $rc = 0;
431 $rc = 1 if( ! defined $self->{'_desc'} ||
432 keys %{$self->{'_desc'}} == 0);
433 return $rc;
436 =head2 to_string
438 Title : to_string
439 Usage : my $str = $taxon->to_string()
440 Function: For debugging, provide a taxon as a string
441 Returns : string
442 Args : none
444 =cut
446 =head2 height
448 Title : height
449 Usage : my $len = $taxon->height
450 Function: Returns the height of the tree starting at this
451 taxon. Height is the maximum branchlength.
452 Returns : The longest length (weighting branches with branch_length) to a leaf
453 Args : none
455 =cut
457 sub height {
458 my ($self) = @_;
460 return $self->{'_height'} if( defined $self->{'_height'} );
462 if( $self->is_Leaf ) {
463 if( !defined $self->branch_length ) {
464 $self->debug(sprintf("Trying to calculate height of a taxon when a taxon (%s) has an undefined branch_length",$self->id || '?' ));
465 return 0;
467 return $self->branch_length;
469 my $max = 0;
470 foreach my $subnode ( $self->each_Descendent ) {
471 my $s = $subnode->height;
472 if( $s > $max ) { $max = $s; }
474 return ($self->{'_height'} = $max + ($self->branch_length || 1));
477 =head2 invalidate_height
479 Title : invalidate_height
480 Usage : private helper method
481 Function: Invalidate our cached value of the taxon's height in the tree
482 Returns : nothing
483 Args : none
485 =cut
487 sub invalidate_height {
488 my ($self) = @_;
490 $self->{'_height'} = undef;
491 if( $self->ancestor ) {
492 $self->ancestor->invalidate_height;
496 =head2 classify
498 Title : classify
499 Usage : @obj->classify()
500 Function: a method to return the classification of a species
501 Returns : name of taxon and ancestor's taxon recursively
502 Args : boolean to specify whether we want all taxa not just ranked
503 levels
505 =cut
507 sub classify {
508 my ($self,$allnodes) = @_;
510 my @classification=($self->taxon);
511 my $node=$self;
513 while (defined $node->ancestor) {
514 push @classification, $node->ancestor->taxon if $allnodes==1;
515 $node=$node->ancestor;
518 return (@classification);
521 =head2 has_rank
523 Title : has_rank
524 Usage : $obj->has_rank($rank)
525 Function: a method to query ancestors' rank
526 Returns : boolean
527 Args : $rank
529 =cut
531 sub has_rank {
532 my ($self,$rank) = @_;
534 return $self if $self->rank eq $rank;
536 while (defined $self->ancestor) {
537 return $self if $self->ancestor->rank eq $rank;
538 $self=$self->ancestor;
541 return;
544 =head2 has_taxon
546 Title : has_taxon
547 Usage : $obj->has_taxon($taxon)
548 Function: a method to query ancestors' taxa
549 Returns : boolean
550 Args : Bio::Taxonomy::Taxon object
552 =cut
554 sub has_taxon {
555 my ($self,$taxon) = @_;
557 return $self if
558 ((defined $self->id && $self->id == $taxon->id) ||
559 ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank));
561 while (defined $self->ancestor) {
562 return $self if
563 ((defined $self->id && $self->id == $taxon->id) ||
564 ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank) &&
565 ($self->taxon ne 'no rank'));
566 $self=$self->ancestor;
569 return;
572 =head2 distance_to_root
574 Title : distance_to_root
575 Usage : $obj->distance_to_root
576 Function: a method to query ancestors' taxa
577 Returns : number of links to root
578 Args :
580 =cut
582 sub distance_to_root {
583 my ($self,$taxon) = @_;
585 my $count=0;
587 while (defined $self->ancestor) {
588 $count++;
589 $self=$self->ancestor;
592 return $count;
595 =head2 recent_common_ancestor
597 Title : recent_common_ancestor
598 Usage : $obj->recent_common_ancestor($taxon)
599 Function: a method to query find common ancestors
600 Returns : Bio::Taxonomy::Taxon of query or undef if no ancestor of rank
601 Args : Bio::Taxonomy::Taxon
603 =cut
605 sub recent_common_ancestor {
606 my ($self,$node) = @_;
608 while (defined $node->ancestor) {
609 my $common=$self->has_taxon($node);
610 return $common if defined $common;
611 $node=$node->ancestor;
614 return;
617 =head2 species
619 Title : species
620 Usage : $obj=$taxon->species;
621 Function: Returns a Bio::Species object reflecting the taxon's tree position
622 Returns : a Bio::Species object
623 Args : none
625 =cut
627 sub species {
628 my ($self) = @_;
629 my $species;
631 if ($self->has_rank('subspecies') && $self->ancestor->rank eq 'species') {
632 $species = Bio::Species->new(-classification => $self->ancestor->classify);
633 $species->genus($self->ancestor->ancestor->taxon);
634 $species->species($self->ancestor->taxon);
635 $species->sub_species($self->taxon);
636 } elsif ($self->has_rank('species')) {
637 $species = Bio::Species->new(-classification => $self->classify);
638 $species->genus($self->ancestor->taxon);
639 $species->species($self->taxon);
640 } else {
641 $self->throw("Trying to create a species from a taxonomic entity without species rank. Use classify instead of species.\n");
643 return $species;