fix spelling errors, fixes #3228
[bioperl-live.git] / Bio / Taxon.pm
blobab4e55a74e33b2af2a5fb7eeb8cdd3a82715b414
2 # BioPerl module for Bio::Taxon
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sendu Bala <bix@sendu.me.uk>
8 # Copyright Sendu Bala, based heavily on a module by Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Taxon - A node in a represented taxonomy
18 =head1 SYNOPSIS
20 use Bio::Taxon;
22 # Typically you will get a Taxon from a Bio::DB::Taxonomy object
23 # but here is how you initialize one
24 my $taxon = Bio::Taxon->new(-name => $name,
25 -id => $id,
26 -rank => $rank,
27 -division => $div);
29 # Get one from a database
30 my $dbh = Bio::DB::Taxonomy->new(-source => 'flatfile',
31 -directory=> '/tmp',
32 -nodesfile=> '/path/to/nodes.dmp',
33 -namesfile=> '/path/to/names.dmp');
34 my $human = $dbh->get_taxon(-name => 'Homo sapiens');
35 $human = $dbh->get_taxon(-taxonid => '9606');
37 print "id is ", $human->id, "\n"; # 9606
38 print "rank is ", $human->rank, "\n"; # species
39 print "scientific name is ", $human->scientific_name, "\n"; # Homo sapiens
40 print "division is ", $human->division, "\n"; # Primates
42 my $mouse = $dbh->get_taxon(-name => 'Mus musculus');
44 # You can quickly make your own lineages with the list database
45 my @ranks = qw(superkingdom class genus species);
46 my @h_lineage = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens');
47 my $list_dbh = Bio::DB::Taxonomy->new(-source => 'list', -names => \@h_lineage,
48 -ranks => \@ranks);
49 $human = $list_dbh->get_taxon(-name => 'Homo sapiens');
50 my @names = $human->common_names; # @names is empty
51 $human->common_names('woman');
52 @names = $human->common_names; # @names contains woman
54 # You can switch to another database when you need more information
55 my $entrez_dbh = Bio::DB::Taxonomy->new(-source => 'entrez');
56 $human->db_handle($entrez_dbh);
57 @names = $human->common_names; # @names contains woman, human, man
59 # Since Bio::Taxon implements Bio::Tree::NodeI, we have access to those
60 # methods (and can manually create our own taxa and taxonomy without the use
61 # of any database)
62 my $homo = $human->ancestor;
64 # Though be careful with each_Descendent - unless you add_Descendent()
65 # yourself, you won't get an answer because unlike for ancestor(), Bio::Taxon
66 # does not ask the database for the answer. You can ask the database yourself
67 # using the same method:
68 ($human) = $homo->db_handle->each_Descendent($homo);
70 # We can also take advantage of Bio::Tree::Tree* methods:
71 # a) some methods are available with just an empty tree object
72 use Bio::Tree::Tree;
73 my $tree_functions = Bio::Tree::Tree->new();
74 my @lineage = $tree_functions->get_lineage_nodes($human);
75 my $lca = $tree_functions->get_lca($human, $mouse);
77 # b) for other methods, create a tree using your Taxon object
78 my $tree = Bio::Tree::Tree->new(-node => $human);
79 my @taxa = $tree->get_nodes;
80 $homo = $tree->find_node(-rank => 'genus');
82 # Normally you can't get the lca of a list-database derived Taxon and an
83 # entrez or flatfile-derived one because the two different databases might
84 # have different roots and different numbers of ranks between the root and the
85 # taxa of interest. To solve this, make a tree of the Taxon with the more
86 # detailed lineage and splice out all the taxa that won't be in the lineage of
87 # your other Taxon:
88 my $entrez_mouse = $entrez_dbh->get_taxon(-name => 'Mus musculus');
89 my $list_human = $list_dbh->get_taxon(-name => 'Homo sapiens');
90 my $mouse_tree = Bio::Tree::Tree->new(-node => $entrez_mouse);
91 $mouse_tree->splice(-keep_rank => \@ranks);
92 $lca = $mouse_tree->get_lca($entrez_mouse, $list_human);
94 =head1 DESCRIPTION
96 This is the next generation (for Bioperl) of representing Taxonomy
97 information. Previously all information was managed by a single
98 object called Bio::Species. This new implementation allows
99 representation of the intermediate nodes not just the species nodes
100 and can relate their connections.
102 =head1 FEEDBACK
104 =head2 Mailing Lists
106 User feedback is an integral part of the evolution of this and other
107 Bioperl modules. Send your comments and suggestions preferably to
108 the Bioperl mailing list. Your participation is much appreciated.
110 bioperl-l@bioperl.org - General discussion
111 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
113 =head2 Support
115 Please direct usage questions or support issues to the mailing list:
117 I<bioperl-l@bioperl.org>
119 rather than to the module maintainer directly. Many experienced and
120 reponsive experts will be able look at the problem and quickly
121 address it. Please include a thorough description of the problem
122 with code and data examples if at all possible.
124 =head2 Reporting Bugs
126 Report bugs to the Bioperl bug tracking system to help us keep track
127 of the bugs and their resolution. Bug reports can be submitted via
128 the web:
130 https://redmine.open-bio.org/projects/bioperl/
132 =head1 AUTHOR - Sendu Bala
134 Email bix@sendu.me.uk
136 =head1 CONTRIBUTORS
138 Jason Stajich, jason-at-bioperl-dot-org (original Bio::Taxonomy::Node)
139 Juguang Xiao, juguang@tll.org.sg
140 Gabriel Valiente, valiente@lsi.upc.edu
142 =head1 APPENDIX
144 The rest of the documentation details each of the object methods.
145 Internal methods are usually preceded with a _
147 =cut
149 package Bio::Taxon;
150 use strict;
151 use Scalar::Util qw(blessed);
153 use Bio::DB::Taxonomy;
155 use base qw(Bio::Tree::Node Bio::IdentifiableI);
157 =head2 new
159 Title : new
160 Usage : my $obj = Bio::Taxonomy::Node->new();
161 Function: Builds a new Bio::Taxonomy::Node object
162 Returns : an instance of Bio::Taxonomy::Node
163 Args : -dbh => a reference to a Bio::DB::Taxonomy object
164 [no default]
165 -name => a string representing the taxon name
166 (scientific name)
167 -id => human readable id - typically NCBI taxid
168 -ncbi_taxid => same as -id, but explicitly say that it is an
169 NCBI taxid
170 -rank => node rank (one of 'species', 'genus', etc)
171 -common_names => array ref of all common names
172 -division => 'Primates', 'Rodents', etc
173 -genetic_code => genetic code table number
174 -mito_genetic_code => mitochondrial genetic code table number
175 -create_date => date created in database
176 -update_date => date last updated in database
177 -pub_date => date published in database
179 =cut
181 sub new {
182 my ($class, @args) = @_;
183 my $self = $class->SUPER::new(@args);
184 my ($name, $id, $objid, $rank, $div, $dbh, $ncbitaxid, $commonname,
185 $commonnames, $gcode, $mitocode, $createdate, $updatedate, $pubdate,
186 $parent_id) = $self->_rearrange([qw(NAME ID OBJECT_ID RANK DIVISION DBH
187 NCBI_TAXID COMMON_NAME COMMON_NAMES
188 GENETIC_CODE MITO_GENETIC_CODE
189 CREATE_DATE UPDATE_DATE PUB_DATE
190 PARENT_ID)], @args);
192 if (defined $id && (defined $ncbitaxid && $ncbitaxid ne $id || defined $objid && $objid ne $id)) {
193 $self->warn("Only provide one of -id, -object_id or -ncbi_taxid, using $id\n");
195 elsif(!defined $id) {
196 $id = $objid || $ncbitaxid;
198 defined $id && $self->id($id);
199 $self->{_ncbi_tax_id_provided} = 1 if $ncbitaxid;
201 defined $rank && $self->rank($rank);
202 defined $name && $self->node_name($name);
204 my @common_names;
205 if ($commonnames) {
206 $self->throw("-common_names takes only an array reference") unless $commonnames
207 && ref($commonnames) eq 'ARRAY';
208 @common_names = @{$commonnames};
210 if ($commonname) {
211 my %c_names = map { $_ => 1 } @common_names;
212 unless (exists $c_names{$commonname}) {
213 unshift(@common_names, $commonname);
216 @common_names > 0 && $self->common_names(@common_names);
218 defined $gcode && $self->genetic_code($gcode);
219 defined $mitocode && $self->mitochondrial_genetic_code($mitocode);
220 defined $createdate && $self->create_date($createdate);
221 defined $updatedate && $self->update_date($updatedate);
222 defined $pubdate && $self->pub_date($pubdate);
223 defined $div && $self->division($div);
224 defined $dbh && $self->db_handle($dbh);
226 # deprecated and will issue a warning when method called,
227 # eventually to be removed completely as option
228 defined $parent_id && $self->parent_id($parent_id);
230 # some things want to freeze/thaw Bio::Species objects, but
231 # _root_cleanup_methods contains a CODE ref, delete it.
232 delete $self->{_root_cleanup_methods};
234 return $self;
237 =head1 Bio::IdentifiableI interface
239 Also see L<Bio::IdentifiableI>
241 =head2 version
243 Title : version
244 Usage : $taxon->version($newval)
245 Returns : value of version (a scalar)
246 Args : on set, new value (a scalar or undef, optional)
248 =cut
250 sub version {
251 my $self = shift;
252 return $self->{'version'} = shift if @_;
253 return $self->{'version'};
256 =head2 authority
258 Title : authority
259 Usage : $taxon->authority($newval)
260 Returns : value of authority (a scalar)
261 Args : on set, new value (a scalar or undef, optional)
263 =cut
265 sub authority {
266 my $self = shift;
267 return $self->{'authority'} = shift if @_;
268 return $self->{'authority'};
271 =head2 namespace
273 Title : namespace
274 Usage : $taxon->namespace($newval)
275 Returns : value of namespace (a scalar)
276 Args : on set, new value (a scalar or undef, optional)
278 =cut
280 sub namespace {
281 my $self = shift;
282 return $self->{'namespace'} = shift if @_;
283 return $self->{'namespace'};
286 =head1 Bio::Taxonomy::Node implementation
288 =head2 db_handle
290 Title : db_handle
291 Usage : $taxon->db_handle($newval)
292 Function: Get/Set Bio::DB::Taxonomy Handle
293 Returns : value of db_handle (a scalar) (Bio::DB::Taxonomy object)
294 Args : on set, new value (a scalar, optional) Bio::DB::Taxonomy object
296 Also see L<Bio::DB::Taxonomy>
298 =cut
300 sub db_handle {
301 my $self = shift;
302 if (@_) {
303 my $db = shift;
305 if (! ref($db) || ! $db->isa('Bio::DB::Taxonomy')) {
306 $self->throw("Must provide a valid Bio::DB::Taxonomy object to db_handle()");
308 if (!$self->{'db_handle'} || ($self->{'db_handle'} && $self->{'db_handle'} ne $db)) {
309 my $new_self = $self->_get_similar_taxon_from_db($self, $db);
310 $self->_merge_taxa($new_self) if $new_self;
313 # NB: The Bio::DB::Taxonomy modules access this data member directly
314 # to avoid calling this method and going infinite
315 $self->{'db_handle'} = $db;
317 return $self->{'db_handle'};
320 =head2 rank
322 Title : rank
323 Usage : $taxon->rank($newval)
324 Function: Get/set rank of this Taxon, 'species', 'genus', 'order', etc...
325 Returns : value of rank (a scalar)
326 Args : on set, new value (a scalar or undef, optional)
328 =cut
330 sub rank {
331 my $self = shift;
332 return $self->{'rank'} = shift if @_;
333 return $self->{'rank'};
336 =head2 id
338 Title : id
339 Usage : $taxon->id($newval)
340 Function: Get/Set id (NCBI Taxonomy ID in most cases); object_id() and
341 ncbi_taxid() are synonyms of this method.
342 Returns : id (a scalar)
343 Args : none to get, OR scalar to set
345 =cut
347 sub id {
348 my $self = shift;
349 return $self->SUPER::id(@_);
352 *object_id = \&id;
354 =head2 ncbi_taxid
356 Title : ncbi_taxid
357 Usage : $taxon->ncbi_taxid($newval)
358 Function: Get/Set the NCBI Taxonomy ID; This actually sets the id() but only
359 returns an id when ncbi_taxid has been explictely set with this
360 method.
361 Returns : id (a scalar)
362 Args : none to get, OR scalar to set
364 =cut
366 sub ncbi_taxid {
367 my ($self, $id) = @_;
369 if ($id) {
370 $self->{_ncbi_tax_id_provided} = 1;
371 return $self->SUPER::id($id);
374 if ($self->{_ncbi_tax_id_provided}) {
375 return $self->SUPER::id;
377 return;
380 =head2 parent_id
382 Title : parent_id
383 Usage : $taxon->parent_id()
384 Function: Get parent ID, (NCBI Taxonomy ID in most cases);
385 parent_taxon_id() is a synonym of this method.
386 Returns : value of parent_id (a scalar)
387 Args : none
388 Status : deprecated
390 =cut
392 sub parent_id {
393 my $self = shift;
394 if (@_) {
395 $self->warn("You can no longer set the parent_id - use ancestor() instead");
397 my $ancestor = $self->ancestor() || return;
398 return $ancestor->id;
401 *parent_taxon_id = \&parent_id;
403 =head2 genetic_code
405 Title : genetic_code
406 Usage : $taxon->genetic_code($newval)
407 Function: Get/set genetic code table
408 Returns : value of genetic_code (a scalar)
409 Args : on set, new value (a scalar or undef, optional)
411 =cut
413 sub genetic_code {
414 my $self = shift;
415 return $self->{'genetic_code'} = shift if @_;
416 return $self->{'genetic_code'};
419 =head2 mitochondrial_genetic_code
421 Title : mitochondrial_genetic_code
422 Usage : $taxon->mitochondrial_genetic_code($newval)
423 Function: Get/set mitochondrial genetic code table
424 Returns : value of mitochondrial_genetic_code (a scalar)
425 Args : on set, new value (a scalar or undef, optional)
427 =cut
429 sub mitochondrial_genetic_code {
430 my $self = shift;
431 return $self->{'mitochondrial_genetic_code'} = shift if @_;
432 return $self->{'mitochondrial_genetic_code'};
435 =head2 create_date
437 Title : create_date
438 Usage : $taxon->create_date($newval)
439 Function: Get/Set Date this node was created (in the database)
440 Returns : value of create_date (a scalar)
441 Args : on set, new value (a scalar or undef, optional)
443 =cut
445 sub create_date {
446 my $self = shift;
447 return $self->{'create_date'} = shift if @_;
448 return $self->{'create_date'};
451 =head2 update_date
453 Title : update_date
454 Usage : $taxon->update_date($newval)
455 Function: Get/Set Date this node was updated (in the database)
456 Returns : value of update_date (a scalar)
457 Args : on set, new value (a scalar or undef, optional)
459 =cut
461 sub update_date {
462 my $self = shift;
463 return $self->{'update_date'} = shift if @_;
464 return $self->{'update_date'};
467 =head2 pub_date
469 Title : pub_date
470 Usage : $taxon->pub_date($newval)
471 Function: Get/Set Date this node was published (in the database)
472 Returns : value of pub_date (a scalar)
473 Args : on set, new value (a scalar or undef, optional)
475 =cut
477 sub pub_date {
478 my $self = shift;
479 return $self->{'pub_date'} = shift if @_;
480 return $self->{'pub_date'};
483 =head2 ancestor
485 Title : ancestor
486 Usage : my $ancestor_taxon = $taxon->ancestor()
487 Function: Retrieve the ancestor taxon. Normally the database is asked what the
488 ancestor is.
490 If you manually set the ancestor (or you make a Bio::Tree::Tree with
491 this object as an argument to new()), the database (if any) will not
492 be used for the purposes of this method.
494 To restore normal database behaviour, call ancestor(undef) (which
495 would remove this object from the tree), or request this taxon again
496 as a new Taxon object from the database.
498 Returns : Bio::Taxon
499 Args : none
501 =cut
503 sub ancestor {
504 my $self = shift;
505 my $ancestor = $self->SUPER::ancestor(@_);
506 if ($ancestor) {
507 return $ancestor;
509 my $dbh = $self->db_handle;
510 #*** could avoid the db lookup if we knew our current id was definitely
511 # information from the db...
512 my $definitely_from_dbh = $self->_get_similar_taxon_from_db($self);
513 return $dbh->ancestor($definitely_from_dbh);
516 =head2 get_Parent_Node
518 Title : get_Parent_Node
519 Function: Synonym of ancestor()
520 Status : deprecated
522 =cut
524 sub get_Parent_Node {
525 my $self = shift;
526 $self->warn("get_Parent_Node is deprecated, use ancestor() instead");
527 return $self->ancestor(@_);
530 =head2 each_Descendent
532 Title : each_Descendent
533 Usage : my @taxa = $taxon->each_Descendent();
534 Function: Get all the descendents for this Taxon (but not their descendents,
535 ie. not a recursive fetchall). get_Children_Nodes() is a synonym of
536 this method.
538 Note that this method never asks the database for the descendents;
539 it will only return objects you have manually set with
540 add_Descendent(), or where this was done for you by making a
541 Bio::Tree::Tree with this object as an argument to new().
543 To get the database descendents use
544 $taxon->db_handle->each_Descendent($taxon).
546 Returns : Array of Bio::Taxon objects
547 Args : optionally, when you have set your own descendents, the string
548 "height", "creation", "alpha", "revalpha", or coderef to be used to
549 sort the order of children nodes.
551 =cut
553 # implemented by Bio::Tree::Node
555 =head2 get_Children_Nodes
557 Title : get_Children_Nodes
558 Function: Synonym of each_Descendent()
559 Status : deprecated
561 =cut
563 sub get_Children_Nodes {
564 my $self = shift;
565 $self->warn("get_Children_Nodes is deprecated, use each_Descendent() instead");
566 return $self->each_Descendent(@_);
569 =head2 name
571 Title: name
572 Usage: $taxon->name('scientific', 'Homo sapiens');
573 $taxon->name('common', 'human', 'man');
574 my @names = @{$taxon->name('common')};
575 Function: Get/set the names. node_name(), scientific_name() and common_names()
576 are shorthands to name('scientific'), name('scientific') and
577 name('common') respectively.
578 Returns: names (a array reference)
579 Args: Arg1 => the name_class. You can assign any text, but the words
580 'scientific' and 'common' have the special meaning, as
581 scientific name and common name, respectively. 'scientific' and
582 'division' are treated specially, allowing only the first value
583 in the Arg2 list to be set.
584 Arg2 .. => list of names
586 =cut
588 sub name {
589 my ($self, $name_class, @names) = @_;
590 $self->throw('No name class specified') unless defined $name_class;
592 if (@names) {
593 if ($name_class =~ /scientific|division/i) {
594 delete $self->{'_names_hash'}->{$name_class};
595 @names = (shift(@names));
597 push @{$self->{'_names_hash'}->{$name_class}}, @names;
599 return $self->{'_names_hash'}->{$name_class} || return;
602 =head2 node_name
604 Title : node_name
605 Usage : $taxon->node_name($newval)
606 Function: Get/set the name of this taxon (node), typically the scientific name
607 of the taxon, eg. 'Primate' or 'Homo'; scientific_name() is a synonym
608 of this method.
609 Returns : value of node_name (a scalar)
610 Args : on set, new value (a scalar or undef, optional)
612 =cut
614 sub node_name {
615 my $self = shift;
616 my @v = @{$self->name('scientific', @_) || []};
617 return pop @v;
620 *scientific_name = \&node_name;
622 =head2 common_names
624 Title : common_names
625 Usage : $taxon->common_names($newval)
626 Function: Get/add the other names of this taxon, typically the genbank common
627 name and others, eg. 'Human' and 'man'. common_name() is a synonym
628 of this method.
629 Returns : array of names in list context, one of those names in scalar context
630 Args : on add, new list of names (scalars, optional)
632 =cut
634 sub common_names {
635 my $self = shift;
636 my @v = @{$self->name('common', @_) || []};
637 return ( wantarray ) ? @v : pop @v;
640 *common_name = \&common_names;
642 =head2 division
644 Title : division
645 Usage : $taxon->division($newval)
646 Function: Get/set the division this taxon belongs to, eg. 'Primates' or
647 'Bacteria'.
648 Returns : value of division (a scalar)
649 Args : on set, new value (a scalar or undef, optional)
651 =cut
653 sub division {
654 my $self = shift;
655 my @v = @{$self->name('division',@_) || []};
656 return pop @v;
659 # get a node from the database that is like the supplied node
660 sub _get_similar_taxon_from_db {
661 #*** not really happy with this having to be called so much; there must be
662 # a better way...
663 my ($self, $taxon, $db) = @_;
664 $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa("Bio::Taxon");
665 ($self->id || $self->node_name) || return;
666 $db ||= $self->db_handle || return;
667 if (!blessed($db) || !$db->isa('Bio::DB::Taxonomy')) {
668 $self->throw("DB handle is not a Bio::DB::Taxonomy: got $db in node ".$self->node_name)
670 my $db_taxon = $db->get_taxon(-taxonid => $taxon->id) if $taxon->id;
671 unless ($db_taxon) {
672 my @try_ids = $db->get_taxonids($taxon->node_name) if $taxon->node_name;
674 my $own_rank = $taxon->rank || 'no rank';
675 foreach my $try_id (@try_ids) {
676 my $try = $db->get_taxon(-taxonid => $try_id);
677 my $try_rank = $try->rank || 'no rank';
678 if ($own_rank eq 'no rank' || $try_rank eq 'no rank' || $own_rank eq $try_rank) {
679 $db_taxon = $try;
680 last;
685 return $db_taxon;
688 # merge data from supplied Taxon into self
689 sub _merge_taxa {
690 my ($self, $taxon) = @_;
691 $self->throw("Must supply a Bio::Taxon object") unless ref($taxon) && $taxon->isa('Bio::Taxon');
692 return if ($taxon eq $self);
694 foreach my $attrib (qw(scientific_name version authority namespace genetic_code mitochondrial_genetic_code create_date update_date pub_date division id)) {
695 my $own = $self->$attrib();
696 my $his = $taxon->$attrib();
697 if (!$own && $his) {
698 $self->$attrib($his);
702 my $own = $self->rank || 'no rank';
703 my $his = $taxon->rank || 'no rank';
704 if ($own eq 'no rank' && $his ne 'no rank') {
705 $self->rank($his);
708 my %own_cnames = map { $_ => 1 } $self->common_names;
709 my %his_cnames = map { $_ => 1 } $taxon->common_names;
710 foreach (keys %his_cnames) {
711 unless (exists $own_cnames{$_}) {
712 $self->common_names($_);
716 #*** haven't merged the other things in names() hash, could do above much easier with direct access to object data
719 =head2 remove_Descendent
721 Title : remove_Descendent
722 Usage : $node->remove_Descedent($node_foo);
723 Function: Removes a specific node from being a Descendent of this node
724 Returns : nothing
725 Args : An array of Bio::Node::NodeI objects which have been previously
726 passed to the add_Descendent call of this object.
728 =cut
730 sub remove_Descendent {
731 # need to override this method from Bio::Tree::Node since it casually
732 # throws away nodes if they don't branch
733 my ($self,@nodes) = @_;
734 my $c= 0;
735 foreach my $n ( @nodes ) {
736 if ($self->{'_desc'}->{$n->internal_id}) {
737 $self->{_removing_descendent} = 1;
738 $n->ancestor(undef);
739 $self->{_removing_descendent} = 0;
740 $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
741 delete $self->{'_desc'}->{$n->internal_id};
742 $c++;
745 return $c;