2 # BioPerl module for Bio::Taxon
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sendu Bala <bix@sendu.me.uk>
8 # Copyright Sendu Bala, based heavily on a module by Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Taxon - A node in a represented taxonomy
22 # Typically you will get a Taxon from a Bio::DB::Taxonomy object
23 # but here is how you initialize one
24 my $taxon = Bio::Taxon->new(-name => $name,
29 # Get one from a database
30 my $dbh = Bio::DB::Taxonomy->new(-source => 'flatfile',
32 -nodesfile=> '/path/to/nodes.dmp',
33 -namesfile=> '/path/to/names.dmp');
34 my $human = $dbh->get_taxon(-name => 'Homo sapiens');
35 $human = $dbh->get_taxon(-taxonid => '9606');
37 print "id is ", $human->id, "\n"; # 9606
38 print "rank is ", $human->rank, "\n"; # species
39 print "scientific name is ", $human->scientific_name, "\n"; # Homo sapiens
40 print "division is ", $human->division, "\n"; # Primates
42 my $mouse = $dbh->get_taxon(-name => 'Mus musculus');
44 # You can quickly make your own lineages with the list database
45 my @ranks = qw(superkingdom class genus species);
46 my @h_lineage = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens');
47 my $list_dbh = Bio::DB::Taxonomy->new(-source => 'list', -names => \@h_lineage,
49 $human = $list_dbh->get_taxon(-name => 'Homo sapiens');
50 my @names = $human->common_names; # @names is empty
51 $human->common_names('woman');
52 @names = $human->common_names; # @names contains woman
54 # You can switch to another database when you need more information
55 my $entrez_dbh = Bio::DB::Taxonomy->new(-source => 'entrez');
56 $human->db_handle($entrez_dbh);
57 @names = $human->common_names; # @names contains woman, human, man
59 # Since Bio::Taxon implements Bio::Tree::NodeI, we have access to those
60 # methods (and can manually create our own taxa and taxonomy without the use
62 my $homo = $human->ancestor;
64 # Though be careful with each_Descendent - unless you add_Descendent()
65 # yourself, you won't get an answer because unlike for ancestor(), Bio::Taxon
66 # does not ask the database for the answer. You can ask the database yourself
67 # using the same method:
68 ($human) = $homo->db_handle->each_Descendent($homo);
70 # We can also take advantage of Bio::Tree::Tree* methods:
71 # a) some methods are available with just an empty tree object
73 my $tree_functions = Bio::Tree::Tree->new();
74 my @lineage = $tree_functions->get_lineage_nodes($human);
75 my $lineage = $tree_functions->get_lineage_string($human);
76 my $lca = $tree_functions->get_lca($human, $mouse);
78 # b) for other methods, create a tree using your Taxon object
79 my $tree = Bio::Tree::Tree->new(-node => $human);
80 my @taxa = $tree->get_nodes;
81 $homo = $tree->find_node(-rank => 'genus');
83 # Normally you can't get the lca of a list-database derived Taxon and an
84 # entrez or flatfile-derived one because the two different databases might
85 # have different roots and different numbers of ranks between the root and the
86 # taxa of interest. To solve this, make a tree of the Taxon with the more
87 # detailed lineage and splice out all the taxa that won't be in the lineage of
89 my $entrez_mouse = $entrez_dbh->get_taxon(-name => 'Mus musculus');
90 my $list_human = $list_dbh->get_taxon(-name => 'Homo sapiens');
91 my $mouse_tree = Bio::Tree::Tree->new(-node => $entrez_mouse);
92 $mouse_tree->splice(-keep_rank => \@ranks);
93 $lca = $mouse_tree->get_lca($entrez_mouse, $list_human);
97 This is the next generation (for Bioperl) of representing Taxonomy
98 information. Previously all information was managed by a single
99 object called Bio::Species. This new implementation allows
100 representation of the intermediate nodes not just the species nodes
101 and can relate their connections.
107 User feedback is an integral part of the evolution of this and other
108 Bioperl modules. Send your comments and suggestions preferably to
109 the Bioperl mailing list. Your participation is much appreciated.
111 bioperl-l@bioperl.org - General discussion
112 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
116 Please direct usage questions or support issues to the mailing list:
118 I<bioperl-l@bioperl.org>
120 rather than to the module maintainer directly. Many experienced and
121 reponsive experts will be able look at the problem and quickly
122 address it. Please include a thorough description of the problem
123 with code and data examples if at all possible.
125 =head2 Reporting Bugs
127 Report bugs to the Bioperl bug tracking system to help us keep track
128 of the bugs and their resolution. Bug reports can be submitted via
131 https://github.com/bioperl/bioperl-live/issues
133 =head1 AUTHOR - Sendu Bala
135 Email bix@sendu.me.uk
139 Jason Stajich, jason-at-bioperl-dot-org (original Bio::Taxonomy::Node)
140 Juguang Xiao, juguang@tll.org.sg
141 Gabriel Valiente, valiente@lsi.upc.edu
145 The rest of the documentation details each of the object methods.
146 Internal methods are usually preceded with a _
153 use Scalar
::Util
qw(blessed);
155 use Bio
::DB
::Taxonomy
;
157 use base
qw(Bio::Tree::Node Bio::IdentifiableI);
163 Usage : my $obj = Bio::Taxonomy::Node->new();
164 Function: Builds a new Bio::Taxonomy::Node object
165 Returns : an instance of Bio::Taxonomy::Node
166 Args : -dbh => a reference to a Bio::DB::Taxonomy object
168 -name => a string representing the taxon name
170 -id => human readable id - typically NCBI taxid
171 -ncbi_taxid => same as -id, but explicitly say that it is an
173 -rank => node rank (one of 'species', 'genus', etc)
174 -common_names => array ref of all common names
175 -division => 'Primates', 'Rodents', etc
176 -genetic_code => genetic code table number
177 -mito_genetic_code => mitochondrial genetic code table number
178 -create_date => date created in database
179 -update_date => date last updated in database
180 -pub_date => date published in database
185 my ($class, @args) = @_;
186 my $self = $class->SUPER::new
(@args);
187 my ($name, $id, $objid, $rank, $div, $dbh, $ncbitaxid, $commonname,
188 $commonnames, $gcode, $mitocode, $createdate, $updatedate, $pubdate,
189 $parent_id) = $self->_rearrange([qw(NAME ID OBJECT_ID RANK DIVISION DBH
190 NCBI_TAXID COMMON_NAME COMMON_NAMES
191 GENETIC_CODE MITO_GENETIC_CODE
192 CREATE_DATE UPDATE_DATE PUB_DATE
195 if (defined $id && (defined $ncbitaxid && $ncbitaxid ne $id || defined $objid && $objid ne $id)) {
196 $self->warn("Only provide one of -id, -object_id or -ncbi_taxid, using $id\n");
198 elsif(!defined $id) {
199 $id = $objid || $ncbitaxid;
201 defined $id && $self->id($id);
202 $self->{_ncbi_tax_id_provided
} = 1 if $ncbitaxid;
204 defined $rank && $self->rank($rank);
205 defined $name && $self->node_name($name);
209 $self->throw("-common_names takes only an array reference") unless $commonnames
210 && ref($commonnames) eq 'ARRAY';
211 @common_names = @
{$commonnames};
214 my %c_names = map { $_ => 1 } @common_names;
215 unless (exists $c_names{$commonname}) {
216 unshift(@common_names, $commonname);
219 @common_names > 0 && $self->common_names(@common_names);
221 defined $gcode && $self->genetic_code($gcode);
222 defined $mitocode && $self->mitochondrial_genetic_code($mitocode);
223 defined $createdate && $self->create_date($createdate);
224 defined $updatedate && $self->update_date($updatedate);
225 defined $pubdate && $self->pub_date($pubdate);
226 defined $div && $self->division($div);
227 defined $dbh && $self->db_handle($dbh);
229 # Making an administrative decision to override this behavior, particularly
230 # for optimization reasons (if it works to cache it up front, why not?
231 # Please trust your implementations to get it right)
234 # deprecated and will issue a warning when method called,
235 # eventually to be removed completely as option
236 defined $parent_id && $self->parent_id($parent_id);
238 # some things want to freeze/thaw Bio::Species objects, but
239 # _root_cleanup_methods contains a CODE ref, delete it.
240 delete $self->{_root_cleanup_methods
};
246 =head1 Bio::IdentifiableI interface
248 Also see L<Bio::IdentifiableI>
253 Usage : $taxon->version($newval)
254 Returns : value of version (a scalar)
255 Args : on set, new value (a scalar or undef, optional)
261 return $self->{'version'} = shift if @_;
262 return $self->{'version'};
269 Usage : $taxon->authority($newval)
270 Returns : value of authority (a scalar)
271 Args : on set, new value (a scalar or undef, optional)
277 return $self->{'authority'} = shift if @_;
278 return $self->{'authority'};
285 Usage : $taxon->namespace($newval)
286 Returns : value of namespace (a scalar)
287 Args : on set, new value (a scalar or undef, optional)
293 return $self->{'namespace'} = shift if @_;
294 return $self->{'namespace'};
298 =head1 Bio::Taxonomy::Node implementation
303 Usage : $taxon->db_handle($newval)
304 Function: Get/Set Bio::DB::Taxonomy Handle
305 Returns : value of db_handle (a scalar) (Bio::DB::Taxonomy object)
306 Args : on set, new value (a scalar, optional) Bio::DB::Taxonomy object
308 Also see L<Bio::DB::Taxonomy>
317 if (! ref($db) || ! $db->isa('Bio::DB::Taxonomy')) {
318 $self->throw("Must provide a valid Bio::DB::Taxonomy object to db_handle()");
320 if (!$self->{'db_handle'} || ($self->{'db_handle'} && $self->{'db_handle'} ne $db)) {
321 my $new_self = $self->_get_similar_taxon_from_db($self, $db);
322 $self->_merge_taxa($new_self) if $new_self;
325 # NB: The Bio::DB::Taxonomy modules access this data member directly
326 # to avoid calling this method and going infinite
327 $self->{'db_handle'} = $db;
329 return $self->{'db_handle'};
336 Usage : $taxon->rank($newval)
337 Function: Get/set rank of this Taxon, 'species', 'genus', 'order', etc...
338 Returns : value of rank (a scalar)
339 Args : on set, new value (a scalar or undef, optional)
345 return $self->{'rank'} = shift if @_;
346 return $self->{'rank'};
353 Usage : $taxon->id($newval)
354 Function: Get/Set id (NCBI Taxonomy ID in most cases); object_id() and
355 ncbi_taxid() are synonyms of this method.
356 Returns : id (a scalar)
357 Args : none to get, OR scalar to set
363 return $self->SUPER::id
(@_);
372 Usage : $taxon->ncbi_taxid($newval)
373 Function: Get/Set the NCBI Taxonomy ID; This actually sets the id() but only
374 returns an id when ncbi_taxid has been explictely set with this
376 Returns : id (a scalar)
377 Args : none to get, OR scalar to set
382 my ($self, $id) = @_;
385 $self->{_ncbi_tax_id_provided
} = 1;
386 return $self->SUPER::id
($id);
389 if ($self->{_ncbi_tax_id_provided
}) {
390 return $self->SUPER::id
;
399 Usage : $taxon->parent_id()
400 Function: Get parent ID, (NCBI Taxonomy ID in most cases);
401 parent_taxon_id() is a synonym of this method.
402 Returns : value of parent_id (a scalar)
410 $self->{parent_id
} = shift;
412 if (defined $self->{parent_id
}) {
413 return $self->{parent_id
}
415 my $ancestor = $self->ancestor() || return;
416 return $ancestor->id;
419 *parent_taxon_id
= \
&parent_id
;
421 =head2 trusted_parent_id
423 Title : trusted_parent_id
424 Usage : $taxon->trusted_parent_id()
425 Function: If the parent_id is explicitly set, trust it
426 Returns : simple boolean value (whether or not it has been set)
428 Notes : Previously, the parent_id method was to be deprecated in favor of
429 using ancestor(). However this removes one key optimization point,
430 namely when an implementation has direct access to the taxon's
431 parent ID when retrieving the information for the taxon ID. This
432 method is in place so implementations can choose to (1) check whether
433 the parent_id is set and (2) trust that the implementation (whether
434 it is self or another implementation) set the parent_id correctly.
438 sub trusted_parent_id
{
439 return defined $_[0]->{parent_id
};
445 Usage : $taxon->genetic_code($newval)
446 Function: Get/set genetic code table
447 Returns : value of genetic_code (a scalar)
448 Args : on set, new value (a scalar or undef, optional)
454 return $self->{'genetic_code'} = shift if @_;
455 return $self->{'genetic_code'};
459 =head2 mitochondrial_genetic_code
461 Title : mitochondrial_genetic_code
462 Usage : $taxon->mitochondrial_genetic_code($newval)
463 Function: Get/set mitochondrial genetic code table
464 Returns : value of mitochondrial_genetic_code (a scalar)
465 Args : on set, new value (a scalar or undef, optional)
469 sub mitochondrial_genetic_code
{
471 return $self->{'mitochondrial_genetic_code'} = shift if @_;
472 return $self->{'mitochondrial_genetic_code'};
479 Usage : $taxon->create_date($newval)
480 Function: Get/Set Date this node was created (in the database)
481 Returns : value of create_date (a scalar)
482 Args : on set, new value (a scalar or undef, optional)
488 return $self->{'create_date'} = shift if @_;
489 return $self->{'create_date'};
496 Usage : $taxon->update_date($newval)
497 Function: Get/Set Date this node was updated (in the database)
498 Returns : value of update_date (a scalar)
499 Args : on set, new value (a scalar or undef, optional)
505 return $self->{'update_date'} = shift if @_;
506 return $self->{'update_date'};
513 Usage : $taxon->pub_date($newval)
514 Function: Get/Set Date this node was published (in the database)
515 Returns : value of pub_date (a scalar)
516 Args : on set, new value (a scalar or undef, optional)
522 return $self->{'pub_date'} = shift if @_;
523 return $self->{'pub_date'};
530 Usage : my $ancestor_taxon = $taxon->ancestor()
531 Function: Retrieve the ancestor taxon. Normally the database is asked what the
534 If you manually set the ancestor (or you make a Bio::Tree::Tree with
535 this object as an argument to new()), the database (if any) will not
536 be used for the purposes of this method.
538 To restore normal database behaviour, call ancestor(undef) (which
539 would remove this object from the tree), or request this taxon again
540 as a new Taxon object from the database.
549 my $ancestor = $self->SUPER::ancestor
(@_);
553 my $dbh = $self->db_handle;
554 #*** could avoid the db lookup if we knew our current id was definitely
555 # information from the db...
557 my $definitely_from_dbh = $self->_get_similar_taxon_from_db($self);
558 return $dbh->ancestor($definitely_from_dbh);
562 =head2 get_Parent_Node
564 Title : get_Parent_Node
565 Function: Synonym of ancestor()
570 sub get_Parent_Node
{
572 $self->warn("get_Parent_Node is deprecated, use ancestor() instead");
573 return $self->ancestor(@_);
577 =head2 each_Descendent
579 Title : each_Descendent
580 Usage : my @taxa = $taxon->each_Descendent();
581 Function: Get all the descendents for this Taxon (but not their descendents,
582 ie. not a recursive fetchall). get_Children_Nodes() is a synonym of
585 Note that this method never asks the database for the descendents;
586 it will only return objects you have manually set with
587 add_Descendent(), or where this was done for you by making a
588 Bio::Tree::Tree with this object as an argument to new().
590 To get the database descendents use
591 $taxon->db_handle->each_Descendent($taxon).
593 Returns : Array of Bio::Taxon objects
594 Args : optionally, when you have set your own descendents, the string
595 "height", "creation", "alpha", "revalpha", or coderef to be used to
596 sort the order of children nodes.
601 # implemented by Bio::Tree::Node
603 =head2 get_Children_Nodes
605 Title : get_Children_Nodes
606 Function: Synonym of each_Descendent()
611 sub get_Children_Nodes
{
613 $self->warn("get_Children_Nodes is deprecated, use each_Descendent() instead");
614 return $self->each_Descendent(@_);
621 Usage: $taxon->name('scientific', 'Homo sapiens');
622 $taxon->name('common', 'human', 'man');
623 my @names = @{$taxon->name('common')};
624 Function: Get/set the names. node_name(), scientific_name() and common_names()
625 are shorthands to name('scientific'), name('scientific') and
626 name('common') respectively.
627 Returns: names (a array reference)
628 Args: Arg1 => the name_class. You can assign any text, but the words
629 'scientific' and 'common' have the special meaning, as
630 scientific name and common name, respectively. 'scientific' and
631 'division' are treated specially, allowing only the first value
632 in the Arg2 list to be set.
633 Arg2 ... => list of names
638 my ($self, $name_class, @names) = @_;
639 $self->throw('No name class specified') unless defined $name_class;
642 if ($name_class =~ /scientific|division/i) {
643 delete $self->{'_names_hash'}->{$name_class};
644 @names = (shift(@names));
646 push @
{$self->{'_names_hash'}->{$name_class}}, @names;
648 return $self->{'_names_hash'}->{$name_class} || return;
655 Usage : $taxon->node_name($newval)
656 Function: Get/set the name of this taxon (node), typically the scientific name
657 of the taxon, eg. 'Primate' or 'Homo'; scientific_name() is a synonym
659 Returns : value of node_name (a scalar)
660 Args : on set, new value (a scalar or undef, optional)
666 my @v = @
{$self->name('scientific', @_) || []};
670 *scientific_name
= \
&node_name
;
676 Usage : $taxon->common_names($newval)
677 Function: Get/add the other names of this taxon, typically the genbank common
678 name and others, eg. 'Human' and 'man'. common_name() is a synonym
680 Returns : array of names in list context, one of those names in scalar context
681 Args : on add, new list of names (scalars, optional)
687 my @v = @
{$self->name('common', @_) || []};
688 return ( wantarray ) ?
@v : pop @v;
691 *common_name
= \
&common_names
;
697 Usage : $taxon->division($newval)
698 Function: Get/set the division this taxon belongs to, eg. 'Primates' or
700 Returns : value of division (a scalar)
701 Args : on set, new value (a scalar or undef, optional)
707 my @v = @
{$self->name('division',@_) || []};
712 # get a node from the database that is like the supplied node
713 sub _get_similar_taxon_from_db
{
714 #*** not really happy with this having to be called so much; there must be
716 my ($self, $taxon, $db) = @_;
717 $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa("Bio::Taxon");
718 ($self->id || $self->node_name) || return;
719 $db ||= $self->db_handle || return;
720 if (!blessed
($db) || !$db->isa('Bio::DB::Taxonomy')) {
721 $self->throw("DB handle is not a Bio::DB::Taxonomy: got $db in node ".$self->node_name)
723 my $db_taxon = $db->get_taxon(-taxonid
=> $taxon->id) if $taxon->id;
725 my @try_ids = $db->get_taxonids($taxon->node_name) if $taxon->node_name;
727 my $own_rank = $taxon->rank || 'no rank';
728 foreach my $try_id (@try_ids) {
729 my $try = $db->get_taxon(-taxonid
=> $try_id);
730 my $try_rank = $try->rank || 'no rank';
731 if ($own_rank eq 'no rank' || $try_rank eq 'no rank' || $own_rank eq $try_rank) {
742 # merge data from supplied Taxon into self
744 my ($self, $taxon) = @_;
745 $self->throw("Must supply a Bio::Taxon object") unless ref($taxon) && $taxon->isa('Bio::Taxon');
746 return if ($taxon eq $self);
748 foreach my $attrib (qw(scientific_name version authority namespace genetic_code mitochondrial_genetic_code create_date update_date pub_date division id)) {
749 my $own = $self->$attrib();
750 my $his = $taxon->$attrib();
752 $self->$attrib($his);
756 my $own = $self->rank || 'no rank';
757 my $his = $taxon->rank || 'no rank';
758 if ($own eq 'no rank' && $his ne 'no rank') {
762 my %own_cnames = map { $_ => 1 } $self->common_names;
763 my %his_cnames = map { $_ => 1 } $taxon->common_names;
764 foreach (keys %his_cnames) {
765 unless (exists $own_cnames{$_}) {
766 $self->common_names($_);
770 #*** haven't merged the other things in names() hash, could do above much easier with direct access to object data
774 =head2 remove_Descendent
776 Title : remove_Descendent
777 Usage : $node->remove_Descedent($node_foo);
778 Function: Removes a specific node from being a Descendent of this node
780 Args : An array of Bio::Node::NodeI objects which have been previously
781 passed to the add_Descendent call of this object.
785 sub remove_Descendent
{
786 # need to override this method from Bio::Tree::Node since it casually
787 # throws away nodes if they don't branch
788 my ($self,@nodes) = @_;
790 foreach my $n ( @nodes ) {
791 if ($self->{'_desc'}->{$n->internal_id}) {
792 $self->{_removing_descendent
} = 1;
794 $self->{_removing_descendent
} = 0;
795 $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
796 delete $self->{'_desc'}->{$n->internal_id};