2 # BioPerl module for Bio::Taxon
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sendu Bala <bix@sendu.me.uk>
8 # Copyright Sendu Bala, based heavily on a module by Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Taxon - A node in a represented taxonomy
22 # Typically you will get a Taxon from a Bio::DB::Taxonomy object
23 # but here is how you initialize one
24 my $taxon = Bio::Taxon->new(-name => $name,
29 # Get one from a database
30 my $dbh = Bio::DB::Taxonomy->new(-source => 'flatfile',
32 -nodesfile=> '/path/to/nodes.dmp',
33 -namesfile=> '/path/to/names.dmp');
34 my $human = $dbh->get_taxon(-name => 'Homo sapiens');
35 $human = $dbh->get_taxon(-taxonid => '9606');
37 print "id is ", $human->id, "\n"; # 9606
38 print "rank is ", $human->rank, "\n"; # species
39 print "scientific name is ", $human->scientific_name, "\n"; # Homo sapiens
40 print "division is ", $human->division, "\n"; # Primates
42 my $mouse = $dbh->get_taxon(-name => 'Mus musculus');
44 # You can quickly make your own lineages with the list database
45 my @ranks = qw(superkingdom class genus species);
46 my @h_lineage = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens');
47 my $list_dbh = Bio::DB::Taxonomy->new(-source => 'list', -names => \@h_lineage,
49 $human = $list_dbh->get_taxon(-name => 'Homo sapiens');
50 my @names = $human->common_names; # @names is empty
51 $human->common_names('woman');
52 @names = $human->common_names; # @names contains woman
54 # You can switch to another database when you need more information
55 my $entrez_dbh = Bio::DB::Taxonomy->new(-source => 'entrez');
56 $human->db_handle($entrez_dbh);
57 @names = $human->common_names; # @names contains woman, human, man
59 # Since Bio::Taxon implements Bio::Tree::NodeI, we have access to those
60 # methods (and can manually create our own taxa and taxonomy without the use
62 my $homo = $human->ancestor;
64 # Though be careful with each_Descendent - unless you add_Descendent()
65 # yourself, you won't get an answer because unlike for ancestor(), Bio::Taxon
66 # does not ask the database for the answer. You can ask the database yourself
67 # using the same method:
68 ($human) = $homo->db_handle->each_Descendent($homo);
70 # We can also take advantage of Bio::Tree::Tree* methods:
71 # a) some methods are available with just an empty tree object
73 my $tree_functions = Bio::Tree::Tree->new();
74 my @lineage = $tree_functions->get_lineage_nodes($human);
75 my $lineage = $tree_functions->get_lineage_string($human);
76 my $lca = $tree_functions->get_lca($human, $mouse);
78 # b) for other methods, create a tree using your Taxon object
79 my $tree = Bio::Tree::Tree->new(-node => $human);
80 my @taxa = $tree->get_nodes;
81 $homo = $tree->find_node(-rank => 'genus');
83 # Normally you can't get the lca of a list-database derived Taxon and an
84 # entrez or flatfile-derived one because the two different databases might
85 # have different roots and different numbers of ranks between the root and the
86 # taxa of interest. To solve this, make a tree of the Taxon with the more
87 # detailed lineage and splice out all the taxa that won't be in the lineage of
89 my $entrez_mouse = $entrez_dbh->get_taxon(-name => 'Mus musculus');
90 my $list_human = $list_dbh->get_taxon(-name => 'Homo sapiens');
91 my $mouse_tree = Bio::Tree::Tree->new(-node => $entrez_mouse);
92 $mouse_tree->splice(-keep_rank => \@ranks);
93 $lca = $mouse_tree->get_lca($entrez_mouse, $list_human);
97 This is the next generation (for Bioperl) of representing Taxonomy
98 information. Previously all information was managed by a single
99 object called Bio::Species. This new implementation allows
100 representation of the intermediate nodes not just the species nodes
101 and can relate their connections.
107 User feedback is an integral part of the evolution of this and other
108 Bioperl modules. Send your comments and suggestions preferably to
109 the Bioperl mailing list. Your participation is much appreciated.
111 bioperl-l@bioperl.org - General discussion
112 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
116 Please direct usage questions or support issues to the mailing list:
118 I<bioperl-l@bioperl.org>
120 rather than to the module maintainer directly. Many experienced and
121 reponsive experts will be able look at the problem and quickly
122 address it. Please include a thorough description of the problem
123 with code and data examples if at all possible.
125 =head2 Reporting Bugs
127 Report bugs to the Bioperl bug tracking system to help us keep track
128 of the bugs and their resolution. Bug reports can be submitted via
131 https://github.com/bioperl/bioperl-live/issues
133 =head1 AUTHOR - Sendu Bala
135 Email bix@sendu.me.uk
139 Jason Stajich, jason-at-bioperl-dot-org (original Bio::Taxonomy::Node)
140 Juguang Xiao, juguang@tll.org.sg
141 Gabriel Valiente, valiente@lsi.upc.edu
145 The rest of the documentation details each of the object methods.
146 Internal methods are usually preceded with a _
153 use Scalar
::Util
qw(blessed);
155 use Bio
::DB
::Taxonomy
;
157 use base
qw(Bio::Tree::Node Bio::IdentifiableI);
163 Usage : my $obj = Bio::Taxonomy::Node->new();
164 Function: Builds a new Bio::Taxonomy::Node object
165 Returns : an instance of Bio::Taxonomy::Node
166 Args : -dbh => a reference to a Bio::DB::Taxonomy object
168 -name => a string representing the taxon name
170 -id => human readable id - typically NCBI taxid
171 -ncbi_taxid => same as -id, but explicitly say that it is an
173 -rank => node rank (one of 'species', 'genus', etc)
174 -common_names => array ref of all common names
175 -division => 'Primates', 'Rodents', etc
176 -genetic_code => genetic code table number
177 -mito_genetic_code => mitochondrial genetic code table number
178 -create_date => date created in database
179 -update_date => date last updated in database
180 -pub_date => date published in database
185 my ($class, @args) = @_;
186 my $self = $class->SUPER::new
(@args);
187 my ($name, $id, $objid, $rank, $div, $dbh, $ncbitaxid, $commonname,
188 $commonnames, $gcode, $mitocode, $createdate, $updatedate, $pubdate,
189 $parent_id) = $self->_rearrange([qw(NAME ID OBJECT_ID RANK DIVISION DBH
190 NCBI_TAXID COMMON_NAME COMMON_NAMES
191 GENETIC_CODE MITO_GENETIC_CODE
192 CREATE_DATE UPDATE_DATE PUB_DATE
195 if (defined $id && (defined $ncbitaxid && $ncbitaxid ne $id || defined $objid && $objid ne $id)) {
196 $self->warn("Only provide one of -id, -object_id or -ncbi_taxid, using $id\n");
198 elsif(!defined $id) {
199 $id = $objid || $ncbitaxid;
201 defined $id && $self->id($id);
202 $self->{_ncbi_tax_id_provided
} = 1 if $ncbitaxid;
204 defined $rank && $self->rank($rank);
205 defined $name && $self->node_name($name);
209 $self->throw("-common_names takes only an array reference") unless $commonnames
210 && ref($commonnames) eq 'ARRAY';
211 @common_names = @
{$commonnames};
214 my %c_names = map { $_ => 1 } @common_names;
215 unless (exists $c_names{$commonname}) {
216 unshift(@common_names, $commonname);
219 @common_names > 0 && $self->common_names(@common_names);
221 defined $gcode && $self->genetic_code($gcode);
222 defined $mitocode && $self->mitochondrial_genetic_code($mitocode);
223 defined $createdate && $self->create_date($createdate);
224 defined $updatedate && $self->update_date($updatedate);
225 defined $pubdate && $self->pub_date($pubdate);
226 defined $div && $self->division($div);
227 defined $dbh && $self->db_handle($dbh);
229 # Making an administrative decision to override this behavior, particularly
230 # for optimization reasons (if it works to cache it up front, why not?
231 # Please trust your implementations to get it right)
234 # deprecated and will issue a warning when method called,
235 # eventually to be removed completely as option
236 defined $parent_id && $self->parent_id($parent_id);
238 # some things want to freeze/thaw Bio::Species objects, but
239 # _root_cleanup_methods contains a CODE ref, delete it.
240 delete $self->{_root_cleanup_methods
};
246 =head1 Bio::IdentifiableI interface
248 Also see L<Bio::IdentifiableI>
253 Usage : $taxon->version($newval)
254 Returns : value of version (a scalar)
255 Args : on set, new value (a scalar or undef, optional)
261 return $self->{'version'} = shift if @_;
262 return $self->{'version'};
269 Usage : $taxon->authority($newval)
270 Returns : value of authority (a scalar)
271 Args : on set, new value (a scalar or undef, optional)
277 return $self->{'authority'} = shift if @_;
278 return $self->{'authority'};
285 Usage : $taxon->namespace($newval)
286 Returns : value of namespace (a scalar)
287 Args : on set, new value (a scalar or undef, optional)
293 return $self->{'namespace'} = shift if @_;
294 return $self->{'namespace'};
298 =head1 Bio::Taxonomy::Node implementation
303 Usage : $taxon->db_handle($newval)
304 Function: Get/Set Bio::DB::Taxonomy Handle
305 Returns : value of db_handle (a scalar) (Bio::DB::Taxonomy object)
306 Args : on set, new value (a scalar, optional) Bio::DB::Taxonomy object
308 Also see L<Bio::DB::Taxonomy>
317 if (! ref($db) || ! $db->isa('Bio::DB::Taxonomy')) {
318 $self->throw("Must provide a valid Bio::DB::Taxonomy object to db_handle()");
320 if (!$self->{'db_handle'} || ($self->{'db_handle'} && $self->{'db_handle'} ne $db)) {
321 my $new_self = $self->_get_similar_taxon_from_db($self, $db);
322 $self->_merge_taxa($new_self) if $new_self;
325 # NB: The Bio::DB::Taxonomy modules access this data member directly
326 # to avoid calling this method and going infinite
327 $self->{'db_handle'} = $db;
329 return $self->{'db_handle'};
336 Usage : $taxon->rank($newval)
337 Function: Get/set rank of this Taxon, 'species', 'genus', 'order', etc...
338 Returns : value of rank (a scalar)
339 Args : on set, new value (a scalar or undef, optional)
345 return $self->{'rank'} = shift if @_;
346 return $self->{'rank'};
353 Usage : $taxon->id($newval)
354 Function: Get/Set id (NCBI Taxonomy ID in most cases); object_id() and
355 ncbi_taxid() are synonyms of this method.
356 Returns : id (a scalar)
357 Args : none to get, OR scalar to set
363 return $self->SUPER::id
(@_);
372 Usage : $taxon->ncbi_taxid($newval)
373 Function: Get/Set the NCBI Taxonomy ID; This actually sets the id() but only
374 returns an id when ncbi_taxid has been explictely set with this
376 Returns : id (a scalar)
377 Args : none to get, OR scalar to set
382 my ($self, $id) = @_;
385 $self->{_ncbi_tax_id_provided
} = 1;
386 return $self->SUPER::id
($id);
389 if ($self->{_ncbi_tax_id_provided
}) {
390 return $self->SUPER::id
;
399 Usage : $taxon->parent_id()
400 Function: Get parent ID, (NCBI Taxonomy ID in most cases);
401 parent_taxon_id() is a synonym of this method.
402 Returns : value of parent_id (a scalar)
410 $self->{parent_id
} = shift;
412 if (defined $self->{parent_id
}) {
413 return $self->{parent_id
}
415 my $ancestor = $self->ancestor() || return;
416 return $ancestor->id;
419 *parent_taxon_id
= \
&parent_id
;
421 =head2 trusted_parent_id
423 Title : trusted_parent_id
424 Usage : $taxon->trusted_parent_id()
425 Function: If the parent_id is explicitly set, trust it
426 Returns : simple boolean value (whether or not it has been set)
431 sub trusted_parent_id
{
432 return defined $_[0]->{parent_id
};
438 Usage : $taxon->genetic_code($newval)
439 Function: Get/set genetic code table
440 Returns : value of genetic_code (a scalar)
441 Args : on set, new value (a scalar or undef, optional)
447 return $self->{'genetic_code'} = shift if @_;
448 return $self->{'genetic_code'};
452 =head2 mitochondrial_genetic_code
454 Title : mitochondrial_genetic_code
455 Usage : $taxon->mitochondrial_genetic_code($newval)
456 Function: Get/set mitochondrial genetic code table
457 Returns : value of mitochondrial_genetic_code (a scalar)
458 Args : on set, new value (a scalar or undef, optional)
462 sub mitochondrial_genetic_code
{
464 return $self->{'mitochondrial_genetic_code'} = shift if @_;
465 return $self->{'mitochondrial_genetic_code'};
472 Usage : $taxon->create_date($newval)
473 Function: Get/Set Date this node was created (in the database)
474 Returns : value of create_date (a scalar)
475 Args : on set, new value (a scalar or undef, optional)
481 return $self->{'create_date'} = shift if @_;
482 return $self->{'create_date'};
489 Usage : $taxon->update_date($newval)
490 Function: Get/Set Date this node was updated (in the database)
491 Returns : value of update_date (a scalar)
492 Args : on set, new value (a scalar or undef, optional)
498 return $self->{'update_date'} = shift if @_;
499 return $self->{'update_date'};
506 Usage : $taxon->pub_date($newval)
507 Function: Get/Set Date this node was published (in the database)
508 Returns : value of pub_date (a scalar)
509 Args : on set, new value (a scalar or undef, optional)
515 return $self->{'pub_date'} = shift if @_;
516 return $self->{'pub_date'};
523 Usage : my $ancestor_taxon = $taxon->ancestor()
524 Function: Retrieve the ancestor taxon. Normally the database is asked what the
527 If you manually set the ancestor (or you make a Bio::Tree::Tree with
528 this object as an argument to new()), the database (if any) will not
529 be used for the purposes of this method.
531 To restore normal database behaviour, call ancestor(undef) (which
532 would remove this object from the tree), or request this taxon again
533 as a new Taxon object from the database.
542 my $ancestor = $self->SUPER::ancestor
(@_);
546 my $dbh = $self->db_handle;
547 #*** could avoid the db lookup if we knew our current id was definitely
548 # information from the db...
550 # TODO: you must trust your implementation to get it right.
551 # If there is a parent_id set, trust it. If not, fall back to calling this
553 my $definitely_from_dbh = $self->_get_similar_taxon_from_db($self);
554 return $dbh->ancestor($definitely_from_dbh);
558 =head2 get_Parent_Node
560 Title : get_Parent_Node
561 Function: Synonym of ancestor()
566 sub get_Parent_Node
{
568 $self->warn("get_Parent_Node is deprecated, use ancestor() instead");
569 return $self->ancestor(@_);
573 =head2 each_Descendent
575 Title : each_Descendent
576 Usage : my @taxa = $taxon->each_Descendent();
577 Function: Get all the descendents for this Taxon (but not their descendents,
578 ie. not a recursive fetchall). get_Children_Nodes() is a synonym of
581 Note that this method never asks the database for the descendents;
582 it will only return objects you have manually set with
583 add_Descendent(), or where this was done for you by making a
584 Bio::Tree::Tree with this object as an argument to new().
586 To get the database descendents use
587 $taxon->db_handle->each_Descendent($taxon).
589 Returns : Array of Bio::Taxon objects
590 Args : optionally, when you have set your own descendents, the string
591 "height", "creation", "alpha", "revalpha", or coderef to be used to
592 sort the order of children nodes.
597 # implemented by Bio::Tree::Node
599 =head2 get_Children_Nodes
601 Title : get_Children_Nodes
602 Function: Synonym of each_Descendent()
607 sub get_Children_Nodes
{
609 $self->warn("get_Children_Nodes is deprecated, use each_Descendent() instead");
610 return $self->each_Descendent(@_);
617 Usage: $taxon->name('scientific', 'Homo sapiens');
618 $taxon->name('common', 'human', 'man');
619 my @names = @{$taxon->name('common')};
620 Function: Get/set the names. node_name(), scientific_name() and common_names()
621 are shorthands to name('scientific'), name('scientific') and
622 name('common') respectively.
623 Returns: names (a array reference)
624 Args: Arg1 => the name_class. You can assign any text, but the words
625 'scientific' and 'common' have the special meaning, as
626 scientific name and common name, respectively. 'scientific' and
627 'division' are treated specially, allowing only the first value
628 in the Arg2 list to be set.
629 Arg2 ... => list of names
634 my ($self, $name_class, @names) = @_;
635 $self->throw('No name class specified') unless defined $name_class;
638 if ($name_class =~ /scientific|division/i) {
639 delete $self->{'_names_hash'}->{$name_class};
640 @names = (shift(@names));
642 push @
{$self->{'_names_hash'}->{$name_class}}, @names;
644 return $self->{'_names_hash'}->{$name_class} || return;
651 Usage : $taxon->node_name($newval)
652 Function: Get/set the name of this taxon (node), typically the scientific name
653 of the taxon, eg. 'Primate' or 'Homo'; scientific_name() is a synonym
655 Returns : value of node_name (a scalar)
656 Args : on set, new value (a scalar or undef, optional)
662 my @v = @
{$self->name('scientific', @_) || []};
666 *scientific_name
= \
&node_name
;
672 Usage : $taxon->common_names($newval)
673 Function: Get/add the other names of this taxon, typically the genbank common
674 name and others, eg. 'Human' and 'man'. common_name() is a synonym
676 Returns : array of names in list context, one of those names in scalar context
677 Args : on add, new list of names (scalars, optional)
683 my @v = @
{$self->name('common', @_) || []};
684 return ( wantarray ) ?
@v : pop @v;
687 *common_name
= \
&common_names
;
693 Usage : $taxon->division($newval)
694 Function: Get/set the division this taxon belongs to, eg. 'Primates' or
696 Returns : value of division (a scalar)
697 Args : on set, new value (a scalar or undef, optional)
703 my @v = @
{$self->name('division',@_) || []};
708 # get a node from the database that is like the supplied node
709 sub _get_similar_taxon_from_db
{
710 #*** not really happy with this having to be called so much; there must be
712 my ($self, $taxon, $db) = @_;
713 $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa("Bio::Taxon");
714 ($self->id || $self->node_name) || return;
715 $db ||= $self->db_handle || return;
716 if (!blessed
($db) || !$db->isa('Bio::DB::Taxonomy')) {
717 $self->throw("DB handle is not a Bio::DB::Taxonomy: got $db in node ".$self->node_name)
719 my $db_taxon = $db->get_taxon(-taxonid
=> $taxon->id) if $taxon->id;
721 my @try_ids = $db->get_taxonids($taxon->node_name) if $taxon->node_name;
723 my $own_rank = $taxon->rank || 'no rank';
724 foreach my $try_id (@try_ids) {
725 my $try = $db->get_taxon(-taxonid
=> $try_id);
726 my $try_rank = $try->rank || 'no rank';
727 if ($own_rank eq 'no rank' || $try_rank eq 'no rank' || $own_rank eq $try_rank) {
738 # merge data from supplied Taxon into self
740 my ($self, $taxon) = @_;
741 $self->throw("Must supply a Bio::Taxon object") unless ref($taxon) && $taxon->isa('Bio::Taxon');
742 return if ($taxon eq $self);
744 foreach my $attrib (qw(scientific_name version authority namespace genetic_code mitochondrial_genetic_code create_date update_date pub_date division id)) {
745 my $own = $self->$attrib();
746 my $his = $taxon->$attrib();
748 $self->$attrib($his);
752 my $own = $self->rank || 'no rank';
753 my $his = $taxon->rank || 'no rank';
754 if ($own eq 'no rank' && $his ne 'no rank') {
758 my %own_cnames = map { $_ => 1 } $self->common_names;
759 my %his_cnames = map { $_ => 1 } $taxon->common_names;
760 foreach (keys %his_cnames) {
761 unless (exists $own_cnames{$_}) {
762 $self->common_names($_);
766 #*** haven't merged the other things in names() hash, could do above much easier with direct access to object data
770 =head2 remove_Descendent
772 Title : remove_Descendent
773 Usage : $node->remove_Descedent($node_foo);
774 Function: Removes a specific node from being a Descendent of this node
776 Args : An array of Bio::Node::NodeI objects which have been previously
777 passed to the add_Descendent call of this object.
781 sub remove_Descendent
{
782 # need to override this method from Bio::Tree::Node since it casually
783 # throws away nodes if they don't branch
784 my ($self,@nodes) = @_;
786 foreach my $n ( @nodes ) {
787 if ($self->{'_desc'}->{$n->internal_id}) {
788 $self->{_removing_descendent
} = 1;
790 $self->{_removing_descendent
} = 0;
791 $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
792 delete $self->{'_desc'}->{$n->internal_id};