2 # BioPerl module for Bio::Taxon
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sendu Bala <bix@sendu.me.uk>
8 # Copyright Sendu Bala, based heavily on a module by Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Taxon - A node in a represented taxonomy
22 # Typically you will get a Taxon from a Bio::DB::Taxonomy object
23 # but here is how you initialize one
24 my $taxon = Bio::Taxon->new(-name => $name,
29 # Get one from a database
30 my $dbh = Bio::DB::Taxonomy->new(-source => 'flatfile',
32 -nodesfile=> '/path/to/nodes.dmp',
33 -namesfile=> '/path/to/names.dmp');
34 my $human = $dbh->get_taxon(-name => 'Homo sapiens');
35 $human = $dbh->get_taxon(-taxonid => '9606');
37 print "id is ", $human->id, "\n"; # 9606
38 print "rank is ", $human->rank, "\n"; # species
39 print "scientific name is ", $human->scientific_name, "\n"; # Homo sapiens
40 print "division is ", $human->division, "\n"; # Primates
42 my $mouse = $dbh->get_taxon(-name => 'Mus musculus');
44 # You can quickly make your own lineages with the list database
45 my @ranks = qw(superkingdom class genus species);
46 my @h_lineage = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens');
47 my $list_dbh = Bio::DB::Taxonomy->new(-source => 'list', -names => \@h_lineage,
49 $human = $list_dbh->get_taxon(-name => 'Homo sapiens');
50 my @names = $human->common_names; # @names is empty
51 $human->common_names('woman');
52 @names = $human->common_names; # @names contains woman
54 # You can switch to another database when you need more information
55 my $entrez_dbh = Bio::DB::Taxonomy->new(-source => 'entrez');
56 $human->db_handle($entrez_dbh);
57 @names = $human->common_names; # @names contains woman, human, man
59 # Since Bio::Taxon implements Bio::Tree::NodeI, we have access to those
60 # methods (and can manually create our own taxa and taxonomy without the use
62 my $homo = $human->ancestor;
64 # Though be careful with each_Descendent - unless you add_Descendent()
65 # yourself, you won't get an answer because unlike for ancestor(), Bio::Taxon
66 # does not ask the database for the answer. You can ask the database yourself
67 # using the same method:
68 ($human) = $homo->db_handle->each_Descendent($homo);
70 # We can also take advantage of Bio::Tree::Tree* methods:
71 # a) some methods are available with just an empty tree object
73 my $tree_functions = Bio::Tree::Tree->new();
74 my @lineage = $tree_functions->get_lineage_nodes($human);
75 my $lineage = $tree_functions->get_lineage_string($human);
76 my $lca = $tree_functions->get_lca($human, $mouse);
78 # b) for other methods, create a tree using your Taxon object
79 my $tree = Bio::Tree::Tree->new(-node => $human);
80 my @taxa = $tree->get_nodes;
81 $homo = $tree->find_node(-rank => 'genus');
83 # Normally you can't get the lca of a list-database derived Taxon and an
84 # entrez or flatfile-derived one because the two different databases might
85 # have different roots and different numbers of ranks between the root and the
86 # taxa of interest. To solve this, make a tree of the Taxon with the more
87 # detailed lineage and splice out all the taxa that won't be in the lineage of
89 my $entrez_mouse = $entrez_dbh->get_taxon(-name => 'Mus musculus');
90 my $list_human = $list_dbh->get_taxon(-name => 'Homo sapiens');
91 my $mouse_tree = Bio::Tree::Tree->new(-node => $entrez_mouse);
92 $mouse_tree->splice(-keep_rank => \@ranks);
93 $lca = $mouse_tree->get_lca($entrez_mouse, $list_human);
97 This is the next generation (for Bioperl) of representing Taxonomy
98 information. Previously all information was managed by a single
99 object called Bio::Species. This new implementation allows
100 representation of the intermediate nodes not just the species nodes
101 and can relate their connections.
107 User feedback is an integral part of the evolution of this and other
108 Bioperl modules. Send your comments and suggestions preferably to
109 the Bioperl mailing list. Your participation is much appreciated.
111 bioperl-l@bioperl.org - General discussion
112 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
116 Please direct usage questions or support issues to the mailing list:
118 I<bioperl-l@bioperl.org>
120 rather than to the module maintainer directly. Many experienced and
121 reponsive experts will be able look at the problem and quickly
122 address it. Please include a thorough description of the problem
123 with code and data examples if at all possible.
125 =head2 Reporting Bugs
127 Report bugs to the Bioperl bug tracking system to help us keep track
128 of the bugs and their resolution. Bug reports can be submitted via
131 https://github.com/bioperl/bioperl-live/issues
133 =head1 AUTHOR - Sendu Bala
135 Email bix@sendu.me.uk
139 Jason Stajich, jason-at-bioperl-dot-org (original Bio::Taxonomy::Node)
140 Juguang Xiao, juguang@tll.org.sg
141 Gabriel Valiente, valiente@lsi.upc.edu
145 The rest of the documentation details each of the object methods.
146 Internal methods are usually preceded with a _
153 use Scalar
::Util
qw(blessed);
155 use Bio
::DB
::Taxonomy
;
157 use base
qw(Bio::Tree::Node Bio::IdentifiableI);
163 Usage : my $obj = Bio::Taxonomy::Node->new();
164 Function: Builds a new Bio::Taxonomy::Node object
165 Returns : an instance of Bio::Taxonomy::Node
166 Args : -dbh => a reference to a Bio::DB::Taxonomy object
168 -name => a string representing the taxon name
170 -id => human readable id - typically NCBI taxid
171 -ncbi_taxid => same as -id, but explicitly say that it is an
173 -rank => node rank (one of 'species', 'genus', etc)
174 -common_names => array ref of all common names
175 -division => 'Primates', 'Rodents', etc
176 -genetic_code => genetic code table number
177 -mito_genetic_code => mitochondrial genetic code table number
178 -create_date => date created in database
179 -update_date => date last updated in database
180 -pub_date => date published in database
185 my ($class, @args) = @_;
186 my $self = $class->SUPER::new
(@args);
187 my ($name, $id, $objid, $rank, $div, $dbh, $ncbitaxid, $commonname,
188 $commonnames, $gcode, $mitocode, $createdate, $updatedate, $pubdate,
189 $parent_id) = $self->_rearrange([qw(NAME ID OBJECT_ID RANK DIVISION DBH
190 NCBI_TAXID COMMON_NAME COMMON_NAMES
191 GENETIC_CODE MITO_GENETIC_CODE
192 CREATE_DATE UPDATE_DATE PUB_DATE
195 if (defined $id && (defined $ncbitaxid && $ncbitaxid ne $id || defined $objid && $objid ne $id)) {
196 $self->warn("Only provide one of -id, -object_id or -ncbi_taxid, using $id\n");
198 elsif(!defined $id) {
199 $id = $objid || $ncbitaxid;
201 defined $id && $self->id($id);
202 $self->{_ncbi_tax_id_provided
} = 1 if $ncbitaxid;
204 defined $rank && $self->rank($rank);
205 defined $name && $self->node_name($name);
209 $self->throw("-common_names takes only an array reference") unless $commonnames
210 && ref($commonnames) eq 'ARRAY';
211 @common_names = @
{$commonnames};
214 my %c_names = map { $_ => 1 } @common_names;
215 unless (exists $c_names{$commonname}) {
216 unshift(@common_names, $commonname);
219 @common_names > 0 && $self->common_names(@common_names);
221 defined $gcode && $self->genetic_code($gcode);
222 defined $mitocode && $self->mitochondrial_genetic_code($mitocode);
223 defined $createdate && $self->create_date($createdate);
224 defined $updatedate && $self->update_date($updatedate);
225 defined $pubdate && $self->pub_date($pubdate);
226 defined $div && $self->division($div);
227 defined $dbh && $self->db_handle($dbh);
229 # deprecated and will issue a warning when method called,
230 # eventually to be removed completely as option
231 defined $parent_id && $self->parent_id($parent_id);
233 # some things want to freeze/thaw Bio::Species objects, but
234 # _root_cleanup_methods contains a CODE ref, delete it.
235 delete $self->{_root_cleanup_methods
};
241 =head1 Bio::IdentifiableI interface
243 Also see L<Bio::IdentifiableI>
248 Usage : $taxon->version($newval)
249 Returns : value of version (a scalar)
250 Args : on set, new value (a scalar or undef, optional)
256 return $self->{'version'} = shift if @_;
257 return $self->{'version'};
264 Usage : $taxon->authority($newval)
265 Returns : value of authority (a scalar)
266 Args : on set, new value (a scalar or undef, optional)
272 return $self->{'authority'} = shift if @_;
273 return $self->{'authority'};
280 Usage : $taxon->namespace($newval)
281 Returns : value of namespace (a scalar)
282 Args : on set, new value (a scalar or undef, optional)
288 return $self->{'namespace'} = shift if @_;
289 return $self->{'namespace'};
293 =head1 Bio::Taxonomy::Node implementation
298 Usage : $taxon->db_handle($newval)
299 Function: Get/Set Bio::DB::Taxonomy Handle
300 Returns : value of db_handle (a scalar) (Bio::DB::Taxonomy object)
301 Args : on set, new value (a scalar, optional) Bio::DB::Taxonomy object
303 Also see L<Bio::DB::Taxonomy>
312 if (! ref($db) || ! $db->isa('Bio::DB::Taxonomy')) {
313 $self->throw("Must provide a valid Bio::DB::Taxonomy object to db_handle()");
315 if (!$self->{'db_handle'} || ($self->{'db_handle'} && $self->{'db_handle'} ne $db)) {
316 my $new_self = $self->_get_similar_taxon_from_db($self, $db);
317 $self->_merge_taxa($new_self) if $new_self;
320 # NB: The Bio::DB::Taxonomy modules access this data member directly
321 # to avoid calling this method and going infinite
322 $self->{'db_handle'} = $db;
324 return $self->{'db_handle'};
331 Usage : $taxon->rank($newval)
332 Function: Get/set rank of this Taxon, 'species', 'genus', 'order', etc...
333 Returns : value of rank (a scalar)
334 Args : on set, new value (a scalar or undef, optional)
340 return $self->{'rank'} = shift if @_;
341 return $self->{'rank'};
348 Usage : $taxon->id($newval)
349 Function: Get/Set id (NCBI Taxonomy ID in most cases); object_id() and
350 ncbi_taxid() are synonyms of this method.
351 Returns : id (a scalar)
352 Args : none to get, OR scalar to set
358 return $self->SUPER::id
(@_);
367 Usage : $taxon->ncbi_taxid($newval)
368 Function: Get/Set the NCBI Taxonomy ID; This actually sets the id() but only
369 returns an id when ncbi_taxid has been explictely set with this
371 Returns : id (a scalar)
372 Args : none to get, OR scalar to set
377 my ($self, $id) = @_;
380 $self->{_ncbi_tax_id_provided
} = 1;
381 return $self->SUPER::id
($id);
384 if ($self->{_ncbi_tax_id_provided
}) {
385 return $self->SUPER::id
;
394 Usage : $taxon->parent_id()
395 Function: Get parent ID, (NCBI Taxonomy ID in most cases);
396 parent_taxon_id() is a synonym of this method.
397 Returns : value of parent_id (a scalar)
406 $self->warn("You can no longer set the parent_id - use ancestor() instead");
408 my $ancestor = $self->ancestor() || return;
409 return $ancestor->id;
412 *parent_taxon_id
= \
&parent_id
;
418 Usage : $taxon->genetic_code($newval)
419 Function: Get/set genetic code table
420 Returns : value of genetic_code (a scalar)
421 Args : on set, new value (a scalar or undef, optional)
427 return $self->{'genetic_code'} = shift if @_;
428 return $self->{'genetic_code'};
432 =head2 mitochondrial_genetic_code
434 Title : mitochondrial_genetic_code
435 Usage : $taxon->mitochondrial_genetic_code($newval)
436 Function: Get/set mitochondrial genetic code table
437 Returns : value of mitochondrial_genetic_code (a scalar)
438 Args : on set, new value (a scalar or undef, optional)
442 sub mitochondrial_genetic_code
{
444 return $self->{'mitochondrial_genetic_code'} = shift if @_;
445 return $self->{'mitochondrial_genetic_code'};
452 Usage : $taxon->create_date($newval)
453 Function: Get/Set Date this node was created (in the database)
454 Returns : value of create_date (a scalar)
455 Args : on set, new value (a scalar or undef, optional)
461 return $self->{'create_date'} = shift if @_;
462 return $self->{'create_date'};
469 Usage : $taxon->update_date($newval)
470 Function: Get/Set Date this node was updated (in the database)
471 Returns : value of update_date (a scalar)
472 Args : on set, new value (a scalar or undef, optional)
478 return $self->{'update_date'} = shift if @_;
479 return $self->{'update_date'};
486 Usage : $taxon->pub_date($newval)
487 Function: Get/Set Date this node was published (in the database)
488 Returns : value of pub_date (a scalar)
489 Args : on set, new value (a scalar or undef, optional)
495 return $self->{'pub_date'} = shift if @_;
496 return $self->{'pub_date'};
503 Usage : my $ancestor_taxon = $taxon->ancestor()
504 Function: Retrieve the ancestor taxon. Normally the database is asked what the
507 If you manually set the ancestor (or you make a Bio::Tree::Tree with
508 this object as an argument to new()), the database (if any) will not
509 be used for the purposes of this method.
511 To restore normal database behaviour, call ancestor(undef) (which
512 would remove this object from the tree), or request this taxon again
513 as a new Taxon object from the database.
522 my $ancestor = $self->SUPER::ancestor
(@_);
526 my $dbh = $self->db_handle;
527 #*** could avoid the db lookup if we knew our current id was definitely
528 # information from the db...
529 my $definitely_from_dbh = $self->_get_similar_taxon_from_db($self);
530 return $dbh->ancestor($definitely_from_dbh);
534 =head2 get_Parent_Node
536 Title : get_Parent_Node
537 Function: Synonym of ancestor()
542 sub get_Parent_Node
{
544 $self->warn("get_Parent_Node is deprecated, use ancestor() instead");
545 return $self->ancestor(@_);
549 =head2 each_Descendent
551 Title : each_Descendent
552 Usage : my @taxa = $taxon->each_Descendent();
553 Function: Get all the descendents for this Taxon (but not their descendents,
554 ie. not a recursive fetchall). get_Children_Nodes() is a synonym of
557 Note that this method never asks the database for the descendents;
558 it will only return objects you have manually set with
559 add_Descendent(), or where this was done for you by making a
560 Bio::Tree::Tree with this object as an argument to new().
562 To get the database descendents use
563 $taxon->db_handle->each_Descendent($taxon).
565 Returns : Array of Bio::Taxon objects
566 Args : optionally, when you have set your own descendents, the string
567 "height", "creation", "alpha", "revalpha", or coderef to be used to
568 sort the order of children nodes.
573 # implemented by Bio::Tree::Node
575 =head2 get_Children_Nodes
577 Title : get_Children_Nodes
578 Function: Synonym of each_Descendent()
583 sub get_Children_Nodes
{
585 $self->warn("get_Children_Nodes is deprecated, use each_Descendent() instead");
586 return $self->each_Descendent(@_);
593 Usage: $taxon->name('scientific', 'Homo sapiens');
594 $taxon->name('common', 'human', 'man');
595 my @names = @{$taxon->name('common')};
596 Function: Get/set the names. node_name(), scientific_name() and common_names()
597 are shorthands to name('scientific'), name('scientific') and
598 name('common') respectively.
599 Returns: names (a array reference)
600 Args: Arg1 => the name_class. You can assign any text, but the words
601 'scientific' and 'common' have the special meaning, as
602 scientific name and common name, respectively. 'scientific' and
603 'division' are treated specially, allowing only the first value
604 in the Arg2 list to be set.
605 Arg2 ... => list of names
610 my ($self, $name_class, @names) = @_;
611 $self->throw('No name class specified') unless defined $name_class;
614 if ($name_class =~ /scientific|division/i) {
615 delete $self->{'_names_hash'}->{$name_class};
616 @names = (shift(@names));
618 push @
{$self->{'_names_hash'}->{$name_class}}, @names;
620 return $self->{'_names_hash'}->{$name_class} || return;
627 Usage : $taxon->node_name($newval)
628 Function: Get/set the name of this taxon (node), typically the scientific name
629 of the taxon, eg. 'Primate' or 'Homo'; scientific_name() is a synonym
631 Returns : value of node_name (a scalar)
632 Args : on set, new value (a scalar or undef, optional)
638 my @v = @
{$self->name('scientific', @_) || []};
642 *scientific_name
= \
&node_name
;
648 Usage : $taxon->common_names($newval)
649 Function: Get/add the other names of this taxon, typically the genbank common
650 name and others, eg. 'Human' and 'man'. common_name() is a synonym
652 Returns : array of names in list context, one of those names in scalar context
653 Args : on add, new list of names (scalars, optional)
659 my @v = @
{$self->name('common', @_) || []};
660 return ( wantarray ) ?
@v : pop @v;
663 *common_name
= \
&common_names
;
669 Usage : $taxon->division($newval)
670 Function: Get/set the division this taxon belongs to, eg. 'Primates' or
672 Returns : value of division (a scalar)
673 Args : on set, new value (a scalar or undef, optional)
679 my @v = @
{$self->name('division',@_) || []};
684 # get a node from the database that is like the supplied node
685 sub _get_similar_taxon_from_db
{
686 #*** not really happy with this having to be called so much; there must be
688 my ($self, $taxon, $db) = @_;
689 $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa("Bio::Taxon");
690 ($self->id || $self->node_name) || return;
691 $db ||= $self->db_handle || return;
692 if (!blessed
($db) || !$db->isa('Bio::DB::Taxonomy')) {
693 $self->throw("DB handle is not a Bio::DB::Taxonomy: got $db in node ".$self->node_name)
695 my $db_taxon = $db->get_taxon(-taxonid
=> $taxon->id) if $taxon->id;
697 my @try_ids = $db->get_taxonids($taxon->node_name) if $taxon->node_name;
699 my $own_rank = $taxon->rank || 'no rank';
700 foreach my $try_id (@try_ids) {
701 my $try = $db->get_taxon(-taxonid
=> $try_id);
702 my $try_rank = $try->rank || 'no rank';
703 if ($own_rank eq 'no rank' || $try_rank eq 'no rank' || $own_rank eq $try_rank) {
714 # merge data from supplied Taxon into self
716 my ($self, $taxon) = @_;
717 $self->throw("Must supply a Bio::Taxon object") unless ref($taxon) && $taxon->isa('Bio::Taxon');
718 return if ($taxon eq $self);
720 foreach my $attrib (qw(scientific_name version authority namespace genetic_code mitochondrial_genetic_code create_date update_date pub_date division id)) {
721 my $own = $self->$attrib();
722 my $his = $taxon->$attrib();
724 $self->$attrib($his);
728 my $own = $self->rank || 'no rank';
729 my $his = $taxon->rank || 'no rank';
730 if ($own eq 'no rank' && $his ne 'no rank') {
734 my %own_cnames = map { $_ => 1 } $self->common_names;
735 my %his_cnames = map { $_ => 1 } $taxon->common_names;
736 foreach (keys %his_cnames) {
737 unless (exists $own_cnames{$_}) {
738 $self->common_names($_);
742 #*** haven't merged the other things in names() hash, could do above much easier with direct access to object data
746 =head2 remove_Descendent
748 Title : remove_Descendent
749 Usage : $node->remove_Descedent($node_foo);
750 Function: Removes a specific node from being a Descendent of this node
752 Args : An array of Bio::Node::NodeI objects which have been previously
753 passed to the add_Descendent call of this object.
757 sub remove_Descendent
{
758 # need to override this method from Bio::Tree::Node since it casually
759 # throws away nodes if they don't branch
760 my ($self,@nodes) = @_;
762 foreach my $n ( @nodes ) {
763 if ($self->{'_desc'}->{$n->internal_id}) {
764 $self->{_removing_descendent
} = 1;
766 $self->{_removing_descendent
} = 0;
767 $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
768 delete $self->{'_desc'}->{$n->internal_id};