3 # BioPerl module for Bio::Taxon
5 # Cared for by Sendu Bala <bix@sendu.me.uk>
7 # Copyright Sendu Bala, based heavily on a module by Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::Taxon - A node in a represented taxonomy
21 # Typically you will get a Taxon from a Bio::DB::Taxonomy object
22 # but here is how you initialize one
23 my $taxon = Bio::Taxon->new(-name => $name,
28 # Get one from a database
29 my $dbh = Bio::DB::Taxonomy->new(-source => 'flatfile',
31 -nodesfile=> '/path/to/nodes.dmp',
32 -namesfile=> '/path/to/names.dmp');
33 my $human = $dbh->get_taxon(-name => 'Homo sapiens');
34 $human = $dbh->get_taxon(-taxonid => '9606');
36 print "id is ", $human->id, "\n"; # 9606
37 print "rank is ", $human->rank, "\n"; # species
38 print "scientific name is ", $human->scientific_name, "\n"; # Homo sapiens
39 print "division is ", $human->division, "\n"; # Primates
41 my $mouse = $dbh->get_taxon(-name => 'Mus musculus');
43 # You can quickly make your own lineages with the list database
44 my @ranks = qw(superkingdom class genus species);
45 my @h_lineage = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens');
46 my $list_dbh = Bio::DB::Taxonomy->new(-source => 'list', -names => \@h_lineage,
48 $human = $list_dbh->get_taxon(-name => 'Homo sapiens');
49 my @names = $human->common_names; # @names is empty
50 $human->common_names('woman');
51 @names = $human->common_names; # @names contains woman
53 # You can switch to another database when you need more information
54 my $entrez_dbh = Bio::DB::Taxonomy->new(-source => 'entrez');
55 $human->db_handle($entrez_dbh);
56 @names = $human->common_names; # @names contains woman, human, man
58 # Since Bio::Taxon implements Bio::Tree::NodeI, we have access to those
59 # methods (and can manually create our own taxa and taxonomy without the use
61 my $homo = $human->ancestor;
63 # Though be careful with each_Descendent - unless you add_Descendent()
64 # yourself, you won't get an answer because unlike for ancestor(), Bio::Taxon
65 # does not ask the database for the answer. You can ask the database yourself
66 # using the same method:
67 ($human) = $homo->db_handle->each_Descendent($homo);
69 # We can also take advantage of Bio::Tree::Tree* methods:
70 # a) some methods are available with just an empty tree object
72 my $tree_functions = Bio::Tree::Tree->new();
73 my @lineage = $tree_functions->get_lineage_nodes($human);
74 my $lca = $tree_functions->get_lca($human, $mouse);
76 # b) for other methods, create a tree using your Taxon object
77 my $tree = Bio::Tree::Tree->new(-node => $human);
78 my @taxa = $tree->get_nodes;
79 $homo = $tree->find_node(-rank => 'genus');
81 # Normally you can't get the lca of a list-database derived Taxon and an
82 # entrez or flatfile-derived one because the two different databases might
83 # have different roots and different numbers of ranks between the root and the
84 # taxa of interest. To solve this, make a tree of the Taxon with the more
85 # detailed lineage and splice out all the taxa that won't be in the lineage of
87 my $entrez_mouse = $entrez_dbh->get_taxon(-name => 'Mus musculus');
88 my $list_human = $list_dbh->get_taxon(-name => 'Homo sapiens');
89 my $mouse_tree = Bio::Tree::Tree->new(-node => $entrez_mouse);
90 $mouse_tree->splice(-keep_rank => \@ranks);
91 $lca = $mouse_tree->get_lca($entrez_mouse, $list_human);
95 This is the next generation (for Bioperl) of representing Taxonomy
96 information. Previously all information was managed by a single
97 object called Bio::Species. This new implementation allows
98 representation of the intermediate nodes not just the species nodes
99 and can relate their connections.
105 User feedback is an integral part of the evolution of this and other
106 Bioperl modules. Send your comments and suggestions preferably to
107 the Bioperl mailing list. Your participation is much appreciated.
109 bioperl-l@bioperl.org - General discussion
110 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
112 =head2 Reporting Bugs
114 Report bugs to the Bioperl bug tracking system to help us keep track
115 of the bugs and their resolution. Bug reports can be submitted via
118 http://bugzilla.open-bio.org/
120 =head1 AUTHOR - Sendu Bala
122 Email bix@sendu.me.uk
126 Jason Stajich, jason-at-bioperl-dot-org (original Bio::Taxonomy::Node)
127 Juguang Xiao, juguang@tll.org.sg
128 Gabriel Valiente, valiente@lsi.upc.edu
132 The rest of the documentation details each of the object methods.
133 Internal methods are usually preceded with a _
140 use Bio
::DB
::Taxonomy
;
142 use base
qw(Bio::Tree::Node Bio::IdentifiableI);
147 Usage : my $obj = Bio::Taxonomy::Node->new();
148 Function: Builds a new Bio::Taxonomy::Node object
149 Returns : an instance of Bio::Taxonomy::Node
150 Args : -dbh => a reference to a Bio::DB::Taxonomy object
152 -name => a string representing the taxon name
154 -id => human readable id - typically NCBI taxid
155 -ncbi_taxid => same as -id, but explicitely say that it is an
157 -rank => node rank (one of 'species', 'genus', etc)
158 -common_names => array ref of all common names
159 -division => 'Primates', 'Rodents', etc
160 -genetic_code => genetic code table number
161 -mito_genetic_code => mitochondrial genetic code table number
162 -create_date => date created in database
163 -update_date => date last updated in database
164 -pub_date => date published in database
169 my ($class, @args) = @_;
170 my $self = $class->SUPER::new
(@args);
171 my ($name, $id, $objid, $rank, $div, $dbh, $ncbitaxid, $commonname,
172 $commonnames, $gcode, $mitocode, $createdate, $updatedate, $pubdate,
173 $parent_id) = $self->_rearrange([qw(NAME ID OBJECT_ID RANK DIVISION DBH
174 NCBI_TAXID COMMON_NAME COMMON_NAMES
175 GENETIC_CODE MITO_GENETIC_CODE
176 CREATE_DATE UPDATE_DATE PUB_DATE
179 if (defined $id && (defined $ncbitaxid && $ncbitaxid ne $id || defined $objid && $objid ne $id)) {
180 $self->warn("Only provide one of -id, -object_id or -ncbi_taxid, using $id\n");
182 elsif(!defined $id) {
183 $id = $objid || $ncbitaxid;
185 defined $id && $self->id($id);
186 $self->{_ncbi_tax_id_provided
} = 1 if $ncbitaxid;
188 defined $rank && $self->rank($rank);
189 defined $name && $self->node_name($name);
193 $self->throw("-common_names takes only an array reference") unless $commonnames
194 && ref($commonnames) eq 'ARRAY';
195 @common_names = @
{$commonnames};
198 my %c_names = map { $_ => 1 } @common_names;
199 unless (exists $c_names{$commonname}) {
200 unshift(@common_names, $commonname);
203 @common_names > 0 && $self->common_names(@common_names);
205 defined $gcode && $self->genetic_code($gcode);
206 defined $mitocode && $self->mitochondrial_genetic_code($mitocode);
207 defined $createdate && $self->create_date($createdate);
208 defined $updatedate && $self->update_date($updatedate);
209 defined $pubdate && $self->pub_date($pubdate);
210 defined $div && $self->division($div);
211 defined $dbh && $self->db_handle($dbh);
213 # deprecated and will issue a warning when method called,
214 # eventually to be removed completely as option
215 defined $parent_id && $self->parent_id($parent_id);
217 # some things want to freeze/thaw Bio::Species objects, but
218 # _root_cleanup_methods contains a CODE ref, delete it.
219 delete $self->{_root_cleanup_methods
};
224 =head1 Bio::IdentifiableI interface
226 Also see L<Bio::IdentifiableI>
231 Usage : $taxon->version($newval)
232 Returns : value of version (a scalar)
233 Args : on set, new value (a scalar or undef, optional)
239 return $self->{'version'} = shift if @_;
240 return $self->{'version'};
246 Usage : $taxon->authority($newval)
247 Returns : value of authority (a scalar)
248 Args : on set, new value (a scalar or undef, optional)
254 return $self->{'authority'} = shift if @_;
255 return $self->{'authority'};
261 Usage : $taxon->namespace($newval)
262 Returns : value of namespace (a scalar)
263 Args : on set, new value (a scalar or undef, optional)
269 return $self->{'namespace'} = shift if @_;
270 return $self->{'namespace'};
273 =head1 Bio::Taxonomy::Node implementation
278 Usage : $taxon->db_handle($newval)
279 Function: Get/Set Bio::DB::Taxonomy Handle
280 Returns : value of db_handle (a scalar) (Bio::DB::Taxonomy object)
281 Args : on set, new value (a scalar, optional) Bio::DB::Taxonomy object
283 Also see L<Bio::DB::Taxonomy>
292 if (! ref($db) || ! $db->isa('Bio::DB::Taxonomy')) {
293 $self->throw("Must provide a valid Bio::DB::Taxonomy object to db_handle()");
295 if (!$self->{'db_handle'} || ($self->{'db_handle'} && $self->{'db_handle'} ne $db)) {
296 my $new_self = $self->_get_similar_taxon_from_db($self, $db);
297 $self->_merge_taxa($new_self) if $new_self;
300 # NB: The Bio::DB::Taxonomy modules access this data member directly
301 # to avoid calling this method and going infinite
302 $self->{'db_handle'} = $db;
304 return $self->{'db_handle'};
310 Usage : $taxon->rank($newval)
311 Function: Get/set rank of this Taxon, 'species', 'genus', 'order', etc...
312 Returns : value of rank (a scalar)
313 Args : on set, new value (a scalar or undef, optional)
319 return $self->{'rank'} = shift if @_;
320 return $self->{'rank'};
326 Usage : $taxon->id($newval)
327 Function: Get/Set id (NCBI Taxonomy ID in most cases); object_id() and
328 ncbi_taxid() are synonyms of this method.
329 Returns : id (a scalar)
330 Args : none to get, OR scalar to set
336 return $self->SUPER::id
(@_);
344 Usage : $taxon->ncbi_taxid($newval)
345 Function: Get/Set the NCBI Taxonomy ID; This actually sets the id() but only
346 returns an id when ncbi_taxid has been explictely set with this
348 Returns : id (a scalar)
349 Args : none to get, OR scalar to set
354 my ($self, $id) = @_;
357 $self->{_ncbi_tax_id_provided
} = 1;
358 return $self->SUPER::id
($id);
361 if ($self->{_ncbi_tax_id_provided
}) {
362 return $self->SUPER::id
;
370 Usage : $taxon->parent_id()
371 Function: Get parent ID, (NCBI Taxonomy ID in most cases);
372 parent_taxon_id() is a synonym of this method.
373 Returns : value of parent_id (a scalar)
382 $self->warn("You can no longer set the parent_id - use ancestor() instead");
384 my $ancestor = $self->ancestor() || return;
385 return $ancestor->id;
388 *parent_taxon_id
= \
&parent_id
;
393 Usage : $taxon->genetic_code($newval)
394 Function: Get/set genetic code table
395 Returns : value of genetic_code (a scalar)
396 Args : on set, new value (a scalar or undef, optional)
402 return $self->{'genetic_code'} = shift if @_;
403 return $self->{'genetic_code'};
406 =head2 mitochondrial_genetic_code
408 Title : mitochondrial_genetic_code
409 Usage : $taxon->mitochondrial_genetic_code($newval)
410 Function: Get/set mitochondrial genetic code table
411 Returns : value of mitochondrial_genetic_code (a scalar)
412 Args : on set, new value (a scalar or undef, optional)
416 sub mitochondrial_genetic_code
{
418 return $self->{'mitochondrial_genetic_code'} = shift if @_;
419 return $self->{'mitochondrial_genetic_code'};
425 Usage : $taxon->create_date($newval)
426 Function: Get/Set Date this node was created (in the database)
427 Returns : value of create_date (a scalar)
428 Args : on set, new value (a scalar or undef, optional)
434 return $self->{'create_date'} = shift if @_;
435 return $self->{'create_date'};
441 Usage : $taxon->update_date($newval)
442 Function: Get/Set Date this node was updated (in the database)
443 Returns : value of update_date (a scalar)
444 Args : on set, new value (a scalar or undef, optional)
450 return $self->{'update_date'} = shift if @_;
451 return $self->{'update_date'};
457 Usage : $taxon->pub_date($newval)
458 Function: Get/Set Date this node was published (in the database)
459 Returns : value of pub_date (a scalar)
460 Args : on set, new value (a scalar or undef, optional)
466 return $self->{'pub_date'} = shift if @_;
467 return $self->{'pub_date'};
473 Usage : my $ancestor_taxon = $taxon->ancestor()
474 Function: Retrieve the ancestor taxon. Normally the database is asked what the
477 If you manually set the ancestor (or you make a Bio::Tree::Tree with
478 this object as an argument to new()), the database (if any) will not
479 be used for the purposes of this method.
481 To restore normal database behaviour, call ancestor(undef) (which
482 would remove this object from the tree), or request this taxon again
483 as a new Taxon object from the database.
492 my $ancestor = $self->SUPER::ancestor
(@_);
493 my $dbh = $self->db_handle || return $ancestor;
499 #*** could avoid the db lookup if we knew our current id was definitely
500 # information from the db...
501 my $definitely_from_dbh = $self->_get_similar_taxon_from_db($self);
502 return $dbh->ancestor($definitely_from_dbh);
506 =head2 get_Parent_Node
508 Title : get_Parent_Node
509 Function: Synonym of ancestor()
514 sub get_Parent_Node
{
516 $self->warn("get_Parent_Node is deprecated, use ancestor() instead");
517 return $self->ancestor(@_);
520 =head2 each_Descendent
522 Title : each_Descendent
523 Usage : my @taxa = $taxon->each_Descendent();
524 Function: Get all the descendents for this Taxon (but not their descendents,
525 ie. not a recursive fetchall). get_Children_Nodes() is a synonym of
528 Note that this method never asks the database for the descendents;
529 it will only return objects you have manually set with
530 add_Descendent(), or where this was done for you by making a
531 Bio::Tree::Tree with this object as an argument to new().
533 To get the database descendents use
534 $taxon->db_handle->each_Descendent($taxon).
536 Returns : Array of Bio::Taxon objects
537 Args : optionally, when you have set your own descendents, the string
538 "height", "creation", "alpha", "revalpha", or coderef to be used to
539 sort the order of children nodes.
543 # implemented by Bio::Tree::Node
545 =head2 get_Children_Nodes
547 Title : get_Children_Nodes
548 Function: Synonym of each_Descendent()
553 sub get_Children_Nodes
{
555 $self->warn("get_Children_Nodes is deprecated, use each_Descendent() instead");
556 return $self->each_Descendent(@_);
562 Usage: $taxon->name('scientific', 'Homo sapiens');
563 $taxon->name('common', 'human', 'man');
564 my @names = @{$taxon->name('common')};
565 Function: Get/set the names. node_name(), scientific_name() and common_names()
566 are shorthands to name('scientific'), name('scientific') and
567 name('common') respectively.
568 Returns: names (a array reference)
569 Args: Arg1 => the name_class. You can assign any text, but the words
570 'scientific' and 'common' have the special meaning, as
571 scientific name and common name, respectively. 'scientific' and
572 'division' are treated specially, allowing only the first value
573 in the Arg2 list to be set.
574 Arg2 .. => list of names
579 my ($self, $name_class, @names) = @_;
580 $self->throw('No name class specified') unless defined $name_class;
583 if ($name_class =~ /scientific|division/i) {
584 delete $self->{'_names_hash'}->{$name_class};
585 @names = (shift(@names));
587 push @
{$self->{'_names_hash'}->{$name_class}}, @names;
589 return $self->{'_names_hash'}->{$name_class} || return;
595 Usage : $taxon->node_name($newval)
596 Function: Get/set the name of this taxon (node), typically the scientific name
597 of the taxon, eg. 'Primate' or 'Homo'; scientific_name() is a synonym
599 Returns : value of node_name (a scalar)
600 Args : on set, new value (a scalar or undef, optional)
606 my @v = @
{$self->name('scientific', @_) || []};
610 *scientific_name
= \
&node_name
;
615 Usage : $taxon->common_names($newval)
616 Function: Get/add the other names of this taxon, typically the genbank common
617 name and others, eg. 'Human' and 'man'. common_name() is a synonym
619 Returns : array of names in list context, one of those names in scalar context
620 Args : on add, new list of names (scalars, optional)
626 my @v = @
{$self->name('common', @_) || []};
627 return ( wantarray ) ?
@v : pop @v;
630 *common_name
= \
&common_names
;
635 Usage : $taxon->division($newval)
636 Function: Get/set the division this taxon belongs to, eg. 'Primates' or
638 Returns : value of division (a scalar)
639 Args : on set, new value (a scalar or undef, optional)
645 my @v = @
{$self->name('division',@_) || []};
649 # get a node from the database that is like the supplied node
650 sub _get_similar_taxon_from_db
{
651 #*** not really happy with this having to be called so much; there must be
653 my ($self, $taxon, $db) = @_;
654 $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa("Bio::Taxon");
655 ($self->id || $self->node_name) || return;
656 $db ||= $self->db_handle || return;
658 my $db_taxon = $db->get_taxon(-taxonid
=> $taxon->id) if $taxon->id;
660 my @try_ids = $db->get_taxonids($taxon->node_name) if $taxon->node_name;
662 my $own_rank = $taxon->rank || 'no rank';
663 foreach my $try_id (@try_ids) {
664 my $try = $db->get_taxon(-taxonid
=> $try_id);
665 my $try_rank = $try->rank || 'no rank';
666 if ($own_rank eq 'no rank' || $try_rank eq 'no rank' || $own_rank eq $try_rank) {
676 # merge data from supplied Taxon into self
678 my ($self, $taxon) = @_;
679 $self->throw("Must supply a Bio::Taxon object") unless ref($taxon) && $taxon->isa('Bio::Taxon');
680 return if ($taxon eq $self);
682 foreach my $attrib (qw(scientific_name version authority namespace genetic_code mitochondrial_genetic_code create_date update_date pub_date division id)) {
683 my $own = $self->$attrib();
684 my $his = $taxon->$attrib();
686 $self->$attrib($his);
690 my $own = $self->rank || 'no rank';
691 my $his = $taxon->rank || 'no rank';
692 if ($own eq 'no rank' && $his ne 'no rank') {
696 my %own_cnames = map { $_ => 1 } $self->common_names;
697 my %his_cnames = map { $_ => 1 } $taxon->common_names;
698 foreach (keys %his_cnames) {
699 unless (exists $own_cnames{$_}) {
700 $self->common_names($_);
704 #*** haven't merged the other things in names() hash, could do above much easier with direct access to object data
707 =head2 remove_Descendent
709 Title : remove_Descendent
710 Usage : $node->remove_Descedent($node_foo);
711 Function: Removes a specific node from being a Descendent of this node
713 Args : An array of Bio::Node::NodeI objects which have been previously
714 passed to the add_Descendent call of this object.
718 sub remove_Descendent
{
719 # need to override this method from Bio::Tree::Node since it casually
720 # throws away nodes if they don't branch
721 my ($self,@nodes) = @_;
723 foreach my $n ( @nodes ) {
724 if ($self->{'_desc'}->{$n->internal_id}) {
725 $self->{_removing_descendent
} = 1;
727 $self->{_removing_descendent
} = 0;
728 $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
729 delete $self->{'_desc'}->{$n->internal_id};