3 # BioPerl module for Bio::Taxon
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Sendu Bala <bix@sendu.me.uk>
9 # Copyright Sendu Bala, based heavily on a module by Jason Stajich
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::Taxon - A node in a represented taxonomy
23 # Typically you will get a Taxon from a Bio::DB::Taxonomy object
24 # but here is how you initialize one
25 my $taxon = Bio::Taxon->new(-name => $name,
30 # Get one from a database
31 my $dbh = Bio::DB::Taxonomy->new(-source => 'flatfile',
33 -nodesfile=> '/path/to/nodes.dmp',
34 -namesfile=> '/path/to/names.dmp');
35 my $human = $dbh->get_taxon(-name => 'Homo sapiens');
36 $human = $dbh->get_taxon(-taxonid => '9606');
38 print "id is ", $human->id, "\n"; # 9606
39 print "rank is ", $human->rank, "\n"; # species
40 print "scientific name is ", $human->scientific_name, "\n"; # Homo sapiens
41 print "division is ", $human->division, "\n"; # Primates
43 my $mouse = $dbh->get_taxon(-name => 'Mus musculus');
45 # You can quickly make your own lineages with the list database
46 my @ranks = qw(superkingdom class genus species);
47 my @h_lineage = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens');
48 my $list_dbh = Bio::DB::Taxonomy->new(-source => 'list', -names => \@h_lineage,
50 $human = $list_dbh->get_taxon(-name => 'Homo sapiens');
51 my @names = $human->common_names; # @names is empty
52 $human->common_names('woman');
53 @names = $human->common_names; # @names contains woman
55 # You can switch to another database when you need more information
56 my $entrez_dbh = Bio::DB::Taxonomy->new(-source => 'entrez');
57 $human->db_handle($entrez_dbh);
58 @names = $human->common_names; # @names contains woman, human, man
60 # Since Bio::Taxon implements Bio::Tree::NodeI, we have access to those
61 # methods (and can manually create our own taxa and taxonomy without the use
63 my $homo = $human->ancestor;
65 # Though be careful with each_Descendent - unless you add_Descendent()
66 # yourself, you won't get an answer because unlike for ancestor(), Bio::Taxon
67 # does not ask the database for the answer. You can ask the database yourself
68 # using the same method:
69 ($human) = $homo->db_handle->each_Descendent($homo);
71 # We can also take advantage of Bio::Tree::Tree* methods:
72 # a) some methods are available with just an empty tree object
74 my $tree_functions = Bio::Tree::Tree->new();
75 my @lineage = $tree_functions->get_lineage_nodes($human);
76 my $lca = $tree_functions->get_lca($human, $mouse);
78 # b) for other methods, create a tree using your Taxon object
79 my $tree = Bio::Tree::Tree->new(-node => $human);
80 my @taxa = $tree->get_nodes;
81 $homo = $tree->find_node(-rank => 'genus');
83 # Normally you can't get the lca of a list-database derived Taxon and an
84 # entrez or flatfile-derived one because the two different databases might
85 # have different roots and different numbers of ranks between the root and the
86 # taxa of interest. To solve this, make a tree of the Taxon with the more
87 # detailed lineage and splice out all the taxa that won't be in the lineage of
89 my $entrez_mouse = $entrez_dbh->get_taxon(-name => 'Mus musculus');
90 my $list_human = $list_dbh->get_taxon(-name => 'Homo sapiens');
91 my $mouse_tree = Bio::Tree::Tree->new(-node => $entrez_mouse);
92 $mouse_tree->splice(-keep_rank => \@ranks);
93 $lca = $mouse_tree->get_lca($entrez_mouse, $list_human);
97 This is the next generation (for Bioperl) of representing Taxonomy
98 information. Previously all information was managed by a single
99 object called Bio::Species. This new implementation allows
100 representation of the intermediate nodes not just the species nodes
101 and can relate their connections.
107 User feedback is an integral part of the evolution of this and other
108 Bioperl modules. Send your comments and suggestions preferably to
109 the Bioperl mailing list. Your participation is much appreciated.
111 bioperl-l@bioperl.org - General discussion
112 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
116 Please direct usage questions or support issues to the mailing list:
118 I<bioperl-l@bioperl.org>
120 rather than to the module maintainer directly. Many experienced and
121 reponsive experts will be able look at the problem and quickly
122 address it. Please include a thorough description of the problem
123 with code and data examples if at all possible.
125 =head2 Reporting Bugs
127 Report bugs to the Bioperl bug tracking system to help us keep track
128 of the bugs and their resolution. Bug reports can be submitted via
131 http://bugzilla.open-bio.org/
133 =head1 AUTHOR - Sendu Bala
135 Email bix@sendu.me.uk
139 Jason Stajich, jason-at-bioperl-dot-org (original Bio::Taxonomy::Node)
140 Juguang Xiao, juguang@tll.org.sg
141 Gabriel Valiente, valiente@lsi.upc.edu
145 The rest of the documentation details each of the object methods.
146 Internal methods are usually preceded with a _
153 use Bio
::DB
::Taxonomy
;
155 use base
qw(Bio::Tree::Node Bio::IdentifiableI);
160 Usage : my $obj = Bio::Taxonomy::Node->new();
161 Function: Builds a new Bio::Taxonomy::Node object
162 Returns : an instance of Bio::Taxonomy::Node
163 Args : -dbh => a reference to a Bio::DB::Taxonomy object
165 -name => a string representing the taxon name
167 -id => human readable id - typically NCBI taxid
168 -ncbi_taxid => same as -id, but explicitely say that it is an
170 -rank => node rank (one of 'species', 'genus', etc)
171 -common_names => array ref of all common names
172 -division => 'Primates', 'Rodents', etc
173 -genetic_code => genetic code table number
174 -mito_genetic_code => mitochondrial genetic code table number
175 -create_date => date created in database
176 -update_date => date last updated in database
177 -pub_date => date published in database
182 my ($class, @args) = @_;
183 my $self = $class->SUPER::new
(@args);
184 my ($name, $id, $objid, $rank, $div, $dbh, $ncbitaxid, $commonname,
185 $commonnames, $gcode, $mitocode, $createdate, $updatedate, $pubdate,
186 $parent_id) = $self->_rearrange([qw(NAME ID OBJECT_ID RANK DIVISION DBH
187 NCBI_TAXID COMMON_NAME COMMON_NAMES
188 GENETIC_CODE MITO_GENETIC_CODE
189 CREATE_DATE UPDATE_DATE PUB_DATE
192 if (defined $id && (defined $ncbitaxid && $ncbitaxid ne $id || defined $objid && $objid ne $id)) {
193 $self->warn("Only provide one of -id, -object_id or -ncbi_taxid, using $id\n");
195 elsif(!defined $id) {
196 $id = $objid || $ncbitaxid;
198 defined $id && $self->id($id);
199 $self->{_ncbi_tax_id_provided
} = 1 if $ncbitaxid;
201 defined $rank && $self->rank($rank);
202 defined $name && $self->node_name($name);
206 $self->throw("-common_names takes only an array reference") unless $commonnames
207 && ref($commonnames) eq 'ARRAY';
208 @common_names = @
{$commonnames};
211 my %c_names = map { $_ => 1 } @common_names;
212 unless (exists $c_names{$commonname}) {
213 unshift(@common_names, $commonname);
216 @common_names > 0 && $self->common_names(@common_names);
218 defined $gcode && $self->genetic_code($gcode);
219 defined $mitocode && $self->mitochondrial_genetic_code($mitocode);
220 defined $createdate && $self->create_date($createdate);
221 defined $updatedate && $self->update_date($updatedate);
222 defined $pubdate && $self->pub_date($pubdate);
223 defined $div && $self->division($div);
224 defined $dbh && $self->db_handle($dbh);
226 # deprecated and will issue a warning when method called,
227 # eventually to be removed completely as option
228 defined $parent_id && $self->parent_id($parent_id);
230 # some things want to freeze/thaw Bio::Species objects, but
231 # _root_cleanup_methods contains a CODE ref, delete it.
232 delete $self->{_root_cleanup_methods
};
237 =head1 Bio::IdentifiableI interface
239 Also see L<Bio::IdentifiableI>
244 Usage : $taxon->version($newval)
245 Returns : value of version (a scalar)
246 Args : on set, new value (a scalar or undef, optional)
252 return $self->{'version'} = shift if @_;
253 return $self->{'version'};
259 Usage : $taxon->authority($newval)
260 Returns : value of authority (a scalar)
261 Args : on set, new value (a scalar or undef, optional)
267 return $self->{'authority'} = shift if @_;
268 return $self->{'authority'};
274 Usage : $taxon->namespace($newval)
275 Returns : value of namespace (a scalar)
276 Args : on set, new value (a scalar or undef, optional)
282 return $self->{'namespace'} = shift if @_;
283 return $self->{'namespace'};
286 =head1 Bio::Taxonomy::Node implementation
291 Usage : $taxon->db_handle($newval)
292 Function: Get/Set Bio::DB::Taxonomy Handle
293 Returns : value of db_handle (a scalar) (Bio::DB::Taxonomy object)
294 Args : on set, new value (a scalar, optional) Bio::DB::Taxonomy object
296 Also see L<Bio::DB::Taxonomy>
305 if (! ref($db) || ! $db->isa('Bio::DB::Taxonomy')) {
306 $self->throw("Must provide a valid Bio::DB::Taxonomy object to db_handle()");
308 if (!$self->{'db_handle'} || ($self->{'db_handle'} && $self->{'db_handle'} ne $db)) {
309 my $new_self = $self->_get_similar_taxon_from_db($self, $db);
310 $self->_merge_taxa($new_self) if $new_self;
313 # NB: The Bio::DB::Taxonomy modules access this data member directly
314 # to avoid calling this method and going infinite
315 $self->{'db_handle'} = $db;
317 return $self->{'db_handle'};
323 Usage : $taxon->rank($newval)
324 Function: Get/set rank of this Taxon, 'species', 'genus', 'order', etc...
325 Returns : value of rank (a scalar)
326 Args : on set, new value (a scalar or undef, optional)
332 return $self->{'rank'} = shift if @_;
333 return $self->{'rank'};
339 Usage : $taxon->id($newval)
340 Function: Get/Set id (NCBI Taxonomy ID in most cases); object_id() and
341 ncbi_taxid() are synonyms of this method.
342 Returns : id (a scalar)
343 Args : none to get, OR scalar to set
349 return $self->SUPER::id
(@_);
357 Usage : $taxon->ncbi_taxid($newval)
358 Function: Get/Set the NCBI Taxonomy ID; This actually sets the id() but only
359 returns an id when ncbi_taxid has been explictely set with this
361 Returns : id (a scalar)
362 Args : none to get, OR scalar to set
367 my ($self, $id) = @_;
370 $self->{_ncbi_tax_id_provided
} = 1;
371 return $self->SUPER::id
($id);
374 if ($self->{_ncbi_tax_id_provided
}) {
375 return $self->SUPER::id
;
383 Usage : $taxon->parent_id()
384 Function: Get parent ID, (NCBI Taxonomy ID in most cases);
385 parent_taxon_id() is a synonym of this method.
386 Returns : value of parent_id (a scalar)
395 $self->warn("You can no longer set the parent_id - use ancestor() instead");
397 my $ancestor = $self->ancestor() || return;
398 return $ancestor->id;
401 *parent_taxon_id
= \
&parent_id
;
406 Usage : $taxon->genetic_code($newval)
407 Function: Get/set genetic code table
408 Returns : value of genetic_code (a scalar)
409 Args : on set, new value (a scalar or undef, optional)
415 return $self->{'genetic_code'} = shift if @_;
416 return $self->{'genetic_code'};
419 =head2 mitochondrial_genetic_code
421 Title : mitochondrial_genetic_code
422 Usage : $taxon->mitochondrial_genetic_code($newval)
423 Function: Get/set mitochondrial genetic code table
424 Returns : value of mitochondrial_genetic_code (a scalar)
425 Args : on set, new value (a scalar or undef, optional)
429 sub mitochondrial_genetic_code
{
431 return $self->{'mitochondrial_genetic_code'} = shift if @_;
432 return $self->{'mitochondrial_genetic_code'};
438 Usage : $taxon->create_date($newval)
439 Function: Get/Set Date this node was created (in the database)
440 Returns : value of create_date (a scalar)
441 Args : on set, new value (a scalar or undef, optional)
447 return $self->{'create_date'} = shift if @_;
448 return $self->{'create_date'};
454 Usage : $taxon->update_date($newval)
455 Function: Get/Set Date this node was updated (in the database)
456 Returns : value of update_date (a scalar)
457 Args : on set, new value (a scalar or undef, optional)
463 return $self->{'update_date'} = shift if @_;
464 return $self->{'update_date'};
470 Usage : $taxon->pub_date($newval)
471 Function: Get/Set Date this node was published (in the database)
472 Returns : value of pub_date (a scalar)
473 Args : on set, new value (a scalar or undef, optional)
479 return $self->{'pub_date'} = shift if @_;
480 return $self->{'pub_date'};
486 Usage : my $ancestor_taxon = $taxon->ancestor()
487 Function: Retrieve the ancestor taxon. Normally the database is asked what the
490 If you manually set the ancestor (or you make a Bio::Tree::Tree with
491 this object as an argument to new()), the database (if any) will not
492 be used for the purposes of this method.
494 To restore normal database behaviour, call ancestor(undef) (which
495 would remove this object from the tree), or request this taxon again
496 as a new Taxon object from the database.
505 my $ancestor = $self->SUPER::ancestor
(@_);
506 my $dbh = $self->db_handle || return $ancestor;
512 #*** could avoid the db lookup if we knew our current id was definitely
513 # information from the db...
514 my $definitely_from_dbh = $self->_get_similar_taxon_from_db($self);
515 return $dbh->ancestor($definitely_from_dbh);
519 =head2 get_Parent_Node
521 Title : get_Parent_Node
522 Function: Synonym of ancestor()
527 sub get_Parent_Node
{
529 $self->warn("get_Parent_Node is deprecated, use ancestor() instead");
530 return $self->ancestor(@_);
533 =head2 each_Descendent
535 Title : each_Descendent
536 Usage : my @taxa = $taxon->each_Descendent();
537 Function: Get all the descendents for this Taxon (but not their descendents,
538 ie. not a recursive fetchall). get_Children_Nodes() is a synonym of
541 Note that this method never asks the database for the descendents;
542 it will only return objects you have manually set with
543 add_Descendent(), or where this was done for you by making a
544 Bio::Tree::Tree with this object as an argument to new().
546 To get the database descendents use
547 $taxon->db_handle->each_Descendent($taxon).
549 Returns : Array of Bio::Taxon objects
550 Args : optionally, when you have set your own descendents, the string
551 "height", "creation", "alpha", "revalpha", or coderef to be used to
552 sort the order of children nodes.
556 # implemented by Bio::Tree::Node
558 =head2 get_Children_Nodes
560 Title : get_Children_Nodes
561 Function: Synonym of each_Descendent()
566 sub get_Children_Nodes
{
568 $self->warn("get_Children_Nodes is deprecated, use each_Descendent() instead");
569 return $self->each_Descendent(@_);
575 Usage: $taxon->name('scientific', 'Homo sapiens');
576 $taxon->name('common', 'human', 'man');
577 my @names = @{$taxon->name('common')};
578 Function: Get/set the names. node_name(), scientific_name() and common_names()
579 are shorthands to name('scientific'), name('scientific') and
580 name('common') respectively.
581 Returns: names (a array reference)
582 Args: Arg1 => the name_class. You can assign any text, but the words
583 'scientific' and 'common' have the special meaning, as
584 scientific name and common name, respectively. 'scientific' and
585 'division' are treated specially, allowing only the first value
586 in the Arg2 list to be set.
587 Arg2 .. => list of names
592 my ($self, $name_class, @names) = @_;
593 $self->throw('No name class specified') unless defined $name_class;
596 if ($name_class =~ /scientific|division/i) {
597 delete $self->{'_names_hash'}->{$name_class};
598 @names = (shift(@names));
600 push @
{$self->{'_names_hash'}->{$name_class}}, @names;
602 return $self->{'_names_hash'}->{$name_class} || return;
608 Usage : $taxon->node_name($newval)
609 Function: Get/set the name of this taxon (node), typically the scientific name
610 of the taxon, eg. 'Primate' or 'Homo'; scientific_name() is a synonym
612 Returns : value of node_name (a scalar)
613 Args : on set, new value (a scalar or undef, optional)
619 my @v = @
{$self->name('scientific', @_) || []};
623 *scientific_name
= \
&node_name
;
628 Usage : $taxon->common_names($newval)
629 Function: Get/add the other names of this taxon, typically the genbank common
630 name and others, eg. 'Human' and 'man'. common_name() is a synonym
632 Returns : array of names in list context, one of those names in scalar context
633 Args : on add, new list of names (scalars, optional)
639 my @v = @
{$self->name('common', @_) || []};
640 return ( wantarray ) ?
@v : pop @v;
643 *common_name
= \
&common_names
;
648 Usage : $taxon->division($newval)
649 Function: Get/set the division this taxon belongs to, eg. 'Primates' or
651 Returns : value of division (a scalar)
652 Args : on set, new value (a scalar or undef, optional)
658 my @v = @
{$self->name('division',@_) || []};
662 # get a node from the database that is like the supplied node
663 sub _get_similar_taxon_from_db
{
664 #*** not really happy with this having to be called so much; there must be
666 my ($self, $taxon, $db) = @_;
667 $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa("Bio::Taxon");
668 ($self->id || $self->node_name) || return;
669 $db ||= $self->db_handle || return;
671 my $db_taxon = $db->get_taxon(-taxonid
=> $taxon->id) if $taxon->id;
673 my @try_ids = $db->get_taxonids($taxon->node_name) if $taxon->node_name;
675 my $own_rank = $taxon->rank || 'no rank';
676 foreach my $try_id (@try_ids) {
677 my $try = $db->get_taxon(-taxonid
=> $try_id);
678 my $try_rank = $try->rank || 'no rank';
679 if ($own_rank eq 'no rank' || $try_rank eq 'no rank' || $own_rank eq $try_rank) {
689 # merge data from supplied Taxon into self
691 my ($self, $taxon) = @_;
692 $self->throw("Must supply a Bio::Taxon object") unless ref($taxon) && $taxon->isa('Bio::Taxon');
693 return if ($taxon eq $self);
695 foreach my $attrib (qw(scientific_name version authority namespace genetic_code mitochondrial_genetic_code create_date update_date pub_date division id)) {
696 my $own = $self->$attrib();
697 my $his = $taxon->$attrib();
699 $self->$attrib($his);
703 my $own = $self->rank || 'no rank';
704 my $his = $taxon->rank || 'no rank';
705 if ($own eq 'no rank' && $his ne 'no rank') {
709 my %own_cnames = map { $_ => 1 } $self->common_names;
710 my %his_cnames = map { $_ => 1 } $taxon->common_names;
711 foreach (keys %his_cnames) {
712 unless (exists $own_cnames{$_}) {
713 $self->common_names($_);
717 #*** haven't merged the other things in names() hash, could do above much easier with direct access to object data
720 =head2 remove_Descendent
722 Title : remove_Descendent
723 Usage : $node->remove_Descedent($node_foo);
724 Function: Removes a specific node from being a Descendent of this node
726 Args : An array of Bio::Node::NodeI objects which have been previously
727 passed to the add_Descendent call of this object.
731 sub remove_Descendent
{
732 # need to override this method from Bio::Tree::Node since it casually
733 # throws away nodes if they don't branch
734 my ($self,@nodes) = @_;
736 foreach my $n ( @nodes ) {
737 if ($self->{'_desc'}->{$n->internal_id}) {
738 $self->{_removing_descendent
} = 1;
740 $self->{_removing_descendent
} = 0;
741 $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
742 delete $self->{'_desc'}->{$n->internal_id};