2 # BioPerl module for Bio::Species
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by James Gilbert <jgrg@sanger.ac.uk>
7 # Reimplemented by Sendu Bala <bix@sendu.me.uk>
8 # Re-reimplemented by Chris Fields <cjfields - at - bioperl dot org>
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Species - Generic species object.
20 $species = Bio::Species->new(-classification => [@classification]);
21 # Can also pass classification
22 # array to new as below
24 $species->classification(qw( sapiens Homo Hominidae
25 Catarrhini Primates Eutheria
26 Mammalia Vertebrata Chordata
29 $genus = $species->genus();
31 $bi = $species->binomial(); # $bi is now "Homo sapiens"
33 # For storing common name
34 $species->common_name("human");
36 # For storing subspecies
37 $species->sub_species("accountant");
41 B<NOTE: This class is planned for deprecation in favor of the simpler Bio::Taxon.
42 Please use that class instead.>
44 Provides a very simple object for storing phylogenetic information. The
45 classification is stored in an array, which is a list of nodes in a phylogenetic
46 tree. Access to getting and setting species and genus is provided, but not to
47 any of the other node types (eg: "phylum", "class", "order", "family"). There's
48 plenty of scope for making the model more sophisticated, if this is ever needed.
50 A methods are also provided for storing common names, and subspecies.
56 User feedback is an integral part of the evolution of this and other
57 Bioperl modules. Send your comments and suggestions preferably to
58 the Bioperl mailing list. Your participation is much appreciated.
60 bioperl-l@bioperl.org - General discussion
61 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
65 Please direct usage questions or support issues to the mailing list:
67 I<bioperl-l@bioperl.org>
69 rather than to the module maintainer directly. Many experienced and
70 reponsive experts will be able look at the problem and quickly
71 address it. Please include a thorough description of the problem
72 with code and data examples if at all possible.
76 Report bugs to the Bioperl bug tracking system to help us keep track
77 of the bugs and their resolution. Bug reports can be submitted via the
80 https://github.com/bioperl/bioperl-live/issues
84 James Gilbert email B<jgrg@sanger.ac.uk>
88 Sendu Bala, bix@sendu.me.uk
89 Chris Fields, cjfields at bioperl dot org
93 The rest of the documentation details each of the object
94 methods. Internal methods are usually preceded with a _
98 #' Let the code begin...
100 package Bio
::Species
;
104 use Bio
::DB
::Taxonomy
;
107 use base
qw(Bio::Root::Root Bio::Tree::NodeI);
112 Usage : my $obj = Bio::Species->new(-classification => \@class)
113 Function: Build a new Species object
114 Returns : Bio::Species object
115 Args : -ncbi_taxid => NCBI taxonomic ID (optional)
116 -classification => arrayref of classification
121 my($class, @args) = @_;
123 my $self = $class->SUPER::new
(@args);
125 # Bio::Species is now just a proxy object that just observes the NodeI
126 # interface methods but delegates them to the proper classes (Bio::Taxon and
127 # Bio::Tree::Tree). This will be surplanted by the much simpler
128 # Bio::Taxon/Bio::DB::Taxonomy modules in the future.
130 # Using a proxy allows proper GC w/o using weaken(). This just wraps the
131 # older instances, which have no reciprocal refs (thus no circular refs).
132 # This can then run proper cleanup
134 $self->taxon(Bio
::Taxon
->new(@args));
136 my ($org, $sp, $var, $classification) =
137 $self->_rearrange([qw(ORGANELLE
140 CLASSIFICATION)], @args);
142 if (defined $classification && ref($classification) eq "ARRAY" && @
{$classification}) {
143 $self->classification(@
$classification);
146 $self->tree(Bio
::Tree
::Tree
->new());
149 defined $org && $self->organelle($org);
150 defined $sp && $self->sub_species($sp);
151 defined $var && $self->variant($var);
156 =head2 classification
158 Title : classification
159 Usage : $self->classification(@class_array);
160 @classification = $self->classification();
161 Function: Get/set the lineage of this species. The array provided must be in
162 the order ... ---> SPECIES, GENUS ---> KINGDOM ---> etc.
163 Example : $obj->classification(qw( 'Homo sapiens' Homo Hominidae
164 Catarrhini Primates Eutheria Mammalia Vertebrata
165 Chordata Metazoa Eukaryota));
166 Returns : Classification array
167 Args : Classification array
169 A reference to the classification array. In the latter case
170 if there is a second argument and it evaluates to true,
171 names will not be validated. NB: in any case, names are never
177 my ($self, @vals) = @_;
179 my $taxon = $self->taxon;
182 if (ref($vals[0]) eq 'ARRAY') {
187 # make sure the lineage contains us as first or second element
188 # (lineage may have subspecies, species, genus ...)
189 my $name = $taxon->node_name;
190 my ($genus, $species) = (quotemeta($vals[1]), quotemeta($vals[0]));
192 ($name !~ m{$species}i && $name !~ m{$genus}i) &&
193 $name !~ m{$genus $species}i) {
194 if ($name =~ /^$genus $species\s*(.+)/) {
195 # just assume the problem is someone tried to make a Bio::Species starting at subspecies
196 #*** no idea if this is appropriate! just a possible fix related to bug 2092
197 $self->sub_species($1);
198 $name = $taxon->node_name("$vals[1] $vals[0]");
201 $self->warn("The supplied lineage does not start near '$name' (I was supplied '".join(" | ", @vals)."')");
205 # create a lineage for ourselves
206 my $db = Bio
::DB
::Taxonomy
->new(-source
=> 'list', -names
=> [reverse @vals]);
207 unless ($taxon->scientific_name) {
208 # assume we're supposed to be the leaf of the supplied lineage
209 $self->taxon->scientific_name($vals[0]);
211 unless ($taxon->rank) {
212 # and that we are rank species
213 $taxon->rank('species');
216 $taxon->db_handle($db);
218 $self->tree(Bio
::Tree
::Tree
->new(-node
=> $taxon));
222 foreach my $node ($self->tree->get_lineage_nodes($taxon), $taxon) {
223 unshift(@vals, $node->scientific_name || next);
231 Usage : $obj->ncbi_taxid($newval)
232 Function: Get/set the NCBI Taxon ID
233 Returns : the NCBI Taxon ID as a string
234 Args : newvalue to set or undef to unset (optional)
241 Usage : $self->common_name( $common_name );
242 $common_name = $self->common_name();
243 Function: Get or set the common name of the species
244 Example : $self->common_name('human')
245 Returns : The common name in a string
246 Args : String, which is the common name (optional)
253 Usage : $obj->division($newval)
254 Function: Genbank Division for a species
255 Returns : value of division (a scalar)
256 Args : value of division (a scalar)
263 Usage : $self->species( $species );
264 $species = $self->species();
265 Function: Get or set the species name.
266 Note that this is NOT genus and species
267 -- use $self->binomial() for that.
268 Example : $self->species('sapiens');
269 Returns : species name as string (NOT genus and species)
270 Args : species name as string (NOT genus and species)
275 my ($self, $species) = @_;
278 $self->{_species
} = $species;
281 unless (defined $self->{_species
}) {
282 # work it out from our nodes
283 my $species_taxon = $self->tree->find_node(-rank
=> 'species');
284 unless ($species_taxon) {
285 # just assume we are rank species
286 $species_taxon = $self->taxon;
289 $species = $species_taxon->scientific_name;
292 # munge it like the Bio::SeqIO modules used to do
293 # (more or less copy/pasted from old Bio::SeqIO::genbank, hence comments
294 # referring to 'ORGANISM' etc.)
297 my $root = $self->tree->get_root_node;
299 $self->tree(Bio
::Tree
::Tree
->new(-node
=> $species_taxon));
300 $root = $self->tree->get_root_node;
303 my @spflds = split(' ', $species);
304 if (@spflds > 1 && $root->node_name ne 'Viruses') {
307 # does the next term start with uppercase?
308 # yes: valid genus; no then unconventional
309 # e.g. leaf litter basidiomycete sp. Collb2-39
311 if ($spflds[0] =~ m/^[A-Z]/) {
312 $genus = shift(@spflds);
320 while (my $fld = shift @spflds) {
322 # does it have subspecies or varieties?
323 last if ($fld =~ m/(sp\.|var\.)/);
325 chop $species; # last space
326 $sub_species = join ' ',@spflds if(@spflds);
332 # does ORGANISM start with any words which make its genus undefined?
333 # these are in @unkn_genus
334 # this in case species starts with uppercase so isn't caught above.
335 # alter common name if required
336 my $unconv = 0; # is it unconventional species name?
337 my @unkn_genus = ('unknown','unclassified','uncultured','unidentified');
338 foreach (@unkn_genus) {
339 if ($genus && $genus =~ m/$_/i) {
340 $species = $genus . " " . $species;
345 elsif ($species =~ m/$_/i) {
350 if (!$unconv && !$sub_species && $species =~ s/^(\w+)\s(\w+)$/$1/) {
351 # need to extract subspecies from conventional ORGANISM format.
352 # Will the 'word' in a two element species name
353 # e.g. $species = 'thummi thummi' => $species='thummi' &
354 # $sub_species='thummi'
358 $self->genus($genus) if $genus;
359 $self->sub_species($sub_species) if $sub_species;
362 $self->{_species
} = $species;
364 return $self->{_species
};
370 Usage : $self->genus( $genus );
371 $genus = $self->genus();
372 Function: Get or set the scientific genus name.
373 Example : $self->genus('Homo');
374 Returns : Scientific genus name as string
375 Args : Scientific genus name as string
380 my ($self, $genus) = @_;
382 # TODO: instead of caching the raw name, cache the actual node instance.
384 $self->{_genus
} = $genus;
386 unless (defined $self->{_genus
}) {
387 my $genus_taxon = $self->tree->find_node(-rank
=> 'genus');
388 unless ($genus_taxon) {
389 # just assume our ancestor is rank genus
390 $genus_taxon = $self->taxon->ancestor;
393 $self->{_genus
} = $genus_taxon->scientific_name if $genus_taxon;
396 return $self->{_genus
};
402 Usage : $obj->sub_species($newval)
403 Function: Get or set the scientific subspecies name.
404 Returns : value of sub_species
405 Args : newvalue (optional)
410 my ($self, $sub) = @_;
412 # TODO: instead of caching the raw name, cache the actual node instance.
413 if (!defined $self->{'_sub_species'}) {
414 my $ss_taxon = $self->tree->find_node(-rank
=> 'subspecies');
417 $ss_taxon->scientific_name($sub);
419 # *** weakening ref to our root node in species() to solve a
420 # memory leak means that we have a subspecies taxon to set
421 # during the first call to species(), but it has vanished by
422 # the time a user subsequently calls sub_species() to get the
423 # value. So we 'cheat' and just store the subspecies name in
424 # our self hash, instead of the tree. Is this a problem for
425 # a Species object? Can't decide --sendu
427 # This can now be changed to deal with this information on the
428 # fly. For now, the caching remains, but maybe we should just
429 # let these things deal with mutable data as needed? -- cjfields
431 $self->{'_sub_species'} = $sub;
433 return $ss_taxon->scientific_name;
436 # should we create a node here to be added to the tree?
440 # fall back to direct storage on self
441 $self->{'_sub_species'} = $sub if $sub;
442 return $self->{'_sub_species'};
448 Usage : $obj->variant($newval)
449 Function: Get/set variant information for this species object (strain,
452 Returns : value of variant (a scalar)
453 Args : new value (a scalar or undef, optional)
458 my ($self, $var) = @_;
460 # TODO: instead of caching the raw name, cache the actual node instance.
461 if (!defined $self->{'_variant'}) {
462 my $var_taxon = $self->tree->find_node(-rank
=> 'variant');
465 $var_taxon->scientific_name($var);
467 return $var_taxon->scientific_name;
470 # should we create a node here to be added to the tree?
474 # fall back to direct storage on self
475 $self->{'_variant'} = $var if $var;
476 return $self->{'_variant'};
482 Usage : $binomial = $self->binomial();
483 $binomial = $self->binomial('FULL');
484 Function: Returns a string "Genus species", or "Genus species subspecies",
485 if the first argument is 'FULL' (and the species has a subspecies).
486 Args : Optionally the string 'FULL' to get the full name including
488 Note : This is just munged from the taxon() name
493 my ($self, $full) = @_;
494 my $rank = $self->taxon->rank || 'no rank';
496 my ($species, $genus) = ($self->species, $self->genus);
497 unless (defined $species) {
499 $self->warn("requested binomial but classification was not set");
501 $genus = '' unless( defined $genus);
503 $species =~ s/$genus\s+//;
505 my $bi = "$genus $species";
506 if (defined($full) && $full =~ /full/i) {
507 my $ssp = $self->sub_species;
510 $ssp =~ s/$species\s+//;
517 =head2 validate_species_name
519 Title : validate_species_name
520 Usage : $result = $self->validate_species_name($string);
521 Function: Validate the species portion of the binomial
523 Notes : The string following the "genus name" in the NCBI binomial is so
524 variable that it's not clear that this is a useful function. Consider
525 the binomials "Simian 11 rotavirus (serotype 3 / strain
526 SA11-Patton)", or "St. Thomas 3 rotavirus", straight from GenBank.
527 This is particularly problematic in microbes and viruses. As such,
528 this isn't actually used automatically by any Bio::Species method.
532 sub validate_species_name
{
533 my( $self, $string ) = @_;
535 return 1 if $string eq "sp.";
536 return 1 if $string =~ /strain/;
537 return 1 if $string =~ /^[a-z][\w\s-]+$/i;
538 $self->throw("Invalid species name '$string'");
548 Usage : $self->organelle( $organelle );
549 $organelle = $self->organelle();
550 Function: Get or set the organelle name
551 Example : $self->organelle('Chloroplast')
552 Returns : The organelle name in a string
553 Args : String, which is the organelle name
554 Note : TODO: We currently do not know where the organelle definition will
555 eventually go. This is stored in the source seqfeature, though,
556 so the information isn't lost.
562 return $self->{'_organelle'} = shift if @_;
563 return $self->{'_organelle'};
568 The following methods delegate to the internal Bio::Taxon instance. This is
569 mainly to allow code continue using older methods, with the mind to migrate to
570 using Bio::Taxon and related methods when this class is deprecated.
574 sub node_name
{shift->taxon->node_name(@_)}
575 sub scientific_name
{shift->taxon->node_name(@_)}
577 sub id
{shift->taxon->id(@_)}
578 sub object_id
{shift->taxon->id(@_)}
579 sub ncbi_taxid
{shift->taxon->ncbi_taxid(@_)}
580 sub rank
{shift->taxon->rank(@_)}
581 sub division
{shift->taxon->division(@_)}
583 sub common_names
{shift->taxon->common_names(@_)}
584 sub common_name
{shift->taxon->common_names(@_)}
586 sub genetic_code
{shift->taxon->genetic_code(@_)}
587 sub mitochondrial_genetic_code
{shift->taxon->mitochondrial_genetic_code(@_)}
589 sub create_date
{ shift->taxon->create_date(@_)}
590 sub pub_date
{ shift->taxon->pub_date(@_)}
591 sub update_date
{ shift->taxon->update_date(@_)}
593 sub db_handle
{ shift->taxon->db_handle(@_)}
595 sub parent_id
{ shift->taxon->parent_id(@_)}
596 sub parent_taxon_id
{ shift->taxon->parent_id(@_)}
598 sub version
{ shift->taxon->version(@_)}
599 sub authority
{ shift->taxon->authority(@_)}
600 sub namespace
{ shift->taxon->namespace(@_)}
602 sub ancestor
{ shift->taxon->ancestor(@_)}
603 sub get_Parent_Node
{ shift->taxon->get_Parent_Node(@_)}
604 sub each_Descendent
{ shift->taxon->each_Descendent(@_)}
605 sub get_Children_Nodes
{ shift->taxon->get_Children_Nodes(@_)}
606 sub remove_Descendant
{ shift->taxon->remove_Descendant(@_)}
608 sub name
{ shift->taxon->name(@_)}
614 Function : retrieve the internal Bio::Taxon instance
615 Returns : A Bio::Taxon. If one is not previously set,
616 an instance is created lazily
617 Args : Bio::Taxon (optional)
622 my ($self, $taxon) = @_;
623 if (!$self->{taxon
} || $taxon) {
624 $taxon ||= Bio
::Taxon
->new();
625 $self->{taxon
} = $taxon;
634 Function : Returns a Bio::Tree::Tree object
635 Returns : A Bio::Tree::Tree. If one is not previously set,
636 an instance is created lazily
637 Args : Bio::Tree::Tree (optional)
642 my ($self, $tree) = @_;
643 if (!$self->{tree
} || $tree) {
644 $tree ||= Bio
::Tree
::Tree
->new();
645 delete $tree->{_root_cleanup_methods
};
646 $self->{tree
} = $tree;
653 $self->tree->cleanup_tree;
654 delete $self->{tree
};
655 $self->taxon->node_cleanup;