3 # BioPerl module for Bio::Species
5 # Cared for by James Gilbert <jgrg@sanger.ac.uk>
6 # Reimplemented by Sendu Bala <bix@sendu.me.uk>
8 # You may distribute this module under the same terms as perl itself
10 # POD documentation - main docs before the code
14 Bio::Species - Generic species object
18 $species = Bio::Species->new(-classification => [@classification]);
19 # Can also pass classification
20 # array to new as below
22 $species->classification(qw( sapiens Homo Hominidae
23 Catarrhini Primates Eutheria
24 Mammalia Vertebrata Chordata
27 $genus = $species->genus();
29 $bi = $species->binomial(); # $bi is now "Homo sapiens"
31 # For storing common name
32 $species->common_name("human");
34 # For storing subspecies
35 $species->sub_species("accountant");
39 Provides a very simple object for storing phylogenetic
40 information. The classification is stored in an array,
41 which is a list of nodes in a phylogenetic tree. Access to
42 getting and setting species and genus is provided, but not
43 to any of the other node types (eg: "phylum", "class",
44 "order", "family"). There's plenty of scope for making the
45 model more sophisticated, if this is ever needed.
47 A methods are also provided for storing common
48 names, and subspecies.
54 User feedback is an integral part of the evolution of this and other
55 Bioperl modules. Send your comments and suggestions preferably to
56 the Bioperl mailing list. Your participation is much appreciated.
58 bioperl-l@bioperl.org - General discussion
59 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
63 Report bugs to the Bioperl bug tracking system to help us keep track
64 of the bugs and their resolution. Bug reports can be submitted via the
67 http://bugzilla.open-bio.org/
71 James Gilbert email B<jgrg@sanger.ac.uk>
75 Sendu Bala, bix@sendu.me.uk
79 The rest of the documentation details each of the object
80 methods. Internal methods are usually preceded with a _
84 #' Let the code begin...
89 use Bio
::DB
::Taxonomy
;
91 use Scalar
::Util
qw(weaken isweak);
92 use base
qw(Bio::Taxon);
97 Usage : my $obj = Bio::Species->new(-classification => \@class)
98 Function: Build a new Species object
99 Returns : Bio::Species object
100 Args : -ncbi_taxid => NCBI taxonomic ID (optional)
101 -classification => arrayref of classification
106 my($class, @args) = @_;
108 my $self = $class->SUPER::new
(@args);
110 my ($org, $sp, $var, $classification) =
111 $self->_rearrange([qw(ORGANELLE
114 CLASSIFICATION)], @args);
116 if (defined $classification && ref($classification) eq "ARRAY" && @
{$classification}) {
117 $self->classification(@
$classification);
120 # store a tree on ourselves so we can use Tree methods
121 $self->{tree
} = Bio
::Tree
::Tree
->new();
123 # some things want to freeze/thaw Bio::Species objects, but
124 # _root_cleanup_methods contains a CODE ref, delete it.
125 # delete $self->{tree}->{_root_cleanup_methods};
128 defined $org && $self->organelle($org);
129 defined $sp && $self->sub_species($sp);
130 defined $var && $self->variant($var);
135 =head2 classification
137 Title : classification
138 Usage : $self->classification(@class_array);
139 @classification = $self->classification();
140 Function: Get/set the lineage of this species. The array provided must be in
141 the order ... ---> SPECIES, GENUS ---> KINGDOM ---> etc.
142 Example : $obj->classification(qw( 'Homo sapiens' Homo Hominidae
143 Catarrhini Primates Eutheria Mammalia Vertebrata
144 Chordata Metazoa Eukaryota));
145 Returns : Classification array
146 Args : Classification array
148 A reference to the classification array. In the latter case
149 if there is a second argument and it evaluates to true,
150 names will not be validated. NB: in any case, names are never
156 my ($self, @vals) = @_;
159 if (ref($vals[0]) eq 'ARRAY') {
163 # make sure the lineage contains us as first or second element
164 # (lineage may have subspecies, species, genus ...)
165 my $name = $self->node_name;
166 my ($genus, $species) = (quotemeta($vals[1]), quotemeta($vals[0]));
167 if ($name && ($name !~ m{$species}i && $name !~ m{$genus}i) && $name !~ m{$vals[1] $vals[0]}i) {
168 if ($name =~ /^$vals[1] $vals[0]\s*(.+)/) {
169 # just assume the problem is someone tried to make a Bio::Species starting at subspecies
170 #*** no idea if this is appropriate! just a possible fix related to bug 2092
171 $self->sub_species($1);
172 $name = $self->node_name("$vals[1] $vals[0]");
175 $self->warn("The supplied lineage does not start near '$name' (I was supplied '".join(" | ", @vals)."')");
179 # create a lineage for ourselves
180 my $db = Bio
::DB
::Taxonomy
->new(-source
=> 'list', -names
=> [reverse @vals]);
181 unless ($self->scientific_name) {
182 # assume we're supposed to be the leaf of the supplied lineage
183 $self->scientific_name($vals[0]);
185 unless ($self->rank) {
186 # and that we are rank species
187 $self->rank('species');
190 $self->db_handle($db);
192 $self->{tree
} = Bio
::Tree
::Tree
->new(-node
=> $self);
193 # some things want to freeze/thaw Bio::Species objects, but tree's
194 # _root_cleanup_methods contains a CODE ref, delete it.
195 #*** even if we don't delete the cleanup methods, we still get memory
196 # leak-like symtoms, and the actual cleanup causes a mass of
197 # warnings... needs investigation!
198 delete $self->{tree
}->{_root_cleanup_methods
};
202 foreach my $node ($self->{tree
}->get_lineage_nodes($self), $self) {
203 unshift(@vals, $node->scientific_name || next);
205 weaken
($self->{tree
}->{'_rootnode'}) unless isweak
($self->{tree
}->{'_rootnode'});
212 Usage : $obj->ncbi_taxid($newval)
213 Function: Get/set the NCBI Taxon ID
214 Returns : the NCBI Taxon ID as a string
215 Args : newvalue to set or undef to unset (optional)
222 Usage : $self->common_name( $common_name );
223 $common_name = $self->common_name();
224 Function: Get or set the common name of the species
225 Example : $self->common_name('human')
226 Returns : The common name in a string
227 Args : String, which is the common name (optional)
234 Usage : $obj->division($newval)
235 Function: Genbank Division for a species
236 Returns : value of division (a scalar)
237 Args : value of division (a scalar)
244 Usage : $self->species( $species );
245 $species = $self->species();
246 Function: Get or set the scientific species name.
247 Example : $self->species('Homo sapiens');
248 Returns : Scientific species name as string
249 Args : Scientific species name as string
254 my ($self, $species) = @_;
257 $self->{_species
} = $species;
260 unless (defined $self->{_species
}) {
261 # work it out from our nodes
262 my $species_taxon = $self->{tree
}->find_node(-rank
=> 'species');
263 unless ($species_taxon) {
264 # just assume we are rank species
265 $species_taxon = $self;
268 $species = $species_taxon->scientific_name;
271 # munge it like the Bio::SeqIO modules used to do
272 # (more or less copy/pasted from old Bio::SeqIO::genbank, hence comments
273 # referring to 'ORGANISM' etc.)
276 my $root = $self->{tree
}->get_root_node;
278 $self->{tree
} = Bio
::Tree
::Tree
->new(-node
=> $species_taxon);
279 delete $self->{tree
}->{_root_cleanup_methods
};
280 $root = $self->{tree
}->get_root_node;
283 my @spflds = split(' ', $species);
284 if (@spflds > 1 && $root->node_name ne 'Viruses') {
287 # does the next term start with uppercase?
288 # yes: valid genus; no then unconventional
289 # e.g. leaf litter basidiomycete sp. Collb2-39
291 if ($spflds[0] =~ m/^[A-Z]/) {
292 $genus = shift(@spflds);
300 while (my $fld = shift @spflds) {
302 # does it have subspecies or varieties?
303 last if ($fld =~ m/(sp\.|var\.)/);
305 chop $species; # last space
306 $sub_species = join ' ',@spflds if(@spflds);
312 # does ORGANISM start with any words which make its genus undefined?
313 # these are in @unkn_genus
314 # this in case species starts with uppercase so isn't caught above.
315 # alter common name if required
316 my $unconv = 0; # is it unconventional species name?
317 my @unkn_genus = ('unknown','unclassified','uncultured','unidentified');
318 foreach (@unkn_genus) {
319 if ($genus && $genus =~ m/$_/i) {
320 $species = $genus . " " . $species;
325 elsif ($species =~ m/$_/i) {
330 if (!$unconv && !$sub_species && $species =~ s/^(\w+)\s(\w+)$/$1/) {
331 # need to extract subspecies from conventional ORGANISM format.
332 # Will the 'word' in a two element species name
333 # e.g. $species = 'thummi thummi' => $species='thummi' &
334 # $sub_species='thummi'
338 $self->genus($genus) if $genus;
339 $self->sub_species($sub_species) if $sub_species;
342 $self->{_species
} = $species;
344 return $self->{_species
};
350 Usage : $self->genus( $genus );
351 $genus = $self->genus();
352 Function: Get or set the scientific genus name.
353 Example : $self->genus('Homo');
354 Returns : Scientific genus name as string
355 Args : Scientific genus name as string
360 my ($self, $genus) = @_;
363 $self->{_genus
} = $genus;
366 unless (defined $self->{_genus
}) {
367 my $genus_taxon = $self->{tree
}->find_node(-rank
=> 'genus');
368 unless ($genus_taxon) {
369 # just assume our ancestor is rank genus
370 $genus_taxon = $self->ancestor;
373 $self->{_genus
} = $genus_taxon->scientific_name if $genus_taxon;
376 return $self->{_genus
};
382 Usage : $obj->sub_species($newval)
383 Function: Get or set the scientific subspecies name.
384 Returns : value of sub_species
385 Args : newvalue (optional)
390 my ($self, $sub) = @_;
392 unless (defined $self->{'_sub_species'}) {
393 my $ss_taxon = $self->{tree
}->find_node(-rank
=> 'subspecies');
396 $ss_taxon->scientific_name($sub);
398 return $ss_taxon->scientific_name;
402 # fall back to direct storage on self
403 $self->{'_sub_species'} = $sub if $sub;
404 return $self->{'_sub_species'};
410 Usage : $obj->variant($newval)
411 Function: Get/set variant information for this species object (strain,
414 Returns : value of variant (a scalar)
415 Args : new value (a scalar or undef, optional)
420 my ($self, $var) = @_;
422 unless (defined $self->{'_variant'}) {
423 my $var_taxon = $self->{tree
}->find_node(-rank
=> 'variant');
426 $var_taxon->scientific_name($var);
428 return $var_taxon->scientific_name;
432 # fall back to direct storage on self
433 $self->{'_variant'} = $var if $var;
434 return $self->{'_variant'};
440 Usage : $binomial = $self->binomial();
441 $binomial = $self->binomial('FULL');
442 Function: Returns a string "Genus species", or "Genus species subspecies",
443 if the first argument is 'FULL' (and the species has a subspecies).
444 Args : Optionally the string 'FULL' to get the full name including
450 my ($self, $full) = @_;
451 my $rank = $self->rank || 'no rank';
453 my ($species, $genus) = ($self->species, $self->genus);
454 unless (defined $species) {
456 $self->warn("requested binomial but classification was not set");
458 $genus = '' unless( defined $genus);
460 $species =~ s/$genus\s+//;
462 my $bi = "$genus $species";
463 if (defined($full) && $full =~ /full/i) {
464 my $ssp = $self->sub_species;
467 $ssp =~ s/$species\s+//;
474 =head2 validate_species_name
476 Title : validate_species_name
477 Usage : $result = $self->validate_species_name($string);
478 Function: Validate the species portion of the binomial
480 Notes : The string following the "genus name" in the NCBI binomial
481 is so variable that it's not clear that this is a useful
482 function. Consider the binomials
483 "Simian 11 rotavirus (serotype 3 / strain SA11-Patton)",
484 or "St. Thomas 3 rotavirus", straight from GenBank.
485 This is particularly problematic in microbes and viruses.
486 As such, this isn't actually used automatically by any Bio::Species
490 sub validate_species_name
{
491 my( $self, $string ) = @_;
493 return 1 if $string eq "sp.";
494 return 1 if $string =~ /strain/;
495 return 1 if $string =~ /^[a-z][\w\s-]+$/i;
496 $self->throw("Invalid species name '$string'");
506 Usage : $self->organelle( $organelle );
507 $organelle = $self->organelle();
508 Function: Get or set the organelle name
509 Example : $self->organelle('Chloroplast')
510 Returns : The organelle name in a string
511 Args : String, which is the organelle name
517 return $self->{'_organelle'} = shift if @_;
518 return $self->{'_organelle'};
523 $self->{tree
}->cleanup_tree if $self->{tree
};
524 delete $self->{tree
};