1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin(-tests => 103,
11 -requires_module => 'XML::Twig');
13 use_ok('Bio::DB::Taxonomy');
14 use_ok('Bio::Tree::Tree');
17 my $temp_dir = test_output_dir();
19 # we're actually testing Bio::Taxon and Bio::DB::Taxonomy::* here, not
22 ok my $db_entrez = Bio::DB::Taxonomy->new(-source => 'entrez');
24 ok my $db_flatfile = Bio::DB::Taxonomy->new(-source => 'flatfile',
25 -directory => $temp_dir,
26 -nodesfile => test_input_file('taxdump', 'nodes.dmp'),
27 -namesfile => test_input_file('taxdump','names.dmp'),
31 foreach my $db ($db_entrez, $db_flatfile) {
33 test_skip(-tests => 38, -requires_networking => 1) if $db eq $db_entrez;
35 eval { $id = $db->get_taxonid('Homo sapiens');};
36 skip "Unable to connect to entrez database; no network or server busy?", 38 if $@;
40 # easy test on human, try out the main Taxon methods
41 ok $n = $db->get_taxon(9606);
43 is $n->object_id, $n->id;
44 is $n->ncbi_taxid, $n->id;
45 is $n->parent_id, 9605;
46 is $n->rank, 'species';
48 is $n->node_name, 'Homo sapiens';
49 is $n->scientific_name, $n->node_name;
50 is ${$n->name('scientific')}[0], $n->node_name;
52 my %common_names = map { $_ => 1 } $n->common_names;
53 is keys %common_names, 2;
54 ok exists $common_names{human};
55 ok exists $common_names{man};
57 is $n->division, 'Primates';
58 is $n->genetic_code, 1;
59 is $n->mitochondrial_genetic_code, 2;
60 # these are entrez-only, data not available in dmp files
61 if ($db eq $db_entrez) {
62 ok defined $n->pub_date;
63 ok defined $n->create_date;
64 ok defined $n->update_date;
67 # briefly test some Bio::Tree::NodeI methods
68 ok my $ancestor = $n->ancestor;
69 is $ancestor->scientific_name, 'Homo';
70 # unless set explicitly, Bio::Taxon doesn't return anything for
71 # each_Descendent; must ask the database directly
72 ok my @children = $ancestor->db_handle->each_Descendent($ancestor);
75 sleep(3) if $db eq $db_entrez;
77 # do some trickier things...
78 ok my $n2 = $db->get_Taxonomy_Node('89593');
79 is $n2->scientific_name, 'Craniata';
81 # briefly check we can use some Tree methods
82 my $tree = Bio::Tree::Tree->new();
83 is $tree->get_lca($n, $n2)->scientific_name, 'Craniata';
85 # can we actually form a Tree and use other Tree methods?
86 ok $tree = Bio::Tree::Tree->new(-node => $n);
87 is $tree->number_nodes, 30;
88 is $tree->get_nodes, 30;
89 is $tree->find_node(-rank => 'genus')->scientific_name, 'Homo';
91 # check that getting the ancestor still works now we have explitly set the
92 # ancestor by making a Tree
93 is $n->ancestor->scientific_name, 'Homo';
95 sleep(3) if $db eq $db_entrez;
97 ok $n = $db->get_Taxonomy_Node('1760');
98 is $n->scientific_name, 'Actinobacteria';
100 sleep(3) if $db eq $db_entrez;
102 # entrez isn't as good at searching as flatfile, so we have to special-case
103 my @ids = $db->get_taxonids('Chloroflexi');
104 $db eq $db_entrez ? (is @ids, 1) : (is @ids, 2);
105 $id = $db->get_taxonids('Chloroflexi (class)');
108 @ids = $db->get_taxonids('Rhodotorula');
110 @ids = $db->get_taxonids('Rhodotorula <Microbotryomycetidae>');
116 # Test the list database
117 my @ranks = qw(superkingdom class genus species);
118 my @h_lineage = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens');
119 my $db_list = Bio::DB::Taxonomy->new(-source => 'list', -names => \@h_lineage,
123 ok my $h_list = $db_list->get_taxon(-name => 'Homo sapiens');
124 ok my $h_flat = $db_flatfile->get_taxon(-name => 'Homo sapiens');
126 is $h_list->ancestor->scientific_name, 'Homo';
128 my @names = $h_list->common_names;
130 $h_list->common_names('woman');
131 @names = $h_list->common_names;
133 @names = $h_flat->common_names;
136 # you can switch to another database when you need more information, which also
137 # merges information in the node from the two different dbs
138 $h_list->db_handle($db_flatfile);
139 @names = $h_list->common_names;
142 # form a tree with the list lineage first, preventing a subsequent database
143 # change from giving us all those extra ranks
144 $h_list->db_handle($db_list);
145 my $ancestors_ancestor = $h_list->ancestor->ancestor;
146 is $ancestors_ancestor->scientific_name, 'Mammalia';
148 my $tree = Bio::Tree::Tree->new(-node => $h_list);
149 $h_list->db_handle($db_flatfile);
150 $ancestors_ancestor = $h_list->ancestor->ancestor;
151 is $ancestors_ancestor->scientific_name, 'Mammalia';
153 # or we can get the flatfile database's idea of the ancestors by removing
154 # ourselves from the tree
155 is $h_flat->ancestor->ancestor->scientific_name, 'Homo/Pan/Gorilla group';
156 $h_list->ancestor(undef);
157 is $h_list->ancestor->ancestor->scientific_name, 'Homo/Pan/Gorilla group';
159 # get_lca should work on nodes from different databases
161 test_skip(-tests => 5, -requires_networking => 1);
162 $h_flat = $db_flatfile->get_taxon(-name => 'Homo');
164 eval { $h_entrez = $db_entrez->get_taxon(-name => 'Homo sapiens');};
165 skip "Unable to connect to entrez database; no network or server busy?", 5 if $@;
167 ok my $tree_functions = Bio::Tree::Tree->new();
168 is $tree_functions->get_lca($h_flat, $h_entrez)->scientific_name, 'Homo';
170 # even though the species taxa for Homo sapiens from list and flat databases
171 # have the same internal id, get_lca won't work because they have different
172 # roots and descendents
173 $h_list = $db_list->get_taxon(-name => 'Homo sapiens');
174 is $h_list->ancestor->internal_id, $h_flat->internal_id;
175 ok ! $tree_functions->get_lca($h_flat, $h_list);
177 # but we can form a tree with the flat node then remove all the ranks we're
178 # not interested in and try again
179 $tree = Bio::Tree::Tree->new(-node => $h_flat);
180 $tree->splice(-keep_rank => \@ranks);
181 is $tree->get_lca($h_flat, $h_list)->scientific_name, 'Homo';
184 # ideas from taxonomy2tree.PLS that let us make nice tree, using
185 # Bio::Tree::TreeFunctionsI methods; this is a weird and trivial example just
186 # because our test flatfile database only has the full lineage of one species
188 for my $name ('Human', 'Hominidae') {
189 my $ncbi_id = $db_flatfile->get_taxonid($name);
191 my $node = $db_flatfile->get_taxon(-taxonid => $ncbi_id);
194 $tree->merge_lineage($node);
197 ok $tree = Bio::Tree::Tree->new(-node => $node);
201 is $tree->get_nodes, 30;
202 $tree->contract_linear_paths;
203 my $ids = join(",", map { $_->id } $tree->get_nodes);
204 is $ids, '131567,9606';
206 # we can recursively fetch all descendents of a taxon
208 test_skip(-tests => 1, -requires_networking => 1);
209 eval {$db_entrez->get_taxon(10090);};
210 skip "Unable to connect to entrez database; no network or server busy?", 1 if $@;
212 my $lca = $db_entrez->get_taxon(314146);
213 my @descs = $db_entrez->get_all_Descendents($lca);
218 $db_list = Bio::DB::Taxonomy->new(-source => 'list',
220 (split(/,\s+/, "cellular organisms, Eukaryota, Fungi/Metazoa group,
221 Metazoa, Eumetazoa, Bilateria, Coelomata, Protostomia, Panarthropoda,
222 Arthropoda, Mandibulata, Pancrustacea, Hexapoda, Insecta, Dicondylia,
223 Pterygota, Neoptera, Endopterygota, Diptera, Nematocera, Culicimorpha,
224 Culicoidea, Culicidae, Anophelinae, Anopheles, Anopheles, Angusticorn,
225 Anopheles, maculipennis group, maculipennis species complex, Anopheles daciae"))]);
227 my @taxonids = $db_list->get_taxonids('Anopheles');
230 # but we should still be able to merge in an incomplete lineage of a sister
231 # species and have the 'tree' remain consistent:
233 # missing 'no rank' Anopheles
234 $db_list->add_lineage(-names => [
235 (split(/,\s+/, "Anophelinae, Anopheles, Anopheles, Angusticorn,
236 maculipennis group, maculipennis species complex, Anopheles labranchiae"))]);
237 my $node = $db_list->get_taxon(-name => 'Anopheles labranchiae');
238 is $node->ancestor->ancestor->ancestor->ancestor->ancestor->ancestor->ancestor->scientific_name, 'Anophelinae';
240 # missing 'subgenus' Anopheles
241 $db_list->add_lineage(-names => [
242 (split(/,\s+/, "Anophelinae, Anopheles, Angusticorn, Anopheles,
243 maculipennis group, maculipennis species complex, Anopheles maculipennis"))]);
244 $node = $db_list->get_taxon(-name => 'Anopheles maculipennis');
245 is $node->ancestor->ancestor->ancestor->ancestor->ancestor->ancestor->ancestor->scientific_name, 'Anophelinae';
247 # missing 'no rank' Angusticorn
248 $db_list->add_lineage(-names => [
249 (split(/,\s+/, "Anophelinae, Anopheles, Anopheles, Anopheles,
250 maculipennis group, maculipennis species complex, Anopheles melanoon"))]);
251 $node = $db_list->get_taxon(-name => 'Anopheles melanoon');
252 is $node->ancestor->ancestor->ancestor->ancestor->scientific_name, 'Angusticorn';
254 @taxonids = $db_list->get_taxonids('Anopheles');