1 # -*-Perl-*- Test Harness script for Bioperl
5 use File::Temp qw(tempdir);
11 test_begin(-tests => 103,
12 -requires_module => 'XML::Twig');
14 use_ok('Bio::DB::Taxonomy');
15 use_ok('Bio::Tree::Tree');
18 my $temp_dir = tempdir( CLEANUP => 1 );
20 # we're actually testing Bio::Taxon and Bio::DB::Taxonomy::* here, not
23 ok my $db_entrez = Bio::DB::Taxonomy->new(-source => 'entrez');
25 ok my $db_flatfile = Bio::DB::Taxonomy->new(-source => 'flatfile',
26 -directory => $temp_dir,
27 -nodesfile => test_input_file('taxdump', 'nodes.dmp'),
28 -namesfile => test_input_file('taxdump','names.dmp'),
32 foreach my $db ($db_entrez, $db_flatfile) {
34 test_skip(-tests => 38, -requires_networking => 1) if $db eq $db_entrez;
36 eval { $id = $db->get_taxonid('Homo sapiens');};
37 skip "Unable to connect to entrez database; no network or server busy?", 38 if $@;
41 # easy test on human, try out the main Taxon methods
42 ok $n = $db->get_taxon(9606);
44 is $n->object_id, $n->id;
45 is $n->ncbi_taxid, $n->id;
46 is $n->parent_id, 9605;
47 is $n->rank, 'species';
49 is $n->node_name, 'Homo sapiens';
50 is $n->scientific_name, $n->node_name;
51 is ${$n->name('scientific')}[0], $n->node_name;
53 my %common_names = map { $_ => 1 } $n->common_names;
54 is keys %common_names, 2;
55 ok exists $common_names{human};
56 ok exists $common_names{man};
58 is $n->division, 'Primates';
59 is $n->genetic_code, 1;
60 is $n->mitochondrial_genetic_code, 2;
61 # these are entrez-only, data not available in dmp files
62 if ($db eq $db_entrez) {
63 ok defined $n->pub_date;
64 ok defined $n->create_date;
65 ok defined $n->update_date;
68 # briefly test some Bio::Tree::NodeI methods
69 ok my $ancestor = $n->ancestor;
70 is $ancestor->scientific_name, 'Homo';
71 # unless set explicitly, Bio::Taxon doesn't return anything for
72 # each_Descendent; must ask the database directly
73 ok my @children = $ancestor->db_handle->each_Descendent($ancestor);
76 sleep(3) if $db eq $db_entrez;
78 # do some trickier things...
79 ok my $n2 = $db->get_Taxonomy_Node('89593');
80 is $n2->scientific_name, 'Craniata';
82 # briefly check we can use some Tree methods
83 my $tree = Bio::Tree::Tree->new();
84 is $tree->get_lca($n, $n2)->scientific_name, 'Craniata';
86 # can we actually form a Tree and use other Tree methods?
87 ok $tree = Bio::Tree::Tree->new(-node => $n);
88 is $tree->number_nodes, 30;
89 is $tree->get_nodes, 30;
90 is $tree->find_node(-rank => 'genus')->scientific_name, 'Homo';
92 # check that getting the ancestor still works now we have explitly set the
93 # ancestor by making a Tree
94 is $n->ancestor->scientific_name, 'Homo';
96 sleep(3) if $db eq $db_entrez;
98 ok $n = $db->get_Taxonomy_Node('1760');
99 is $n->scientific_name, 'Actinobacteria';
101 sleep(3) if $db eq $db_entrez;
103 # entrez isn't as good at searching as flatfile, so we have to special-case
104 my @ids = $db->get_taxonids('Chloroflexi');
105 $db eq $db_entrez ? (is @ids, 1) : (is @ids, 2);
106 $id = $db->get_taxonids('Chloroflexi (class)');
109 @ids = $db->get_taxonids('Rhodotorula');
111 @ids = $db->get_taxonids('Rhodotorula <Microbotryomycetidae>');
117 # Test the list database
118 my @ranks = qw(superkingdom class genus species);
119 my @h_lineage = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens');
120 my $db_list = Bio::DB::Taxonomy->new(-source => 'list', -names => \@h_lineage,
124 ok my $h_list = $db_list->get_taxon(-name => 'Homo sapiens');
125 ok my $h_flat = $db_flatfile->get_taxon(-name => 'Homo sapiens');
127 is $h_list->ancestor->scientific_name, 'Homo';
129 my @names = $h_list->common_names;
131 $h_list->common_names('woman');
132 @names = $h_list->common_names;
134 @names = $h_flat->common_names;
137 # you can switch to another database when you need more information, which also
138 # merges information in the node from the two different dbs
139 $h_list->db_handle($db_flatfile);
140 @names = $h_list->common_names;
143 # form a tree with the list lineage first, preventing a subsequent database
144 # change from giving us all those extra ranks
145 $h_list->db_handle($db_list);
146 my $ancestors_ancestor = $h_list->ancestor->ancestor;
147 is $ancestors_ancestor->scientific_name, 'Mammalia';
149 my $tree = Bio::Tree::Tree->new(-node => $h_list);
150 $h_list->db_handle($db_flatfile);
151 $ancestors_ancestor = $h_list->ancestor->ancestor;
152 is $ancestors_ancestor->scientific_name, 'Mammalia';
154 # or we can get the flatfile database's idea of the ancestors by removing
155 # ourselves from the tree
156 is $h_flat->ancestor->ancestor->scientific_name, 'Homo/Pan/Gorilla group';
157 $h_list->ancestor(undef);
158 is $h_list->ancestor->ancestor->scientific_name, 'Homo/Pan/Gorilla group';
160 # get_lca should work on nodes from different databases
162 test_skip(-tests => 5, -requires_networking => 1);
163 $h_flat = $db_flatfile->get_taxon(-name => 'Homo');
165 eval { $h_entrez = $db_entrez->get_taxon(-name => 'Homo sapiens');};
166 skip "Unable to connect to entrez database; no network or server busy?", 5 if $@;
168 ok my $tree_functions = Bio::Tree::Tree->new();
169 is $tree_functions->get_lca($h_flat, $h_entrez)->scientific_name, 'Homo';
171 # even though the species taxa for Homo sapiens from list and flat databases
172 # have the same internal id, get_lca won't work because they have different
173 # roots and descendents
174 $h_list = $db_list->get_taxon(-name => 'Homo sapiens');
175 is $h_list->ancestor->internal_id, $h_flat->internal_id;
176 ok ! $tree_functions->get_lca($h_flat, $h_list);
178 # but we can form a tree with the flat node then remove all the ranks we're
179 # not interested in and try again
180 $tree = Bio::Tree::Tree->new(-node => $h_flat);
181 $tree->splice(-keep_rank => \@ranks);
182 is $tree->get_lca($h_flat, $h_list)->scientific_name, 'Homo';
185 # ideas from taxonomy2tree.PLS that let us make nice tree, using
186 # Bio::Tree::TreeFunctionsI methods; this is a weird and trivial example just
187 # because our test flatfile database only has the full lineage of one species
189 for my $name ('Human', 'Hominidae') {
190 my $ncbi_id = $db_flatfile->get_taxonid($name);
192 my $node = $db_flatfile->get_taxon(-taxonid => $ncbi_id);
195 $tree->merge_lineage($node);
198 ok $tree = Bio::Tree::Tree->new(-node => $node);
202 is $tree->get_nodes, 30;
203 $tree->contract_linear_paths;
204 my $ids = join(",", map { $_->id } $tree->get_nodes);
205 is $ids, '131567,9606';
207 # we can recursively fetch all descendents of a taxon
209 test_skip(-tests => 1, -requires_networking => 1);
210 eval {$db_entrez->get_taxon(10090);};
211 skip "Unable to connect to entrez database; no network or server busy?", 1 if $@;
213 my $lca = $db_entrez->get_taxon(314146);
214 my @descs = $db_entrez->get_all_Descendents($lca);
219 $db_list = Bio::DB::Taxonomy->new(-source => 'list',
221 (split(/,\s+/, "cellular organisms, Eukaryota, Fungi/Metazoa group,
222 Metazoa, Eumetazoa, Bilateria, Coelomata, Protostomia, Panarthropoda,
223 Arthropoda, Mandibulata, Pancrustacea, Hexapoda, Insecta, Dicondylia,
224 Pterygota, Neoptera, Endopterygota, Diptera, Nematocera, Culicimorpha,
225 Culicoidea, Culicidae, Anophelinae, Anopheles, Anopheles, Angusticorn,
226 Anopheles, maculipennis group, maculipennis species complex, Anopheles daciae"))]);
228 my @taxonids = $db_list->get_taxonids('Anopheles');
231 # but we should still be able to merge in an incomplete lineage of a sister
232 # species and have the 'tree' remain consistent:
234 # missing 'no rank' Anopheles
235 $db_list->add_lineage(-names => [
236 (split(/,\s+/, "Anophelinae, Anopheles, Anopheles, Angusticorn,
237 maculipennis group, maculipennis species complex, Anopheles labranchiae"))]);
238 my $node = $db_list->get_taxon(-name => 'Anopheles labranchiae');
239 is $node->ancestor->ancestor->ancestor->ancestor->ancestor->ancestor->ancestor->scientific_name, 'Anophelinae';
241 # missing 'subgenus' Anopheles
242 $db_list->add_lineage(-names => [
243 (split(/,\s+/, "Anophelinae, Anopheles, Angusticorn, Anopheles,
244 maculipennis group, maculipennis species complex, Anopheles maculipennis"))]);
245 $node = $db_list->get_taxon(-name => 'Anopheles maculipennis');
246 is $node->ancestor->ancestor->ancestor->ancestor->ancestor->ancestor->ancestor->scientific_name, 'Anophelinae';
248 # missing 'no rank' Angusticorn
249 $db_list->add_lineage(-names => [
250 (split(/,\s+/, "Anophelinae, Anopheles, Anopheles, Anopheles,
251 maculipennis group, maculipennis species complex, Anopheles melanoon"))]);
252 $node = $db_list->get_taxon(-name => 'Anopheles melanoon');
253 is $node->ancestor->ancestor->ancestor->ancestor->scientific_name, 'Angusticorn';
255 @taxonids = $db_list->get_taxonids('Anopheles');