1 # -*-Perl-*- Test Harness script for Bioperl
12 # -requires_modules => [qw(DBI DBD::SQLite )]
15 use_ok('Bio::DB::Taxonomy');
16 use_ok('Bio::Tree::Tree');
19 my $temp_dir = test_output_dir();
21 # TODO: run basic tests making sure that a database is not regenerated if
22 # present or unless forced
24 ok my $db_flatfile = Bio::DB::Taxonomy->new(
26 -nodesfile => test_input_file('taxdump', 'nodes.dmp'),
27 -namesfile => test_input_file('taxdump', 'names.dmp'),
29 isa_ok $db_flatfile, 'Bio::DB::Taxonomy::sqlite';
30 isa_ok $db_flatfile, 'Bio::DB::Taxonomy';
32 # By not specifying a '-directory' argument, index files go to a temporary
33 # folder ($Bio::Root::IO::TEMPDIR, such as 'C:\Users\USER\AppData\Local\Temp'),
34 # and are implied to be temporary. So test the ability of flatfile->DESTROY to
35 # remove the temporary index files at object destruction (this also affects files
36 # in "test_output_dir()", since the folder is created inside the temporary folder)
37 #no warnings qw(once); # silence 'Name "$Bio::Root::IO::TEMPDIR" used only once'
39 #is $db_flatfile->{index_directory}, $Bio::Root::IO::TEMPDIR, 'removal of temporary index files: no -directory';
41 #$db_flatfile->DESTROY;
42 #ok not -e ($db_flatfile->{index_directory} . '/id2names');
43 #ok not -e ($db_flatfile->{index_directory} . '/names2id');
44 #ok not -e ($db_flatfile->{index_directory} . '/nodes');
45 #ok not -e ($db_flatfile->{index_directory} . '/parents');
47 ## Test removal of temporary index files from test_output_dir folder
48 ## (since test_output_dir() =~ m/^$Bio::Root::IO::TEMPDIR/)
49 #ok $db_flatfile = Bio::DB::Taxonomy->new(
50 # -source => 'flatfile',
51 # -directory => $temp_dir,
52 # -nodesfile => test_input_file('taxdump', 'nodes.dmp'),
53 # -namesfile => test_input_file('taxdump', 'names.dmp'),
56 #is $db_flatfile->{index_directory}, $temp_dir, 'removal of temporary index files: test_output_dir()';
57 #$db_flatfile->DESTROY;
58 #ok not -e ($db_flatfile->{index_directory} . '/id2names');
59 #ok not -e ($db_flatfile->{index_directory} . '/names2id');
60 #ok not -e ($db_flatfile->{index_directory} . '/nodes');
61 #ok not -e ($db_flatfile->{index_directory} . '/parents');
63 # Generate the object (and the files) again for the remaining tests
65 ok my $db = Bio::DB::Taxonomy->new(
67 -directory => $temp_dir,
68 -nodesfile => test_input_file('taxdump', 'nodes.dmp'),
69 -namesfile => test_input_file('taxdump', 'names.dmp'),
75 # taxid data in the nodes.dmp file should be unique, we ignore repeated values
78 is $db->get_num_taxa, 188;
80 lives_ok {$id = $db->get_taxonid('Homo sapiens')};
84 ## easy test on human, try out the main Taxon methods
86 ok $n = $db->get_taxon(9606);
88 is $n->object_id, $n->id;
89 is $n->ncbi_taxid, $n->id;
90 is $n->parent_id, 9605;
91 is $n->rank, 'species';
93 is $n->node_name, 'Homo sapiens';
94 is $n->scientific_name, $n->node_name;
95 is ${$n->name('scientific')}[0], $n->node_name;
97 my %common_names = map { $_ => 1 } $n->common_names;
98 is keys %common_names, 3, ref($db).": common names";
99 ok exists $common_names{human};
100 ok exists $common_names{man};
102 is $n->division, 'Primates';
103 is $n->genetic_code, 1;
104 is $n->mitochondrial_genetic_code, 2;
105 # these are entrez-only, data not available in dmp files
106 #if ($db eq $db_entrez) {
107 # ok defined $n->pub_date;
108 # ok defined $n->create_date;
109 # ok defined $n->update_date;
112 # briefly test some Bio::Tree::NodeI methods
113 ok my $ancestor = $n->ancestor;
114 is $ancestor->scientific_name, 'Homo';
115 # unless set explicitly, Bio::Taxon doesn't return anything for
116 # each_Descendent; must ask the database directly
117 ok my @children = $ancestor->db_handle->each_Descendent($ancestor);
118 cmp_ok @children, '>', 0;
120 #sleep(3) if $db eq $db_entrez;
122 ## do some trickier things...
123 ok my $n2 = $db->get_Taxonomy_Node('89593');
124 is $n2->scientific_name, 'Craniata';
126 # briefly check we can use some Tree methods
127 my $tree = Bio::Tree::Tree->new();
128 #is $tree->get_lca($n, $n2)->scientific_name, 'Craniata';
131 #my @nodes = $tree->get_nodes;
132 #is scalar(@nodes), 0;
134 #@lineage_nodes = $tree->get_lineage_nodes($n->id); # read ID, only works if nodes have been added to tree
135 #is scalar @lineage_nodes, 0;
136 #@lineage_nodes = $tree->get_lineage_nodes($n); # node object always works
137 #cmp_ok(scalar @lineage_nodes, '>', 20);
139 ## get lineage string
140 #like($tree->get_lineage_string($n), qr/cellular organisms;Eukaryota/);
141 #like($tree->get_lineage_string($n,'-'), qr/cellular organisms-Eukaryota/);
142 #like($tree->get_lineage_string($n2), qr/cellular organisms;Eukaryota/);
144 ## can we actually form a Tree and use other Tree methods?
145 #ok $tree = Bio::Tree::Tree->new(-node => $n);
146 #cmp_ok($tree->number_nodes, '>', 20);
147 #cmp_ok(scalar($tree->get_nodes), '>', 20);
148 #is $tree->find_node(-rank => 'genus')->scientific_name, 'Homo';
150 ## check that getting the ancestor still works now we have explitly set the
151 ## ancestor by making a Tree
152 #is $n->ancestor->scientific_name, 'Homo';
154 #sleep(3) if $db eq $db_entrez;
156 #ok $n = $db->get_Taxonomy_Node('1760');
157 #is $n->scientific_name, 'Actinobacteria';
159 #sleep(3) if $db eq $db_entrez;
161 ## entrez isn't as good at searching as flatfile, so we have to special-case
162 #my @ids = sort $db->get_taxonids('Chloroflexi');
164 #is_deeply \@ids, [200795, 32061];
166 #$id = $db->get_taxonids('Chloroflexi (class)');
167 #$db eq $db_entrez ? is($id, undef) : is($id, 32061);
169 #@ids = $db->get_taxonids('Rhodotorula');
170 #cmp_ok @ids, '>=' , 8;
171 #@ids = $db->get_taxonids('Rhodotorula <Microbotryomycetidae>');
176 # get_lca should work on nodes from different databases
178 # test_skip(-tests => 9, -requires_networking => 1);
180 # # check that the result is the same as if we are retrieving from the same DB
182 # $h_flat = $db_flatfile->get_taxon(-name => 'Homo');
183 # my $h_flat2 = $db_flatfile->get_taxon(-name => 'Homo sapiens');
184 # ok my $tree_functions = Bio::Tree::Tree->new();
185 # is $tree_functions->get_lca($h_flat, $h_flat2)->scientific_name, 'Homo', 'get_lca() within flatfile db';
189 # eval { $h_entrez = $db_entrez->get_taxon(-name => 'Homo sapiens');};
190 # skip "Unable to connect to entrez database; no network or server busy?", 7 if $@;
192 # eval { $h_entrez2 = $db_entrez->get_taxon(-name => 'Homo');};
193 # skip "Unable to connect to entrez database; no network or server busy?", 7 if $@;
194 # ok $tree_functions = Bio::Tree::Tree->new();
195 # is $tree_functions->get_lca($h_entrez, $h_entrez2)->scientific_name, 'Homo', 'get_lca() within entrez db';
197 # ok $tree_functions = Bio::Tree::Tree->new();
198 # # mixing entrez and flatfile
200 # local $TODO = 'Mixing databases for get_lca() not working, see bug #3416';
201 # is $tree_functions->get_lca($h_flat, $h_entrez)->scientific_name, 'Homo', 'get_lca() mixing flatfile and remote db';
203 # # even though the species taxa for Homo sapiens from list and flat databases
204 # # have the same internal id, get_lca won't work because they have different
205 # # roots and descendents
206 # $h_list = $db_list->get_taxon(-name => 'Homo sapiens');
207 # is $h_list->ancestor->internal_id, $h_flat->internal_id;
208 # ok ! $tree_functions->get_lca($h_flat, $h_list);
210 # # but we can form a tree with the flat node then remove all the ranks we're
211 # # not interested in and try again
212 # $tree = Bio::Tree::Tree->new(-node => $h_flat);
213 # $tree->splice(-keep_rank => \@ranks);
214 # is $tree->get_lca($h_flat, $h_list)->scientific_name, 'Homo';
217 # Some tests carried over from flatfile and others that would be nice to pass
219 ## ideas from taxonomy2tree.PLS that let us make nice tree, using
220 ## Bio::Tree::TreeFunctionsI methods; this is a weird and trivial example just
221 ## because our test flatfile database only has the full lineage of one species
223 #for my $name ('Human', 'Hominidae') {
224 # my $ncbi_id = $db_flatfile->get_taxonid($name);
226 # my $node = $db_flatfile->get_taxon(-taxonid => $ncbi_id);
229 # ok $tree->merge_lineage($node);
232 # ok $tree = Bio::Tree::Tree->new(-node => $node);
236 #is $tree->get_nodes, 30;
237 #$tree->contract_linear_paths;
238 #my $ids = join(",", map { $_->id } $tree->get_nodes);
239 #is $ids, '131567,9606';
242 # test_skip(-tests => 1, -requires_networking => 1);
243 # eval {$db_entrez->get_taxon(10090);};
244 # skip "Unable to connect to entrez database; no network or server busy?", 1 if $@;
246 # my $lca = $db_entrez->get_taxon(314146);
247 # my @descs = $db_entrez->get_all_Descendents($lca);
248 # cmp_ok @descs, '>=', 17;
252 #unlink 'taxonomy.sqlite' if (-e 'taxonomy.sqlite');