From c3bbdb7c72dfc95f13670ecd6bfaa0df4127cc6b Mon Sep 17 00:00:00 2001 From: Chris Fields Date: Mon, 12 Sep 2016 22:43:46 -0500 Subject: [PATCH] logic fix for #182 --- Bio/DB/Taxonomy/entrez.pm | 59 ++++++++++++++++++++++++----------------------- t/RemoteDB/Taxonomy.t | 30 ++++++++++++++++++++++-- 2 files changed, 58 insertions(+), 31 deletions(-) diff --git a/Bio/DB/Taxonomy/entrez.pm b/Bio/DB/Taxonomy/entrez.pm index ffc0dac2f..67d255f8f 100644 --- a/Bio/DB/Taxonomy/entrez.pm +++ b/Bio/DB/Taxonomy/entrez.pm @@ -321,7 +321,6 @@ sub get_taxon { $taxonid = join(',', @uncached); $p{'id'} = $taxonid; - $self->debug("id is $taxonid\n"); my $twig = $self->_run_query($self->_build_url($EntrezFetch, \%p)); my $root = $twig->root; @@ -357,35 +356,37 @@ sub get_taxon { # requested node, we may as well cache data for the ancestors to # reduce the number of accesses to website in future my $lineage_ex = $taxon->first_child('LineageEx'); - my ($ancestor, $lineage_data, @taxa); - foreach my $lineage_taxon ($lineage_ex->children) { - my $lineage_taxid = $lineage_taxon->first_child_text('TaxId'); - - if (exists $DATA_CACHE->{minimal_info}->{$lineage_taxid} || exists $DATA_CACHE->{full_info}->{$lineage_taxid}) { - $lineage_data = $DATA_CACHE->{minimal_info}->{$lineage_taxid} || $DATA_CACHE->{full_info}->{$lineage_taxid}; - next; - } - else { - $lineage_data = {}; - } - - $lineage_data->{id} = $lineage_taxid; - $lineage_data->{scientific_name} = $lineage_taxon->first_child_text('ScientificName'); - $lineage_data->{rank} = $lineage_taxon->first_child_text('Rank'); - - $RELATIONS->{ancestors}->{$lineage_taxid} = $ancestor->{id} if $ancestor; - - $DATA_CACHE->{minimal_info}->{$lineage_taxid} = $lineage_data; - } continue { $ancestor = $lineage_data; unshift(@taxa, $lineage_data); } - - $RELATIONS->{ancestors}->{$taxid} = $ancestor->{id} if $ancestor; - - # go through the lineage in reverse so we can remember the children - my $child = $data; - foreach my $lineage_data (@taxa) { - $RELATIONS->{children}->{$lineage_data->{id}}->{$child->{id}} = 1; - } continue { $child = $lineage_data; } + if (defined $lineage_ex) { + my ($ancestor, $lineage_data, @taxa); + foreach my $lineage_taxon ($lineage_ex->children) { + my $lineage_taxid = $lineage_taxon->first_child_text('TaxId'); + + if (exists $DATA_CACHE->{minimal_info}->{$lineage_taxid} || exists $DATA_CACHE->{full_info}->{$lineage_taxid}) { + $lineage_data = $DATA_CACHE->{minimal_info}->{$lineage_taxid} || $DATA_CACHE->{full_info}->{$lineage_taxid}; + next; + } + else { + $lineage_data = {}; + } + + $lineage_data->{id} = $lineage_taxid; + $lineage_data->{scientific_name} = $lineage_taxon->first_child_text('ScientificName'); + $lineage_data->{rank} = $lineage_taxon->first_child_text('Rank'); + + $RELATIONS->{ancestors}->{$lineage_taxid} = $ancestor->{id} if $ancestor; + + $DATA_CACHE->{minimal_info}->{$lineage_taxid} = $lineage_data; + } continue { $ancestor = $lineage_data; unshift(@taxa, $lineage_data); } + $RELATIONS->{ancestors}->{$taxid} = $ancestor->{id} if $ancestor; + + # go through the lineage in reverse so we can remember the children + my $child = $data; + foreach my $lineage_data (@taxa) { + $RELATIONS->{children}->{$lineage_data->{id}}->{$child->{id}} = 1; + } continue { $child = $lineage_data; } + } + delete $DATA_CACHE->{minimal_info}->{$taxid}; $DATA_CACHE->{full_info}->{$taxid} = $data; push(@results, $self->_make_taxon($data)); diff --git a/t/RemoteDB/Taxonomy.t b/t/RemoteDB/Taxonomy.t index f7b5951fa..3a7ddcf65 100644 --- a/t/RemoteDB/Taxonomy.t +++ b/t/RemoteDB/Taxonomy.t @@ -8,7 +8,7 @@ BEGIN { use Bio::Root::Test; test_begin( - -tests => 202, + -tests => 214, -requires_modules => [qw(DB_File LWP::UserAgent XML::Twig )] @@ -103,7 +103,7 @@ for my $db ($db_entrez, $db_flatfile) { is ${$n->name('scientific')}[0], $n->node_name; my %common_names = map { $_ => 1 } $n->common_names; - is keys %common_names, 3, ref($db).": common names"; + cmp_ok keys %common_names, '>=', 3, ref($db).": common names"; ok exists $common_names{human}; ok exists $common_names{man}; @@ -446,3 +446,29 @@ ok $node2 = $db_list->get_taxon( -names => [ 'o__Chroococcales', 'g__Microcoleus is $node2->scientific_name, $node1->scientific_name; is $node2->id, $node1->id; is $node2->internal_id, $node1->internal_id; + +# tests for #182 +SKIP: { + test_skip(-tests => 12, -requires_networking => 1); + + my $db=Bio::DB::Taxonomy->new(-source=>"entrez"); + + my @taxa = qw(viruses Deltavirus unclassified plasmid); + + for my $taxon (@taxa) { + test_taxid($db, $taxon); + } + + sub test_taxid { + my ($db, $taxa) = @_; + my @taxonids = $db->get_taxonids($taxa); + cmp_ok(scalar(@taxonids), '>', 0, "Got IDs returned for $taxa:".join(',', @taxonids)); + my $taxon; + lives_ok { $taxon = $db->get_taxon(-taxonid => pop @taxonids) } "IDs generates a Bio::Taxonomy::Node"; + if (defined $taxon) { + like( $taxon->scientific_name, qr/$taxa/i, "Name returned matches $taxa"); + } else { + ok(0, "No taxon object returned for $taxa"); + } + } +} -- 2.11.4.GIT