t/11locuslink.t

   1 # -*-Perl-*- mode (to keep my emacs happy)
   2 # $Id$
   3
   4 BEGIN {
   5     use lib 't';
   6     use Bio::Root::Test;
   7     test_begin(-tests => 113);
   8
   9         use_ok('DBTestHarness');
  10         use_ok('Bio::SeqIO');
  11         use_ok('Bio::Seq::SeqFactory');
  12 }
  13
  14 $biosql = DBTestHarness->new("biosql");
  15 $db = $biosql->get_DBAdaptor();
  16 ok $db;
  17
  18 my $seqin = Bio::SeqIO->new(-file => test_input_file("LL-sample.seq"),
  19                             -format => 'locuslink');
  20 ok $seqin;
  21
  22 my $seq = $seqin->next_seq();
  23 ok $seq;
  24
  25 my $pseq = $db->create_persistent($seq);
  26 ok $pseq;
  27
  28 $pseq->namespace("mytestnamespace");
  29 $pseq->accession_number("999999999"); # don't clash with something existing
  30 ok $pseq->create();
  31 ok $pseq->primary_key();
  32
  33 my $adp = $db->get_object_adaptor($seq);
  34 ok $adp;
  35 my $seqfact = Bio::Seq::SeqFactory->new(-type => "Bio::Seq::RichSeq");
  36 ok $seqfact;
  37
  38 # try/finally
  39 eval {
  40     $dbseq = $adp->find_by_primary_key($pseq->primary_key(), $seqfact);
  41     ok $dbseq;
  42
  43     is ($dbseq->desc, $seq->desc);
  44     is ($dbseq->accession_number, $seq->accession_number);
  45     is ($dbseq->display_id, $seq->display_id);
  46     is ($dbseq->species->binomial, "Homo sapiens");
  47
  48
  49     my @dblinks = $dbseq->annotation->get_Annotations('dblink');
  50     my %dbcounts = map { ($_->database(),0) } @dblinks;
  51     foreach (@dblinks) { $dbcounts{$_->database()}++; }
  52
  53     # We need to remove duplicated dblinks as neither in bioperl nor in biosql
  54     # we have context for dblinks. The problem is that the locuslink
  55     # parser currently adds the value for the ASSEMBLY tag as a dblink, and
  56     # that one might occur later, too (as a real dbxref).
  57     my @links = $seq->annotation->remove_Annotations('dblink');
  58     my %xrefs = map { ($_->database .":". $_->primary_id, $_); } @links;
  59     @links = values %xrefs;
  60     foreach (@links) { $seq->annotation->add_Annotation($_); }
  61     # now count ...
  62     my %counts = map { ($_->database(),0) } @links;
  63     foreach (@links) { $counts{$_->database()}++; }
  64
  65     foreach my $k (keys %counts) {
  66         is ($dbcounts{$k}, $counts{$k}, "equal counts for $k");
  67     }
  68     is (scalar(@dblinks), scalar(@links));
  69
  70     my $dbac = $dbseq->annotation;
  71     my $ac = $seq->annotation;
  72
  73     # LocusLink annotates GO terms with their GO sub-division as the category,
  74     # which isn't really the correct thing to do. If we run this script on a
  75     # database into which GO was loaded already, we'll find the terms by their
  76     # IDs - and then the category (tagname) will change to Gene Ontology. We
  77     # try to fix this possible discrepancy here.
  78     my ($t1) = grep {
  79         $_->isa("Bio::Ontology::TermI") && $_->identifier eq "GO:0008152";
  80     } $dbac->get_Annotations();
  81     my ($t2) = grep {
  82         $_->isa("Bio::Ontology::TermI") && $_->identifier eq "GO:0005777";
  83     } $dbac->get_Annotations();
  84     my ($t3) = grep {
  85         $_->isa("Bio::Ontology::TermI") && $_->identifier eq "GO:0008131";
  86     } $dbac->get_Annotations();
  87     my %tagnames = ("biological process" => $t1->ontology->name,
  88                     "cellular component" => $t2->ontology->name,
  89                     "molecular function" => $t3->ontology->name);
  90     foreach my $tag (keys %tagnames) {
  91         if($tag ne $tagnames{$tag}) {
  92             # we need to fix this one before we can compare
  93             map { $_->tagname($tagnames{$tag}); $ac->add_Annotation($_);
  94               } $ac->remove_Annotations($tag);
  95         }
  96     }
  97     my %uniquenames = map { ($_, undef); } values %tagnames;
  98     # we also need to make up for tests that we won't conduct
  99     # (namely the count comparison per annotation key)
 100     for (1..(3-scalar(values %uniquenames))) {
 101         skip("GO sub-division tag became other GO name", $_);
 102     }
 103
 104     my @keys = $ac->get_all_annotation_keys();
 105     is (scalar($dbac->get_all_annotation_keys()), scalar(@keys));
 106
 107     foreach my $k (@keys) {
 108         my @dbanns =
 109             sort { $a->as_text() cmp $b->as_text } $dbac->get_Annotations($k);
 110         my @anns =
 111             sort { $a->as_text() cmp $b->as_text } $ac->get_Annotations($k);
 112         is (scalar(@dbanns), scalar(@anns), "equal counts for $k");
 113         for(my $i = 0; $i < @anns; $i++) {
 114             is ($dbanns[$i]->as_text, $anns[$i]->as_text);
 115         }
 116     }
 117
 118     my ($dbcmt) = $dbac->get_Annotations('comment');
 119     my ($cmt) = $ac->get_Annotations('comment');
 120     is ($dbcmt->text, $cmt->text);
 121 };
 122
 123 print STDERR $@ if $@;
 124
 125 # delete seq
 126 is ($pseq->remove(), 1);
 127 my $ns = Bio::DB::Persistent::BioNamespace->new(-identifiable => $pseq);
 128 ok $ns = $db->get_object_adaptor($ns)->find_by_unique_key($ns);
 129 ok $ns->primary_key();
 130 is ($ns->remove(), 1);
 131