1 # -*-Perl-*- mode (to keep my emacs happy)
7 test_begin(-tests => 113);
9 use_ok('DBTestHarness');
11 use_ok('Bio::Seq::SeqFactory');
14 $biosql = DBTestHarness->new("biosql");
15 $db = $biosql->get_DBAdaptor();
18 my $seqin = Bio::SeqIO->new(-file => test_input_file("LL-sample.seq"),
19 -format => 'locuslink');
22 my $seq = $seqin->next_seq();
25 my $pseq = $db->create_persistent($seq);
28 $pseq->namespace("mytestnamespace");
29 $pseq->accession_number("999999999"); # don't clash with something existing
31 ok $pseq->primary_key();
33 my $adp = $db->get_object_adaptor($seq);
35 my $seqfact = Bio::Seq::SeqFactory->new(-type => "Bio::Seq::RichSeq");
40 $dbseq = $adp->find_by_primary_key($pseq->primary_key(), $seqfact);
43 is ($dbseq->desc, $seq->desc);
44 is ($dbseq->accession_number, $seq->accession_number);
45 is ($dbseq->display_id, $seq->display_id);
46 is ($dbseq->species->binomial, "Homo sapiens");
49 my @dblinks = $dbseq->annotation->get_Annotations('dblink');
50 my %dbcounts = map { ($_->database(),0) } @dblinks;
51 foreach (@dblinks) { $dbcounts{$_->database()}++; }
53 # We need to remove duplicated dblinks as neither in bioperl nor in biosql
54 # we have context for dblinks. The problem is that the locuslink
55 # parser currently adds the value for the ASSEMBLY tag as a dblink, and
56 # that one might occur later, too (as a real dbxref).
57 my @links = $seq->annotation->remove_Annotations('dblink');
58 my %xrefs = map { ($_->database .":". $_->primary_id, $_); } @links;
59 @links = values %xrefs;
60 foreach (@links) { $seq->annotation->add_Annotation($_); }
62 my %counts = map { ($_->database(),0) } @links;
63 foreach (@links) { $counts{$_->database()}++; }
65 foreach my $k (keys %counts) {
66 is ($dbcounts{$k}, $counts{$k}, "equal counts for $k");
68 is (scalar(@dblinks), scalar(@links));
70 my $dbac = $dbseq->annotation;
71 my $ac = $seq->annotation;
73 # LocusLink annotates GO terms with their GO sub-division as the category,
74 # which isn't really the correct thing to do. If we run this script on a
75 # database into which GO was loaded already, we'll find the terms by their
76 # IDs - and then the category (tagname) will change to Gene Ontology. We
77 # try to fix this possible discrepancy here.
79 $_->isa("Bio::Ontology::TermI") && $_->identifier eq "GO:0008152";
80 } $dbac->get_Annotations();
82 $_->isa("Bio::Ontology::TermI") && $_->identifier eq "GO:0005777";
83 } $dbac->get_Annotations();
85 $_->isa("Bio::Ontology::TermI") && $_->identifier eq "GO:0008131";
86 } $dbac->get_Annotations();
87 my %tagnames = ("biological process" => $t1->ontology->name,
88 "cellular component" => $t2->ontology->name,
89 "molecular function" => $t3->ontology->name);
90 foreach my $tag (keys %tagnames) {
91 if($tag ne $tagnames{$tag}) {
92 # we need to fix this one before we can compare
93 map { $_->tagname($tagnames{$tag}); $ac->add_Annotation($_);
94 } $ac->remove_Annotations($tag);
97 my %uniquenames = map { ($_, undef); } values %tagnames;
98 # we also need to make up for tests that we won't conduct
99 # (namely the count comparison per annotation key)
100 for (1..(3-scalar(values %uniquenames))) {
101 skip("GO sub-division tag became other GO name", $_);
104 my @keys = $ac->get_all_annotation_keys();
105 is (scalar($dbac->get_all_annotation_keys()), scalar(@keys));
107 foreach my $k (@keys) {
109 sort { $a->as_text() cmp $b->as_text } $dbac->get_Annotations($k);
111 sort { $a->as_text() cmp $b->as_text } $ac->get_Annotations($k);
112 is (scalar(@dbanns), scalar(@anns), "equal counts for $k");
113 for(my $i = 0; $i < @anns; $i++) {
114 is ($dbanns[$i]->as_text, $anns[$i]->as_text);
118 my ($dbcmt) = $dbac->get_Annotations('comment');
119 my ($cmt) = $ac->get_Annotations('comment');
120 is ($dbcmt->text, $cmt->text);
123 print STDERR $@ if $@;
126 is ($pseq->remove(), 1);
127 my $ns = Bio::DB::Persistent::BioNamespace->new(-identifiable => $pseq);
128 ok $ns = $db->get_object_adaptor($ns)->find_by_unique_key($ns);
129 ok $ns->primary_key();
130 is ($ns->remove(), 1);