2 ## Bioperl Test Harness Script for Modules
4 # Before `make install' is performed this script should be runnable with
5 # `make test'. After `make install' it should work as `perl test.t'
10 use constant TEST_COUNT => 52;
11 use constant GFF_FILE => Bio::Root::IO->catfile('t','data',
12 'seqfeaturedb','test.gff3');
15 # to handle systems with no installed Test module
16 # we include the t dir (where a copy of Test.pm is located)
18 eval { require Test; };
23 plan test => TEST_COUNT;
24 $ENV{ORACLE_HOME} ||= '/home/oracle/Home';
27 use lib '.','..','./blib/lib';
28 use Bio::DB::SeqFeature::Store;
29 use Bio::DB::SeqFeature::Store::GFF3Loader;
33 my $explanation = shift;
48 my (@f,$f,@s,$s,$seq1,$seq2);
51 @args = (-adaptor => 'memory') unless @args;
53 my $db = eval { Bio::DB::SeqFeature::Store->new(@args) };
56 fail(TEST_COUNT - 1) unless $db;
58 my $loader = eval { Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db) };
61 fail(TEST_COUNT - 2) unless $loader;
64 ok($loader->load(GFF_FILE));
66 # there should be one gene named 'abc-1'
67 @f = $db->get_features_by_name('abc-1');
71 # there should be three subfeatures of type "exon" and three of type "CDS"
72 ok($f->get_SeqFeatures('exon')==3);
73 ok($f->get_SeqFeatures('CDS')==3);
75 # the sequence of feature abc-1 should match the sequence of the first exon at the beginning
77 $seq2 = (sort {$a->start<=>$b->start} $f->get_SeqFeatures('exon'))[0]->seq->seq;
78 ok(substr($seq1,0,length $seq2) eq $seq2);
80 # sequence lengths should match
81 ok(length $seq1 == $f->length);
83 # if we pull out abc-1 again we should get the same object
84 ($s) = $db->get_features_by_name('abc-1');
87 # we should get two objects when we ask for abc-1 using get_features_by_alias
88 # this also depends on selective subfeature indexing
89 @f = $db->get_features_by_alias('abc-1');
92 # the two features should be different
95 # test that targets are working
96 ($f) = $db->get_features_by_name('match1');
100 ok($s->seq_id eq 'CEESC13F');
101 $seq1 = $s->seq->seq;
102 ok(substr($seq1,0,10) eq 'ttgcgttcgg');
104 # can we fetch subfeatures?
105 # gene3.a has the Index=1 attribute, so we should fetch it
106 ($f) = $db->get_features_by_name('gene3.a');
109 # gene 3.b doesn't have an index, so we shouldn't get it
110 ($f) = $db->get_features_by_name('gene3.b');
113 # test three-tiered genes
114 ($f) = $db->get_features_by_name('gene3');
116 my @transcripts = $f->get_SeqFeatures;
117 ok(@transcripts == 2);
118 ok($transcripts[0]->method eq 'mRNA');
119 ok($transcripts[0]->source eq 'confirmed');
121 # test that exon #2 is shared between the two transcripts
122 my @exons1 = $transcripts[0]->get_SeqFeatures('CDS');
124 my @exons2 = $transcripts[1]->get_SeqFeatures('CDS');
125 my ($shared1) = grep {$_->display_name||'' eq 'shared_exon'} @exons1;
126 my ($shared2) = grep {$_->display_name||'' eq 'shared_exon'} @exons2;
127 ok($shared1 && $shared2);
128 ok($shared1 eq $shared2);
129 ok($shared1->primary_id eq $shared2->primary_id);
132 ok($shared1->phase == 0);
133 ok($shared1->strand eq +1);
134 ok(($f->attributes('expressed'))[0] eq 'yes');
137 my ($gene3a) = grep { $_->display_name eq 'gene3.a'} @transcripts;
138 my ($gene3b) = grep { $_->display_name eq 'gene3.b'} @transcripts;
141 ok($gene3a->Is_expressed);
142 ok(!$gene3b->Is_expressed);
144 # the representation of the 3'-UTR in the two transcripts a and b is
145 # different (not recommended but supported by the GFF3 spec). In the
146 # first case, there are two 3'UTRs existing as independent
147 # features. In the second, there is one UTR with a split location.
148 ok($gene3a->Three_prime_UTR == 2);
149 ok($gene3b->Three_prime_UTR == 1);
150 my ($utr) = $gene3b->Three_prime_UTR;
151 ok($utr->segments == 2);
152 my $location = $utr->location;
153 ok($location->isa('Bio::Location::Split'));
154 ok($location->sub_Location == 2);
156 # ok, test that queries are working properly.
157 # find all features with the attribute "expressed"
158 @f = $db->get_features_by_attribute({expressed=>'yes'});
161 # find all top-level features on Contig3 -- there should be two
162 @f = $db->get_features_by_location(-seq_id=>'Contig3');
165 # find all top-level features on Contig3 of type 'assembly_component'
166 @f = $db->features(-seq_id=>'Contig3',-type=>'assembly_component');
171 my $feature_count = @f;
172 ok($feature_count > 0);
174 my $i = $db->get_seq_stream;
178 while ($i->next_seq) { $count++ }
179 ok($feature_count == $count);
181 # regression test on bug in which get_SeqFeatures('type') did not filter inline segments
182 @f = $db->get_features_by_name('agt830.3');
183 ok(@f && !$f[0]->get_SeqFeatures('exon'));
184 ok(@f && $f[0]->get_SeqFeatures('EST_match'));
186 # regression test on bug in which the load_id disappeared
187 ok(@f && $f[0]->load_id eq 'Match2');
189 # regress on proper handling of multiple ID features
190 my ($alignment) = $db->get_features_by_name('agt830.5');
192 ok($alignment->target->start == 1 && $alignment->target->end == 654);
193 ok($alignment->get_SeqFeatures == 2);
194 my $gff3 = $alignment->gff3_string(1);
195 my @lines = split "\n",$gff3;
197 ok ("@lines" !~ /Parent=/s);
198 ok ("@lines" =~ /ID=/s);