1 # -*-Perl-*- Test Harness script for Bioperl
12 test_begin(-tests => 275,
13 -excludes_os => 'mswin');
15 use_ok('Bio::DB::GFF');
18 my $fasta_files = test_input_file('dbfa');
19 my $gff_file1 = test_input_file('biodbgff', 'test.gff');
20 my $gff_file2 = test_input_file('biodbgff', 'test.gff3');
22 my $build = Module::Build->current;
23 my $test_dsn = $build->notes('test_dsn');
25 my $adaptor = $test_dsn ? $test_dsn : 'memory';
26 $adaptor = shift if @ARGV;
28 if ($adaptor =~ /sqlite/i) {
33 if ($adaptor =~ /^dbi/) {
35 $cfg->{dbd_driver} = $build->notes('dbd_driver');
36 $cfg->{test_db} = $build->notes('test_db');
37 $cfg->{test_host} = $build->notes('test_host');
38 $cfg->{test_user} = $build->notes('test_user');
39 $cfg->{test_pass} = $build->notes('test_pass');
40 $cfg->{test_dsn} = $build->notes('test_dsn');
42 $adaptor = "dbi::$cfg->{dbd_driver}" if $cfg->{dbd_driver};
43 @args = ( '-adaptor' => $adaptor,
44 '-dsn' => $cfg->{test_dsn},
46 push @args,('-user' => $cfg->{test_user}) if $cfg->{test_user};
47 push @args,('-pass' => $cfg->{test_pass}) if $cfg->{test_pass};
49 @args = ('-adaptor' => $adaptor,
53 push @args,('-aggregators' => ['transcript','processed_transcript']);
56 for my $FILE ($gff_file1,$gff_file2) {
58 my $db = eval { Bio::DB::GFF->new(@args) };
59 skip "DB load failed? Skipping all! $@", 278 if $@;
63 $db->gff3_name_munging(1);
65 # set the preferred groups
66 $db->preferred_groups( [ 'transcript', 'gene', 'mRNA' ] );
67 my @pg = $db->preferred_groups;
72 ok($db->initialize(1));
73 ok($db->load_gff($FILE));
74 ok($db->load_fasta($fasta_files));
77 my @types = sort $db->types;
79 is($types[0],'CDS:confirmed');
80 is($types[-1],'transposon:tc1');
81 my %types = $db->types('-enumerate'=>1);
82 is($types{'transposon:tc1'},2);
85 my $segment1 = $db->segment('Contig1');
88 is($segment1->length,37450);
89 is($segment1->start,1);
90 is($segment1->end,37450);
91 is($segment1->strand,1);
93 my $segment2 = $db->segment('Contig1',1=>1000);
94 is($segment2->length,1000);
95 is($segment2->start,1);
96 is($segment2->end,1000);
97 is($segment2->strand,1);
99 my $segment3 = $db->segment('Contig1',10=>1);
100 is($segment3->start,10);
101 is($segment3->end,1);
102 is($segment3->strand,-1);
104 # exercise attribute fetching
105 my @t = $db->fetch_feature_by_name(Transcript => 'trans-1');
106 my ($t) = grep {$_->type eq 'transcript:confirmed'} @t;
107 is($t->attributes('Note'),'function unknown');
108 is(join(' ',sort $t->attributes('Gene')),'abc-1 xyz-2');
109 my $att = $t->attributes;
110 is(scalar @{$att->{Gene}},2);
111 @t = sort {$a->display_name cmp $b->display_name} $db->fetch_feature_by_attribute('Gene'=>'abc-1');
114 my $seg = $db->segment('Contig1');
115 @t = $seg->features(-attributes=>{'Gene'=>'abc-1'});
117 is($seg->feature_count, 17);
118 @t = $seg->features(-attributes=>{'Gene'=>'xyz-2',Note=>'Terribly interesting'});
121 # exercise dna() a bit
122 my $dna = $segment2->dna;
123 is(length $dna,1000);
124 is(substr($dna,0,10),'gcctaagcct');
125 is($segment3->dna,'aggcttaggc');
126 is($segment1->dna, $db->dna($segment1->ref));
129 my $segment4 = $db->segment('-name'=>'c128.1','-class'=>'Transposon');
130 is($segment4->length,1000);
131 is($segment4->start,1);
132 is($segment4->end,1000);
133 is($segment4->ref,'c128.1');
134 is($segment4->strand,1);
135 ok(!$segment4->absolute);
137 $segment4->absolute(1);
138 ok($segment4->absolute);
139 is($segment4->ref,'Contig1');
140 is($segment4->start,5001);
141 $segment4->absolute(0);
142 my $tmp = $db->segment('Contig1',5001=>6000);
143 is($segment4->dna,$tmp->dna);
145 $segment4->ref('Contig1');
146 is($segment4->ref,'Contig1');
147 is($segment4->start,5001);
148 is($segment4->end,6000);
150 my $segment5 = $db->segment('-name'=>'c128.2','-class'=>'Transposon');
151 is($segment5->length,1000);
152 is($segment5->start,1);
153 is($segment5->end,1000);
154 is($segment5->ref,'c128.2');
155 is($segment5->strand,1);
157 $tmp = $db->segment('Contig1',9000,8001);
158 is($segment5->dna,$tmp->dna);
159 $segment5->absolute(1);
160 is($segment5->strand,-1);
163 # first two positive strand features
164 $segment4 = $db->segment('-name'=>'c128.1','-class'=>'Transposon');
165 my $start4 = $segment4->abs_start;
166 $segment5 = $db->segment('Transcript' => 'trans-1');
167 my $start5 = $segment5->abs_start;
168 $segment4->ref($segment5);
169 is($segment4->strand,1);
170 is($segment4->start,$start4-$start5+1);
171 is($segment4->stop,$start4-$start5+$segment4->length);
173 $segment4->ref('Transposon' => 'c128.1');
174 $segment5->ref('Transcript' => 'trans-1');
175 $segment5->ref($segment4);
176 is($segment5->start,$start5-$start4+1);
178 # now a positive on a negative strand feature
179 my $segment6 = $db->segment('Transcript'=>'trans-2');
180 my $start6 = $segment6->abs_start;
181 is($segment6->strand,1);
182 is($segment6->abs_strand,-1);
183 $segment6->ref($segment4);
184 is($segment6->start,$start6-$start4+1);
185 is($segment6->strand,-1);
187 $segment4->ref($segment6);
188 is($segment4->start,$start6-$start4+1);
189 is($segment4->strand,-1);
190 is($segment4->ref,$segment6);
192 # the reference sequence shouldn't affect the dna
193 $segment6 = $db->segment('Transcript'=>'trans-2');
194 $dna = $segment6->dna;
195 $segment6->ref($segment4);
196 is($segment6->dna,$dna);
198 # segments should refuse to accept a reference sequence on a foreign segment
200 my $result = eval { $segment6->ref('Contig2') };
202 like($@, qr/are on different sequence segments/);
204 # types across a segment
205 $segment1 = $db->segment('Contig1');
206 @types = sort $segment1->types;
208 is($types[0],'CDS:confirmed');
209 is($types[-1],'transposon:tc1');
210 %types = $segment1->types('-enumerate'=>1);
211 is($types{'similarity:est'},3);
213 # features across a segment
214 my @features = $segment1->features('-automerge'=>0);
215 is(scalar @features,17);
217 foreach (@features) {
218 $types_seen{$_->type}++;
220 my $inconsistency = 0;
221 foreach (keys %types,keys %types_seen) {
222 $inconsistency++ unless $types_seen{$_} == $types{$_};
226 @features = sort {$a->start<=>$b->start} @features;
228 # make sure that we can use features to get at dna
229 is($features[0]->dna,$db->segment('Contig1',$features[0]->start,$features[0]->end)->dna);
231 # check three forward features and three reverse features
232 # (This depends on the test.gff data)
234 $segment2 = $db->segment($features[$_],50,100);
235 if ($features[$_]->strand >= 0) {
236 is($segment2->dna,$db->segment('Contig1',
237 $features[$_]->start+50-1,
238 $features[$_]->start+100-1)->dna)
240 is($segment2->dna,$db->segment('Contig1',
241 $features[$_]->start-50+1,
242 $features[$_]->start-100+1)->dna)
246 # exercise the aggregator
247 my $aggregator = Bio::DB::GFF::Aggregator->new('-method' => 'aggregated_transcript',
248 '-main_method' => 'transcript',
249 '-sub_parts' => ['exon','CDS']);
250 $db->add_aggregator($aggregator);
251 $segment1 = $db->segment('Contig1');
252 @features = sort $segment1->features('aggregated_transcript'); # sort so that trans-1 comes first
253 is(scalar @features,2);
254 cmp_ok($features[0]->Exon, '>', 0);
255 cmp_ok($features[0]->Cds,'>', 0);
257 # Test that sorting is correct. The way that test.gff is set up, the lower one is
258 # on the + strand and the higher is on the -.
259 @features = sort {$a->start <=> $b->start} @features;
260 is($features[0]->strand,1);
261 is($features[1]->strand,-1);
265 foreach ($features[0]->Exon) {
266 $inconsistency++ if $_->start > $_->end;
267 $inconsistency++ if $last && $_->start < $last;
272 $inconsistency = $last = 0;
273 foreach ($features[1]->Exon) {
274 $inconsistency++ if $_->start < $_->end;
275 $inconsistency++ if $last && $_->start > $last;
280 # relative addressing in aggregated features
281 my $transcript1 = $db->segment($features[0]);
282 $transcript1->ref($features[0]);
283 my @overlap = sort {$a->start <=> $b->start } $transcript1->features;
284 is(scalar(@overlap),5);
285 is($overlap[0]->start,-999);
287 $transcript1 = $db->segment('Transcript' => 'trans-1');
288 @overlap = sort {$a->start <=> $b->start } $transcript1->features;
289 is($overlap[0]->start,-999);
291 # test strandedness of features
292 $segment1 = $db->segment('-class' => 'Transcript',
293 '-name' => 'trans-3',
296 is($segment1->strand,1);
297 @overlap = sort {$a->start <=> $b->start} $segment1->features('transcript');
298 is(scalar(@overlap),2);
299 is($overlap[0]->name,'trans-3');
300 is($overlap[1]->name,'trans-4');
301 is($overlap[0]->strand,1);
302 is($overlap[1]->strand,-1);
304 # testing feature id and group_id
305 my $tf = $overlap[0];
307 my $t1 = $db->fetch_feature_by_id($tf->id);
311 if (defined $tf->group_id) {
312 my $t2 = $db->fetch_feature_by_gid($tf->group_id);
313 is($t2->group_id,$tf->group_id);
314 is($t2->group_id,$t1->group_id);
316 skip("fetch_feature_by_gid() not implemented by this adaptor",2);
320 $segment1 = $db->segment('-class' => 'Transcript',
321 '-name' => 'trans-4',
324 is($segment1->strand,1);
325 @overlap = sort {$a->start <=> $b->start} $segment1->features('transcript');
326 is($overlap[0]->name,'trans-4');
327 is($overlap[1]->name,'trans-3');
328 is($overlap[0]->strand,1);
329 is($overlap[1]->strand,-1);
331 @overlap = sort {$a->start <=> $b->start} $segment1->features('Component');
332 is($overlap[0]->strand,0);
335 # test preferred group assignments
336 if ($FILE =~ /\.gff$/) {
337 my @gene = $db->get_feature_by_name( gene => 'gene-9' );
338 my @mrna = $db->get_feature_by_name( mRNA => 'trans-9' );
339 is($gene[0]->ref, 'Contig4');
340 is(scalar(@gene), 2);
341 is(scalar(@mrna), 1);
343 skip('preferred groups are not supported by gff3',3);
347 # test iterator across a segment
348 $segment1 = $db->segment('Contig1');
349 my $i = $segment1->features('-automerge'=>0,'-iterator'=>1);
351 while (my $s = $i->next_feature) {
352 $strand{$s->strand}++;
356 # test iterator across entire database
357 $i = $db->features('-automerge'=>0,'-iterator'=>1);
359 while (my $s = $i->next_feature) {
360 $strand{$s->strand}++;
364 # test iterator across a segment, limited by an attribute
365 $i = $seg->get_feature_stream(-attributes=>{'Gene'=>'abc-1',Note=>'function unknown'});
367 while ($i->next_seq) {
372 # test that aliases work
373 my $st1 = $db->segment(Transcript => 'trans-3');
375 my $st2 = $db->segment(Transcript => 'trans-18'); # this is an alias!
378 my @transcripts = $st1->features('transcript');
379 is(($transcripts[0]->aliases)[0],'trans-18');
382 $db->strict_bounds_checking(1);
383 my $tseg = $db->segment(-name=>'trans-1',-class=>'Transcript',-start=>1,-stop=>500);
384 ok(!$tseg->truncated);
385 $tseg = $db->segment(-name=>'trans-1',-class=>'Transcript',-start=>1,-stop=>50000);
386 ok($tseg->truncated);
387 $db->strict_bounds_checking(0);
388 $tseg = $db->segment(-name=>'trans-1',-class=>'Transcript',-start=>1,-stop=>50000);
389 ok(!$tseg->truncated);
391 # test the processed_transcript aggregator
392 $db->clear_aggregators;
393 $db->add_aggregator('processed_transcript');
394 my @f = $db->fetch_feature_by_name(mRNA => 'trans-8');
396 is($f[0]->length,35000-32000+1);
397 is(scalar $f[0]->CDS,3);
398 is(scalar $f[0]->UTR,2);
401 # segment delete() method
402 my $clone = $db->segment(Clone=>'M7.3');
403 my $overlapping_feature_count = $clone->features(-range_type =>'overlaps');
404 my $contained_feature_count = $clone->features(-range_type =>'contains');
405 is(scalar $clone->delete(-range_type=>'contains'),$contained_feature_count);
406 is(scalar $clone->features,$overlapping_feature_count - $contained_feature_count);
408 # database delete() method
409 is($db->delete(-type=>['mRNA:confirmed','transposon:tc1']),4);
410 is($db->delete(-type=>'UTR',-ref=>'Contig29'),undef);
411 is($db->delete(-type=>'CDS',-ref=>'AL12345.2',-class=>'Clone'),3);
412 is($db->delete_features(1,2,3),3);
416 is($db->delete_groups(1,2,3,4,5),5);
417 my @features = $db->get_feature_by_name(Sequence => 'Contig2');
418 is($db->delete_groups(@features),1);
421 if (!$result && $@ =~ /not implemented/i) {
422 skip("delete_groups() not implemented by this adaptor",2);
427 test_skip(-tests => 1, -excludes_os => 'mswin');
429 # test ability to pass adaptors across a fork
430 if (my $child = open(F,"-|")) { # parent reads from child
436 my @f = $db->features();
442 ok(!defined eval{$db->delete()});
443 ok($db->delete(-force=>1));
444 is(scalar $db->features,0);
445 ok(!$db->segment('Contig1'));
454 unlink $fasta_files."/directory.index";