1 # -*-Perl-*- Test Harness script for Bioperl
5 use constant TEST_COUNT => 116;
8 use lib '.','..','./t/lib';
11 test_begin(-tests => TEST_COUNT);
13 $ENV{ORACLE_HOME} ||= '/home/oracle/Home';
14 use_ok('Bio::SeqFeature::Generic');
15 use_ok('Bio::DB::SeqFeature::Store');
16 use_ok('Bio::DB::SeqFeature::Store::GFF3Loader');
17 use_ok('Bio::Root::IO');
18 use_ok('Bio::DB::Fasta');
22 my $DEBUG = test_debug();
24 my $gff_file = test_input_file('seqfeaturedb','test.gff3');
26 my (@f,$f,$f2,$sf1,$sf2,$sf3,@s,$s,$seq1,$seq2,$count,$new_features);
29 @args = (-adaptor => 'memory') unless @args;
32 my $db = eval { Bio::DB::SeqFeature::Store->new(@args) };
33 skip "DB load failed? Skipping all! $@", (TEST_COUNT - 6) if $@;
36 is( $db->isa('Bio::SeqFeature::CollectionI'), 1 );
38 my $loader = eval { Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db) };
39 skip "GFF3 loader failed? Skipping all! $@", (TEST_COUNT - 6) if $@;
44 # skip("skipping memory adaptor-specific tests",27)
45 # unless $db->isa('Bio::DB::SeqFeature::Store::memory');
49 my $n = Bio::SeqFeature::Generic->new(
50 # -primary_id => '_some_id', # you're not allowed to do this!!
51 -primary => 'repeat_123',
55 -display_name => 'My favorite feature'
57 ok( my $id = $db->add_features([$n]), 'adding a feature' );
58 ok( @f = $db->fetch($n->primary_id));
61 is( $f->primary_id, $n->primary_id);
63 $f2 = Bio::SeqFeature::Generic->new(
67 -primary => 'repeat_123', # -primary_tag is a synonym
68 -source_tag => 'repeatmasker',
69 -display_name => 'alu family',
73 sillytag => 'this is silly!' }
75 ok( $db->store($f2) , 'adding a feature with no primary_id' );
76 ok( $f2->primary_id );
78 # test fetching features
79 is( $db->fetch('-1'), undef, 'searching for a feature that shouldnt exist');
80 is( $db->get_features_by_type('repeat_123:repeatmasker'), 1, 'simple type' );
81 is( $db->get_features_by_type('repeat_123:'), 2, 'base type with colon' );
82 is( $db->get_features_by_type('repeat_123'), 2, 'base type alone' );
83 is( $db->get_features_by_type('rep.*'), 0, 'queried types are not interpolated' );
84 ok( @f = $db->types );
86 isa_ok($f[0], 'Bio::DB::GFF::Typename');
88 # test removing features
89 ok( $db->delete( $f ), 'feature deletion' );
90 is( $db->fetch( $f->primary_id ), undef );
93 ok( $db->store($f, $f2) );
95 # test adding seqfeatures
96 $sf1 = Bio::SeqFeature::Generic->new( -primary=>'seqfeat1', -start=>23, -end=>512 );
97 $sf2 = Bio::SeqFeature::Generic->new( -primary=>'seqfeat2', -start=>23, -end=>512 );
98 $sf3 = Bio::SeqFeature::Generic->new( -primary=>'seqfeat1', -start=>23, -end=>512, source_tag => 'dna' );
99 ok $db->add_features([$sf1, $sf2, $sf3]), 'adding subfeatures';
100 is $db->add_SeqFeature($f, $sf1), 1;
101 is $db->add_SeqFeature($f, $sf2, $sf3), 2;
102 is $db->add_SeqFeature($f, $sf1, $sf2, $sf3), 3;
104 # test fetching seqfeatures
105 is $db->fetch_SeqFeatures($f), 3;
106 is $db->fetch_SeqFeatures($f, 'seqfeat2'), 1;
107 is $db->fetch_SeqFeatures($f, 'seqfeat1:dna'), 1;
108 is $db->fetch_SeqFeatures($f, 'seqfeat1'), 2;
109 is $db->fetch_SeqFeatures($f, 'seqfeat1', 'seqfeat2'), 3;
110 is $db->fetch_SeqFeatures($f, 'seqfeat4'), 0;
112 $new_features = scalar $db->features;
115 # exercise the loader
116 ok($loader->load($gff_file));
118 # there should be one gene named 'abc-1'
119 @f = $db->get_features_by_name('abc-1');
123 # there should be three subfeatures of type "exon" and three of type "CDS"
124 is($f->get_SeqFeatures('exon'),3);
125 is($f->get_SeqFeatures('CDS'),3);
127 # the sequence of feature abc-1 should match the sequence of the first exon at the beginning
128 $seq1 = $f->seq->seq;
129 $seq2 = (sort {$a->start<=>$b->start} $f->get_SeqFeatures('exon'))[0]->seq->seq;
130 is(substr($seq1,0,length $seq2),$seq2);
132 # sequence lengths should match
133 is(length $seq1, $f->length);
135 # if we pull out abc-1 again we should get the same object
136 ($s) = $db->get_features_by_name('abc-1');
139 # test case-sensitivity
140 ($s) = $db->get_features_by_name('Abc-1');
141 is($s, $f, 'feature names should be case insensitive');
143 # we should get two objects when we ask for abc-1 using get_features_by_alias
144 # this also depends on selective subfeature indexing
145 @f = $db->get_features_by_alias('abc-1');
148 # the two features should be different
151 # test that targets are working
152 ($f) = $db->get_features_by_name('match1');
156 ok($s->seq_id eq 'CEESC13F');
157 $seq1 = $s->seq->seq;
158 is(substr($seq1,0,10), 'ttgcgttcgg');
160 # can we fetch subfeatures?
161 # gene3.a has the Index=1 attribute, so we should fetch it
162 ($f) = $db->get_features_by_name('gene3.a');
165 # gene 3.b doesn't have an index, so we shouldn't get it
166 ($f) = $db->get_features_by_name('gene3.b');
169 # test three-tiered genes
170 ($f) = $db->get_features_by_name('gene3');
172 my @transcripts = $f->get_SeqFeatures;
174 is($transcripts[0]->method,'mRNA');
175 is($transcripts[0]->source,'confirmed');
177 # test that exon #2 is shared between the two transcripts
178 my @exons1 = $transcripts[0]->get_SeqFeatures('CDS');
180 my @exons2 = $transcripts[1]->get_SeqFeatures('CDS');
181 my ($shared1) = grep {$_->display_name||'' eq 'shared_exon'} @exons1;
182 my ($shared2) = grep {$_->display_name||'' eq 'shared_exon'} @exons2;
183 ok($shared1 && $shared2);
184 is($shared1, $shared2);
185 is($shared1->primary_id, $shared2->primary_id);
188 is($shared1->phase, 0);
189 is($shared1->strand, +1);
190 is(($f->attributes('expressed'))[0], 'yes');
193 is (scalar $db->get_features_by_type('transcript'), 4, 'base type');
194 is (scalar $db->get_features_by_type('transcript:confirmed'), 2, 'base:source type');
197 my ($gene3a) = grep { $_->display_name eq 'gene3.a'} @transcripts;
198 my ($gene3b) = grep { $_->display_name eq 'gene3.b'} @transcripts;
201 ok($gene3a->Is_expressed);
202 ok(!$gene3b->Is_expressed);
204 # the representation of the 3'-UTR in the two transcripts a and b is
205 # different (not recommended but supported by the GFF3 spec). In the
206 # first case, there are two 3'UTRs existing as independent
207 # features. In the second, there is one UTR with a split location.
208 is($gene3a->Three_prime_UTR, 2);
209 is($gene3b->Three_prime_UTR, 1);
210 my ($utr) = $gene3b->Three_prime_UTR;
211 is($utr->segments, 2);
212 my $location = $utr->location;
213 isa_ok($location, 'Bio::Location::Split');
214 is($location->sub_Location,2);
216 # ok, test that queries are working properly.
217 # find all features with the attribute "expressed"
218 @f = $db->get_features_by_attribute({expressed=>'yes'});
221 # find all top-level features on Contig3 -- there should be two
222 @f = $db->get_features_by_location(-seq_id=>'Contig3');
225 # find all top-level features on Contig3 that overlap a range -- only one
226 @f = $db->get_features_by_location(-seq_id=>'Contig3',-start=>40000,-end=>50000);
229 # find all top-level features on Contig3 of type 'assembly_component'
230 @f = $db->features(-seq_id=>'Contig3',-type=>'assembly_component');
235 is(scalar @f, 27+$new_features);
237 my $i = $db->get_seq_stream;
240 my $feature_count = @f;
241 while ($i->next_seq) { $count++ }
242 is($feature_count,$count);
244 # regression test on bug in which get_SeqFeatures('type') did not filter inline segments
245 @f = $db->get_features_by_name('agt830.3');
246 ok(@f && !$f[0]->get_SeqFeatures('exon'));
247 ok(@f && $f[0]->get_SeqFeatures('EST_match'));
249 # regression test on bug in which the load_id disappeared
250 is(@f && $f[0]->load_id, 'Match2');
252 # regress on proper handling of multiple ID features
253 my ($alignment) = $db->get_features_by_name('agt830.5');
255 is($alignment->target->start,1);
256 is($alignment->target->end, 654);
257 is($alignment->get_SeqFeatures, 2);
258 my $gff3 = $alignment->gff3_string(1);
259 my @lines = split "\n",$gff3;
261 ok("@lines" !~ /Parent=/s);
262 ok("@lines" =~ /ID=/s);
264 # regress on multiple parentage
265 my ($gp) = $db->get_features_by_name('gparent1');
266 my ($p1,$p2) = $gp->get_SeqFeatures;
267 my @c = sort {$a->start<=>$b->start} $p1->get_SeqFeatures;
271 @c = sort {$a->start<=>$b->start} $p2->get_SeqFeatures;
277 test_skip(-tests => 2, -excludes_os => 'mswin');
279 if (my $child = open(F,"-|")) { # parent reads from child
280 cmp_ok(scalar <F>,'>',0);
282 # The challenge is to make sure that the handle
283 # still works in the parent!
284 my @f = $db->features();
285 cmp_ok(scalar @f,'>',0);
289 my @f = $db->features();
290 my $feature_count = @f;
291 print $feature_count;
297 # test the -ignore_seqregion flag
299 # the original should have a single feature named 'Contig1'
300 my @f = $db->get_features_by_name('Contig1');
303 $db = eval { Bio::DB::SeqFeature::Store->new(@args) };
304 $loader = eval { Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db,
305 -ignore_seqregion=>1)
307 $loader->load($gff_file);
308 @f = $db->get_features_by_name('Contig1');
311 # test keyword search
312 my @results = $db->search_notes('interesting');
313 is(scalar @results,2,'keyword search; 1 term');
315 @results = $db->search_notes('terribly interesting');
316 is(scalar @results,2,'keyword search; 2 terms');
318 # test our ability to substitute a FASTA file for the database
319 my $fasta_dir = make_fasta_testdir();
320 my $dbfa = Bio::DB::Fasta->new($fasta_dir, -reindex => 1);
322 ok(my $contig1=$dbfa->seq('Contig1'));
324 $db = Bio::DB::SeqFeature::Store->new(@args,-fasta=>$dbfa);
325 $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db);
326 ok($loader->load($gff_file));
328 ok($db->dna_accessor);
329 my $f = $db->segment('Contig1');
330 ok($f->dna eq $contig1);
332 ok(my $contig2 = $dbfa->seq('Contig2'));
333 ($f) = $db->get_feature_by_name('match4');
334 my $length = $f->length;
335 ok(substr($contig2,0,$length) eq $f->dna);
337 # testing namespaces for mysql and Pg adaptor
342 for (my $i=0; $i < @args; $i++) {
343 if ($args[$i] eq '-adaptor') {
344 $adaptor = $args[$i+1];
349 skip "Namespaces only supported for DBI::mysql and DBI::Pg adaptors", 6, if ($adaptor ne 'DBI::mysql' && $adaptor ne 'DBI::Pg');
351 push(@args, ('-namespace', 'bioperl_seqfeature_t_test_schema'));
352 $db = eval { Bio::DB::SeqFeature::Store->new(@args) };
355 $loader = eval { Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db) };
358 $loader->load($gff_file);
360 # there should be one gene named 'abc-1'
361 ok( @f = $db->get_features_by_name('abc-1') );
365 # there should be three subfeatures of type "exon" and three of type "CDS"
366 is($f->get_SeqFeatures('exon'),3);
367 is($f->get_SeqFeatures('CDS'),3);
369 $db->remove_namespace();
373 sub make_fasta_testdir {
374 # this obfuscation is to deal with lockfiles by GDBM_File which can
375 # only be created on local filesystems apparently so will cause test
376 # to block and then fail when the testdir is on an NFS mounted system
377 my $io = Bio::Root::IO->new(-verbose => $DEBUG);
378 my $tempdir = test_output_dir();
379 my $test_dbdir = $io->catfile($tempdir, 'dbfa');
380 mkdir($test_dbdir); # make the directory
381 my $indir = test_input_file('dbfa');
382 opendir(INDIR,$indir) || die("cannot open dir $indir");
383 # effectively do a cp -r but only copy the files that are in there, no subdirs
384 for my $file ( map { $io->catfile($indir,$_) } readdir(INDIR) ) {
385 next unless (-f $file );
386 copy($file, $test_dbdir);