1 # -*-Perl-*- Test Harness script for Bioperl
5 use constant TEST_COUNT => 59;
11 test_begin(-tests => TEST_COUNT);
13 $ENV{ORACLE_HOME} ||= '/home/oracle/Home';
15 use_ok('Bio::DB::SeqFeature::Store');
16 use_ok('Bio::DB::SeqFeature::Store::GFF3Loader');
19 my $gff_file = test_input_file('seqfeaturedb','test.gff3');
21 my (@f,$f,@s,$s,$seq1,$seq2);
24 @args = (-adaptor => 'memory') unless @args;
27 my $db = eval { Bio::DB::SeqFeature::Store->new(@args) };
28 skip "DB load failed? Skipping all! $@", (TEST_COUNT - 2) if $@;
31 my $loader = eval { Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db) };
32 skip "GFF3 loader failed? Skipping all! $@", (TEST_COUNT - 3) if $@;
36 ok($loader->load($gff_file));
38 # there should be one gene named 'abc-1'
39 @f = $db->get_features_by_name('abc-1');
43 # there should be three subfeatures of type "exon" and three of type "CDS"
44 is($f->get_SeqFeatures('exon'),3);
45 is($f->get_SeqFeatures('CDS'),3);
47 # the sequence of feature abc-1 should match the sequence of the first exon at the beginning
49 $seq2 = (sort {$a->start<=>$b->start} $f->get_SeqFeatures('exon'))[0]->seq->seq;
50 is(substr($seq1,0,length $seq2),$seq2);
52 # sequence lengths should match
53 is(length $seq1, $f->length);
55 # if we pull out abc-1 again we should get the same object
56 ($s) = $db->get_features_by_name('abc-1');
59 # we should get two objects when we ask for abc-1 using get_features_by_alias
60 # this also depends on selective subfeature indexing
61 @f = $db->get_features_by_alias('abc-1');
64 # the two features should be different
67 # test that targets are working
68 ($f) = $db->get_features_by_name('match1');
72 ok($s->seq_id eq 'CEESC13F');
74 is(substr($seq1,0,10), 'ttgcgttcgg');
76 # can we fetch subfeatures?
77 # gene3.a has the Index=1 attribute, so we should fetch it
78 ($f) = $db->get_features_by_name('gene3.a');
81 # gene 3.b doesn't have an index, so we shouldn't get it
82 ($f) = $db->get_features_by_name('gene3.b');
85 # test three-tiered genes
86 ($f) = $db->get_features_by_name('gene3');
88 my @transcripts = $f->get_SeqFeatures;
90 is($transcripts[0]->method,'mRNA');
91 is($transcripts[0]->source,'confirmed');
93 # test that exon #2 is shared between the two transcripts
94 my @exons1 = $transcripts[0]->get_SeqFeatures('CDS');
96 my @exons2 = $transcripts[1]->get_SeqFeatures('CDS');
97 my ($shared1) = grep {$_->display_name||'' eq 'shared_exon'} @exons1;
98 my ($shared2) = grep {$_->display_name||'' eq 'shared_exon'} @exons2;
99 ok($shared1 && $shared2);
100 is($shared1, $shared2);
101 is($shared1->primary_id, $shared2->primary_id);
104 is($shared1->phase, 0);
105 is($shared1->strand, +1);
106 is(($f->attributes('expressed'))[0], 'yes');
109 my ($gene3a) = grep { $_->display_name eq 'gene3.a'} @transcripts;
110 my ($gene3b) = grep { $_->display_name eq 'gene3.b'} @transcripts;
113 ok($gene3a->Is_expressed);
114 ok(!$gene3b->Is_expressed);
116 # the representation of the 3'-UTR in the two transcripts a and b is
117 # different (not recommended but supported by the GFF3 spec). In the
118 # first case, there are two 3'UTRs existing as independent
119 # features. In the second, there is one UTR with a split location.
120 is($gene3a->Three_prime_UTR, 2);
121 is($gene3b->Three_prime_UTR, 1);
122 my ($utr) = $gene3b->Three_prime_UTR;
123 is($utr->segments, 2);
124 my $location = $utr->location;
125 isa_ok($location, 'Bio::Location::Split');
126 is($location->sub_Location,2);
128 # ok, test that queries are working properly.
129 # find all features with the attribute "expressed"
130 @f = $db->get_features_by_attribute({expressed=>'yes'});
133 # find all top-level features on Contig3 -- there should be two
134 @f = $db->get_features_by_location(-seq_id=>'Contig3');
137 # find all top-level features on Contig3 of type 'assembly_component'
138 @f = $db->features(-seq_id=>'Contig3',-type=>'assembly_component');
143 my $feature_count = @f;
144 cmp_ok($feature_count, '>', 0);
146 my $i = $db->get_seq_stream;
150 while ($i->next_seq) { $count++ }
151 is($feature_count,$count);
153 # regression test on bug in which get_SeqFeatures('type') did not filter inline segments
154 @f = $db->get_features_by_name('agt830.3');
155 ok(@f && !$f[0]->get_SeqFeatures('exon'));
156 ok(@f && $f[0]->get_SeqFeatures('EST_match'));
158 # regression test on bug in which the load_id disappeared
159 is(@f && $f[0]->load_id, 'Match2');
161 # regress on proper handling of multiple ID features
162 my ($alignment) = $db->get_features_by_name('agt830.5');
164 is($alignment->target->start,1);
165 is($alignment->target->end, 654);
166 is($alignment->get_SeqFeatures, 2);
167 my $gff3 = $alignment->gff3_string(1);
168 my @lines = split "\n",$gff3;
170 ok("@lines" !~ /Parent=/s);
171 ok("@lines" =~ /ID=/s);
173 if (my $child = open(F,"-|")) { # parent reads from child
174 cmp_ok(scalar <F>,'>',0);
176 # The challenge is to make sure that the handle
177 # still works in the parent!
178 my @f = $db->features();
179 cmp_ok(scalar @f,'>',0);
183 my @f = $db->features();
184 my $feature_count = @f;
185 print $feature_count;
189 # test the -ignore_seqregion flag
191 # the original should have a single feature named 'Contig1'
192 my @f = $db->get_features_by_name('Contig1');
195 $db = eval { Bio::DB::SeqFeature::Store->new(@args) };
196 $loader = eval { Bio::DB::SeqFeature::Store::GFF3Loader->new(-store=>$db,
197 -ignore_seqregion=>1)
199 $loader->load($gff_file);
200 @f = $db->get_features_by_name('Contig1');