1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin(-tests => 237,
11 -requires_module => 'URI::Escape');
15 use_ok('Bio::SeqFeature::Generic');
16 use_ok('Bio::SeqFeature::Annotated');
17 use_ok('Bio::SeqFeature::FeaturePair');
18 use_ok('Bio::SeqFeature::Computation');
19 use_ok('Bio::SeqFeature::Gene::Transcript');
20 use_ok('Bio::SeqFeature::Gene::UTR');
21 use_ok('Bio::SeqFeature::Gene::Exon');
22 use_ok('Bio::SeqFeature::Gene::Poly_A_site');
23 use_ok('Bio::SeqFeature::Gene::GeneStructure');
24 use_ok('Bio::Location::Fuzzy');
27 # predeclare variables for strict
28 my ($feat,$str,$feat2,$pair,$comp_obj1,$comp_obj2,@sft);
30 my $DEBUG = test_debug();
32 $feat = Bio::SeqFeature::Generic->new( -start => 40,
36 -source => 'internal',
43 is $feat->start, 40, 'start of feature location';
44 is $feat->end, 80, 'end of feature location';
45 is $feat->primary_tag, 'exon', 'primary tag';
46 is $feat->source_tag, 'internal', 'source tag';
48 $str = $feat->gff_string() || ""; # placate -w
50 $pair = Bio::SeqFeature::FeaturePair->new();
53 $feat2 = Bio::SeqFeature::Generic->new( -start => 400,
57 -source => 'program_a',
65 $pair->feature1($feat);
66 $pair->feature2($feat2);
68 is $pair->feature1, $feat, 'feature1 of pair stored';
69 is $pair->feature2, $feat2, 'feature2 of pair stored';
70 is $pair->start, 40, 'feature start';
71 is $pair->end, 80, 'feature end';
72 is $pair->primary_tag, 'exon', 'primary tag';
73 is $pair->source_tag, 'internal', 'source tag';
74 is $pair->hstart, 400, 'hstart';
75 is $pair->hend, 440, 'hend';
76 is $pair->hprimary_tag, 'other', 'hprimary tag';
77 is $pair->hsource_tag, 'program_a', 'hsource tag';
80 is $pair->end, 440, 'inverted end';
82 # Test attaching a SeqFeature::Generic to a Bio::Seq
84 # Make the parent sequence object
85 my $seq = Bio::Seq->new(
86 -seq => 'aaaaggggtttt',
87 -display_id => 'test',
92 my $sf1 = Bio::SeqFeature::Generic->new(
98 # Add the SeqFeature to the parent
99 ok ($seq->add_SeqFeature($sf1));
101 # Test that it gives the correct sequence
102 my $sf_seq1 = $sf1->seq->seq;
103 is $sf_seq1, 'aggggt', 'seq string';
104 is $sf1->end,9, 'sf1 end';
105 is $sf1->start,4, 'sf1 start';
107 # Make a second seqfeature on the opposite strand
108 my $sf2 = Bio::SeqFeature::Generic->new(
114 # This time add the PrimarySeq to the seqfeature
115 # before adding it to the parent
116 ok ($sf2->attach_seq($seq->primary_seq));
117 $seq->add_SeqFeature($sf2);
119 # Test again that we have the correct sequence
120 my $sf_seq2 = $sf2->seq->seq;
121 is $sf_seq2, 'acccct', 'sf2';
124 #Do some tests for computation.pm
126 ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new('-start' => 1,
128 is($comp_obj1->computation_id(332),332, 'computation id');
129 ok( $comp_obj1->add_score_value('P', 33), 'score value');
131 $comp_obj2 = Bio::SeqFeature::Computation->new('-start' => 2,
133 ok ($comp_obj1->add_sub_SeqFeature($comp_obj2, 'exon') );
134 ok (@sft = $comp_obj1->all_sub_SeqFeature_types() );
135 is($sft[0], 'exon', 'sft[0] is exon');
138 ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new
140 -start => 10, -end => 100,
141 -strand => -1, -primary => 'repeat',
142 -program_name => 'GeneMark',
143 -program_date => '12-5-2000',
144 -program_version => 'x.y',
145 -database_name => 'Arabidopsis',
146 -database_date => '12-dec-2000',
147 -computation_id => 2231,
148 -score => { no_score => 334 } )
151 is ( $comp_obj1->computation_id, 2231, 'computation id' );
152 ok ( $comp_obj1->add_score_value('P', 33) );
153 is ( ($comp_obj1->each_score_value('no_score'))[0], '334', 'score value');
155 # some tests for bug #947
157 my $sfeat = Bio::SeqFeature::Generic->new(-primary => 'test');
159 $sfeat->add_sub_SeqFeature(Bio::SeqFeature::Generic->new(-start => 2,
164 $sfeat->add_sub_SeqFeature(Bio::SeqFeature::Generic->new(-start => 6,
169 is $sfeat->start, 2, 'sfeat start for EXPAND-ED feature (bug #947)';
170 is $sfeat->end, 8, 'sfeat end for EXPAND-ED feature (bug #947)';
172 # some tests to see if we can set a feature to start at 0
173 $sfeat = Bio::SeqFeature::Generic->new(-start => 0, -end => 0 );
175 ok(defined $sfeat->start);
176 is($sfeat->start,0, 'can create feature starting and ending at 0');
177 ok(defined $sfeat->end);
178 is($sfeat->end,0,'can create feature starting and ending at 0');
181 # tests for Bio::SeqFeature::Gene::* objects
182 # using information from acc: AB077698 as a guide
184 ok my $seqio = Bio::SeqIO->new(-format => 'genbank',
185 -file => test_input_file('AB077698.gb'));
186 ok my $geneseq = $seqio->next_seq();
188 ok my $gene = Bio::SeqFeature::Gene::GeneStructure->new(-primary => 'gene',
193 ok my $transcript = Bio::SeqFeature::Gene::Transcript->new(-primary => 'CDS',
198 'note' => "Cys3His CCG1-Required Encoded on BAC clone RP5-842K24 (AL050310) The human CHCR (Cys3His CCG1-Required) protein is highly related to EXP/MBNL (Y13829, NM_021038, AF401998) and MBLL (NM_005757,AF061261), which together comprise the human Muscleblind family",
200 'protein_id' => 'BAB85648.1',
203 ok my $poly_A_site1 = Bio::SeqFeature::Gene::Poly_A_site->new
204 (-primary => 'polyA_site',
208 'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#2 used by CHCR EST clone DKFZp434G2222 (AL133625)"
211 ok my $poly_A_site2 = Bio::SeqFeature::Gene::Poly_A_site->new
212 (-primary => 'polyA_site',
216 'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#1 used by CHCR EST clone PLACE1010202 (AK002178)",
219 ok my $fiveprimeUTR = Bio::SeqFeature::Gene::UTR->new(-primary => "utr5prime");
220 ok $fiveprimeUTR->location(Bio::Location::Fuzzy->new(-start => "<1",
222 ok my $threeprimeUTR = Bio::SeqFeature::Gene::UTR->new(-primary => "utr3prime",
226 # Did a quick est2genome against genomic DNA (this is on Chr X) to
227 # get the gene structure by hand since it is not in the file
230 ok my $exon1 = Bio::SeqFeature::Gene::Exon->new(-primary => 'exon',
233 ok $geneseq->add_SeqFeature($exon1);
235 ok $geneseq->add_SeqFeature($fiveprimeUTR);
236 ok $geneseq->add_SeqFeature($threeprimeUTR);
237 ok $geneseq->add_SeqFeature($poly_A_site1);
238 ok $geneseq->add_SeqFeature($poly_A_site2);
240 ok $transcript->add_utr($fiveprimeUTR, 'utr5prime');
241 ok $transcript->add_utr($threeprimeUTR, 'utr3prime');
243 ok $transcript->add_exon($exon1);
245 # API only supports a single poly-A site per transcript at this point
246 $transcript->poly_A_site($poly_A_site2);
247 $geneseq->add_SeqFeature($transcript);
248 $gene->add_transcript($transcript);
249 $geneseq->add_SeqFeature($gene);
251 my ($t) = $gene->transcripts(); # get 1st transcript
253 is($t->mrna->length, 1693, 'mRNA spliced length');
254 is($gene->utrs, 2, 'has 2 UTRs');
258 # Test for bug when Locations are not created explicitly
260 my $feat1 = Bio::SeqFeature::Generic->new(-start => 1,
264 $feat2 = Bio::SeqFeature::Generic->new(-start => 10,
268 my $overlap = $feat1->location->union($feat2->location);
269 is($overlap->start, 1);
270 is($overlap->end, 25);
272 my $intersect = $feat1->location->intersection($feat2->location);
273 is($intersect->start, 10);
274 is($intersect->end, 15);
277 # now let's test spliced_seq
279 isa_ok( $seqio = Bio::SeqIO->new(-file => test_input_file('AY095303S1.gbk'),
280 -format => 'genbank'), "Bio::SeqIO");
282 isa_ok( $geneseq = $seqio->next_seq(), 'Bio::Seq');
283 my ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
287 test_skip(-tests => 5,
288 -requires_modules => [qw(IO::String
290 HTTP::Request::Common)],
291 -requires_networking => 1);
293 use_ok('Bio::DB::GenBank');
295 $db = Bio::DB::GenBank->new(-verbose=> $DEBUG);
297 my $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
299 is($cdsseq->subseq(1,76),
300 'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC');
301 is($cdsseq->translate->subseq(1,100),
302 'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
303 # test what happens without
304 $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
305 is($cdsseq->subseq(1,76),
306 'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC');
307 is($cdsseq->translate->subseq(1,100),
308 'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
311 isa_ok( $seqio = Bio::SeqIO->new(-file => test_input_file('AF032047.gbk'),
312 -format => 'genbank'), 'Bio::SeqIO');
313 isa_ok $geneseq = $seqio->next_seq(), 'Bio::Seq';
314 ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
316 test_skip(-tests => 2,
317 -requires_modules => [qw(IO::String
319 HTTP::Request::Common)],
320 -requires_networking => 1);
322 my $cdsseq = $CDS->spliced_seq( -db => $db, -nosort => 1);
323 is($cdsseq->subseq(1,70), 'ATGGCTCGCTTCGTGGTGGTAGCCCTGCTCGCGCTACTCTCTCTGTCTGGCCTGGAGGCTATCCAGCATG');
324 is($cdsseq->translate->seq, 'MARFVVVALLALLSLSGLEAIQHAPKIQVYSRHPAENGKPNFLNCYVSGFHPSDIEVDLLKNGKKIEKVEHSDLSFSKDWSFYLLYYTEFTPNEKDEYACRVSHVTFPTPKTVKWDRTM*');
330 isa_ok( $seqio = Bio::SeqIO->new(-format => 'genbank',
331 -file => test_input_file('NC_001284.gbk')),
333 my $genome = $seqio->next_seq;
335 foreach my $cds (grep { $_->primary_tag eq 'CDS' } $genome->get_SeqFeatures) {
336 my $spliced = $cds->spliced_seq(-nosort => 1)->translate->seq;
337 chop($spliced); # remove stop codon
338 is($spliced,($cds->get_tag_values('translation'))[0],
339 'spliced seq translation matches expected');
343 my $seqin = Bio::SeqIO->new(-format => 'fasta',
344 -file => test_input_file('sbay_c127.fas'));
346 my $seq = $seqin->next_seq;
348 my $sf = Bio::SeqFeature::Generic->new(-verbose => -1,
352 -primary => 'splicedgene');
354 $sf->attach_seq($seq);
357 'TTCAATGACT' => 'FNDFYSMGKS',
358 'TCAATGACTT' => 'SMTSIPWVNQ',
359 'GTTCAATGAC' => 'VQ*LLFHG*I',
362 for my $phase (-1..3) {
363 my $sfseq = $sf->spliced_seq(-phase => $phase);
364 ok exists $phase_check{$sfseq->subseq(1,10)};
365 is ($sfseq->translate->subseq(1,10), $phase_check{$sfseq->subseq(1,10)}, 'phase check');
370 my $sfa = Bio::SeqFeature::Annotated->new(-start => 1,
376 -display_name => 'test.annot',
377 -seq_id => 'test.displayname' );
380 my $loc = $sfa->location;
381 ok $loc->isa("Bio::Location::Simple");
382 ok $sfa->display_name eq 'test.annot';
384 #test bsfa::from_feature
385 my $sfg = Bio::SeqFeature::Generic->new ( -start => 400,
388 -primary => 'nucleotide_motif',
389 -source => 'program_a',
396 $sfa2 = Bio::SeqFeature::Annotated->new(-feature => $sfg);
398 is $sfa2->type->name,'nucleotide_motif';
399 is $sfa2->primary_tag, 'nucleotide_motif';
400 is $sfa2->source,'program_a';
404 is $sfa2->get_Annotations('silly')->value,20;
405 is $sfa2->get_Annotations('new')->value,1;
407 my $sfa3 = Bio::SeqFeature::Annotated->new( -start => 1,
413 -display_name => 'test.annot',
414 -seq_id => 'test.displayname' );
415 $sfa3->from_feature($sfg);
418 is $sfa3->type->name,'nucleotide_motif', 'type->name';
419 is $sfa3->primary_tag, 'nucleotide_motif', 'primary_tag';
420 is $sfa3->source,'program_a';
424 is $sfa3->get_Annotations('silly')->value,20;
425 is $sfa3->get_Annotations('new')->value,1;
426 is $sfa3->score(), 12;
428 is $sfa3->score(), 11;
430 is $sfa3->score(), 0; # test that setting to 0 no longer is overriddent to set score to '.' (fixed in Bio::SeqFeature::Annotated version 1.3.7)