1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin(-tests => 214);
14 use_ok('Bio::SeqFeature::Generic');
15 use_ok('Bio::SeqFeature::FeaturePair');
16 use_ok('Bio::SeqFeature::Computation');
17 use_ok('Bio::SeqFeature::Gene::Transcript');
18 use_ok('Bio::SeqFeature::Gene::UTR');
19 use_ok('Bio::SeqFeature::Gene::Exon');
20 use_ok('Bio::SeqFeature::Gene::Poly_A_site');
21 use_ok('Bio::SeqFeature::Gene::GeneStructure');
22 use_ok('Bio::Location::Fuzzy');
25 # predeclare variables for strict
26 my ($feat,$str,$feat2,$pair,$comp_obj1,$comp_obj2,@sft);
28 my $DEBUG = test_debug();
30 $feat = Bio::SeqFeature::Generic->new( -start => 40,
34 -source => 'internal',
41 is $feat->start, 40, 'start of feature location';
42 is $feat->end, 80, 'end of feature location';
43 is $feat->primary_tag, 'exon', 'primary tag';
44 is $feat->source_tag, 'internal', 'source tag';
46 $str = $feat->gff_string() || ""; # placate -w
48 $pair = Bio::SeqFeature::FeaturePair->new();
51 $feat2 = Bio::SeqFeature::Generic->new( -start => 400,
55 -source => 'program_a',
63 $pair->feature1($feat);
64 $pair->feature2($feat2);
66 is $pair->feature1, $feat, 'feature1 of pair stored';
67 is $pair->feature2, $feat2, 'feature2 of pair stored';
68 is $pair->start, 40, 'feature start';
69 is $pair->end, 80, 'feature end';
70 is $pair->primary_tag, 'exon', 'primary tag';
71 is $pair->source_tag, 'internal', 'source tag';
72 is $pair->hstart, 400, 'hstart';
73 is $pair->hend, 440, 'hend';
74 is $pair->hprimary_tag, 'other', 'hprimary tag';
75 is $pair->hsource_tag, 'program_a', 'hsource tag';
78 is $pair->end, 440, 'inverted end';
80 # Test attaching a SeqFeature::Generic to a Bio::Seq
82 # Make the parent sequence object
83 my $seq = Bio::Seq->new(
84 -seq => 'aaaaggggtttt',
85 -display_id => 'test',
90 my $sf1 = Bio::SeqFeature::Generic->new(
96 # Add the SeqFeature to the parent
97 ok ($seq->add_SeqFeature($sf1));
99 # Test that it gives the correct sequence
100 my $sf_seq1 = $sf1->seq->seq;
101 is $sf_seq1, 'aggggt', 'seq string';
102 is $sf1->end,9, 'sf1 end';
103 is $sf1->start,4, 'sf1 start';
105 # Make a second seqfeature on the opposite strand
106 my $sf2 = Bio::SeqFeature::Generic->new(
112 # This time add the PrimarySeq to the seqfeature
113 # before adding it to the parent
114 ok ($sf2->attach_seq($seq->primary_seq));
115 $seq->add_SeqFeature($sf2);
117 # Test again that we have the correct sequence
118 my $sf_seq2 = $sf2->seq->seq;
119 is $sf_seq2, 'acccct', 'sf2';
122 #Do some tests for computation.pm
124 ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new('-start' => 1,
126 is($comp_obj1->computation_id(332),332, 'computation id');
127 ok( $comp_obj1->add_score_value('P', 33), 'score value');
129 $comp_obj2 = Bio::SeqFeature::Computation->new('-start' => 2,
131 ok ($comp_obj1->add_sub_SeqFeature($comp_obj2, 'exon') );
132 ok (@sft = $comp_obj1->all_sub_SeqFeature_types() );
133 is($sft[0], 'exon', 'sft[0] is exon');
136 ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new
138 -start => 10, -end => 100,
139 -strand => -1, -primary => 'repeat',
140 -program_name => 'GeneMark',
141 -program_date => '12-5-2000',
142 -program_version => 'x.y',
143 -database_name => 'Arabidopsis',
144 -database_date => '12-dec-2000',
145 -computation_id => 2231,
146 -score => { no_score => 334 } )
149 is ( $comp_obj1->computation_id, 2231, 'computation id' );
150 ok ( $comp_obj1->add_score_value('P', 33) );
151 is ( ($comp_obj1->each_score_value('no_score'))[0], '334', 'score value');
153 # some tests for bug #947
155 my $sfeat = Bio::SeqFeature::Generic->new(-primary => 'test');
157 $sfeat->add_sub_SeqFeature(Bio::SeqFeature::Generic->new(-start => 2,
162 $sfeat->add_sub_SeqFeature(Bio::SeqFeature::Generic->new(-start => 6,
167 is $sfeat->start, 2, 'sfeat start for EXPAND-ED feature (bug #947)';
168 is $sfeat->end, 8, 'sfeat end for EXPAND-ED feature (bug #947)';
170 # some tests to see if we can set a feature to start at 0
171 $sfeat = Bio::SeqFeature::Generic->new(-start => 0, -end => 0 );
173 ok(defined $sfeat->start);
174 is($sfeat->start,0, 'can create feature starting and ending at 0');
175 ok(defined $sfeat->end);
176 is($sfeat->end,0,'can create feature starting and ending at 0');
179 # tests for Bio::SeqFeature::Gene::* objects
180 # using information from acc: AB077698 as a guide
182 ok my $seqio = Bio::SeqIO->new(-format => 'genbank',
183 -file => test_input_file('AB077698.gb'));
184 ok my $geneseq = $seqio->next_seq();
186 ok my $gene = Bio::SeqFeature::Gene::GeneStructure->new(-primary => 'gene',
191 ok my $transcript = Bio::SeqFeature::Gene::Transcript->new(-primary => 'CDS',
196 'note' => "Cys3His CCG1-Required Encoded on BAC clone RP5-842K24 (AL050310) The human CHCR (Cys3His CCG1-Required) protein is highly related to EXP/MBNL (Y13829, NM_021038, AF401998) and MBLL (NM_005757,AF061261), which together comprise the human Muscleblind family",
198 'protein_id' => 'BAB85648.1',
201 ok my $poly_A_site1 = Bio::SeqFeature::Gene::Poly_A_site->new
202 (-primary => 'polyA_site',
206 'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#2 used by CHCR EST clone DKFZp434G2222 (AL133625)"
209 ok my $poly_A_site2 = Bio::SeqFeature::Gene::Poly_A_site->new
210 (-primary => 'polyA_site',
214 'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#1 used by CHCR EST clone PLACE1010202 (AK002178)",
217 ok my $fiveprimeUTR = Bio::SeqFeature::Gene::UTR->new(-primary => "utr5prime");
218 ok $fiveprimeUTR->location(Bio::Location::Fuzzy->new(-start => "<1",
220 ok my $threeprimeUTR = Bio::SeqFeature::Gene::UTR->new(-primary => "utr3prime",
224 # Did a quick est2genome against genomic DNA (this is on Chr X) to
225 # get the gene structure by hand since it is not in the file
228 ok my $exon1 = Bio::SeqFeature::Gene::Exon->new(-primary => 'exon',
231 ok $geneseq->add_SeqFeature($exon1);
233 ok $geneseq->add_SeqFeature($fiveprimeUTR);
234 ok $geneseq->add_SeqFeature($threeprimeUTR);
235 ok $geneseq->add_SeqFeature($poly_A_site1);
236 ok $geneseq->add_SeqFeature($poly_A_site2);
238 ok $transcript->add_utr($fiveprimeUTR, 'utr5prime');
239 ok $transcript->add_utr($threeprimeUTR, 'utr3prime');
241 ok $transcript->add_exon($exon1);
243 # API only supports a single poly-A site per transcript at this point
244 $transcript->poly_A_site($poly_A_site2);
245 $geneseq->add_SeqFeature($transcript);
246 $gene->add_transcript($transcript);
247 $geneseq->add_SeqFeature($gene);
249 my ($t) = $gene->transcripts(); # get 1st transcript
251 is($t->mrna->length, 1693, 'mRNA spliced length');
252 is($gene->utrs, 2, 'has 2 UTRs');
256 # Test for bug when Locations are not created explicitly
258 my $feat1 = Bio::SeqFeature::Generic->new(-start => 1,
262 $feat2 = Bio::SeqFeature::Generic->new(-start => 10,
266 my $overlap = $feat1->location->union($feat2->location);
267 is($overlap->start, 1);
268 is($overlap->end, 25);
270 my $intersect = $feat1->location->intersection($feat2->location);
271 is($intersect->start, 10);
272 is($intersect->end, 15);
275 # now let's test spliced_seq
277 isa_ok( $seqio = Bio::SeqIO->new(-file => test_input_file('AY095303S1.gbk'),
278 -format => 'genbank'), "Bio::SeqIO");
280 isa_ok( $geneseq = $seqio->next_seq(), 'Bio::Seq');
281 my ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
285 test_skip(-tests => 5,
286 -requires_modules => [qw(IO::String
288 HTTP::Request::Common)],
289 -requires_networking => 1);
291 use_ok('Bio::DB::GenBank');
293 $db = Bio::DB::GenBank->new(-verbose=> $DEBUG);
295 my $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
297 is($cdsseq->subseq(1,76),
298 'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC');
299 is($cdsseq->translate->subseq(1,100),
300 'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
301 # test what happens without
302 $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
303 is($cdsseq->subseq(1,76),
304 'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC');
305 is($cdsseq->translate->subseq(1,100),
306 'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
309 isa_ok( $seqio = Bio::SeqIO->new(-file => test_input_file('AF032047.gbk'),
310 -format => 'genbank'), 'Bio::SeqIO');
311 isa_ok $geneseq = $seqio->next_seq(), 'Bio::Seq';
312 ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
314 test_skip(-tests => 2,
315 -requires_modules => [qw(IO::String
317 HTTP::Request::Common)],
318 -requires_networking => 1);
320 my $cdsseq = $CDS->spliced_seq( -db => $db, -nosort => 1);
321 is($cdsseq->subseq(1,70), 'ATGGCTCGCTTCGTGGTGGTAGCCCTGCTCGCGCTACTCTCTCTGTCTGGCCTGGAGGCTATCCAGCATG');
322 is($cdsseq->translate->seq, 'MARFVVVALLALLSLSGLEAIQHAPKIQVYSRHPAENGKPNFLNCYVSGFHPSDIEVDLLKNGKKIEKVEHSDLSFSKDWSFYLLYYTEFTPNEKDEYACRVSHVTFPTPKTVKWDRTM*');
328 isa_ok( $seqio = Bio::SeqIO->new(-format => 'genbank',
329 -file => test_input_file('NC_001284.gbk')),
331 my $genome = $seqio->next_seq;
333 foreach my $cds (grep { $_->primary_tag eq 'CDS' } $genome->get_SeqFeatures) {
334 my $spliced = $cds->spliced_seq(-nosort => 1)->translate->seq;
335 chop($spliced); # remove stop codon
336 is($spliced,($cds->get_tag_values('translation'))[0],
337 'spliced seq translation matches expected');
341 my $seqin = Bio::SeqIO->new(-format => 'fasta',
342 -file => test_input_file('sbay_c127.fas'));
344 my $seq = $seqin->next_seq;
346 my $sf = Bio::SeqFeature::Generic->new(-verbose => -1,
350 -primary => 'splicedgene');
352 $sf->attach_seq($seq);
355 'TTCAATGACT' => 'FNDFYSMGKS',
356 'TCAATGACTT' => 'SMTSIPWVNQ',
357 'GTTCAATGAC' => 'VQ*LLFHG*I',
360 for my $phase (-1..3) {
361 my $sfseq = $sf->spliced_seq(-phase => $phase);
362 ok exists $phase_check{$sfseq->subseq(1,10)};
363 is ($sfseq->translate->subseq(1,10), $phase_check{$sfseq->subseq(1,10)}, 'phase check');