Catch failing tie'd DB_File handle
[bioperl-live.git] / t / SeqFeature.t
blob0f18d3414987e9802d75edfcd8340753d0f46b08
1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id$
4 use strict;
6 BEGIN { 
7     use lib 't/lib';
8     use BioperlTest;
9     
10     test_begin(-tests => 237,
11                            -requires_module => 'URI::Escape');
12         
13         use_ok('Bio::Seq');
14         use_ok('Bio::SeqIO');
15         use_ok('Bio::SeqFeature::Generic');
16         use_ok('Bio::SeqFeature::Annotated');
17         use_ok('Bio::SeqFeature::FeaturePair');
18         use_ok('Bio::SeqFeature::Computation');
19         use_ok('Bio::SeqFeature::Gene::Transcript');
20         use_ok('Bio::SeqFeature::Gene::UTR');
21         use_ok('Bio::SeqFeature::Gene::Exon');
22         use_ok('Bio::SeqFeature::Gene::Poly_A_site');
23         use_ok('Bio::SeqFeature::Gene::GeneStructure');
24         use_ok('Bio::Location::Fuzzy');
27 # predeclare variables for strict
28 my ($feat,$str,$feat2,$pair,$comp_obj1,$comp_obj2,@sft); 
30 my $DEBUG = test_debug();
32 $feat = Bio::SeqFeature::Generic->new( -start => 40,
33                                        -end => 80,
34                                        -strand => 1,
35                                        -primary => 'exon',
36                                        -source  => 'internal',
37                                        -tag => {
38                                            silly => 20,
39                                            new => 1
40                                            }
41                                        );
43 is $feat->start, 40, 'start of feature location';
44 is $feat->end, 80, 'end of feature location';
45 is $feat->primary_tag, 'exon', 'primary tag';
46 is $feat->source_tag, 'internal', 'source tag';
48 $str = $feat->gff_string() || ""; # placate -w
50 $pair = Bio::SeqFeature::FeaturePair->new();
51 ok defined $pair;
53 $feat2 = Bio::SeqFeature::Generic->new( -start => 400,
54                                        -end => 440,
55                                        -strand => 1,
56                                        -primary => 'other',
57                                        -source  => 'program_a',
58                                        -tag => {
59                                            silly => 20,
60                                            new => 1
61                                            }
62                                        );
64 ok defined $feat2;
65 $pair->feature1($feat);
66 $pair->feature2($feat2);
68 is $pair->feature1, $feat, 'feature1 of pair stored';
69 is $pair->feature2, $feat2, 'feature2 of pair stored';
70 is $pair->start, 40, 'feature start';
71 is $pair->end, 80, 'feature end';
72 is $pair->primary_tag, 'exon', 'primary tag';
73 is $pair->source_tag, 'internal', 'source tag';
74 is $pair->hstart, 400, 'hstart';
75 is $pair->hend, 440, 'hend';
76 is $pair->hprimary_tag, 'other', 'hprimary tag';
77 is $pair->hsource_tag, 'program_a', 'hsource tag';
79 $pair->invert;
80 is $pair->end, 440, 'inverted end';
82 # Test attaching a SeqFeature::Generic to a Bio::Seq
84     # Make the parent sequence object
85     my $seq = Bio::Seq->new(
86                             -seq          => 'aaaaggggtttt',
87                             -display_id   => 'test',
88                             -alphabet     => 'dna',
89                             );
90     
91     # Make a SeqFeature
92     my $sf1 = Bio::SeqFeature::Generic->new(
93                                             -start    => 4,
94                                             -end      => 9,
95                                             -strand   => 1,
96                                             );
97     
98     # Add the SeqFeature to the parent
99     ok ($seq->add_SeqFeature($sf1));
100     
101     # Test that it gives the correct sequence
102     my $sf_seq1 = $sf1->seq->seq;
103     is $sf_seq1, 'aggggt', 'seq string';
104     is $sf1->end,9, 'sf1 end';
105     is $sf1->start,4, 'sf1 start';
107     # Make a second seqfeature on the opposite strand
108     my $sf2 = Bio::SeqFeature::Generic->new(
109                                             -start    => 4,
110                                             -end      => 9,
111                                             -strand   => -1,
112                                             );
113     
114     # This time add the PrimarySeq to the seqfeature
115     # before adding it to the parent
116     ok ($sf2->attach_seq($seq->primary_seq));
117     $seq->add_SeqFeature($sf2);
118     
119     # Test again that we have the correct sequence
120     my $sf_seq2 = $sf2->seq->seq;
121     is $sf_seq2, 'acccct', 'sf2';
124 #Do some tests for computation.pm
126 ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new('-start' => 1,
127                                                             '-end'   => 10) );
128 is($comp_obj1->computation_id(332),332, 'computation id');
129 ok( $comp_obj1->add_score_value('P', 33), 'score value');
131     $comp_obj2 = Bio::SeqFeature::Computation->new('-start' => 2,
132                                                    '-end'   => 10);
133     ok ($comp_obj1->add_sub_SeqFeature($comp_obj2, 'exon') );
134     ok (@sft = $comp_obj1->all_sub_SeqFeature_types() );
135     is($sft[0], 'exon', 'sft[0] is exon');
138 ok defined ( $comp_obj1 = Bio::SeqFeature::Computation->new 
139              (
140               -start => 10, -end => 100,
141               -strand => -1, -primary => 'repeat',
142               -program_name => 'GeneMark',
143               -program_date => '12-5-2000',
144               -program_version => 'x.y',
145               -database_name => 'Arabidopsis',
146               -database_date => '12-dec-2000',
147               -computation_id => 2231,
148               -score    => { no_score => 334 } )
149              );
151 is ( $comp_obj1->computation_id, 2231, 'computation id' );
152 ok ( $comp_obj1->add_score_value('P', 33) );
153 is ( ($comp_obj1->each_score_value('no_score'))[0], '334', 'score value');
155 # some tests for bug #947
157 my $sfeat = Bio::SeqFeature::Generic->new(-primary => 'test');
159 $sfeat->add_sub_SeqFeature(Bio::SeqFeature::Generic->new(-start => 2,
160                                                         -end   => 4,
161                                                         -primary => 'sub1'),
162                            'EXPAND');
164 $sfeat->add_sub_SeqFeature(Bio::SeqFeature::Generic->new(-start => 6,
165                                                         -end   => 8,
166                                                         -primary => 'sub2'),
167                            'EXPAND');
169 is $sfeat->start, 2, 'sfeat start for EXPAND-ED feature (bug #947)';
170 is $sfeat->end, 8, 'sfeat end for EXPAND-ED feature (bug #947)';
172 # some tests to see if we can set a feature to start at 0
173 $sfeat = Bio::SeqFeature::Generic->new(-start => 0, -end => 0 );
175 ok(defined $sfeat->start);
176 is($sfeat->start,0, 'can create feature starting and ending at 0');
177 ok(defined $sfeat->end);
178 is($sfeat->end,0,'can create feature starting and ending at 0');
181 # tests for Bio::SeqFeature::Gene::* objects
182 # using information from acc: AB077698 as a guide
184 ok my $seqio = Bio::SeqIO->new(-format => 'genbank',
185                          -file   => test_input_file('AB077698.gb'));
186 ok my $geneseq = $seqio->next_seq();
188 ok my $gene = Bio::SeqFeature::Gene::GeneStructure->new(-primary => 'gene',
189                                                     -start   => 1,
190                                                        -end     => 2701,
191                                                        -strand  => 1);
193 ok my $transcript = Bio::SeqFeature::Gene::Transcript->new(-primary => 'CDS',
194                                                           -start   => 80,
195                                                           -end     => 1144,
196                                                           -tag     => { 
197                                                               'gene' => "CHCR",
198                                                               'note' => "Cys3His CCG1-Required Encoded on BAC clone RP5-842K24 (AL050310) The human CHCR (Cys3His CCG1-Required) protein is highly related to EXP/MBNL (Y13829, NM_021038, AF401998) and MBLL (NM_005757,AF061261), which together comprise the human Muscleblind family",
199                                                               'codon_start' => 1,
200                                                               'protein_id'  => 'BAB85648.1',
201                                                           });
203 ok my $poly_A_site1 = Bio::SeqFeature::Gene::Poly_A_site->new
204     (-primary => 'polyA_site',
205      -start => 2660,
206      -end   => 2660,
207      -tag   => { 
208          'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#2 used by CHCR EST clone DKFZp434G2222 (AL133625)"
209          });
211 ok my $poly_A_site2 = Bio::SeqFeature::Gene::Poly_A_site->new
212     (-primary => 'polyA_site',
213      -start => 1606,
214      -end   => 1606,
215      -tag   => { 
216          'note' => "Encoded on BAC clone RP5-842K24 (AL050310); PolyA_site#1 used by CHCR EST clone PLACE1010202 (AK002178)",
217      });
219 ok my $fiveprimeUTR = Bio::SeqFeature::Gene::UTR->new(-primary => "utr5prime");
220 ok $fiveprimeUTR->location(Bio::Location::Fuzzy->new(-start => "<1",
221                                                     -end   => 79));
222 ok my $threeprimeUTR = Bio::SeqFeature::Gene::UTR->new(-primary => "utr3prime",
223                                                       -start   => 1145,
224                                                       -end     => 2659);
226 # Did a quick est2genome against genomic DNA (this is on Chr X) to
227 # get the gene structure by hand since it is not in the file
228 # --Jason
230 ok my $exon1 = Bio::SeqFeature::Gene::Exon->new(-primary => 'exon',
231                                                -start => 80,
232                                                -end   => 177);
233 ok $geneseq->add_SeqFeature($exon1);
235 ok $geneseq->add_SeqFeature($fiveprimeUTR);
236 ok $geneseq->add_SeqFeature($threeprimeUTR);
237 ok $geneseq->add_SeqFeature($poly_A_site1);
238 ok $geneseq->add_SeqFeature($poly_A_site2);
240 ok $transcript->add_utr($fiveprimeUTR, 'utr5prime');
241 ok $transcript->add_utr($threeprimeUTR, 'utr3prime');
243 ok $transcript->add_exon($exon1);
245 # API only supports a single poly-A site per transcript at this point 
246 $transcript->poly_A_site($poly_A_site2);
247 $geneseq->add_SeqFeature($transcript);
248 $gene->add_transcript($transcript);
249 $geneseq->add_SeqFeature($gene);
251 my ($t) = $gene->transcripts(); # get 1st transcript
252 ok(defined $t); 
253 is($t->mrna->length, 1693, 'mRNA spliced length');
254 is($gene->utrs, 2, 'has 2 UTRs');
258 # Test for bug when Locations are not created explicitly
260 my $feat1 = Bio::SeqFeature::Generic->new(-start => 1,
261                                          -end   => 15,
262                                          -strand=> 1);
264 $feat2 = Bio::SeqFeature::Generic->new(-start => 10,
265                                          -end   => 25,
266                                          -strand=> 1);
268 my $overlap = $feat1->location->union($feat2->location);
269 is($overlap->start, 1);
270 is($overlap->end,   25);
272 my $intersect = $feat1->location->intersection($feat2->location);
273 is($intersect->start, 10);
274 is($intersect->end,   15);
277 # now let's test spliced_seq
279 isa_ok(  $seqio = Bio::SeqIO->new(-file => test_input_file('AY095303S1.gbk'),
280                                  -format  => 'genbank'), "Bio::SeqIO");
282 isa_ok( $geneseq = $seqio->next_seq(), 'Bio::Seq');
283 my ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
284 my $db;
286 SKIP: {
287         test_skip(-tests => 5,
288                           -requires_modules => [qw(IO::String
289                                                                            LWP::UserAgent
290                                                                            HTTP::Request::Common)],
291                           -requires_networking => 1);
292         
293         use_ok('Bio::DB::GenBank');
294     
295     $db = Bio::DB::GenBank->new(-verbose=> $DEBUG);
296     $CDS->verbose(-1);
297     my $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);
298     
299     is($cdsseq->subseq(1,76),
300        'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC');
301     is($cdsseq->translate->subseq(1,100), 
302        'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
303     # test what happens without 
304     $cdsseq = $CDS->spliced_seq(-db => $db,-nosort => 1);    
305     is($cdsseq->subseq(1,76), 
306        'ATGCAGCCATACGCTTCCGTGAGCGGGCGATGTCTATCTAGACCAGATGCATTGCATGTGATACCGTTTGGGCGAC');
307     is($cdsseq->translate->subseq(1,100), 
308        'MQPYASVSGRCLSRPDALHVIPFGRPLQAIAGRRFVRCFAKGGQPGDKKKLNVTDKLRLGNTPPTLDVLKAPRPTDAPSAIDDAPSTSGLGLGGGVASPR');
311 isa_ok(  $seqio = Bio::SeqIO->new(-file => test_input_file('AF032047.gbk'),
312                                 -format  => 'genbank'), 'Bio::SeqIO');
313 isa_ok $geneseq = $seqio->next_seq(), 'Bio::Seq';
314 ($CDS) = grep { $_->primary_tag eq 'CDS' } $geneseq->get_SeqFeatures;
315 SKIP: { 
316     test_skip(-tests => 2,
317                           -requires_modules => [qw(IO::String
318                                                                            LWP::UserAgent
319                                                                            HTTP::Request::Common)],
320                           -requires_networking => 1);
321         
322     my $cdsseq = $CDS->spliced_seq( -db => $db, -nosort => 1);
323     is($cdsseq->subseq(1,70), 'ATGGCTCGCTTCGTGGTGGTAGCCCTGCTCGCGCTACTCTCTCTGTCTGGCCTGGAGGCTATCCAGCATG');
324     is($cdsseq->translate->seq, 'MARFVVVALLALLSLSGLEAIQHAPKIQVYSRHPAENGKPNFLNCYVSGFHPSDIEVDLLKNGKKIEKVEHSDLSFSKDWSFYLLYYTEFTPNEKDEYACRVSHVTFPTPKTVKWDRTM*');
328 # trans-spliced 
330 isa_ok( $seqio = Bio::SeqIO->new(-format => 'genbank',
331                                  -file   => test_input_file('NC_001284.gbk')), 
332         'Bio::SeqIO');
333 my $genome = $seqio->next_seq;
335 foreach my $cds (grep { $_->primary_tag eq 'CDS' } $genome->get_SeqFeatures) {
336    my $spliced = $cds->spliced_seq(-nosort => 1)->translate->seq;
337    chop($spliced); # remove stop codon
338    is($spliced,($cds->get_tag_values('translation'))[0],
339       'spliced seq translation matches expected');
342 # spliced_seq phase 
343 my $seqin = Bio::SeqIO->new(-format => 'fasta',
344                             -file   => test_input_file('sbay_c127.fas'));
346 my $seq = $seqin->next_seq;
348 my $sf = Bio::SeqFeature::Generic->new(-verbose => -1,
349                                        -start => 263,
350                                        -end => 721,
351                                        -strand => 1,
352                                        -primary => 'splicedgene');
354 $sf->attach_seq($seq);
356 my %phase_check = (
357     'TTCAATGACT' => 'FNDFYSMGKS',
358     'TCAATGACTT' => 'SMTSIPWVNQ',
359     'GTTCAATGAC' => 'VQ*LLFHG*I',
362 for my $phase (-1..3) {
363     my $sfseq = $sf->spliced_seq(-phase => $phase);
364     ok exists $phase_check{$sfseq->subseq(1,10)};
365     is ($sfseq->translate->subseq(1,10), $phase_check{$sfseq->subseq(1,10)}, 'phase check');
369 SKIP: {
370     my $sfa = Bio::SeqFeature::Annotated->new(-start => 1,
371                                               -end => 5,
372                                               -strand => "+",
373                                               -frame => 2,
374                                               -phase => 2,
375                                               -score => 12,
376                                               -display_name => 'test.annot',
377                                               -seq_id => 'test.displayname' );
378     
379     ok (defined $sfa);
380     my $loc = $sfa->location;
381     ok $loc->isa("Bio::Location::Simple");    
382     ok $sfa->display_name eq 'test.annot';
384         #test bsfa::from_feature
385     my $sfg = Bio::SeqFeature::Generic->new ( -start => 400,
386                                               -end => 440,
387                                               -strand => 1,
388                                               -primary => 'nucleotide_motif',
389                                               -source  => 'program_a',
390                                               -tag => {
391                                                   silly => 20,
392                                                   new => 1
393                                                   }
394                                               );
395     my $sfa2;
396     $sfa2 = Bio::SeqFeature::Annotated->new(-feature => $sfg);
397     
398     is $sfa2->type->name,'nucleotide_motif';
399     is $sfa2->primary_tag, 'nucleotide_motif';
400     is $sfa2->source,'program_a';
401     is $sfa2->strand,1;
402     is $sfa2->start,400;
403     is $sfa2->end,440;
404     is $sfa2->get_Annotations('silly')->value,20;
405     is $sfa2->get_Annotations('new')->value,1;
406     
407     my $sfa3 = Bio::SeqFeature::Annotated->new( -start => 1,
408                                                 -end => 5,
409                                                 -strand => "+",
410                                                 -frame => 2,
411                                                 -phase => 2,
412                                                 -score => 12,
413                                                 -display_name => 'test.annot',
414                                                 -seq_id => 'test.displayname' );
415     $sfa3->from_feature($sfg);
416     
417     
418     is $sfa3->type->name,'nucleotide_motif', 'type->name';
419     is $sfa3->primary_tag, 'nucleotide_motif', 'primary_tag';
420     is $sfa3->source,'program_a';
421     is $sfa3->strand,1;
422     is $sfa3->start,400;
423     is $sfa3->end,440;
424     is $sfa3->get_Annotations('silly')->value,20;
425     is $sfa3->get_Annotations('new')->value,1;
426     is $sfa3->score(), 12;
427     $sfa3->score(11);
428     is $sfa3->score(), 11;
429     $sfa3->score(0);            
430     is $sfa3->score(), 0;       # test that setting to 0 no longer is overriddent to set score to '.' (fixed in Bio::SeqFeature::Annotated version 1.3.7)