1 # -*-Perl-*- Test Harness script for Bioperl
10 test_begin(-tests => 21);
13 use_ok('Bio::SeqFeature::Tools::Unflattener');
16 my $verbosity = test_debug();
19 my $unflattener = Bio::SeqFeature::Tools::Unflattener->new;
20 $unflattener->verbose($verbosity);
24 my @path = ("NC_000007-ribosomal-slippage.gb");
27 my @topsfs = $seq->get_SeqFeatures;
28 if( $verbosity > 0 ) {
29 warn sprintf "TOP:%d\n", scalar(@topsfs);
34 @sfs = $unflattener->unflatten_seq(-seq => $seq,
36 if( $verbosity > 0 ) {
37 warn "\n\nPOST PROCESSING:\n";
39 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
46 my @path = ("ribosome-slippage.gb");
49 my @topsfs = $seq->get_SeqFeatures;
50 if( $verbosity > 0 ) {
51 warn sprintf "TOP:%d\n", scalar(@topsfs);
56 @sfs = $unflattener->unflatten_seq(-seq => $seq,
58 if( $verbosity > 0 ) {
59 warn "\n\nPOST PROCESSING:\n";
61 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
68 my @path = ("AE003644_Adh-genomic.gb");
71 is ($seq->accession_number, 'AE003644');
72 my @topsfs = $seq->get_SeqFeatures;
73 if( $verbosity > 0 ) {
74 warn sprintf "TOP:%d\n", scalar(@topsfs);
79 @sfs = $unflattener->unflatten_seq(-seq => $seq,
80 -group_tag => 'locus_tag');
81 if( $verbosity > 0 ) {
82 warn "\n\nPOST PROCESSING:\n";
84 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
89 # now try again, using a custom subroutine to link together features
90 $seq = getseq("AE003644_Adh-genomic.gb");
91 @sfs = $unflattener->unflatten_seq
93 -group_tag => 'locus_tag',
97 my ($sf, @candidate_container_sfs) = @_;
99 if ($sf->has_tag('note')) {
100 my @notes = $sf->get_tag_values('note');
101 my @trnames = map {/from transcript\s+(.*)/; $1;}
103 @trnames = grep {$_} @trnames;
107 $self->throw("UNRESOLVABLE");
109 elsif (@trnames == 1) {
110 $trname = $trnames[0];
113 $self->throw("AMBIGUOUS: @trnames");
119 $_->has_tag('product') ? $_->get_tag_values('product')
122 } @candidate_container_sfs;
124 if (@container_sfs == 0) {
125 $self->throw("UNRESOLVABLE");
127 elsif (@container_sfs == 1) {
129 return ($container_sfs[0]=>0);
132 $self->throw("AMBIGUOUS");
136 $unflattener->feature_from_splitloc(-seq => $seq);
137 if( $verbosity > 0 ) {
138 warn "\n\nPOST PROCESSING:\n";
140 warn sprintf "PROCESSED2:%d\n", scalar(@sfs);
144 # try again; different sequence
145 # this is an E-Coli seq with no mRNA features;
146 # we just want to link all features directly with gene
148 $seq = getseq("D10483.gbk");
151 @sfs = $unflattener->unflatten_seq(-seq => $seq,
152 -partonomy => {'*'=>'gene'});
153 if( $verbosity > 0 ) {
154 warn "\n\nPOST PROCESSING:\n";
156 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
160 # this sequence has no locus_tag or or gene tags
161 $seq = getseq("AY763288.gb");
164 @sfs = $unflattener->unflatten_seq(-seq => $seq,
166 if( $verbosity > 0 ) {
167 warn "\n\nPOST PROCESSING:\n";
169 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
173 # try again; different sequence - dicistronic gene, mRNA record
175 $seq = getseq("X98338_Adh-mRNA.gb");
178 @sfs = $unflattener->unflatten_seq(-seq => $seq,
179 -partonomy => {'*'=>'gene'});
180 if( $verbosity > 0 ) {
181 warn "\n\nPOST PROCESSING:\n";
183 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
187 # try again; this sequence has no CDSs but rRNA present
189 $seq = getseq("no_cds_example.gb");
192 @sfs = $unflattener->unflatten_seq(-seq => $seq,
194 if( $verbosity > 0 ) {
195 warn "\n\nPOST PROCESSING:\n";
197 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
200 my @all_sfs = $seq->get_all_SeqFeatures;
202 my @exons = grep { $_-> primary_tag eq 'exon' } @all_sfs ;
208 # this is an arabidopsise gbk record. it has no mRNA features.
209 # it has explicit exon/intron records
210 my @path = ("ATF14F8.gbk");
211 $seq = getseq(@path);
213 is ($seq->accession_number, 'AL391144');
214 my @topsfs = $seq->get_SeqFeatures;
215 my @cdss = grep {$_->primary_tag eq 'CDS'} @topsfs;
216 my $n = scalar(@topsfs);
217 if( $verbosity > 0 ) {
218 warn sprintf "TOP:%d\n", scalar(@topsfs);
223 @sfs = $unflattener->unflatten_seq(-seq => $seq,
225 @sfs = $seq->get_SeqFeatures;
226 if( $verbosity > 0 ) {
227 warn "\n\nPOST PROCESSING:\n";
229 warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
233 my @allsfs = $seq->get_all_SeqFeatures;
236 my @mrnas = grep {$_->primary_tag eq 'mRNA'} @allsfs;
237 if( $verbosity > 0 ) {
238 warn sprintf "ALL:%d\n", scalar(@allsfs);
239 warn sprintf "mRNAs:%d\n", scalar(@mrnas);
241 # relationship between mRNA and CDS should be one-one
247 # this is a record from FlyBase
248 # it has mRNA features, and explicit exon/intron records
249 my @path = ("AnnIX-v003.gbk");
250 $seq = getseq(@path);
252 my @topsfs = $seq->get_SeqFeatures;
253 if( $verbosity > 0 ) {
254 warn sprintf "TOP:%d\n", scalar(@topsfs);
259 @sfs = $unflattener->unflatten_seq(-seq => $seq,
261 @sfs = $seq->get_SeqFeatures;
262 if( $verbosity > 0 ) {
263 warn "\n\nPOST PROCESSING:\n";
265 warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
269 my @exons = grep {$_->primary_tag eq 'exon'} $seq->get_all_SeqFeatures;
270 is scalar(@exons), 6; # total number of exons per splice
272 my %numberh = map {$_->get_tag_values("number") => 1} @exons;
273 my @numbers = keys %numberh;
274 if( $verbosity > 0 ) {
275 warn sprintf "DISTINCT EXONS: %d [@numbers]\n", scalar(@numbers);
277 is scalar(@numbers), 6; # distinct exons
282 # example of a BAD genbank entry
283 my @path = ("dmel_2Lchunk.gb");
284 $seq = getseq(@path);
286 my @topsfs = $seq->get_SeqFeatures;
287 if( $verbosity > 0 ) {
288 warn sprintf "TOP:%d\n", scalar(@topsfs);
294 # we EXPECT problems with this erroneous record
295 $unflattener->error_threshold(2);
296 @sfs = $unflattener->unflatten_seq(-seq => $seq,
298 @sfs = $seq->get_SeqFeatures;
299 if( $verbosity > 0 ) {
300 warn "\n\nPOST PROCESSING:\n";
302 warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
306 my @exons = grep {$_->primary_tag eq 'exon'} $seq->get_all_SeqFeatures;
307 is scalar(@exons), 2; # total number of exons per splice
309 my @probs = $unflattener->get_problems;
310 $unflattener->report_problems(\*STDERR) if $verbosity > 0;
311 $unflattener->clear_problems;
312 if( $verbosity > 0 ) {
313 warn sprintf "PROBLEMS ENCOUNTERED: %d (EXPECTED: 6)\n", scalar(@probs);
315 is scalar(@probs), 6;
321 _write_hier(0, @sfs);
327 foreach my $sf (@sfs) {
329 if ($sf->has_tag('gene')) {
330 ($label) = $sf->get_tag_values('gene');
332 elsif ($sf->has_tag('product')) {
333 ($label) = $sf->get_tag_values('product');
335 elsif ($sf->has_tag('number')) {
336 $label = join("; ", $sf->get_tag_values('number'));
338 warn sprintf "%s%s $label\n", ' ' x $indent, $sf->primary_tag;
339 my @sub_sfs = $sf->sub_SeqFeature;
340 _write_hier($indent+1, @sub_sfs);
346 my $seqio = Bio::SeqIO->new('-file' => test_input_file(@path),
347 '-format' => 'GenBank');
348 $seqio->verbose($verbosity);
350 my $seq = $seqio->next_seq();