1 # -*-Perl-*- mode (to keep my emacs happy)
5 use vars qw($DEBUG $TESTCOUNT);
7 eval { require Test; };
13 plan tests => $TESTCOUNT;
19 use Bio::SeqFeature::Tools::Unflattener;
23 my $verbosity = -1; # Set to -1 for release version, so warnings aren't printed
27 my $unflattener = Bio::SeqFeature::Tools::Unflattener->new;
28 $unflattener->verbose($verbosity);
32 # this is an arabidopsise gbk record. it has no mRNA features.
33 # it has explicit exon/intron records
35 my @path = ("t","data","ATF14F8.gbk");
38 ok ($seq->accession_number, 'AL391144');
39 my @topsfs = $seq->get_SeqFeatures;
40 my @cdss = grep {$_->primary_tag eq 'CDS'} @topsfs;
41 my $n = scalar(@topsfs);
42 if( $verbosity > 0 ) {
43 warn sprintf "TOP:%d\n", scalar(@topsfs);
47 @sfs = $unflattener->unflatten_seq(-seq=>$seq,
50 @sfs = $seq->get_SeqFeatures;
51 if( $verbosity > 0 ) {
52 warn "\n\nPOST PROCESSING:\n";
54 warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
57 my @allsfs = $seq->get_all_SeqFeatures;
59 my @mrnas = grep {$_->primary_tag eq 'mRNA'} @allsfs;
60 if( $verbosity > 0 ) {
61 warn sprintf "ALL:%d\n", scalar(@allsfs);
62 warn sprintf "mRNAs:%d\n", scalar(@mrnas);
65 # relationship between mRNA and CDS should be one-one
71 # this is a record from FlyBase
72 # it has mRNA features, and explicit exon/intron records
74 my @path = ("t","data","AnnIX-v003.gbk");
77 my @topsfs = $seq->get_SeqFeatures;
78 if( $verbosity > 0 ) {
79 warn sprintf "TOP:%d\n", scalar(@topsfs);
83 @sfs = $unflattener->unflatten_seq(-seq=>$seq,
86 @sfs = $seq->get_SeqFeatures;
87 if( $verbosity > 0 ) {
88 warn "\n\nPOST PROCESSING:\n";
90 warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
93 my @exons = grep {$_->primary_tag eq 'exon'} $seq->get_all_SeqFeatures;
94 ok scalar(@exons), 6; # total number of exons per splice
95 my %numberh = map {$_->get_tag_values("number") => 1} @exons;
96 my @numbers = keys %numberh;
97 if( $verbosity > 0 ) {
98 warn sprintf "DISTINCT EXONS: %d [@numbers]\n", scalar(@numbers);
100 ok scalar(@numbers), 6; # distinct exons
105 # example of a BAD genbank entry
107 my @path = ("t","data","dmel_2Lchunk.gb");
108 $seq = getseq(@path);
110 my @topsfs = $seq->get_SeqFeatures;
111 if( $verbosity > 0 ) {
112 warn sprintf "TOP:%d\n", scalar(@topsfs);
117 # we EXPECT problems with this erroneous record
118 $unflattener->error_threshold(2);
119 @sfs = $unflattener->unflatten_seq(-seq=>$seq,
122 my @probs = $unflattener->get_problems;
123 $unflattener->report_problems(\*STDERR) if $verbosity > 0;
124 $unflattener->clear_problems;
125 @sfs = $seq->get_SeqFeatures;
126 if( $verbosity > 0 ) {
127 warn "\n\nPOST PROCESSING:\n";
129 warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
132 my @exons = grep {$_->primary_tag eq 'exon'} $seq->get_all_SeqFeatures;
133 ok scalar(@exons), 2; # total number of exons per splice
134 if( $verbosity > 0 ) {
135 warn sprintf "PROBLEMS ENCOUNTERED: %d (EXPECTED: 6)\n", scalar(@probs);
137 ok scalar(@probs), 6;
143 _write_hier(0, @sfs);
149 foreach my $sf (@sfs) {
151 if ($sf->has_tag('gene')) {
152 ($label) = $sf->get_tag_values('gene');
154 if ($sf->has_tag('product')) {
155 ($label) = $sf->get_tag_values('product');
157 if ($sf->has_tag('number')) {
158 $label = join("; ", $sf->get_tag_values('number'));
160 printf "%s%s $label\n", ' ' x $indent, $sf->primary_tag;
161 my @sub_sfs = $sf->sub_SeqFeature;
162 _write_hier($indent+1, @sub_sfs);
169 Bio::SeqIO->new('-file'=> Bio::Root::IO->catfile(
172 '-format' => 'GenBank');
173 $seqio->verbose($verbosity);
175 my $seq = $seqio->next_seq();