Massive check of file open lines. Changed bareword filehandles
[bioperl-live.git] / t / SeqFeature / Unflattener.t
blob5a97433a0dc8d694fd98fb3b47576326e0b28dca
1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id$
4 use strict;
6 BEGIN { 
7     use lib '.';
8     use Bio::Root::Test;
9     
10     test_begin(-tests => 11);
11         
12         use_ok('Bio::SeqIO');
13         use_ok('Bio::SeqFeature::Tools::Unflattener');
16 my $verbosity = test_debug();
18 my ($seq, @sfs);
19 my $unflattener = Bio::SeqFeature::Tools::Unflattener->new();
21 if (1) {
22     $unflattener->verbose(10);
23     my @path = ("NC_000007-ribosomal-slippage.gb");
24     $seq = getseq(@path);
25     
26     my @topsfs = $seq->get_SeqFeatures;
27     if( $verbosity > 0 ) {
28         warn sprintf "TOP:%d\n", scalar(@topsfs);
29         write_hier(@topsfs);
30     }
31     
32     # UNFLATTEN
33     $unflattener->verbose($verbosity);
34     @sfs = $unflattener->unflatten_seq(-seq=>$seq,
35                                        -use_magic=>1);
36     if( $verbosity > 0 ) {
37         warn "\n\nPOST PROCESSING:\n";
38         write_hier(@sfs);
39         warn sprintf "PROCESSED:%d\n", scalar(@sfs);
40     }
41     is(@sfs, 1995);
44 if (1) {
45     my @path = ("ribosome-slippage.gb");
46     $seq = getseq(@path);
47     
48     my @topsfs = $seq->get_SeqFeatures;
49     if( $verbosity > 0 ) {
50         warn sprintf "TOP:%d\n", scalar(@topsfs);
51         write_hier(@topsfs);
52     }
53     
54     # UNFLATTEN
55     $unflattener->verbose($verbosity);
56     @sfs = $unflattener->unflatten_seq(-seq=>$seq,
57                                        -use_magic=>1);
58     if( $verbosity > 0 ) {
59         warn "\n\nPOST PROCESSING:\n";
60         write_hier(@sfs);
61         warn sprintf "PROCESSED:%d\n", scalar(@sfs);
62     }
63     is(@sfs, 3);
66 if (1) {
67     my @path = ("AE003644_Adh-genomic.gb");
68     $seq = getseq(@path);
69     
70     is ($seq->accession_number, 'AE003644');
71     my @topsfs = $seq->get_SeqFeatures;
72     if( $verbosity > 0 ) {
73         warn sprintf "TOP:%d\n", scalar(@topsfs);
74         write_hier(@topsfs);
75     }
76     
77     # UNFLATTEN
78     $unflattener->verbose($verbosity);
79     @sfs = $unflattener->unflatten_seq(-seq=>$seq,
80                                        -group_tag=>'locus_tag');
81     if( $verbosity > 0 ) {
82         warn "\n\nPOST PROCESSING:\n";
83         write_hier(@sfs);
84         warn sprintf "PROCESSED:%d\n", scalar(@sfs);
85     }
86     is(@sfs, 21);
89 # now try again, using a custom subroutine to link together features
90 $seq = getseq("AE003644_Adh-genomic.gb");
91 @sfs = $unflattener->unflatten_seq
92     (-seq=>$seq,
93      -group_tag=>'locus_tag',
94      -resolver_method => 
95      sub {
96          my $self = shift;
97          my ($sf, @candidate_container_sfs) = @_;
98          if ($sf->has_tag('note')) {
99              my @notes = $sf->get_tag_values('note');
100              my @trnames = map {/from transcript\s+(.*)/;
101                                 $1} @notes;
102              @trnames = grep {$_} @trnames;
103              my $trname;
104              if (@trnames == 0) {
105                  $self->throw("UNRESOLVABLE");
106              }
107              elsif (@trnames == 1) {
108                  $trname = $trnames[0];
109              }
110              else {
111                  $self->throw("AMBIGUOUS: @trnames");
112              }
113              my @container_sfs =
114                  grep {
115                      my ($product) =
116                          $_->has_tag('product') ?
117                          $_->get_tag_values('product') :
118                          ('');
119                      $product eq $trname;
120                  } @candidate_container_sfs;
121              if (@container_sfs == 0) {
122                  $self->throw("UNRESOLVABLE");
123              }
124              elsif (@container_sfs == 1) {
125                  # we got it!
126                  return ($container_sfs[0]=>0);
127              }
128              else {
129                  $self->throw("AMBIGUOUS");
130              }
131          }
132      });
133 $unflattener->feature_from_splitloc(-seq=>$seq);
134 if( $verbosity > 0 ) {
135     warn "\n\nPOST PROCESSING:\n";
136     write_hier(@sfs);
137     warn sprintf "PROCESSED2:%d\n", scalar(@sfs);
139 is(@sfs, 21);
141 # try again; different sequence
142 # this is an E-Coli seq with no mRNA features;
143 # we just want to link all features directly with gene
145 $seq = getseq("D10483.gbk");
147 # UNFLATTEN
148 @sfs = $unflattener->unflatten_seq(-seq=>$seq,
149                                    -partonomy=>{'*'=>'gene'},
150                                 );
151 if( $verbosity > 0 ) {
152     warn "\n\nPOST PROCESSING:\n";
153     write_hier(@sfs);
154     warn sprintf "PROCESSED:%d\n", scalar(@sfs);
156 is(@sfs, 98);
158 # this sequence has no locus_tag or or gene tags
159 $seq = getseq("AY763288.gb");
161 # UNFLATTEN
162 @sfs = $unflattener->unflatten_seq(-seq=>$seq,
163                                    -use_magic=>1
164                                   );
165 if( $verbosity > 0 ) {
166     warn "\n\nPOST PROCESSING:\n";
167     write_hier(@sfs);
168     warn sprintf "PROCESSED:%d\n", scalar(@sfs);
170 is(@sfs, 3);
173 # try again; different sequence - dicistronic gene, mRNA record
175 $seq = getseq("X98338_Adh-mRNA.gb");
177 # UNFLATTEN
178 @sfs = $unflattener->unflatten_seq(-seq=>$seq,
179                                  -partonomy=>{'*'=>'gene'},
180                                 );
181 if( $verbosity > 0 ) {                                 
182     warn "\n\nPOST PROCESSING:\n";
183     write_hier(@sfs);
184     warn sprintf "PROCESSED:%d\n", scalar(@sfs);
186 is(@sfs, 7);
188 # try again; this sequence has no CDSs but rRNA present
190 $seq = getseq("no_cds_example.gb");
192 # UNFLATTEN
193 @sfs = $unflattener->unflatten_seq(-seq=>$seq,
194                                  use_magic=>1
195                                 );
196 if( $verbosity > 0 ) {
197     warn "\n\nPOST PROCESSING:\n";
198     write_hier(@sfs);
199     warn sprintf "PROCESSED:%d\n", scalar(@sfs);
202 my @all_sfs = $seq->get_all_SeqFeatures;
204 my @exons = grep { $_-> primary_tag eq 'exon' }  @all_sfs ; 
206 is(@exons, 2);
210 sub write_hier {
211     my @sfs = @_;
212     _write_hier(0, @sfs);
215 sub _write_hier {
216     my $indent = shift;
217     my @sfs = @_;
218     foreach my $sf (@sfs) {
219         my $label = '?';
220         if ($sf->has_tag('product')) {
221             ($label) = $sf->get_tag_values('product');
222         }
223         warn sprintf "%s%s $label\n", '  ' x $indent, $sf->primary_tag;
224         my @sub_sfs = $sf->sub_SeqFeature;
225         _write_hier($indent+1, @sub_sfs);
226     }
229 sub getseq {
230     my @path = @_;
231     my $seqio =
232       Bio::SeqIO->new('-file'=> test_input_file(@path), 
233                       '-format' => 'GenBank');
234     $seqio->verbose($verbosity);
236     my $seq = $seqio->next_seq();
237     return $seq;