[BUG] bug 2598
[bioperl-live.git] / t / Unflattener.t
blobc69441b360a81e6cc2bbd356df8c227ef032cb46
1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id$
4 use strict;
6 BEGIN { 
7     use lib 't/lib';
8     use BioperlTest;
9     
10     test_begin(-tests => 9);
11         
12         use_ok('Bio::SeqIO');
13         use_ok('Bio::SeqFeature::Tools::Unflattener');
16 my $verbosity = test_debug();
18 my ($seq, @sfs);
19 my $unflattener = Bio::SeqFeature::Tools::Unflattener->new();
21 if (1) {
22     my @path = ("AE003644_Adh-genomic.gb");
23     $seq = getseq(@path);
24     
25     is ($seq->accession_number, 'AE003644');
26     my @topsfs = $seq->get_SeqFeatures;
27     if( $verbosity > 0 ) {
28         warn sprintf "TOP:%d\n", scalar(@topsfs);
29         write_hier(@topsfs);
30     }
31     
32     # UNFLATTEN
33     $unflattener->verbose($verbosity);
34     @sfs = $unflattener->unflatten_seq(-seq=>$seq,
35                                        -group_tag=>'locus_tag');
36     if( $verbosity > 0 ) {
37         warn "\n\nPOST PROCESSING:\n";
38         write_hier(@sfs);
39         warn sprintf "PROCESSED:%d\n", scalar(@sfs);
40     }
41     is(@sfs, 21);
44 # now try again, using a custom subroutine to link together features
45 $seq = getseq("AE003644_Adh-genomic.gb");
46 @sfs = $unflattener->unflatten_seq
47     (-seq=>$seq,
48      -group_tag=>'locus_tag',
49      -resolver_method => 
50      sub {
51          my $self = shift;
52          my ($sf, @candidate_container_sfs) = @_;
53          if ($sf->has_tag('note')) {
54              my @notes = $sf->get_tag_values('note');
55              my @trnames = map {/from transcript\s+(.*)/;
56                                 $1} @notes;
57              @trnames = grep {$_} @trnames;
58              my $trname;
59              if (@trnames == 0) {
60                  $self->throw("UNRESOLVABLE");
61              }
62              elsif (@trnames == 1) {
63                  $trname = $trnames[0];
64              }
65              else {
66                  $self->throw("AMBIGUOUS: @trnames");
67              }
68              my @container_sfs =
69                  grep {
70                      my ($product) =
71                          $_->has_tag('product') ?
72                          $_->get_tag_values('product') :
73                          ('');
74                      $product eq $trname;
75                  } @candidate_container_sfs;
76              if (@container_sfs == 0) {
77                  $self->throw("UNRESOLVABLE");
78              }
79              elsif (@container_sfs == 1) {
80                  # we got it!
81                  return ($container_sfs[0]=>0);
82              }
83              else {
84                  $self->throw("AMBIGUOUS");
85              }
86          }
87      });
88 $unflattener->feature_from_splitloc(-seq=>$seq);
89 if( $verbosity > 0 ) {
90     warn "\n\nPOST PROCESSING:\n";
91     write_hier(@sfs);
92     warn sprintf "PROCESSED2:%d\n", scalar(@sfs);
94 is(@sfs, 21);
96 # try again; different sequence
97 # this is an E-Coli seq with no mRNA features;
98 # we just want to link all features directly with gene
100 $seq = getseq("D10483.gbk");
102 # UNFLATTEN
103 @sfs = $unflattener->unflatten_seq(-seq=>$seq,
104                                    -partonomy=>{'*'=>'gene'},
105                                 );
106 if( $verbosity > 0 ) {
107     warn "\n\nPOST PROCESSING:\n";
108     write_hier(@sfs);
109     warn sprintf "PROCESSED:%d\n", scalar(@sfs);
111 is(@sfs, 98);
113 # this sequence has no locus_tag or or gene tags
114 $seq = getseq("AY763288.gb");
116 # UNFLATTEN
117 @sfs = $unflattener->unflatten_seq(-seq=>$seq,
118                                    -use_magic=>1
119                                   );
120 if( $verbosity > 0 ) {
121     warn "\n\nPOST PROCESSING:\n";
122     write_hier(@sfs);
123     warn sprintf "PROCESSED:%d\n", scalar(@sfs);
125 is(@sfs, 3);
128 # try again; different sequence - dicistronic gene, mRNA record
130 $seq = getseq("X98338_Adh-mRNA.gb");
132 # UNFLATTEN
133 @sfs = $unflattener->unflatten_seq(-seq=>$seq,
134                                  -partonomy=>{'*'=>'gene'},
135                                 );
136 if( $verbosity > 0 ) {                                 
137     warn "\n\nPOST PROCESSING:\n";
138     write_hier(@sfs);
139     warn sprintf "PROCESSED:%d\n", scalar(@sfs);
141 is(@sfs, 7);
143 # try again; this sequence has no CDSs but rRNA present
145 $seq = getseq("no_cds_example.gb");
147 # UNFLATTEN
148 @sfs = $unflattener->unflatten_seq(-seq=>$seq,
149                                  use_magic=>1
150                                 );
151 if( $verbosity > 0 ) {
152     warn "\n\nPOST PROCESSING:\n";
153     write_hier(@sfs);
154     warn sprintf "PROCESSED:%d\n", scalar(@sfs);
157 my @all_sfs = $seq->get_all_SeqFeatures;
159 my @exons = grep { $_-> primary_tag eq 'exon' }  @all_sfs ; 
161 is(@exons, 2);
165 sub write_hier {
166     my @sfs = @_;
167     _write_hier(0, @sfs);
170 sub _write_hier {
171     my $indent = shift;
172     my @sfs = @_;
173     foreach my $sf (@sfs) {
174         my $label = '?';
175         if ($sf->has_tag('product')) {
176             ($label) = $sf->get_tag_values('product');
177         }
178         warn sprintf "%s%s $label\n", '  ' x $indent, $sf->primary_tag;
179         my @sub_sfs = $sf->sub_SeqFeature;
180         _write_hier($indent+1, @sub_sfs);
181     }
184 sub getseq {
185     my @path = @_;
186     my $seqio =
187       Bio::SeqIO->new('-file'=> test_input_file(@path), 
188                       '-format' => 'GenBank');
189     $seqio->verbose($verbosity);
191     my $seq = $seqio->next_seq();
192     return $seq;