Fix bug 253 testing for defined
[bioperl-live.git] / t / SeqIO / table.t
blob4b250b02408eba3dea390081383165af8b451899
1 # -*-Perl-*- Test Harness script for Bioperl
3 use strict;
5 BEGIN {     
6     use lib '.';
7     use Bio::Root::Test;
8     
9     test_begin(-tests => 463,
10                            -requires_module => 'IO::Scalar');
11         
12         use_ok('Bio::Tools::CodonTable');
13         use_ok('Bio::SeqIO::table');
16 my @names = qw(A6
17                A6r
18                A6ps1
19                A6ps2
20                CaMK2d
21                CaMKK2
22                AMPKa1
23                AMPKa2
24                MARK3
25                MARK2);
26 my @accs = qw(SK001
27               SK512
28               SK752
29               SK766
30               SK703
31               SK482
32               SK032
33               SK033
34               SK096
35               SK120);
36 my @num_anns = (5, 5, 5, 5, 6, 7, 7, 7, 7, 7);
37 my @psg = (0, 0, 1, 1, 0, 0, 0, 0, 0, 0);
38 my @rs = (0, 0, 0, 0, 1, 1, 1, 1, 1, 1);
40 ok my $seqin = Bio::SeqIO->new(-file => test_input_file("test.tsv"),
41                             -format  => 'table',
42                             -species => "Homo sapiens",
43                             -delim   => "\t",
44                             -header  => 1,
45                             -display_id => 1,
46                             -accession_number => 2,
47                             -seq => 7,
48                             -annotation => 1,
49                             -trim => 1);
50 run_tests([@names],[@accs],[@num_anns],[@psg],[@rs]);
52 $seqin->close();
54 ok $seqin = Bio::SeqIO->new(-file => test_input_file("test.tsv"),
55                          -format  => 'table',
56                          -species => "Homo sapiens",
57                          -delim   => "\t",
58                          -header  => 1,
59                          -display_id => 1,
60                          -accession_number => 2,
61                          -seq => 7,
62                          -colnames => "[Family,Subfamily,Pseudogene?,Protein,Novelty]",
63                          -trim => 1);
64 run_tests([@names],[@accs],[4,4,4,4,4,5,5,5,5,5],[@psg],[@rs]);
66 $seqin->close();
68 ok $seqin = Bio::SeqIO->new(-file => test_input_file("test.tsv"),
69                          -format  => 'table',
70                          -species => "Homo sapiens",
71                          -delim   => "\t",
72                          -header  => 1,
73                          -display_id => 1,
74                          -accession_number => 2,
75                          -seq => 7,
76                          -annotation => "[4,5,6,8,10]",
77                          -trim => 1);
78 run_tests([@names],[@accs],[4,4,4,4,4,5,5,5,5,5],[@psg],[@rs]);
80 # Tests to check that 'description' is read from 'table' format
81 ok $seqin = Bio::SeqIO->new(
82     -file   => test_input_file("test-1.tab"),
83     -format => 'table',
84     -header => 1,
85     -display_id => 1, 
86     -accession_number => 1, 
87     -seq => 3, 
88     -desc => 2
90 ok($seqin);
91 my $seq = $seqin->next_seq;
92 ok($seq);
93 is( $seq->desc, 'd1');
94 is( $seq->display_id, 'n1');
95 is( $seq->seq, 'aaaa');
96 $seq = $seqin->next_seq;
97 ok($seq);
98 is( $seq->desc, 'd2');
99 is( $seq->display_id, 'n2');
100 is( $seq->seq, 'tttt');
102 $seqin->close();
104 # Tests to check that we can _not_ write to 'table' format
105 ok $seqin = Bio::SeqIO->new(
106     -file   => test_input_file("test-1.tab.gb"),
107     -format => 'genbank'
109 ok($seqin);
110 my $seq = $seqin->next_seq;
111 ok($seq);
112 my $tmpfile = test_output_file();
113 my $seqout = Bio::SeqIO->new( -format => 'table', -file => ">$tmpfile" );
114 # dies_ok not available
115 # dies_ok { $seqout->write_seq($seq) } "write_seq() not implemented";
117 # Need Spreadsheet::ParseExcel installed for testing Excel format
118 SKIP: {
119         test_skip(-tests => 112, -requires_module => 'Spreadsheet::ParseExcel');
121         ok $seqin = Bio::SeqIO->new(-file => test_input_file("test.xls"),
122                                                          -format  => 'excel',
123                                                          -species => "Homo sapiens",
124                                                          -header  => 1,
125                                                          -display_id => 1,
126                                                          -accession_number => 2,
127                                                          -seq => 7,
128                                                          -annotation => 1,
129                                                          -trim => 1);
130         run_tests([@names],[@accs],[@num_anns],[@psg],[@rs]);
131         
132         $seqin->close();
135 sub run_tests {
136     my ($names_,$accs_,$num_anns_,$psg_,$rs_) = @_;
138     my @names = @$names_;
139     my @accs = @$accs_;
140     my @num_anns = @$num_anns_;
141     my @psg = @$psg_;
142     my @rs = @$rs_;
144     my $n = 0;
145     my $translator = Bio::Tools::CodonTable->new(-id => 1);
146     while (my $seq = $seqin->next_seq()) {
147         $n++;
148         is ($seq->display_id, shift(@names));
149         is ($seq->accession_number, shift(@accs));
150         ok ($seq->species);
151         is ($seq->species->binomial, "Homo sapiens");
152         my @anns = $seq->annotation->get_Annotations();
153         is (scalar(@anns), shift(@num_anns));
154         @anns = grep { $_->value eq "Y"; 
155                      } $seq->annotation->get_Annotations("Pseudogene?");
156         is (scalar(@anns), shift(@psg));
157         
158         # check sequences and that they translate to what we expect
159         if (($n >= 5) && ($seq->display_id ne "MARK3")) {
160             my $dna = $seq->seq;
161             my $protein = "";
162             my $frame = 0;
163             while ($frame <= 2) {
164                 my $inframe = substr($dna,$frame);
165                 # translate to protein
166                 my $protseq = $translator->translate($inframe);
167                 # chop off everything after the stop and before the first Met
168                 while ($protseq =~ /(M[^\*]+)/g) {
169                     $protein = $1 if length($1) > length($protein);
170                 }
171                 $frame++;
172             }
173             # retrieve expected result from annotation and compare
174             my ($protann) = $seq->annotation->get_Annotations("Protein");
175             ok (defined $protann);
176             is ($protein, $protann->value);
177         }
178         
179         @anns = grep { $_->value eq "Known - Refseq"; 
180                      } $seq->annotation->get_Annotations("Novelty");
181         is (scalar(@anns), shift(@rs));
182         @anns = $seq->annotation->get_Annotations("Subfamily");
183         is (scalar(@anns), ($n <= 5) ? 0 : 1);
184         @anns = $seq->annotation->get_Annotations("Family");
185         is (scalar(@anns), 1);
186         is (substr($anns[0]->value,0,4), ($n <= 4) ? "A6" : "CAMK");    
187     }
188     
189     is ($n, 10);