[bug 2262]
[bioperl-live.git] / t / table.t
blob004f9900fb2568e205a0ecf8a7e8d76e50ffd1cb
1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id$
4 use strict;
6 BEGIN {     
7     use lib 't/lib';
8     use BioperlTest;
9     
10     test_begin(-tests => 450,
11                            -requires_module => 'IO::Scalar');
12         
13         use_ok('Bio::Tools::CodonTable');
14         use_ok('Bio::SeqIO');
17 my @names = qw(A6
18                A6r
19                A6ps1
20                A6ps2
21                CaMK2d
22                CaMKK2
23                AMPKa1
24                AMPKa2
25                MARK3
26                MARK2);
27 my @accs = qw(SK001
28               SK512
29               SK752
30               SK766
31               SK703
32               SK482
33               SK032
34               SK033
35               SK096
36               SK120);
37 my @num_anns = (5, 5, 5, 5, 6, 7, 7, 7, 7, 7);
38 my @psg = (0, 0, 1, 1, 0, 0, 0, 0, 0, 0);
39 my @rs = (0, 0, 0, 0, 1, 1, 1, 1, 1, 1);
41 ok my $seqin = Bio::SeqIO->new(-file => test_input_file("kinases.tsv"),
42                             -format  => 'table',
43                             -species => "Homo sapiens",
44                             -delim   => "\t",
45                             -header  => 1,
46                             -display_id => 1,
47                             -accession_number => 2,
48                             -seq => 7,
49                             -annotation => 1,
50                             -trim => 1);
51 run_tests([@names],[@accs],[@num_anns],[@psg],[@rs]);
53 $seqin->close();
55 ok $seqin = Bio::SeqIO->new(-file => test_input_file("kinases.tsv"),
56                          -format  => 'table',
57                          -species => "Homo sapiens",
58                          -delim   => "\t",
59                          -header  => 1,
60                          -display_id => 1,
61                          -accession_number => 2,
62                          -seq => 7,
63                          -colnames => "[Family,Subfamily,Pseudogene?,Protein,Novelty]",
64                          -trim => 1);
65 run_tests([@names],[@accs],[4,4,4,4,4,5,5,5,5,5],[@psg],[@rs]);
67 $seqin->close();
69 ok $seqin = Bio::SeqIO->new(-file => test_input_file("kinases.tsv"),
70                          -format  => 'table',
71                          -species => "Homo sapiens",
72                          -delim   => "\t",
73                          -header  => 1,
74                          -display_id => 1,
75                          -accession_number => 2,
76                          -seq => 7,
77                          -annotation => "[4,5,6,8,10]",
78                          -trim => 1);
79 run_tests([@names],[@accs],[4,4,4,4,4,5,5,5,5,5],[@psg],[@rs]);
81 $seqin->close();
83 # need Spreadsheet::ParseExcel installed for testing Excel format
84 SKIP: {
85         test_skip(-tests => 112, -requires_module => 'Spreadsheet::ParseExcel');
87         ok $seqin = Bio::SeqIO->new(-file => test_input_file("kinases.xls"),
88                                                          -format  => 'excel',
89                                                          -species => "Homo sapiens",
90                                                          -header  => 1,
91                                                          -display_id => 1,
92                                                          -accession_number => 2,
93                                                          -seq => 7,
94                                                          -annotation => 1,
95                                                          -trim => 1);
96         run_tests([@names],[@accs],[@num_anns],[@psg],[@rs]);
97         
98         $seqin->close();
101 sub run_tests {
102     my ($names_,$accs_,$num_anns_,$psg_,$rs_) = @_;
104     my @names = @$names_;
105     my @accs = @$accs_;
106     my @num_anns = @$num_anns_;
107     my @psg = @$psg_;
108     my @rs = @$rs_;
110     my $n = 0;
111     my $translator = Bio::Tools::CodonTable->new(-id => 1);
112     while (my $seq = $seqin->next_seq()) {
113         $n++;
114         is ($seq->display_id, shift(@names));
115         is ($seq->accession_number, shift(@accs));
116         ok ($seq->species);
117         is ($seq->species->binomial, "Homo sapiens");
118         my @anns = $seq->annotation->get_Annotations();
119         is (scalar(@anns), shift(@num_anns));
120         @anns = grep { $_->value eq "Y"; 
121                      } $seq->annotation->get_Annotations("Pseudogene?");
122         is (scalar(@anns), shift(@psg));
123         
124         # check sequences and that they translate to what we expect
125         if (($n >= 5) && ($seq->display_id ne "MARK3")) {
126             my $dna = $seq->seq;
127             my $protein = "";
128             my $frame = 0;
129             while ($frame <= 2) {
130                 my $inframe = substr($dna,$frame);
131                 # translate to protein
132                 my $protseq = $translator->translate($inframe);
133                 # chop off everything after the stop and before the first Met
134                 while ($protseq =~ /(M[^\*]+)/g) {
135                     $protein = $1 if length($1) > length($protein);
136                 }
137                 $frame++;
138             }
139             # retrieve expected result from annotation and compare
140             my ($protann) = $seq->annotation->get_Annotations("Protein");
141             ok (defined $protann);
142             is ($protein, $protann->value);
143         }
144         
145         @anns = grep { $_->value eq "Known - Refseq"; 
146                      } $seq->annotation->get_Annotations("Novelty");
147         is (scalar(@anns), shift(@rs));
148         @anns = $seq->annotation->get_Annotations("Subfamily");
149         is (scalar(@anns), ($n <= 5) ? 0 : 1);
150         @anns = $seq->annotation->get_Annotations("Family");
151         is (scalar(@anns), 1);
152         is (substr($anns[0]->value,0,4), ($n <= 4) ? "A6" : "CAMK");    
153     }
154     
155     is ($n, 10);