partial fix for arp (-end now works and counts ? as ambigious sequence instead of...
[bioperl-live.git] / t / Signalp2.t
blobcf30015695874c7055fd521a2a5d08c2e48d849b
1 # -*-Perl-*- Test Harness script for Bioperl
2 # $Id$
4 use strict;
5 use Data::Dumper;
6 BEGIN {
7         use lib 't/lib';
8     use BioperlTest;
9     
10     test_begin(-tests => 185);
11         
12     use_ok('Bio::Tools::Signalp::ExtendedSignalp');
15 ###############################################
16 ### TESTS ON SUMMARY OUTPUT FORMAT (NN+HMM) ###
17 ###############################################
19 my $res = {
20            '1' => {
21                    'id'     => 'BC1G_00003.1',
22                    'pred'   => 'Signal peptide',
23                    'nnpred' => 'signal-peptide',
24                    'end'    => '22',
25                    'prob'   => '0.999',
26                    'anchor' => '0.000',
27                   },
28         '2' => {
29                    'id'     => 'BC1G_00008.1',
30                    'pred'   => 'Non-secretory protein',
31                    'nnpred' => 'signal-peptide',
32                    'end'    => '83',
33                    'prob'   => '0.222',
34                    'anchor' => '0.067',
35                   },
36           };
38 # Test on filtered results
39 my $facts   = [qw(maxS D)];
40 my $in      = test_input_file("signalp.summary");
41 my $signalp = Bio::Tools::Signalp::ExtendedSignalp->new(
42                                                         -file => $in,
43                                                         -factors => $facts,
44                                                        );
46 ok($signalp);
47 my $i = 1;
49 while(my $feat = $signalp->next_feature()){
50     #print Dumper($feat);
51     is($feat->seq_id(), $res->{$i}->{id});
52     is($feat->end(), $res->{$i}->{end});
53     is(($feat->get_tag_values('peptideProb'))[0], $res->{$i}->{prob});
54     is(($feat->get_tag_values('anchorProb'))[0], $res->{$i}->{anchor});
55     is(($feat->get_tag_values('signalpPrediction'))[0], $res->{$i}->{pred});
56     is(($feat->get_tag_values('nnPrediction'))[0], $res->{$i}->{nnpred});
57     $i++;
60 # Tests without filters.
61 # It should by default only parses results with Ymax and meanS to mimic default behavior
62 # from Bio::Tools::Signalp
63 $res = {
64         '1' => {
65                 'id'     => 'BC1G_00003.1',
66                 'pred'   => 'Signal peptide',
67                 'end'    => '22',
68                 'prob'   => '0.999',
69                 'anchor' => '0.000',
70                },
71         '2' => {
72                 'id'     => 'BC1G_00008.1',
73                 'pred'   => 'Non-secretory protein',
74                 'end'    => '83',
75                 'prob'   => '0.222',
76                 'anchor' => '0.067',
77                },
78        };
80 #No filters required
81 $signalp = Bio::Tools::Signalp::ExtendedSignalp->new(
82                                                      -file => $in
83                                                     );
84 ok($signalp);
85 $i = 1;
87 while(my $feat = $signalp->next_feature()){
88     is($feat->seq_id(), $res->{$i}->{id});
89     is($feat->end(), $res->{$i}->{end});
90     is(($feat->get_tag_values('peptideProb'))[0], $res->{$i}->{prob});
91     is(($feat->get_tag_values('anchorProb'))[0], $res->{$i}->{anchor});
92     is(($feat->get_tag_values('signalpPrediction'))[0], $res->{$i}->{pred});
93     $i++;
96 #############################################
97 ### TESTS ON SHORT OUTPUT FORMAT (NN+HMM) ###
98 #############################################
100 $res = {
101         '1' => {
102                 'id'     => 'BC1G_00003.1',
103                 'pred'   => 'Signal peptide',
104                 'nnpred' => 'signal-peptide',
105                 'end'    => '22',
106                },
107         '2' => {
108                 'id'     => 'BC1G_00008.1',
109                 'nnpred' => 'signal-peptide',
110                 'pred'   => 'Non-secretory protein',
111                 'end'    => '83',
112                },
113         '3' => {
114                 'id'     => 'BC1G_00009.1',
115                 'nnpred' => 'signal-peptide',
116                 'pred'   => 'Non-secretory protein',
117                 'end'    => '28',
118                 },
119         '4' => {
120                 'id'     => 'BC1G_00010.1',
121                 'nnpred' => 'signal-peptide',
122                 'pred'   => 'Non-secretory protein',
123                 'end'    => '15',
124                 },
126        };
128 # Test on filtered results
129 $facts   = [qw(maxC)];
130 $in      = test_input_file("signalp.short");
131 $signalp = Bio::Tools::Signalp::ExtendedSignalp->new(
132                                                      -file => $in,
133                                                      -factors => $facts,
134                                                     );
136 ok($signalp);
137 $i = 1;
139 while(my $feat = $signalp->next_feature()){
140     is($feat->seq_id(), $res->{$i}->{id});
141     is($feat->end(), $res->{$i}->{end});
142     is(($feat->get_tag_values('signalpPrediction'))[0], $res->{$i}->{pred});
143     is(($feat->get_tag_values('nnPrediction'))[0], $res->{$i}->{nnpred});
144     $i++;
147 # Tests without filters.
148 # It should by default only parses results with Ymax and meanS to mimic default behavior
149 # from GPI::Bio::Tools::Signalp
150 $res = {
151         '1' => {
152                 'id'     => 'BC1G_00003.1',
153                 'pred'   => 'Signal peptide',
154         'prob' => 0.999,
155                 'nnpred' => 'signal-peptide',
156                 'end'    => '22',
157                },
158         '2' => {
159                 'id'     => 'BC1G_00008.1',
160                 'pred'   => 'Non-secretory protein',
161         'prob' => 0.222,
162                 'nnpred' => 'signal-peptide',
163                 'end'    => '83',
164                },
165        };
167 #No filters required
168 $signalp = Bio::Tools::Signalp::ExtendedSignalp->new(
169                                                      -file => $in
170                                                     );
171 ok($signalp);
172 $i = 1;
174 while(my $feat = $signalp->next_feature()){
175     is($feat->seq_id(), $res->{$i}->{id});
176     is($feat->end(), $res->{$i}->{end});
177     is(($feat->get_tag_values('peptideProb'))[0], $res->{$i}->{prob});
178     is(($feat->get_tag_values('anchorProb'))[0], $res->{$i}->{anchor});
179     is(($feat->get_tag_values('signalpPrediction'))[0], $res->{$i}->{pred});
180     is(($feat->get_tag_values('nnPrediction'))[0], $res->{$i}->{nnpred});
181     $i++;
185 ###########################################
186 ### TESTS ON SUMMARY OUTPUT FORMAT (NN) ###
187 ###########################################
189 $res = {
190         '1' => {
191                 'id'     => 'BC1G_00003.1',
192                 'nnpred' => 'signal-peptide',
193                 'end'    => '22',
194                },
195         '2' => {
196                 'id'     => 'BC1G_00008.1',
197                 'nnpred' => 'signal-peptide',
198                 'end'    => '83',
199                },
200        };
202 # Test on filtered results BROKEN
203 $facts   = [qw(maxC)];
204 $in      = test_input_file("signalp.nn.summary");
205 $signalp = Bio::Tools::Signalp::ExtendedSignalp->new(
206                                                      -file => $in,
207                                                      -factors => $facts,
208                                                     );
210 ok($signalp);
211 $i = 1;
213 while(my $feat = $signalp->next_feature()){
215     is($feat->seq_id(), $res->{$i}->{id});
216     is($feat->end(), $res->{$i}->{end});
217     is(($feat->get_tag_values('nnPrediction'))[0], $res->{$i}->{nnpred});
218     $i++;
220 # Tests without filters.
221 # It should by default only parses results with Ymax and meanS to mimic default behavior
222 # from GPI::Bio::Tools::Signalp
223 $res = {
224         '1' => {
225                 'id'     => 'BC1G_00003.1',
226                 'nnpred' => 'signal-peptide',
227                 'end'    => '22',
228                },
229         '2' => {
230                 'id'     => 'BC1G_00008.1',
231                 'nnpred' => 'signal-peptide',
232                 'end'    => '83',
233                },
234         '3' => {
235                 'id'     => 'BC1G_00009.1',
236                 'nnpred' => 'signal-peptide',
237                 'end'    => '28',
238                 },
239         '4' => {
240                 'id'     => 'BC1G_00010.1',
241                 'nnpred' => 'signal-peptide',
242                 'end'    => '15',
243                 },
244        };
246 #No filters required BROKEN
247 $signalp = Bio::Tools::Signalp::ExtendedSignalp->new(
248                                                      -file => $in
249                                                     );
250 ok($signalp);
251 $i = 1;
253 while(my $feat = $signalp->next_feature()){
255     is($feat->seq_id(), $res->{$i}->{id});
256     is($feat->end(), $res->{$i}->{end});
257     is(($feat->get_tag_values('nnPrediction'))[0], $res->{$i}->{nnpred});
258     $i++;
262 ############################################
263 ### TESTS ON SUMMARY OUTPUT FORMAT (HMM) ###
264 ############################################
266 $res = {
267         '1' => {
268                 'id'     => 'BC1G_00002.1',
269                 'prob'   => '0.000',
270                 'anchor' => '0.000',
271                 'cleav'  => '0.000',
272                 'pred'   => 'Non-secretory protein',
273                 'end'    => '22',
274                },
275         '2' => {
276                 'id'     => 'BC1G_00003.1',
277                 'prob'   => '0.999',
278                 'anchor' => '0.000',
279                 'cleav'  => '0.973',
280                 'pred'   => 'Signal peptide',
281                 'end'    => '22',
282                },
283         '3' => {
284                 'id'     => 'BC1G_00004.1',
285                 'prob'   => '0.003',
286                 'anchor' => '0.000',
287                 'cleav'  => '0.001',
288                 'pred'   => 'Non-secretory protein',
289                 'end'    => '19',
290                },
291         '4' => {
292                 'id'     => 'BC1G_00005.1',
293                 'prob'   => '0.008',
294                 'anchor' => '0.000',
295                 'cleav'  => '0.007',
296                 'pred'   => 'Non-secretory protein',
297                 'end'    => '22',
298                 },
299         '5' => {
300                 'id'     => 'BC1G_00006.1',
301                 'prob'   => '0.000',
302                 'anchor' => '0.000',
303                 'cleav'  => '0.000',
304                 'pred'   => 'Non-secretory protein',
305                 'end'    => '23',
306                 },
307         '6' => {
308                 'id'     => 'BC1G_00007.1',
309                 'prob'   => '0.240',
310                 'anchor' => '0.000',
311                 'cleav'  => '0.228',
312                 'pred'   => 'Non-secretory protein',
313                 'end'    => '22',
314                 },
315         '7' => {
316                 'id'     => 'BC1G_00008.1',
317                 'prob'   => '0.222',
318                 'anchor' => '0.067',
319                 'cleav'  => '0.061',
320                 'pred'   => 'Non-secretory protein',
321                 'end'    => '22',
322                 },
323         '8' => {
324                 'id'     => 'BC1G_00009.1',
325                 'prob'   => '0.000',
326                 'anchor' => '0.000',
327                 'cleav'  => '0.000',
328                 'pred'   => 'Non-secretory protein',
329                 'end'    => '20',
330                 },
331        };
333 # It is impossible to filter with hmm output...
334 $in      = test_input_file("signalp.hmm.summary");
335 $signalp = Bio::Tools::Signalp::ExtendedSignalp->new(
336                                                      -file => $in,
337                                                     );
339 ok($signalp);
340 $i = 1;
342 while(my $feat = $signalp->next_feature()){
343     is($feat->seq_id(), $res->{$i}->{id});
344     is($feat->end(), $res->{$i}->{end});
345     is(($feat->get_tag_values('peptideProb'))[0], $res->{$i}->{prob});
346     is(($feat->get_tag_values('anchorProb'))[0], $res->{$i}->{anchor});
347     is(($feat->get_tag_values('signalpPrediction'))[0], $res->{$i}->{pred});
348     is(($feat->get_tag_values('cleavageSiteProb'))[0], $res->{$i}->{cleav});
349     $i++;
352 #########################################
353 ### TESTS ON SHORT OUTPUT FORMAT (NN) ###
354 #########################################
356 $res = {
357         '1' => {
358                 'id'     => 'BC1G_00003.1',
359                 'nnpred' => 'signal-peptide',
360                 'yprob'  => '0.866',
361                 'dprob'  => '0.902',
362                 'cprob'  => '0.934',
363                 'end'    => '22',
364                },
365         '2' => {
366                 'id'     => 'BC1G_00008.1',
367                 'yprob'  => '0.383',
368                 'dprob'  => '0.436',
369                 'cprob'  => '0.576',
370                 'nnpred' => 'signal-peptide',
371                 'end'    => '83',
372                },
373        };
375 # Test on filtered results
376 $facts   = [qw(maxY)];
377 $in      = test_input_file("signalp.nn.short");
378 $signalp = Bio::Tools::Signalp::ExtendedSignalp->new(
379                                                      -file => $in,
380                                                      -factors => $facts,
381                                                     );
383 ok($signalp);
384 $i = 1;
386 while(my $feat = $signalp->next_feature()){
387     is($feat->seq_id(), $res->{$i}->{id});
388     is($feat->end(), $res->{$i}->{end});
389     is(($feat->get_tag_values('maxCprob'))[0], $res->{$i}->{cprob});
390     is(($feat->get_tag_values('Dprob'))[0], $res->{$i}->{dprob});
391     is(($feat->get_tag_values('maxYprob'))[0], $res->{$i}->{yprob});
392     is(($feat->get_tag_values('nnPrediction'))[0], $res->{$i}->{nnpred});
393     $i++;
395 # Tests without filters.
396 # It should by default only parses results with Ymax and meanS to mimic default behavior
397 # from GPI::Bio::Tools::Signalp
398 $res = {
399         '1' => {
400                 'id'     => 'BC1G_00003.1',
401                 'nnpred' => 'signal-peptide',
402                 'yprob'  => '0.866',
403                 'dprob'  => '0.902',
404                 'cprob'  => '0.934',
405                 'end'    => '22',
406                },
407         '2' => {
408                 'id'     => 'BC1G_00008.1',
409                 'yprob'  => '0.383',
410                 'dprob'  => '0.436',
411                 'cprob'  => '0.576',
412                 'nnpred' => 'signal-peptide',
413                 'end'    => '83',
414                },
415        };
417 #No filters required
418 $signalp = Bio::Tools::Signalp::ExtendedSignalp->new(
419                                                      -file => $in
420                                                     );
421 ok($signalp);
422 $i = 1;
424 while(my $feat = $signalp->next_feature()){
425     is($feat->seq_id(), $res->{$i}->{id});
426     is($feat->end(), $res->{$i}->{end});
427     is(($feat->get_tag_values('maxCprob'))[0], $res->{$i}->{cprob});
428     is(($feat->get_tag_values('Dprob'))[0], $res->{$i}->{dprob});
429     is(($feat->get_tag_values('maxYprob'))[0], $res->{$i}->{yprob});
430     is(($feat->get_tag_values('nnPrediction'))[0], $res->{$i}->{nnpred});
431     $i++;
434 ##########################################
435 ### TESTS ON SHORT OUTPUT FORMAT (HMM) ###
436 ##########################################
438 $res = {
439         '1' => {
440                 'id'     => 'BC1G_00002.1',
441                 'prob'   => '0.000',
442                 'cleav'  => '0.000',
443                 'pred'   => 'Non-secretory protein',
444                 'end'    => '22',
445                },
446         '2' => {
447                 'id'     => 'BC1G_00003.1',
448                 'prob'   => '0.999',
449                 'cleav'  => '0.973',
450                 'pred'   => 'Signal peptide',
451                 'end'    => '22',
452                },
453         '3' => {
454                 'id'     => 'BC1G_00004.1',
455                 'prob'   => '0.003',
456                 'cleav'  => '0.001',
457                 'pred'   => 'Non-secretory protein',
458                 'end'    => '19',
459                },
460         '4' => {
461                 'id'     => 'BC1G_00005.1',
462                 'prob'   => '0.008',
463                 'cleav'  => '0.007',
464                 'pred'   => 'Non-secretory protein',
465                 'end'    => '22',
466                 },
467         '5' => {
468                 'id'     => 'BC1G_00006.1',
469                 'prob'   => '0.000',
470                 'cleav'  => '0.000',
471                 'pred'   => 'Non-secretory protein',
472                 'end'    => '23',
473                 },
474         '6' => {
475                 'id'     => 'BC1G_00007.1',
476                 'prob'   => '0.240',
477                 'cleav'  => '0.228',
478                 'pred'   => 'Non-secretory protein',
479                 'end'    => '22',
480                 },
481         '7' => {
482                 'id'     => 'BC1G_00008.1',
483                 'prob'   => '0.222',
484                 'cleav'  => '0.061',
485                 'pred'   => 'Non-secretory protein',
486                 'end'    => '22',
487                 },
488         '8' => {
489                 'id'     => 'BC1G_00009.1',
490                 'prob'   => '0.000',
491                 'cleav'  => '0.000',
492                 'pred'   => 'Non-secretory protein',
493                 'end'    => '20',
494                 },
495        };
497 # No filters available with hmm on short output
498 $in      = test_input_file("signalp.hmm.short");
499 $signalp = Bio::Tools::Signalp::ExtendedSignalp->new(
500                                                      -file => $in,
501                                                     );
503 ok($signalp);
504 $i = 1;
506 while(my $feat = $signalp->next_feature()){
507     is($feat->seq_id(), $res->{$i}->{id});
508     is($feat->end(), $res->{$i}->{end});
509     is(($feat->get_tag_values('peptideProb'))[0], $res->{$i}->{prob});
510     is(($feat->get_tag_values('cleavageSiteProb'))[0], $res->{$i}->{cleav});
511     is(($feat->get_tag_values('signalpPrediction'))[0], $res->{$i}->{pred});
512     $i++;
516 exit 0;