Massive check of file open lines. Changed bareword filehandles
[bioperl-live.git] / t / SeqIO / msout.t
blob7b7e8ae66cae55baabef5872abc99e12f9521037
1 #!/usr/bin/perl
2 use version;
3 our $API_VERSION = $Bio::SeqIO::msout::API_VERSION;
4 use strict;
5 use File::Path qw(mkpath rmtree);
7 BEGIN {
8 use lib '.'; # for core package test scripts only
9 use Bio::Root::Test;
11 test_begin(
12 -tests => 165,
13 -requires_modules => [q(Bio::SeqIO::msout)],
14 -requires_networking => 0
17 use_ok('Bio::SeqIO::msout');
21 # skip tests if the msout.pm module is too old.
22 my $api_version = $Bio::SeqIO::msout::API_VERSION;
23 cmp_ok( $api_version, '>=', qv('1.1.5'),
24 "Bio::SeqIO::msout is at least api version 1.1.5" );
26 test_file_1( 0, "msout/msout_infile1" ); # 23 tests
27 test_file_2( 0, "msout/msout_infile2" ); # 22 tests
28 test_file_3( 0, "msout/msout_infile3" ); # 17 tests
30 # tests to run for api versions >= 1.1.6
31 SKIP: {
32 skip q($Bio::SeqIO::msout::API_VERSION < 1.1.6), 22
33 unless $api_version >= qv('1.1.6');
34 test_file_1( 0, q(msout/msout_infile4) );
37 # tests to run for api versions >= 1.1.7
38 SKIP: {
39 skip q($Bio::SeqIO::msout::API_VERSION < 1.1.7), 4
40 unless $api_version >= qv('1.1.7');
41 bad_test_file_1( 0, q(msout/bad_msout_infile1) ); # 2 tests
42 bad_test_file_2( 0, q(msout/bad_msout_infile2) ); # 2 tests
45 # tests to run for api version >= 1.1.8
46 SKIP: {
47 skip q($Bio::SeqIO::msout::API_VERSION < 1.1.8), 75
48 unless $api_version >= qv('1.1.8');
50 test_file_1( 0, "msout/msout_infile1", 100 );
51 test_file_2( 0, "msout/msout_infile2", 10 );
52 test_file_1( 0, q(msout/msout_infile4), 100 );
53 bad_test_file_1( 0, q(msout/bad_msout_infile1), 1000 );
54 bad_test_file_2( 0, q(msout/bad_msout_infile2), 1000 );
55 bad_n_sites( 0, q(msout/msout_infile1) ); # 2 tests
58 sub create_dir {
60 my $dir = shift;
62 $dir = Bio::Root::Test::test_input_file($dir);
64 unless ( -d $dir ) {
65 mkpath($dir);
69 sub remove_dir {
71 my $dir = shift;
73 $dir = Bio::Root::Test::test_input_file($dir);
75 if ( -d $dir ) {
76 rmtree($dir);
78 else { warn "Tried to remove $dir, but it does not exist" }
81 sub test_file_1 {
82 ##############################################################################
83 ## Test file 1
84 ##############################################################################
86 my $gzip = shift;
87 my $infile = shift;
88 my $n_sites = shift;
89 $infile = Bio::Root::Test::test_input_file($infile);
91 # the files are now part of the git repo and don't have to be printed
92 # print_file1( $infile, $gzip );
94 my $file_sequence = $infile;
95 if ($gzip) {
96 $file_sequence = "gzip -dc <$file_sequence |";
98 my $msout = Bio::SeqIO->new(
99 -file => "$file_sequence",
100 -format => 'msout',
101 -n_sites => $n_sites,
104 isa_ok( $msout, 'Bio::SeqIO::msout' );
106 my $rh_base_conversion_table = $msout->get_base_conversion_table;
108 my %attributes = (
109 RUNS => 3,
110 SEGSITES => 7,
111 N_SITES => $n_sites,
112 SEEDS => [qw(1 1 1)],
113 MS_INFO_LINE => 'ms 6 3 -s 7 -I 3 3 2 1',
114 TOT_RUN_HAPS => 6,
115 POPS => [qw(3 2 1)],
116 NEXT_RUN_NUM => 1,
117 LAST_READ_HAP_NUM => 0,
118 POSITIONS => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
119 CURRENT_RUN_SEGSITES => 7
122 foreach my $attribute ( keys %attributes ) {
123 my $func = lc($attribute);
125 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
126 $func = ucfirst($func);
129 $func = 'get_' . $func;
130 my @returns = $msout->$func();
131 my ( $return, $got );
133 # If there were more than one return value, then compare references to
134 # arrays instead of scalars
135 unless ( @returns > 1 ) {
136 $got = shift @returns;
138 else { $got = \@returns }
140 my $expected = $attributes{$attribute};
142 if ( defined $got && defined $expected ) {
143 is_deeply( $got, $expected, "Get $attribute" );
145 else { is_deeply( $got, $expected, "Get $attribute" ) }
148 # Testing next_hap at beginning of run
149 my @data_got =
150 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
151 my @data_expected;
152 if ( !defined($n_sites) ) {
153 @data_expected = qw(1111111);
155 else {
156 @data_expected =
157 qw(1000000000000000000000001000001000100000000000000000000000000000000100000001000000001000000000000000);
159 is_deeply( \@data_got, \@data_expected,
160 "Get next_hap at beginning of run" );
162 # Testing next_hap after beginning of run
163 @data_got =
164 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
165 if ( !defined($n_sites) ) {
166 @data_expected = qw(5555555);
168 else {
169 @data_expected =
170 qw(5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
172 is_deeply( \@data_got, \@data_expected,
173 "Get next_hap after beginning of run" );
175 # Surprise test! testing msout::outgroup
176 my $outgroup = $msout->outgroup;
177 is( $outgroup, 1, "Testing msout::outgroup" );
179 # Testing next_pop after beginning of pop
180 @data_got =
181 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
182 if ( !defined($n_sites) ) {
183 @data_expected = qw(4444444);
185 else {
186 @data_expected =
187 qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000);
189 is_deeply( \@data_got, \@data_expected,
190 "Get next_pop after beginning of pop" );
192 # Testing next_pop at beginning of pop
193 @data_got =
194 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
195 if ( !defined($n_sites) ) {
196 @data_expected = qw(4444444 5555555);
198 else {
199 @data_expected =
200 qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
202 is_deeply( \@data_got, \@data_expected,
203 "Get next_pop at beginning of pop" );
205 # Testing next_run after beginning of run
206 @data_got =
207 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
208 if ( !defined($n_sites) ) {
209 @data_expected = qw(4444444);
211 else {
212 @data_expected =
213 qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000);
215 is_deeply( \@data_got, \@data_expected,
216 "Get next_run after beginning of run" );
218 # Testing next_pop at beginning of run
219 @data_got =
220 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
221 if ( !defined($n_sites) ) {
222 @data_expected = qw(5555555 5555555 5555555);
224 else {
225 @data_expected =
226 qw(5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
228 is_deeply( \@data_got, \@data_expected,
229 "Get next_pop at beginning of run" );
231 # Testing next_hap after pop
232 @data_got = $msout->get_next_hap;
233 @data_expected = qw(1010101);
234 is_deeply( \@data_got, \@data_expected, "Get next_hap after pop" );
236 # Testing next_run after pop and hap
237 @data_got =
238 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
239 if ( !defined($n_sites) ) {
240 @data_expected = qw(1111111 1515151);
242 else {
243 @data_expected =
244 qw(1000000000000000000000001000001000100000000000000000000000000000000100000001000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000);
246 is_deeply( \@data_got, \@data_expected, "Get next_run after pop and hap" );
248 # Testing next_run at beginning of run
249 @data_got =
250 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
251 if ( !defined($n_sites) ) {
252 @data_expected = qw(1414141 1414141 1515151 1414141 1515151 1515151);
254 else {
255 @data_expected =
256 qw(1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000 1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000);
258 is_deeply( \@data_got, \@data_expected,
259 "Get next_run at beginning of run" );
261 is( $msout->get_next_run_num, undef, 'have all lines been read?' );
264 sub test_file_2 {
265 ##############################################################################
266 ## Test file 2
267 ##############################################################################
269 my $gzip = shift;
270 my $infile = shift;
271 my $n_sites = shift;
272 $infile = Bio::Root::Test::test_input_file($infile);
274 # the files are now part of the git repo and don't have to be printed
275 # print_file2( $infile, $gzip );
277 my $file_sequence = $infile;
278 if ($gzip) {
279 $file_sequence = "gzip -dc <$file_sequence |";
282 my $msout = Bio::SeqIO->new(
283 -file => "$file_sequence",
284 -format => 'msout',
285 -n_sites => $n_sites,
288 isa_ok( $msout, 'Bio::SeqIO::msout' );
290 my %attributes = (
291 RUNS => 3,
292 SEGSITES => 7,
293 N_SITES => $n_sites,
294 SEEDS => [qw(1 1 1)],
295 MS_INFO_LINE => 'ms 6 3',
296 TOT_RUN_HAPS => 6,
297 POPS => 6,
298 NEXT_RUN_NUM => 1,
299 LAST_READ_HAP_NUM => 0,
300 POSITIONS => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
301 CURRENT_RUN_SEGSITES => 7
304 foreach my $attribute ( keys %attributes ) {
305 my $func = lc($attribute);
307 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
308 $func = ucfirst($func);
311 $func = 'get_' . $func;
312 my @returns = $msout->$func();
313 my ( $return, $got );
315 # If there were more than one return value, then compare references to
316 # arrays instead of scalars
317 unless ( @returns > 1 ) {
318 $got = shift @returns;
320 else { $got = \@returns }
322 my $expected = $attributes{$attribute};
324 if ( defined $got && defined $expected ) {
325 is_deeply( $got, $expected, "Get $attribute" );
327 else { is_deeply( $got, $expected, "Get $attribute" ) }
330 my $rh_base_conversion_table = $msout->get_base_conversion_table;
332 # Testing next_hap at beginning of run
333 my @data_got = $msout->get_next_hap;
334 my @data_expected = '1111111';
335 is_deeply( \@data_got, \@data_expected,
336 "Get next_hap at beginning of run" );
338 # Testing next_hap after beginning of run
339 @data_got =
340 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
341 if ( !defined($n_sites) ) {
342 @data_expected = '5555555';
344 else {
345 @data_expected = '5555055500';
347 is_deeply( \@data_got, \@data_expected,
348 "Get next_hap after beginning of run" );
350 # Surprise test! testing msout::outgroup
351 my $outgroup = $msout->outgroup;
352 is( $outgroup, 0, "Testing msout::outgroup" );
354 # Testing next_pop after beginning of pop
355 @data_got =
356 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
357 if ( !defined($n_sites) ) {
358 @data_expected = qw(4444444 4444444 5555555 4444444);
360 else {
361 @data_expected = qw(4444044400 4444044400 5555055500 4444044400);
363 is_deeply( \@data_got, \@data_expected,
364 "Get next_pop after beginning of pop" );
366 # Testing next_pop at beginning of pop/run
367 @data_got =
368 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
369 if ( !defined($n_sites) ) {
370 @data_expected = qw(5555555 5555555 5555555 1010101 1111111 1515151);
372 else {
373 @data_expected =
374 qw(5555055500 5555055500 5555055500 1010010100 1111011100 1515015100);
376 is_deeply( \@data_got, \@data_expected,
377 "Get next_pop at beginning of pop/run" );
379 # Testing next_run at beginning of run
380 @data_got =
381 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
382 if ( !defined($n_sites) ) {
383 @data_expected = qw(1414141 1414141 1515151 1414141 1515151 1515151);
385 else {
386 @data_expected =
387 qw(1414014100 1414014100 1515015100 1414014100 1515015100 1515015100);
389 is_deeply( \@data_got, \@data_expected,
390 "Get next_run at beginning of run" );
392 # Testing next_hap at beginning of run 2
393 @data_got =
394 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
395 if ( !defined($n_sites) ) {
396 @data_expected = '1515151';
398 else {
399 @data_expected = '1515015100';
401 is_deeply( \@data_got, \@data_expected,
402 "Get next_hap at beginning of run 2" );
404 # Testing next_run after hap
405 @data_got =
406 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
407 if ( !defined($n_sites) ) {
408 @data_expected = qw(5050505 5151515 5555555 5454545 5454545);
410 else {
411 @data_expected =
412 qw(5050050500 5151051500 5555055500 5454054500 5454054500);
414 is_deeply( \@data_got, \@data_expected, "Get next_run after hap" );
416 is( $msout->get_next_run_num, 5, 'next run should be 5.' );
418 # getting the last hap of the file via next hap
419 # Testing next_run after hap
420 @data_got = $msout->get_next_hap;
421 @data_expected = qw( 5555555 );
422 is_deeply( \@data_got, \@data_expected, "Get last hap through next_hap" );
426 sub test_file_3 {
427 ##############################################################################
428 ## Test file 3
429 ##############################################################################
431 my $gzip = shift;
432 my $infile = shift;
433 $infile = Bio::Root::Test::test_input_file($infile);
435 # the files are now part of the git repo and don't have to be printed
436 # print_file3( $infile, $gzip );
438 my $file_sequence = $infile;
439 if ($gzip) {
440 $file_sequence = "gzip -dc <$file_sequence |";
442 my $msout = Bio::SeqIO->new(
443 -file => "$file_sequence",
444 -format => 'msout',
447 isa_ok( $msout, 'Bio::SeqIO::msout' );
449 my $rh_base_conversion_table = $msout->get_base_conversion_table;
451 my %attributes = (
452 RUNS => 1,
453 SEGSITES => 7,
454 SEEDS => [qw(1 1 1)],
455 MS_INFO_LINE => 'ms 3 1',
456 TOT_RUN_HAPS => 3,
457 POPS => 3,
458 NEXT_RUN_NUM => 1,
459 LAST_READ_HAP_NUM => 0,
460 POSITIONS => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
461 CURRENT_RUN_SEGSITES => 7
464 foreach my $attribute ( keys %attributes ) {
465 my $func = lc($attribute);
467 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
468 $func = ucfirst($func);
471 $func = 'get_' . $func;
472 my @returns = $msout->$func();
473 my ( $return, $got );
475 # If there were more than one return value, then compare references to
476 # arrays instead of scalars
477 unless ( @returns > 1 ) {
478 $got = shift @returns;
480 else { $got = \@returns }
482 my $expected = $attributes{$attribute};
484 if ( defined $got && defined $expected ) {
485 is_deeply( $got, $expected, "Get $attribute" );
487 else { is_deeply( $got, $expected, "Get $attribute" ) }
490 # Testing next_hap at beginning of run
491 my @data_got =
492 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
493 my @data_expected = qw(1111111 5555555 4444444);
494 is_deeply( \@data_got, \@data_expected, "Get next_pop at end of run" );
496 is( $msout->get_next_run_num, undef, 'have all lines been read?' );
498 # Testing what happens when we read from empty stream
499 @data_got = $msout->get_next_pop;
500 @data_expected = ();
501 is_deeply( \@data_got, \@data_expected, "Get next_pop at eof" );
503 # Testing what happens when we read from empty stream
504 @data_got = $msout->get_next_run;
505 @data_expected = ();
506 is_deeply( \@data_got, \@data_expected, "Get next_run at eof" );
508 # Testing what happens when we read from empty stream
509 @data_got = $msout->get_next_hap;
510 @data_expected = undef;
511 is_deeply( \@data_got, \@data_expected, "Get next_hap at eof" );
513 # Testing what happens when we read from empty stream
514 @data_got = $msout->get_next_seq;
515 @data_expected = ();
516 is_deeply( \@data_got, \@data_expected, "Get next_seq at eof" );
520 sub print_to_file {
521 my ( $ra_in, $out ) = @_;
522 open my $OUT, '>', $out or die "\nCould not write outfile '$out': $!\n";
523 print $OUT ("@$ra_in");
524 close $OUT;
527 sub convert_bases_to_nums {
529 my ( $rh_base_conversion_table, @seqs ) = @_;
531 my @out_seqstrings;
532 foreach my $seq (@seqs) {
533 my $seqstring = $seq->seq;
534 foreach my $base ( keys %{$rh_base_conversion_table} ) {
535 $seqstring =~ s/($base)/$rh_base_conversion_table->{$base}/g;
537 push @out_seqstrings, $seqstring;
540 return @out_seqstrings;
544 sub bad_test_file_1 {
545 ##############################################################################
546 ## Bad Test file 1
547 ##############################################################################
549 # This sub tests to see if msout.pm will catch if the msinfo line's
550 # advertized haps are less than are actually in the file
552 my $gzip = shift;
553 my $infile = shift;
554 my $n_sites = shift;
555 $infile = test_input_file($infile);
557 my $file_sequence = $infile;
558 if ($gzip) {
559 $file_sequence = "gunzip -c <$file_sequence |";
561 my $msout = Bio::SeqIO->new(
562 -file => "$file_sequence",
563 -format => 'msout',
564 -n_sites => $n_sites,
567 isa_ok( $msout, 'Bio::SeqIO::msout' );
569 throws_ok { $msout->get_next_run }
570 qr/msout file has only 2 hap\(s\), which is less than indicated in msinfo line \( 9 \)/,
571 q(Caught error in bad msout file 1);
575 sub bad_test_file_2 {
576 ##############################################################################
577 ## Bad Test file 2
578 ##############################################################################
580 # This sub tests to see if msout.pm will catch if the msinfo line's
581 # advertized haps are more than are actually in the file
583 my $gzip = shift;
584 my $infile = shift;
585 my $n_sites = shift;
586 $infile = test_input_file($infile);
588 my $file_sequence = $infile;
589 if ($gzip) {
590 $file_sequence = "gunzip -c <$file_sequence |";
592 my $msout = Bio::SeqIO->new(
593 -file => "$file_sequence",
594 -format => 'msout',
595 -n_sites => $n_sites,
598 isa_ok( $msout, 'Bio::SeqIO::msout' );
600 throws_ok { $msout->get_next_run }
601 qr/\'\/\/\' not encountered when expected. There are more haplos in one of the msOUT runs than advertised in the msinfo line/,
602 q(Caught error in bad msout file 2);
606 sub bad_n_sites {
607 ##############################################################################
608 ## Bad n_sites
609 ##############################################################################
611 # this sub tests if msout.pm dies when n_sites is smaller than segsites
612 my $gzip = shift;
613 my $infile = shift;
614 $infile = Bio::Root::Test::test_input_file($infile);
616 my $file_sequence = $infile;
617 if ($gzip) {
618 $file_sequence = "gzip -dc <$file_sequence |";
620 my $msout = Bio::SeqIO->new(
621 -file => "$file_sequence",
622 -format => 'msout',
625 # test nsites -1
626 throws_ok { $msout->set_n_sites(-1) } qr|first argument needs to be a positive integer. argument supplied: -1|;
628 # test nsites smaller than next hap
629 $msout->set_n_sites(1);
630 throws_ok{$msout->get_next_seq} qr/n_sites needs to be at least the number of segsites of every run/, 'too few n_sites failed OK';