3 our $API_VERSION = $Bio::SeqIO
::msout
::API_VERSION
;
5 use File
::Path
qw(mkpath rmtree);
12 -requires_modules
=> [q
(Bio
::SeqIO
::msout
)],
13 -requires_networking
=> 0
16 use_ok
('Bio::SeqIO::msout');
20 # skip tests if the msout.pm module is too old.
21 my $api_version = $Bio::SeqIO
::msout
::API_VERSION
;
22 cmp_ok
( $api_version, '>=', qv
('1.1.5'),
23 "Bio::SeqIO::msout is at least api version 1.1.5" );
25 test_file_1
( 0, "msout/msout_infile1" ); # 23 tests
26 test_file_2
( 0, "msout/msout_infile2" ); # 22 tests
27 test_file_3
( 0, "msout/msout_infile3" ); # 17 tests
29 # tests to run for api versions >= 1.1.6
31 skip q
($Bio::SeqIO
::msout
::API_VERSION
< 1.1.6), 22
32 unless $api_version >= qv
('1.1.6');
33 test_file_1
( 0, q
(msout
/msout_infile4
) );
36 # tests to run for api versions >= 1.1.7
38 skip q
($Bio::SeqIO
::msout
::API_VERSION
< 1.1.7), 4
39 unless $api_version >= qv
('1.1.7');
40 bad_test_file_1
( 0, q
(msout
/bad_msout_infile1
) ); # 2 tests
41 bad_test_file_2
( 0, q
(msout
/bad_msout_infile2
) ); # 2 tests
44 # tests to run for api version >= 1.1.8
46 skip q
($Bio::SeqIO
::msout
::API_VERSION
< 1.1.8), 75
47 unless $api_version >= qv
('1.1.8');
49 test_file_1
( 0, "msout/msout_infile1", 100 );
50 test_file_2
( 0, "msout/msout_infile2", 10 );
51 test_file_1
( 0, q
(msout
/msout_infile4
), 100 );
52 bad_test_file_1
( 0, q
(msout
/bad_msout_infile1
), 1000 );
53 bad_test_file_2
( 0, q
(msout
/bad_msout_infile2
), 1000 );
54 bad_n_sites
( 0, q
(msout
/msout_infile1
) ); # 2 tests
61 $dir = Bio
::Root
::Test
::test_input_file
($dir);
72 $dir = Bio
::Root
::Test
::test_input_file
($dir);
77 else { warn "Tried to remove $dir, but it does not exist" }
81 ##############################################################################
83 ##############################################################################
88 $infile = Bio
::Root
::Test
::test_input_file
($infile);
90 # the files are now part of the git repo and don't have to be printed
91 # print_file1( $infile, $gzip );
93 my $file_sequence = $infile;
95 $file_sequence = "gzip -dc <$file_sequence |";
97 my $msout = Bio
::SeqIO
->new(
98 -file
=> "$file_sequence",
100 -n_sites
=> $n_sites,
103 isa_ok
( $msout, 'Bio::SeqIO::msout' );
105 my $rh_base_conversion_table = $msout->get_base_conversion_table;
111 SEEDS
=> [qw(1 1 1)],
112 MS_INFO_LINE
=> 'ms 6 3 -s 7 -I 3 3 2 1',
116 LAST_READ_HAP_NUM
=> 0,
117 POSITIONS
=> [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
118 CURRENT_RUN_SEGSITES
=> 7
121 foreach my $attribute ( keys %attributes ) {
122 my $func = lc($attribute);
124 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
125 $func = ucfirst($func);
128 $func = 'get_' . $func;
129 my @returns = $msout->$func();
130 my ( $return, $got );
132 # If there were more than one return value, then compare references to
133 # arrays instead of scalars
134 unless ( @returns > 1 ) {
135 $got = shift @returns;
137 else { $got = \
@returns }
139 my $expected = $attributes{$attribute};
141 if ( defined $got && defined $expected ) {
142 is_deeply
( $got, $expected, "Get $attribute" );
144 else { is_deeply
( $got, $expected, "Get $attribute" ) }
147 # Testing next_hap at beginning of run
149 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_seq );
151 if ( !defined($n_sites) ) {
152 @data_expected = qw(1111111);
156 qw(1000000000000000000000001000001000100000000000000000000000000000000100000001000000001000000000000000);
158 is_deeply
( \
@data_got, \
@data_expected,
159 "Get next_hap at beginning of run" );
161 # Testing next_hap after beginning of run
163 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_seq );
164 if ( !defined($n_sites) ) {
165 @data_expected = qw(5555555);
169 qw(5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
171 is_deeply
( \
@data_got, \
@data_expected,
172 "Get next_hap after beginning of run" );
174 # Surprise test! testing msout::outgroup
175 my $outgroup = $msout->outgroup;
176 is
( $outgroup, 1, "Testing msout::outgroup" );
178 # Testing next_pop after beginning of pop
180 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_pop );
181 if ( !defined($n_sites) ) {
182 @data_expected = qw(4444444);
186 qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000);
188 is_deeply
( \
@data_got, \
@data_expected,
189 "Get next_pop after beginning of pop" );
191 # Testing next_pop at beginning of pop
193 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_pop );
194 if ( !defined($n_sites) ) {
195 @data_expected = qw(4444444 5555555);
199 qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
201 is_deeply
( \
@data_got, \
@data_expected,
202 "Get next_pop at beginning of pop" );
204 # Testing next_run after beginning of run
206 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_run );
207 if ( !defined($n_sites) ) {
208 @data_expected = qw(4444444);
212 qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000);
214 is_deeply
( \
@data_got, \
@data_expected,
215 "Get next_run after beginning of run" );
217 # Testing next_pop at beginning of run
219 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_pop );
220 if ( !defined($n_sites) ) {
221 @data_expected = qw(5555555 5555555 5555555);
225 qw(5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
227 is_deeply
( \
@data_got, \
@data_expected,
228 "Get next_pop at beginning of run" );
230 # Testing next_hap after pop
231 @data_got = $msout->get_next_hap;
232 @data_expected = qw(1010101);
233 is_deeply
( \
@data_got, \
@data_expected, "Get next_hap after pop" );
235 # Testing next_run after pop and hap
237 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_run );
238 if ( !defined($n_sites) ) {
239 @data_expected = qw(1111111 1515151);
243 qw(1000000000000000000000001000001000100000000000000000000000000000000100000001000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000);
245 is_deeply
( \
@data_got, \
@data_expected, "Get next_run after pop and hap" );
247 # Testing next_run at beginning of run
249 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_run );
250 if ( !defined($n_sites) ) {
251 @data_expected = qw(1414141 1414141 1515151 1414141 1515151 1515151);
255 qw(1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000 1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000);
257 is_deeply
( \
@data_got, \
@data_expected,
258 "Get next_run at beginning of run" );
260 is
( $msout->get_next_run_num, undef, 'have all lines been read?' );
264 ##############################################################################
266 ##############################################################################
271 $infile = Bio
::Root
::Test
::test_input_file
($infile);
273 # the files are now part of the git repo and don't have to be printed
274 # print_file2( $infile, $gzip );
276 my $file_sequence = $infile;
278 $file_sequence = "gzip -dc <$file_sequence |";
281 my $msout = Bio
::SeqIO
->new(
282 -file
=> "$file_sequence",
284 -n_sites
=> $n_sites,
287 isa_ok
( $msout, 'Bio::SeqIO::msout' );
293 SEEDS
=> [qw(1 1 1)],
294 MS_INFO_LINE
=> 'ms 6 3',
298 LAST_READ_HAP_NUM
=> 0,
299 POSITIONS
=> [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
300 CURRENT_RUN_SEGSITES
=> 7
303 foreach my $attribute ( keys %attributes ) {
304 my $func = lc($attribute);
306 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
307 $func = ucfirst($func);
310 $func = 'get_' . $func;
311 my @returns = $msout->$func();
312 my ( $return, $got );
314 # If there were more than one return value, then compare references to
315 # arrays instead of scalars
316 unless ( @returns > 1 ) {
317 $got = shift @returns;
319 else { $got = \
@returns }
321 my $expected = $attributes{$attribute};
323 if ( defined $got && defined $expected ) {
324 is_deeply
( $got, $expected, "Get $attribute" );
326 else { is_deeply
( $got, $expected, "Get $attribute" ) }
329 my $rh_base_conversion_table = $msout->get_base_conversion_table;
331 # Testing next_hap at beginning of run
332 my @data_got = $msout->get_next_hap;
333 my @data_expected = '1111111';
334 is_deeply
( \
@data_got, \
@data_expected,
335 "Get next_hap at beginning of run" );
337 # Testing next_hap after beginning of run
339 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_seq );
340 if ( !defined($n_sites) ) {
341 @data_expected = '5555555';
344 @data_expected = '5555055500';
346 is_deeply
( \
@data_got, \
@data_expected,
347 "Get next_hap after beginning of run" );
349 # Surprise test! testing msout::outgroup
350 my $outgroup = $msout->outgroup;
351 is
( $outgroup, 0, "Testing msout::outgroup" );
353 # Testing next_pop after beginning of pop
355 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_pop );
356 if ( !defined($n_sites) ) {
357 @data_expected = qw(4444444 4444444 5555555 4444444);
360 @data_expected = qw(4444044400 4444044400 5555055500 4444044400);
362 is_deeply
( \
@data_got, \
@data_expected,
363 "Get next_pop after beginning of pop" );
365 # Testing next_pop at beginning of pop/run
367 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_pop );
368 if ( !defined($n_sites) ) {
369 @data_expected = qw(5555555 5555555 5555555 1010101 1111111 1515151);
373 qw(5555055500 5555055500 5555055500 1010010100 1111011100 1515015100);
375 is_deeply
( \
@data_got, \
@data_expected,
376 "Get next_pop at beginning of pop/run" );
378 # Testing next_run at beginning of run
380 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_run );
381 if ( !defined($n_sites) ) {
382 @data_expected = qw(1414141 1414141 1515151 1414141 1515151 1515151);
386 qw(1414014100 1414014100 1515015100 1414014100 1515015100 1515015100);
388 is_deeply
( \
@data_got, \
@data_expected,
389 "Get next_run at beginning of run" );
391 # Testing next_hap at beginning of run 2
393 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_seq );
394 if ( !defined($n_sites) ) {
395 @data_expected = '1515151';
398 @data_expected = '1515015100';
400 is_deeply
( \
@data_got, \
@data_expected,
401 "Get next_hap at beginning of run 2" );
403 # Testing next_run after hap
405 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_run );
406 if ( !defined($n_sites) ) {
407 @data_expected = qw(5050505 5151515 5555555 5454545 5454545);
411 qw(5050050500 5151051500 5555055500 5454054500 5454054500);
413 is_deeply
( \
@data_got, \
@data_expected, "Get next_run after hap" );
415 is
( $msout->get_next_run_num, 5, 'next run should be 5.' );
417 # getting the last hap of the file via next hap
418 # Testing next_run after hap
419 @data_got = $msout->get_next_hap;
420 @data_expected = qw( 5555555 );
421 is_deeply
( \
@data_got, \
@data_expected, "Get last hap through next_hap" );
426 ##############################################################################
428 ##############################################################################
432 $infile = Bio
::Root
::Test
::test_input_file
($infile);
434 # the files are now part of the git repo and don't have to be printed
435 # print_file3( $infile, $gzip );
437 my $file_sequence = $infile;
439 $file_sequence = "gzip -dc <$file_sequence |";
441 my $msout = Bio
::SeqIO
->new(
442 -file
=> "$file_sequence",
446 isa_ok
( $msout, 'Bio::SeqIO::msout' );
448 my $rh_base_conversion_table = $msout->get_base_conversion_table;
453 SEEDS
=> [qw(1 1 1)],
454 MS_INFO_LINE
=> 'ms 3 1',
458 LAST_READ_HAP_NUM
=> 0,
459 POSITIONS
=> [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
460 CURRENT_RUN_SEGSITES
=> 7
463 foreach my $attribute ( keys %attributes ) {
464 my $func = lc($attribute);
466 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
467 $func = ucfirst($func);
470 $func = 'get_' . $func;
471 my @returns = $msout->$func();
472 my ( $return, $got );
474 # If there were more than one return value, then compare references to
475 # arrays instead of scalars
476 unless ( @returns > 1 ) {
477 $got = shift @returns;
479 else { $got = \
@returns }
481 my $expected = $attributes{$attribute};
483 if ( defined $got && defined $expected ) {
484 is_deeply
( $got, $expected, "Get $attribute" );
486 else { is_deeply
( $got, $expected, "Get $attribute" ) }
489 # Testing next_hap at beginning of run
491 convert_bases_to_nums
( $rh_base_conversion_table, $msout->get_next_pop );
492 my @data_expected = qw(1111111 5555555 4444444);
493 is_deeply
( \
@data_got, \
@data_expected, "Get next_pop at end of run" );
495 is
( $msout->get_next_run_num, undef, 'have all lines been read?' );
497 # Testing what happens when we read from empty stream
498 @data_got = $msout->get_next_pop;
500 is_deeply
( \
@data_got, \
@data_expected, "Get next_pop at eof" );
502 # Testing what happens when we read from empty stream
503 @data_got = $msout->get_next_run;
505 is_deeply
( \
@data_got, \
@data_expected, "Get next_run at eof" );
507 # Testing what happens when we read from empty stream
508 @data_got = $msout->get_next_hap;
509 @data_expected = undef;
510 is_deeply
( \
@data_got, \
@data_expected, "Get next_hap at eof" );
512 # Testing what happens when we read from empty stream
513 @data_got = $msout->get_next_seq;
515 is_deeply
( \
@data_got, \
@data_expected, "Get next_seq at eof" );
520 my ( $ra_in, $out ) = @_;
521 open my $OUT, '>', $out or die "\nCould not write outfile '$out': $!\n";
522 print $OUT ("@$ra_in");
526 sub convert_bases_to_nums
{
528 my ( $rh_base_conversion_table, @seqs ) = @_;
531 foreach my $seq (@seqs) {
532 my $seqstring = $seq->seq;
533 foreach my $base ( keys %{$rh_base_conversion_table} ) {
534 $seqstring =~ s/($base)/$rh_base_conversion_table->{$base}/g;
536 push @out_seqstrings, $seqstring;
539 return @out_seqstrings;
543 sub bad_test_file_1
{
544 ##############################################################################
546 ##############################################################################
548 # This sub tests to see if msout.pm will catch if the msinfo line's
549 # advertized haps are less than are actually in the file
554 $infile = test_input_file
($infile);
556 my $file_sequence = $infile;
558 $file_sequence = "gunzip -c <$file_sequence |";
560 my $msout = Bio
::SeqIO
->new(
561 -file
=> "$file_sequence",
563 -n_sites
=> $n_sites,
566 isa_ok
( $msout, 'Bio::SeqIO::msout' );
568 throws_ok
{ $msout->get_next_run }
569 qr/msout file has only 2 hap\(s\), which is less than indicated in msinfo line \( 9 \)/,
570 q
(Caught error
in bad msout file
1);
574 sub bad_test_file_2
{
575 ##############################################################################
577 ##############################################################################
579 # This sub tests to see if msout.pm will catch if the msinfo line's
580 # advertized haps are more than are actually in the file
585 $infile = test_input_file
($infile);
587 my $file_sequence = $infile;
589 $file_sequence = "gunzip -c <$file_sequence |";
591 my $msout = Bio
::SeqIO
->new(
592 -file
=> "$file_sequence",
594 -n_sites
=> $n_sites,
597 isa_ok
( $msout, 'Bio::SeqIO::msout' );
599 throws_ok
{ $msout->get_next_run }
600 qr/\'\/\
/\' not encountered when expected. There are more haplos in one of the msOUT runs than advertised in the msinfo line/,
601 q
(Caught error
in bad msout file
2);
606 ##############################################################################
608 ##############################################################################
610 # this sub tests if msout.pm dies when n_sites is smaller than segsites
613 $infile = Bio
::Root
::Test
::test_input_file
($infile);
615 my $file_sequence = $infile;
617 $file_sequence = "gzip -dc <$file_sequence |";
619 my $msout = Bio
::SeqIO
->new(
620 -file
=> "$file_sequence",
625 throws_ok
{ $msout->set_n_sites(-1) } qr
|first argument needs to be a positive integer
. argument supplied
: -1|;
627 # test nsites smaller than next hap
628 $msout->set_n_sites(1);
629 throws_ok
{$msout->get_next_seq} qr/n_sites needs to be at least the number of segsites of every run/, 'too few n_sites failed OK';