no need to print these out to a file, we just store these in the repo
[bioperl-live.git] / t / SeqIO / msout.t
blobf88b3219f37a922246edf2e92dac727f2092667a
1 #!/usr/bin/perl -w
2 use version;
3 our $API_VERSION = qv('1.1.6');
5 use strict;
6 use File::Path qw(mkpath rmtree);
8 BEGIN {
9 use lib '.'; # for core package test scripts only
10 use Bio::Root::Test;
12 test_begin(
13 -tests => 85,
14 -requires_modules => [q(Bio::SeqIO::msout)],
15 -requires_networking => 0
18 use_ok('Bio::SeqIO::msout');
22 # skip tests if the msout.pm module is too old.
23 my $api_version = $Bio::SeqIO::msout::API_VERSION;
24 cmp_ok( $api_version,
25 '>=', qv('1.1.5'), "Bio::SeqIO::msout is at least api version 1.1.5" );
27 test_file_1( 0, "msout/msout_infile1" );
28 test_file_2( 0, "msout/msout_infile2" );
29 test_file_3( 0, "msout/msout_infile3" );
31 # tests to run for api versions >= 1.1.6
32 SKIP: {
33 skip q($Bio::SeqIO::msout::API_VERSION < 1.1.6) , 22 unless $api_version >= qv('1.1.6');
34 test_file_4( 0, q(msout/msout_infile4) );
37 sub create_dir {
39 my $dir = shift;
41 $dir = test_input_file($dir);
43 unless ( -d $dir ) {
44 mkpath($dir);
48 sub remove_dir {
50 my $dir = shift;
52 $dir = test_input_file($dir);
54 if ( -d $dir ) {
55 rmtree($dir);
57 else { warn "Tried to remove $dir, but it does not exist" }
60 sub test_file_1 {
61 ##############################################################################
62 ## Test file 1
63 ##############################################################################
65 my $gzip = shift;
66 my $infile = shift;
67 $infile = test_input_file($infile);
69 #print_file1( $infile, $gzip );
71 my $file_sequence = $infile;
72 if ($gzip) {
73 $file_sequence = "gunzip -c <$file_sequence |";
75 my $msout = Bio::SeqIO->new(
76 -file => "$file_sequence",
77 -format => 'msout',
80 isa_ok( $msout, 'Bio::SeqIO::msout' );
82 my $rh_base_conversion_table = $msout->get_base_conversion_table;
84 my %attributes = (
85 RUNS => 3,
86 SEGSITES => 7,
87 SEEDS => [qw(1 1 1)],
88 MS_INFO_LINE => 'ms 6 3 -s 7 -I 3 3 2 1',
89 TOT_RUN_HAPS => 6,
90 POPS => [qw(3 2 1)],
91 NEXT_RUN_NUM => 1,
92 LAST_READ_HAP_NUM => 0,
93 POSITIONS => [qw(79.1001 80.1001 81.101 82.101 83.10001 84.801 85)],
94 CURRENT_RUN_SEGSITES => 7
97 foreach my $attribute ( keys %attributes ) {
98 my $func = lc($attribute);
100 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
101 $func = ucfirst($func);
104 $func = 'get_' . $func;
105 my @returns = $msout->$func();
106 my ( $return, $got );
108 # If there were more than one return value, then compare references to
109 # arrays instead of scalars
110 unless ( @returns > 1 ) {
111 $got = shift @returns;
113 else { $got = \@returns }
115 my $expected = $attributes{$attribute};
117 if ( defined $got && defined $expected ) {
118 is_deeply( $got, $expected, "Get $attribute" );
120 else { is_deeply( $got, $expected, "Get $attribute" ) }
123 # Testing next_hap at beginning of run
124 my @data_got =
125 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
126 my @data_expected = qw(1111111);
127 is_deeply( \@data_got, \@data_expected,
128 "Get next_hap at beginning of run" );
130 # Testing next_hap after beginning of run
131 @data_got =
132 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
133 @data_expected = qw(5555555);
134 is_deeply( \@data_got, \@data_expected,
135 "Get next_hap after beginning of run" );
137 # Surprise test! testing msout::outgroup
138 my $outgroup = $msout->outgroup;
139 is( $outgroup, 1, "Testing msout::outgroup" );
141 # Testing next_pop after beginning of pop
142 @data_got =
143 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
144 @data_expected = qw(4444444);
145 is_deeply( \@data_got, \@data_expected,
146 "Get next_pop after beginning of pop" );
148 # Testing next_pop at beginning of pop
149 @data_got =
150 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
151 @data_expected = qw(4444444 5555555);
152 is_deeply( \@data_got, \@data_expected,
153 "Get next_pop at beginning of pop" );
155 # Testing next_run after beginning of run
156 @data_got =
157 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
158 @data_expected = qw(4444444);
159 is_deeply( \@data_got, \@data_expected,
160 "Get next_run after beginning of run" );
162 # Testing next_pop at beginning of run
163 @data_got =
164 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
165 @data_expected = qw(5555555 5555555 5555555);
166 is_deeply( \@data_got, \@data_expected,
167 "Get next_pop at beginning of run" );
169 # Testing next_hap after pop
170 @data_got = $msout->get_next_hap;
171 @data_expected = qw(1010101);
172 is_deeply( \@data_got, \@data_expected, "Get next_hap after pop" );
174 # Testing next_run after pop and hap
175 @data_got =
176 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
177 @data_expected = qw(1111111 1515151);
178 is_deeply( \@data_got, \@data_expected, "Get next_run after pop and hap" );
180 # Testing next_run at beginning of run
181 @data_got =
182 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
183 @data_expected = qw(
184 1414141
185 1414141
186 1515151
187 1414141
188 1515151
189 1515151);
190 is_deeply( \@data_got, \@data_expected,
191 "Get next_run at beginning of run" );
193 is( $msout->get_next_run_num, undef, 'have all lines been read?' );
196 sub test_file_2 {
197 ##############################################################################
198 ## Test file 2
199 ##############################################################################
201 my $gzip = shift;
202 my $infile = shift;
203 $infile = test_input_file($infile);
205 #print_file2( $infile, $gzip );
207 my $file_sequence = $infile;
208 if ($gzip) {
209 $file_sequence = "gunzip -c <$file_sequence |";
212 my $msout = Bio::SeqIO->new(
213 -file => "$file_sequence",
214 -format => 'msout',
217 isa_ok( $msout, 'Bio::SeqIO::msout' );
219 my %attributes = (
220 RUNS => 3,
221 SEGSITES => 7,
222 SEEDS => [qw(1 1 1)],
223 MS_INFO_LINE => 'ms 6 3',
224 TOT_RUN_HAPS => 6,
225 POPS => 6,
226 NEXT_RUN_NUM => 1,
227 LAST_READ_HAP_NUM => 0,
228 POSITIONS => [qw(79.1001 80.1001 81.101 82.101 83.10001 84.801 85)],
229 CURRENT_RUN_SEGSITES => 7
232 foreach my $attribute ( keys %attributes ) {
233 my $func = lc($attribute);
235 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
236 $func = ucfirst($func);
239 $func = 'get_' . $func;
240 my @returns = $msout->$func();
241 my ( $return, $got );
243 # If there were more than one return value, then compare references to
244 # arrays instead of scalars
245 unless ( @returns > 1 ) {
246 $got = shift @returns;
248 else { $got = \@returns }
250 my $expected = $attributes{$attribute};
252 if ( defined $got && defined $expected ) {
253 is_deeply( $got, $expected, "Get $attribute" );
255 else { is_deeply( $got, $expected, "Get $attribute" ) }
258 my $rh_base_conversion_table = $msout->get_base_conversion_table;
260 # Testing next_hap at beginning of run
261 my @data_got = $msout->get_next_hap;
262 my @data_expected = '1111111';
263 is_deeply( \@data_got, \@data_expected,
264 "Get next_hap at beginning of run" );
266 # Testing next_hap after beginning of run
267 @data_got =
268 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
269 @data_expected = '5555555';
270 is_deeply( \@data_got, \@data_expected,
271 "Get next_hap after beginning of run" );
273 # Surprise test! testing msout::outgroup
274 my $outgroup = $msout->outgroup;
275 is( $outgroup, 0, "Testing msout::outgroup" );
277 # Testing next_pop after beginning of pop
278 @data_got =
279 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
280 @data_expected = qw(
281 4444444
282 4444444
283 5555555
284 4444444);
285 is_deeply( \@data_got, \@data_expected,
286 "Get next_pop after beginning of pop" );
288 # Testing next_pop at beginning of pop/run
289 @data_got =
290 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
291 @data_expected = qw(
292 5555555
293 5555555
294 5555555
295 1010101
296 1111111
297 1515151);
298 is_deeply( \@data_got, \@data_expected,
299 "Get next_pop at beginning of pop/run" );
301 # Testing next_run at beginning of run
302 @data_got =
303 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
304 @data_expected = qw(
305 1414141
306 1414141
307 1515151
308 1414141
309 1515151
310 1515151);
311 is_deeply( \@data_got, \@data_expected,
312 "Get next_run at beginning of run" );
314 # Testing next_hap at beginning of run 2
315 @data_got =
316 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
317 @data_expected = '1515151';
318 is_deeply( \@data_got, \@data_expected,
319 "Get next_hap at beginning of run 2" );
321 # Testing next_run after hap
322 @data_got =
323 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
324 @data_expected = qw(
325 5050505
326 5151515
327 5555555
328 5454545
329 5454545);
330 is_deeply( \@data_got, \@data_expected, "Get next_run after hap" );
332 is( $msout->get_next_run_num, 5, 'next run should be 5.' );
334 # getting the last hap of the file via next hap
335 # Testing next_run after hap
336 @data_got = $msout->get_next_hap;
337 @data_expected = qw( 5555555 );
338 is_deeply( \@data_got, \@data_expected, "Get last hap through next_hap" );
342 sub test_file_3 {
343 ##############################################################################
344 ## Test file 3
345 ##############################################################################
347 my $gzip = shift;
348 my $infile = shift;
349 $infile = test_input_file($infile);
351 #print_file3( $infile, $gzip );
353 my $file_sequence = $infile;
354 if ($gzip) {
355 $file_sequence = "gunzip -c <$file_sequence |";
357 my $msout = Bio::SeqIO->new(
358 -file => "$file_sequence",
359 -format => 'msout',
362 isa_ok( $msout, 'Bio::SeqIO::msout' );
364 my $rh_base_conversion_table = $msout->get_base_conversion_table;
366 isa_ok( $msout, 'Bio::SeqIO::msout' );
368 my %attributes = (
369 RUNS => 1,
370 SEGSITES => 7,
371 SEEDS => [qw(1 1 1)],
372 MS_INFO_LINE => 'ms 3 1',
373 TOT_RUN_HAPS => 3,
374 POPS => 3,
375 NEXT_RUN_NUM => 1,
376 LAST_READ_HAP_NUM => 0,
377 POSITIONS => [qw(79.1001 80.1001 81.101 82.101 83.10001 84.801 85)],
378 CURRENT_RUN_SEGSITES => 7
381 foreach my $attribute ( keys %attributes ) {
382 my $func = lc($attribute);
384 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
385 $func = ucfirst($func);
388 $func = 'get_' . $func;
389 my @returns = $msout->$func();
390 my ( $return, $got );
392 # If there were more than one return value, then compare references to
393 # arrays instead of scalars
394 unless ( @returns > 1 ) {
395 $got = shift @returns;
397 else { $got = \@returns }
399 my $expected = $attributes{$attribute};
401 if ( defined $got && defined $expected ) {
402 is_deeply( $got, $expected, "Get $attribute" );
404 else { is_deeply( $got, $expected, "Get $attribute" ) }
407 # Testing next_hap at beginning of run
408 my @data_got =
409 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
410 my @data_expected = qw(1111111 5555555 4444444);
411 is_deeply( \@data_got, \@data_expected, "Get next_pop at end of run" );
413 is( $msout->get_next_run_num, undef, 'have all lines been read?' );
415 # Testing what happens when we read from empty stream
416 @data_got = $msout->get_next_pop;
417 @data_expected = ();
418 is_deeply( \@data_got, \@data_expected, "Get next_pop at eof" );
420 # Testing what happens when we read from empty stream
421 @data_got = $msout->get_next_run;
422 @data_expected = ();
423 is_deeply( \@data_got, \@data_expected, "Get next_run at eof" );
425 # Testing what happens when we read from empty stream
426 @data_got = $msout->get_next_hap;
427 @data_expected = undef;
428 is_deeply( \@data_got, \@data_expected, "Get next_hap at eof" );
430 # Testing what happens when we read from empty stream
431 @data_got = $msout->get_next_seq;
432 @data_expected = ();
433 is_deeply( \@data_got, \@data_expected, "Get next_seq at eof" );
437 sub test_file_4 {
438 ##############################################################################
439 ## Test file 4
440 ##############################################################################
442 # All this does is test to see if Bio::SeqIO::msout can handle ms output files
443 # with multiple newline characters randomly interspersed in the file.
445 my $gzip = shift;
446 my $infile = shift;
447 $infile = test_input_file($infile);
449 #print_file4( $infile, $gzip );
451 my $file_sequence = $infile;
452 if ($gzip) {
453 $file_sequence = "gunzip -c <$file_sequence |";
455 my $msout = Bio::SeqIO->new(
456 -file => "$file_sequence",
457 -format => 'msout',
460 isa_ok( $msout, 'Bio::SeqIO::msout' );
462 my $rh_base_conversion_table = $msout->get_base_conversion_table;
464 my %attributes = (
465 RUNS => 3,
466 SEGSITES => 7,
467 SEEDS => [qw(1 1 1)],
468 MS_INFO_LINE => 'ms 6 3 -s 7 -I 3 3 2 1',
469 TOT_RUN_HAPS => 6,
470 POPS => [qw(3 2 1)],
471 NEXT_RUN_NUM => 1,
472 LAST_READ_HAP_NUM => 0,
473 POSITIONS => [qw(79.1001 80.1001 81.101 82.101 83.10001 84.801 85)],
474 CURRENT_RUN_SEGSITES => 7
477 foreach my $attribute ( keys %attributes ) {
478 my $func = lc($attribute);
480 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
481 $func = ucfirst($func);
484 $func = 'get_' . $func;
485 my @returns = $msout->$func();
486 my ( $return, $got );
488 # If there were more than one return value, then compare references to
489 # arrays instead of scalars
490 unless ( @returns > 1 ) {
491 $got = shift @returns;
493 else { $got = \@returns }
495 my $expected = $attributes{$attribute};
497 if ( defined $got && defined $expected ) {
498 is_deeply( $got, $expected, "Get $attribute" );
500 else { is_deeply( $got, $expected, "Get $attribute" ) }
503 # Testing next_hap at beginning of run
504 my @data_got =
505 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
506 my @data_expected = qw(1111111);
507 is_deeply( \@data_got, \@data_expected,
508 "Get next_hap at beginning of run" );
510 # Testing next_hap after beginning of run
511 @data_got =
512 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
513 @data_expected = qw(5555555);
514 is_deeply( \@data_got, \@data_expected,
515 "Get next_hap after beginning of run" );
517 # Surprise test! testing msout::outgroup
518 my $outgroup = $msout->outgroup;
519 is( $outgroup, 1, "Testing msout::outgroup" );
521 # Testing next_pop after beginning of pop
522 @data_got =
523 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
524 @data_expected = qw(4444444);
525 is_deeply( \@data_got, \@data_expected,
526 "Get next_pop after beginning of pop" );
528 # Testing next_pop at beginning of pop
529 @data_got =
530 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
531 @data_expected = qw(4444444 5555555);
532 is_deeply( \@data_got, \@data_expected,
533 "Get next_pop at beginning of pop" );
535 # Testing next_run after beginning of run
536 @data_got =
537 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
538 @data_expected = qw(4444444);
539 is_deeply( \@data_got, \@data_expected,
540 "Get next_run after beginning of run" );
542 # Testing next_pop at beginning of run
543 @data_got =
544 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
545 @data_expected = qw(5555555 5555555 5555555);
546 is_deeply( \@data_got, \@data_expected,
547 "Get next_pop at beginning of run" );
549 # Testing next_hap after pop
550 @data_got = $msout->get_next_hap;
551 @data_expected = qw(1010101);
552 is_deeply( \@data_got, \@data_expected, "Get next_hap after pop" );
554 # Testing next_run after pop and hap
555 @data_got =
556 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
557 @data_expected = qw(1111111 1515151);
558 is_deeply( \@data_got, \@data_expected, "Get next_run after pop and hap" );
560 # Testing next_run at beginning of run
561 @data_got =
562 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
563 @data_expected = qw(
564 1414141
565 1414141
566 1515151
567 1414141
568 1515151
569 1515151);
570 is_deeply( \@data_got, \@data_expected,
571 "Get next_run at beginning of run" );
573 is( $msout->get_next_run_num, undef, 'have all lines been read?' );
576 sub print_file1 {
578 my $destination = shift;
579 my $gzip = shift;
581 my $out = <<END
582 ms 6 3 -s 7 -I 3 3 2 1
583 1 1 1
586 segsites: 7
587 positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
588 1111111
589 5555555
590 4444444
591 4444444
592 5555555
593 4444444
595 segsites: 7
596 positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
597 5555555
598 5555555
599 5555555
600 1010101
601 1111111
602 1515151
604 segsites: 7
605 positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
606 1414141
607 1414141
608 1515151
609 1414141
610 1515151
611 1515151
615 if ($gzip) {
616 $gzip = "| gzip";
618 else { $gzip = ' '; }
619 open OUT, "$gzip >$destination" or die "Unable to open $destination\n";
621 print OUT $out;
622 close OUT;
625 sub print_file2 {
627 my $destination = shift;
628 my $gzip = shift;
630 my $out = <<END
631 ms 6 3
632 1 1 1
635 segsites: 7
636 positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
637 1111111
638 5555555
639 4444444
640 4444444
641 5555555
642 4444444
644 segsites: 7
645 positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
646 5555555
647 5555555
648 5555555
649 1010101
650 1111111
651 1515151
653 segsites: 7
654 positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
655 1414141
656 1414141
657 1515151
658 1414141
659 1515151
660 1515151
662 segsites: 7
663 positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
664 1515151
665 5050505
666 5151515
667 5555555
668 5454545
669 5454545
671 segsites: 7
672 positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
673 5555555
677 if ($gzip) {
678 $gzip = "| gzip";
680 else { $gzip = ' '; }
682 open OUT, "$gzip >$destination" or die "Unable to open $destination\n";
684 print OUT $out;
685 close OUT;
688 sub print_file3 {
690 my $destination = shift;
691 my $gzip = shift;
693 my $out = <<END ;
694 ms 3 1
695 1 1 1
698 segsites: 7
699 positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
700 1111111
701 5555555
702 4444444
705 if ($gzip) {
706 $gzip = "| gzip";
708 else { $gzip = ' '; }
710 open OUT, "$gzip >$destination" or die "Unable to open $destination\n";
712 print OUT $out;
713 close OUT;
716 sub print_file4 {
718 my $destination = shift;
719 my $gzip = shift;
721 my $out = <<END
726 ms 6 3 -s 7 -I 3 3 2 1
730 1 1 1
734 segsites: 7
737 positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
740 1111111
741 5555555
744 4444444
746 4444444
747 5555555
749 4444444
754 segsites: 7
755 positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
756 5555555
757 5555555
761 5555555
762 1010101
763 1111111
764 1515151
767 segsites: 7
771 positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
773 1414141
774 1414141
775 1515151
776 1414141
777 1515151
778 1515151
787 if ($gzip) {
788 $gzip = "| gzip";
790 else { $gzip = ' '; }
791 open OUT, "$gzip >$destination" or die "Unable to open $destination\n";
793 print OUT $out;
794 close OUT;
798 sub print_to_file {
799 my ( $ra_in, $out ) = @_;
800 unless ( open OUT, ">$out" ) {
801 die "\nCould not open outfile $out!!\n\n";
803 print OUT ("@$ra_in");
804 close OUT;
807 sub convert_bases_to_nums {
809 my ( $rh_base_conversion_table, @seqs ) = @_;
811 my @out_seqstrings;
812 foreach my $seq (@seqs) {
813 my $seqstring = $seq->seq;
814 foreach my $base ( keys %{$rh_base_conversion_table} ) {
815 $seqstring =~ s/($base)/$rh_base_conversion_table->{$base}/g;
817 push @out_seqstrings, $seqstring;
820 return @out_seqstrings;