You must actually test data if you want this module to be included in the release...
[bioperl-live.git] / t / Microarray / Tools / ReseqChip.t
blobbee56d1560b89bf03f1bfa887fa83ab6fca027fe
1 #!/usr/bin/perl
3 use strict;
5 BEGIN {
6 use lib '.';
7 use Bio::Root::Test;
8 test_begin(-tests => 7,
9 -requires_modules => [qw(Statistics::Frequency
10 Spreadsheet::ParseExcel
11 Spreadsheet::WriteExcel)]
13 use_ok('Bio::Microarray::Tools::ReseqChip');
16 my $DEBUG = test_debug();
18 sub process_sample($$$$$$$) {
20 my ($myReseqChip, $aln, $ind_id, $options_hash, $newseq_output_filename, $recalls_output_filename, $workbook) = @_;
21 print "process sample $ind_id ...\n" if $DEBUG;
23 $aln->sort_alphabetically;
24 $myReseqChip->write_alignment2xls($aln, $workbook, $ind_id, 'human_mtDNA_RCRS', 1);
25 ok(-e $workbook, 'write_alignment2xls');
26 my ($newseq,$dummy);
27 $newseq=$myReseqChip->calc_sequence($aln, $dummy, $options_hash, $recalls_output_filename);
28 ok(-e $recalls_output_filename, 'calc_sequence');
29 $myReseqChip->write2fasta($newseq, $ind_id, $newseq_output_filename, 1);
30 ok(-e $newseq_output_filename, 'write2fasta');
34 ###input files
35 my $Affy_frags_design_filename = test_input_file('ReseqChip_mtDNA_design_annotation_file_FINAL.xls');
36 my $format = 'affy_mitochip_v2';
37 my $Affy_sample_fasta_file = test_input_file('ReseqChip_ExampleData.fasta');
38 my $Mito_reference_fasta_file = test_input_file('ReseqChip_RefSeq.fasta');
40 #my $tmpdir = File::Spec->catfile(qw(t tmp));
41 #mkdir($tmpdir,0777);
43 ###output files
44 my $xls_filename=test_output_file();
45 my $recalls_output_filename=test_output_file();
46 my $newseq_output_filename=test_output_file();
48 my $in = Bio::SeqIO->new(-file => $Affy_sample_fasta_file,
49 -format => 'Fasta');
50 my $in_refseq = Bio::SeqIO->new(-file => $Mito_reference_fasta_file,
51 -format => 'Fasta');
52 my $refseq = $in_refseq->next_seq();
53 my %ref_seq_max_ins_hash=(3106 => 1);
55 my $myReseqChip = Bio::Microarray::Tools::ReseqChip->new($Affy_frags_design_filename, $format, \%ref_seq_max_ins_hash, $refseq);
57 my %options_hash=(
58 include_main_sequence => 1,
59 insertions => 1,
60 deletions => 1,
61 depth_ins => 1,
62 depth_del => 9,
63 depth => 1,
64 # depth_ins => 0,
65 # depth_del => 0,
66 # depth => 0,
67 consider_context => 1,
68 flank_left => 10,
69 flank_right => 10,
70 allowed_n_in_flank => 0,
71 # allowed_n_in_flank => 20,
72 flank_left_ins => 4,
73 flank_right_ins => 4,
74 allowed_n_in_flank_ins => 1,
75 # allowed_n_in_flank_ins => 21,
76 flank_size_weak => 1,
77 call_threshold => 55,
78 ins_threshold => 35,
79 del_threshold => 75,
80 swap_ins => 1,
81 # start_pos=> 300,
82 # stop_pos=> 320,
86 ##general options
87 $options_hash{include_main_sequence}=1;
88 $options_hash{insertions}=1;
89 $options_hash{deletions}=1;
90 $options_hash{flank_size_weak}=1;
91 $options_hash{consider_context}=1;
92 $options_hash{swap_ins}=1;
94 ##options for diploid model
95 $options_hash{depth}=10;
96 $options_hash{depth_del}=10;
98 $options_hash{call_threshold}=60;
99 $options_hash{del_threshold}=60;
101 $options_hash{flank_left}=1;
102 $options_hash{flank_right}=1;
104 $options_hash{allowed_n_in_flank}=2;
106 ##ins options
107 $options_hash{depth_ins}=1;
108 $options_hash{ins_threshold}=60;
109 $options_hash{flank_left_ins}=2;
110 $options_hash{flank_right_ins}=2;
111 $options_hash{allowed_n_in_flank_ins}=2;
113 ##options for haploid model #1
114 $options_hash{depth}=1;
115 $options_hash{depth_del}=1;
117 $options_hash{call_threshold}=90;
118 $options_hash{del_threshold}=90;
120 $options_hash{flank_left}=5;
121 $options_hash{flank_right}=5;
123 $options_hash{allowed_n_in_flank}=0;
125 ##hap model 2
126 $options_hash{depth}=10;
127 $options_hash{depth_del}=10;
129 $options_hash{call_threshold}=90;
130 $options_hash{del_threshold}=90;
132 $options_hash{flank_left}=1;
133 $options_hash{flank_right}=1;
135 $options_hash{allowed_n_in_flank}=1;
137 my $ind_id="";
138 my $ind_id_old="";
139 my $workbook = Spreadsheet::WriteExcel->new($xls_filename);
140 my $j=1;
141 my $aln = Bio::SimpleAlign->new();
142 while ( (my $seq = $in->next_seq())) {
143 my $locseq;
144 my $test_complete_seq=($seq->id =~ /human_mtDNA_RCRS/);
145 if ($test_complete_seq==1) {
146 $ind_id=$seq->id;
148 else {$test_complete_seq=0;}
149 if (!$test_complete_seq) {
150 $locseq=$myReseqChip->insert_gaps2frag($seq);
151 $aln->add_seq($locseq);
153 else {
154 if ($aln->length>0) {
155 process_sample($myReseqChip, $aln, $ind_id_old, \%options_hash,
156 $newseq_output_filename, $recalls_output_filename, $workbook);
158 $ind_id_old=$ind_id;
160 $aln = new Bio::SimpleAlign();
161 $locseq=$myReseqChip->insert_gaps2reference_sequence($seq);
162 $aln->add_seq($locseq);
163 $j++;
167 process_sample($myReseqChip, $aln, $ind_id_old, \%options_hash,
168 $newseq_output_filename, $recalls_output_filename, $workbook);
170 $workbook->close();