6 test_begin( -tests => 239,
7 -requires_modules => [ qw(Bio::Assembly::Tools::ContigSpectrum)] );
8 use_ok 'Bio::Assembly::IO';
9 use_ok 'Bio::Assembly::Tools::ContigSpectrum';
13 my $in = Bio::Assembly::IO->new(
14 -file => test_input_file('contigspectrumtest.tigr'),
17 isa_ok $in, 'Bio::Assembly::IO';
18 my $sc = $in->next_assembly;
19 isa_ok $sc, 'Bio::Assembly::Scaffold';
21 # Try all the get/set methods
22 ok my $csp = Bio::Assembly::Tools::ContigSpectrum->new, 'get/set methods';
23 isa_ok $csp, 'Bio::Assembly::Tools::ContigSpectrum';
26 ok $csp->nof_seq(123);
27 is $csp->nof_seq, 123;
28 ok $csp->nof_rep(456);
29 is $csp->nof_rep, 456;
30 ok $csp->max_size(789);
31 is $csp->max_size, 789;
32 ok $csp->nof_overlaps(111);
33 is $csp->nof_overlaps, 111;
34 ok $csp->min_overlap(50);
35 is $csp->min_overlap, 50;
36 ok $csp->avg_overlap(54.3);
37 is $csp->avg_overlap, 54.3;
38 ok $csp->min_identity(89.1);
39 is $csp->min_identity, 89.1;
40 ok $csp->avg_identity(98.7);
41 is $csp->avg_identity, 98.7;
42 ok $csp->avg_seq_len(123.456);
43 is $csp->avg_seq_len, 123.456;
44 ok $csp->eff_asm_params(1);
45 is $csp->eff_asm_params, 1;
47 # contig spectrum based on simple spectrum
48 ok my $spectrum_csp = Bio::Assembly::Tools::ContigSpectrum->new, 'simple spectrum';
49 ok $spectrum_csp->spectrum({1=>1, 2=>2, 3=>3});
50 is $spectrum_csp->eff_asm_params, 0;
51 is $spectrum_csp->nof_seq, 14;
52 is $spectrum_csp->max_size, 3;
53 is $spectrum_csp->nof_rep, 1;
54 is $spectrum_csp->nof_overlaps, 0;
55 is $spectrum_csp->min_overlap, undef;
56 is $spectrum_csp->avg_overlap, 0;
57 is $spectrum_csp->min_identity, undef;
58 is $spectrum_csp->avg_identity, 0;
59 is $spectrum_csp->avg_seq_len, 0;
60 is scalar $spectrum_csp->assembly, 0;
62 ok my $string = $spectrum_csp->to_string(1);
64 ok $string = $spectrum_csp->to_string(2);
65 is $string, "1\t2\t3";
66 ok $string = $spectrum_csp->to_string(3);
67 is $string, "1\n2\n3";
72 ok $test_csp = Bio::Assembly::Tools::ContigSpectrum->new(-spectrum=>$spectrum), 'contig spectrum score';
73 is $test_csp->score, undef;
75 ok $test_csp = Bio::Assembly::Tools::ContigSpectrum->new(-spectrum=>$spectrum);
76 is $test_csp->score, 0;
78 ok $test_csp = Bio::Assembly::Tools::ContigSpectrum->new(-spectrum=>$spectrum);
79 is $test_csp->score, 1;
80 float_is $test_csp->score(240), 0.248953974895397;
81 $spectrum = {1=>120, 120=>1};
82 ok $test_csp = Bio::Assembly::Tools::ContigSpectrum->new(-spectrum=>$spectrum);
83 float_is $test_csp->score, 0.248953974895397;
86 test_skip( -tests => 183, -requires_module => 'Graph::Undirected' ); #####
88 # mixed contig spectrum imported from assembly
89 ok my $mixed_csp = Bio::Assembly::Tools::ContigSpectrum->new(
91 -eff_asm_params => 1 ), 'mixed contig spectrum';
92 is_deeply $mixed_csp->spectrum, {1=>0, 2=>3, 6=>1, 9=>1}; # [0 3 0 0 0 1 0 0 1]
93 is $mixed_csp->eff_asm_params, 1;
94 is $mixed_csp->max_size, 9;
95 is $mixed_csp->nof_rep, 1;
96 is $mixed_csp->nof_seq, 21;
97 float_is $mixed_csp->avg_seq_len, 303.81;
98 is $mixed_csp->nof_overlaps, 16;
99 is $mixed_csp->min_overlap, 35;
100 float_is $mixed_csp->avg_overlap, 155.875;
101 float_is $mixed_csp->min_identity, 96.8421;
102 float_is $mixed_csp->avg_identity, 98.8826;
103 is scalar $mixed_csp->assembly, 1;
105 # dissolved contig spectrum
106 ok my $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
107 -dissolve => [$mixed_csp, 'ZZZ'] ), 'dissolved contig spectrum';
108 is_deeply $dissolved_csp->spectrum, {1=>2, 2=>1}; # [2 1]
109 is $dissolved_csp->eff_asm_params, 0;
110 is $dissolved_csp->max_size, 2;
111 is $dissolved_csp->nof_rep, 1;
112 is $dissolved_csp->nof_seq, 4;
113 float_is $dissolved_csp->avg_seq_len, 321;
114 # eff_asm_params haven't been requested
115 is $dissolved_csp->nof_overlaps, 0;
116 is $dissolved_csp->min_overlap, undef;
117 is $dissolved_csp->avg_overlap, 0;
118 is $dissolved_csp->min_identity, undef;
119 is $dissolved_csp->avg_identity, 0;
121 ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
122 -dissolve => [$mixed_csp, 'sdsu'] );
123 is_deeply $dissolved_csp->spectrum, {1=>3, 6=>1}; # [3 0 0 0 0 1]
124 is $dissolved_csp->eff_asm_params, 0;
125 is $dissolved_csp->max_size, 6;
126 is $dissolved_csp->nof_rep, 1;
127 is $dissolved_csp->nof_seq, 9;
128 float_is $dissolved_csp->avg_seq_len, 441.222222222222;
129 # eff_asm_params haven't been requested
130 is $dissolved_csp->nof_overlaps, 0;
131 is $dissolved_csp->min_overlap, undef;
132 is $dissolved_csp->avg_overlap, 0;
133 is $dissolved_csp->min_identity, undef;
134 is $dissolved_csp->avg_identity, 0;
136 ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
137 -dissolve => [$mixed_csp, 'ABC'] );
138 is_deeply $dissolved_csp->spectrum, {1=>2, 6=>1}; # [2 0 0 0 0 1]
139 is $dissolved_csp->eff_asm_params, 0;
140 is $dissolved_csp->max_size, 6;
141 is $dissolved_csp->nof_rep, 1;
142 is $dissolved_csp->nof_seq, 8;
143 float_is $dissolved_csp->avg_seq_len, 140.625;
144 # eff_asm_params haven't been requested
145 is $dissolved_csp->nof_overlaps, 0;
146 is $dissolved_csp->min_overlap, undef;
147 is $dissolved_csp->avg_overlap, 0;
148 is $dissolved_csp->min_identity, undef;
149 is $dissolved_csp->avg_identity, 0;
151 ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
154 -dissolve => [$mixed_csp, 'ABC'] );
155 is_deeply $dissolved_csp->spectrum, {1=>2, 6=>1}; # [2 0 0 0 0 1]
157 ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
160 -dissolve => [$mixed_csp, 'ABC'] );
161 is_deeply $dissolved_csp->spectrum, {1=>3, 5=>1}; # [3 0 0 0 1]
163 # after dissolving, the remaining assembly objects should be 3 singlets and 1 6-contig
164 my @contigs = ($dissolved_csp->assembly);
165 is scalar @contigs, 4;
166 my @contig_ids = sort qw( 144 652_1 652_2 652_3 );
167 is_deeply [sort map($_->id, @contigs)], \@contig_ids;
168 my @contig_sizes = sort qw( 1 1 1 5 );
169 is_deeply [sort map($_->num_sequences, @contigs)], \@contig_sizes;
170 my @contig_isas = sort qw( Bio::Assembly::Singlet Bio::Assembly::Singlet
171 Bio::Assembly::Singlet Bio::Assembly::Contig );
172 is_deeply [sort map(ref $_, @contigs)], \@contig_isas;
173 my @reads = ($contigs[1])->each_seq;
174 my @read_ids = sort qw(ABC|9980040 ABC|9937790 ABC|9956706 ABC|9960711 ABC|9976538);
175 is_deeply [sort map($_->id, @reads)], \@read_ids;
177 ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
180 -dissolve => [$mixed_csp, 'ABC'] );
181 is_deeply $dissolved_csp->spectrum, {1=>2, 6=>1}; # [2 0 0 0 0 1]
183 ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
186 -dissolve => [$mixed_csp, 'ABC'] );
187 is_deeply $dissolved_csp->spectrum, {1=>2, 6=>1}; # [2 0 0 0 0 1]
189 ok $dissolved_csp = Bio::Assembly::Tools::ContigSpectrum->new(
190 -dissolve => [$mixed_csp, 'ABC'],
191 -eff_asm_params => 1 );
192 is_deeply $dissolved_csp->spectrum, {1=>2, 6=>1}; # [2 0 0 0 0 1]
193 is $dissolved_csp->eff_asm_params, 1;
194 is $dissolved_csp->max_size, 6;
195 is $dissolved_csp->nof_rep, 1;
196 is $dissolved_csp->nof_seq, 8;
197 float_is $dissolved_csp->avg_seq_len, 140.625;
198 is $dissolved_csp->nof_overlaps, 5;
199 float_is $dissolved_csp->avg_overlap, 76.8;
200 float_is $dissolved_csp->avg_identity, 100.0;
201 # min_overlap and min_identity not explicitely specified for the dissolved csp
202 # min_overlap and min_identity are thus taken from the mixed csp
203 is $dissolved_csp->min_overlap, 35;
204 float_is $dissolved_csp->min_identity, 96.8421;
206 # cross contig spectrum
207 ok my $cross_csp = Bio::Assembly::Tools::ContigSpectrum->new(
208 -cross => $mixed_csp), 'cross-contig spectrum';
209 is_deeply $cross_csp->spectrum, {1=>7, 2=>2, 9=>1}; # [7 2 0 0 0 0 0 0 1]
211 # assembly should have 2 singlets and 1 9-contig
212 @contigs = $cross_csp->assembly;
213 is scalar @contigs, 3;
214 @contig_sizes = sort qw( 2 2 9 );
215 is_deeply [sort map($_->num_sequences, @contigs)], \@contig_sizes;
216 @contig_isas = sort qw( Bio::Assembly::Contig Bio::Assembly::Contig Bio::Assembly::Contig);
217 is_deeply [sort map(ref $_, @contigs)], \@contig_isas;
218 @read_ids = sort qw(sdsu|SDSU_RFPERU_006_E04.x01.phd.1 ZZZ|SDSU_RFPERU_010_B05.x01.phd.1);
219 is_deeply [sort map($_->id, $contigs[0]->each_seq)], \@read_ids;
220 @read_ids = sort qw(sdsu|SDSU_RFPERU_013_H05.x01.phd.1 ABC|SDSU_RFPERU_005_F02.x01.phd.1);
221 is_deeply [sort map($_->id, $contigs[1]->each_seq)], \@read_ids;
222 @read_ids = sort qw( ZZZ|9962187 ABC|9937790 ABC|9944760 ABC|9956706
223 sdsu|9986984 ABC|9960711 ABC|9970175 ABC|9976538 ABC|9980040);
224 is_deeply [sort map($_->id, $contigs[2]->each_seq)], \@read_ids;
226 # effective assembly params
227 ok $cross_csp = Bio::Assembly::Tools::ContigSpectrum->new(
228 -cross => $mixed_csp,
229 -eff_asm_params => 1 ), 'cross-contig spectrum';
230 is_deeply $cross_csp->spectrum, {1=>7, 2=>2, 9=>1}; # [7 2 0 0 0 0 0 0 1]
231 is $cross_csp->nof_rep, 1;
232 is $cross_csp->eff_asm_params, 1;
233 is $cross_csp->max_size, 9;
234 is $cross_csp->nof_seq, 13;
235 float_is $cross_csp->avg_seq_len, 206.308;
236 is $cross_csp->nof_overlaps, 10;
237 float_is $cross_csp->avg_overlap, 76.9;
238 float_is $cross_csp->avg_identity, 99.2357;
239 # min_overlap and min_identity not explicitly specified for the cross csp
240 # min_overlap and min_identity are thus taken from the mixed csp
241 is $cross_csp->min_overlap, 35;
242 float_is $cross_csp->min_identity, 96.8421;
244 # with a specified minimum overlap and identity
245 ok $cross_csp = Bio::Assembly::Tools::ContigSpectrum->new(
246 -cross => $mixed_csp,
248 -min_identity => 98 ), 'cross-contig spectrum';
249 is_deeply $cross_csp->spectrum, {1=>3, 2=>1, 7=>1}; # [3 1 0 0 0 0 1]
250 is $cross_csp->nof_rep, 1;
251 is $cross_csp->eff_asm_params, 0;
252 is $cross_csp->max_size, 7;
253 is $cross_csp->nof_seq, 9;
254 float_is $cross_csp->avg_seq_len, 191.222;
255 is $cross_csp->min_overlap, 50;
256 float_is $cross_csp->min_identity, 98;
258 # sum of contig spectra
259 ok my $sum_csp = Bio::Assembly::Tools::ContigSpectrum->new(-eff_asm_params=>1), 'contig spectrum sum';
260 ok $sum_csp->add($dissolved_csp);
261 ok $sum_csp->add($mixed_csp);
262 is_deeply $sum_csp->spectrum, {1=>2, 2=>3, 6=>2, 9=>1}; # [2 3 0 0 0 2 0 0 1]
263 is $sum_csp->eff_asm_params, 1;
264 is $sum_csp->max_size, 9;
265 is $sum_csp->nof_rep, 2;
266 is $sum_csp->nof_seq, 29;
267 float_is $sum_csp->avg_seq_len, 258.7934;
268 is $sum_csp->nof_overlaps, 21;
269 is $sum_csp->min_overlap, 35;
270 float_is $sum_csp->avg_overlap, 137.0476;
271 float_is $sum_csp->min_identity, 96.8421;
272 float_is $sum_csp->avg_identity, 99.1487;
273 is scalar $sum_csp->assembly, 4;
275 # average of contig spectra
276 ok my $avg_csp = Bio::Assembly::Tools::ContigSpectrum->new(-eff_asm_params=>1), 'average contig spectrum';
277 ok $avg_csp = $avg_csp->average([$dissolved_csp, $mixed_csp]);
278 is_deeply $avg_csp->spectrum, {1=>1, 2=>1.5, 6=>1, 9=>0.5}; # [1 1 0 0 0 1 0 0 0.5]
279 is $avg_csp->eff_asm_params, 1;
280 is $avg_csp->max_size, 9;
281 is $avg_csp->nof_rep, 2;
282 is $avg_csp->nof_seq, 14.5;
283 float_is $avg_csp->avg_seq_len, 258.7934;
284 is $avg_csp->nof_overlaps, 10.5;
285 is $avg_csp->min_overlap, 35;
286 float_is $avg_csp->avg_overlap, 137.0476;
287 float_is $avg_csp->min_identity, 96.8421;
288 float_is $avg_csp->avg_identity, 99.1487;
289 is scalar $avg_csp->assembly, 4;
291 # drop assembly info from contig spectrum
292 ok $mixed_csp->drop_assembly(), 'drop assembly';
293 is scalar $mixed_csp->assembly(), 0;
295 # large contig (27 reads)
296 $in = Bio::Assembly::IO->new(
297 -file => test_input_file('27-contig_Newbler.ace'),
300 isa_ok $in, 'Bio::Assembly::IO';
301 $sc = $in->next_assembly;
302 isa_ok $sc, 'Bio::Assembly::Scaffold';
303 ok my $large_csp = Bio::Assembly::Tools::ContigSpectrum->new(
305 -eff_asm_params => 1 ), 'large contig spectrum';
306 is scalar $large_csp->assembly(), 1;
307 is_deeply $large_csp->spectrum, {1=>0, 27=>1};
308 is $large_csp->eff_asm_params, 1;
309 is $large_csp->max_size, 27;
310 is $large_csp->nof_rep, 1;
311 is $large_csp->nof_seq, 27;
312 float_is $large_csp->avg_seq_len, 100;
313 is $large_csp->nof_overlaps, 26;
314 is $large_csp->min_overlap, 54;
315 # operation returns sometimes 88.76923... and sometimes 88.80769...
316 ok $large_csp->avg_overlap >= 88.7692;
317 ok $large_csp->avg_overlap <= 88.8077;
318 float_is $large_csp->min_identity, 33.3333;
319 cmp_ok($large_csp->avg_identity, '>=', 74.7, $large_csp->avg_identity);
320 cmp_ok($large_csp->avg_identity, '<=', 74.9, $large_csp->avg_identity);
322 ok my $large_xcsp = Bio::Assembly::Tools::ContigSpectrum->new(
323 -cross => $large_csp,
324 -eff_asm_params => 1 ), 'large cross-contig spectrum';
325 is $large_xcsp->nof_overlaps, 26;
326 cmp_ok($large_xcsp->avg_overlap, '>=', 88.7, $large_xcsp->avg_overlap);
327 cmp_ok($large_xcsp->avg_overlap, '<=', 88.9, $large_xcsp->avg_overlap);
328 is_deeply $large_xcsp->spectrum, {1=>21, 27=>1};
330 ok $large_xcsp = Bio::Assembly::Tools::ContigSpectrum->new(
331 -cross => $large_csp,
332 -min_overlap => 100);
333 is_deeply $large_xcsp->spectrum, {1=>18, 2=>5, 3=>1, 7=>1};
334 my @xcontigs = sort {$a->id cmp $b->id} $large_xcsp->assembly;
335 is scalar @xcontigs, 7; # the cross-1-contigs are not included
336 my @xcontig_ids = sort qw( contig00001_1 contig00001_2 contig00001_3 contig00001_4
337 contig00001_5 contig00001_6 contig00001_7 );
338 is_deeply [map($_->id, @xcontigs)], \@xcontig_ids;
339 my @xcontig_sizes = sort qw( 2 2 2 2 2 3 7 );
340 is_deeply [sort map($_->num_sequences, @xcontigs)], \@xcontig_sizes;
342 # Examine largest cross-contig
343 my $xcontig = (sort {$b->num_sequences <=> $a->num_sequences} $large_xcsp->assembly)[0];
344 is $xcontig->num_sequences, 7;
345 is $xcontig->get_seq_coord($xcontig->get_seq_by_name('species1635|5973'))->start, 1;
346 is $xcontig->get_seq_coord($xcontig->get_seq_by_name('species158|7890'))->start, 1;
347 is $xcontig->get_seq_coord($xcontig->get_seq_by_name('species2742|48'))->end, 140;
349 # one contig at a time
350 $in = Bio::Assembly::IO->new(
351 -file => test_input_file('contigspectrumtest.tigr'),
354 $sc = $in->next_assembly;
355 ok $csp = Bio::Assembly::Tools::ContigSpectrum->new(
356 -eff_asm_params => 1 ), 'one contig at a time';
357 for my $contig ($sc->all_contigs) {
358 ok $csp->assembly($contig);
361 is scalar $csp->assembly(), 5;
362 is_deeply $csp->spectrum, {1=>0, 2=>3, 6=>1, 9=>1}; # [0 3 0 0 0 1 0 0 1]
363 is $csp->eff_asm_params, 1;
364 is $csp->max_size, 9;
366 is $csp->nof_seq, 21;
367 float_is $csp->avg_seq_len, 303.81;
368 is $csp->nof_overlaps, 16;
369 is $csp->min_overlap, 35;
370 float_is $csp->avg_overlap, 155.875;
371 float_is $csp->min_identity, 96.8421;
372 float_is $csp->avg_identity, 98.8826;