1 # -*-Perl-*- Test Harness script for Bioperl
9 test_begin(-tests => 194);
12 use_ok('Bio::Seq::Quality');
13 use_ok('Bio::PrimarySeq');
14 use_ok('Bio::LocatableSeq');
15 use_ok('Bio::Seq::SimulatedRead');
18 my $VERBOSE = test_debug();
20 my ($ref, $ref2, $ref3, $ref4, $ref5, $read, $errors);
22 $ref = Bio::Seq::Quality->new(-id => 'human_id',
23 -seq => 'TAAAAAAACCCC',
24 -qual => '1 2 3 4 5 6 7 8 9 10 11 12',
25 -trace => '0 5 10 15 20 25 30 35 40 45 50 55',
26 -desc => 'The human genome' );
28 $ref2 = Bio::Seq->new(-id => 'other_genome',
30 -desc => '"Secret" sequence');
32 $ref3 = Bio::PrimarySeq->new(-seq => 'ACACTGATCTAGCGTCGTGCTAGCTGACGTAGCTGAT' );
34 $ref4 = Bio::LocatableSeq->new(-id => 'a_thaliana',
35 -seq => 'CGTATTCTGAGGAGAGCTCT' );
40 ok $read = Bio::Seq::SimulatedRead->new();
41 isa_ok $read, 'Bio::Seq::SimulatedRead';
42 isa_ok $read, 'Bio::LocatableSeq';
43 isa_ok $read, 'Bio::Seq::Quality';
45 $errors->{'1'}->{'+'} = 'T';
46 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
47 is $read->reference, $ref;
49 is $read->errors->{'1'}->{'+'}->[0], 'T';
51 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 0 );
54 is $read->seq, 'TAAAAAAACCCC';
56 is $read->desc, undef;
57 is $read->revcom->seq, 'GGGGTTTTTTTA';
59 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 1 );
62 is $read->seq, 'TAAAAAAACCCC';
63 is join(' ',@{$read->qual}), '';
65 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
67 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 1, -coord_style => 'bioperl' );
68 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
70 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 1, -coord_style => 'genbank' );
71 is $read->desc, 'reference=human_id position=1..12 description="The human genome"';
73 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -qual_levels => [30, 10]);
76 is $read->seq, 'TAAAAAAACCCC';
77 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30';
79 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
80 is $read->revcom->seq, 'GGGGTTTTTTTA';
82 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref2 );
85 is $read->seq, 'ACGTACGT';
86 is join(' ',@{$read->qual}), '';
87 is $read->desc, 'reference=other_genome start=1 end=8 strand=+1 description="\"Secret\" sequence"';
89 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref3 );
92 is $read->seq, 'ACACTGATCTAGCGTCGTGCTAGCTGACGTAGCTGAT';
93 is join(' ',@{$read->qual}), '';
94 is $read->desc, 'start=1 end=37 strand=+1';
96 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -qual_levels => [30, 10]);
97 is $read->seq, 'GGGGTTTTTTTA';
98 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30';
99 is $read->desc, 'reference=human_id start=1 end=12 strand=-1 description="The human genome"';
101 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -qual_levels => [30, 10], -coord_style => 'genbank' );
102 is $read->desc, 'reference=human_id position=complement(1..12) description="The human genome"';
104 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -start => 2, -end => 8, -qual_levels => [30, 10]);
105 is $read->seq, 'AAAAAAA';
106 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30';
107 is $read->desc, 'reference=human_id start=2 end=8 strand=+1 description="The human genome"';
109 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -start => 2, -end => 8, -qual_levels => [30, 10]);
110 is $read->seq, 'TTTTTTT';
111 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30';
112 is $read->desc, 'reference=human_id start=2 end=8 strand=-1 description="The human genome"';
115 $errors->{'6'}->{'+'} = 'GG';
116 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -start => 2, -end => 8, -errors => $errors, -qual_levels => [30, 10]);
119 is $read->seq, 'TTTTTTGGT';
120 is join(' ', @{$read->qual}), '30 30 30 30 30 30 10 10 30';
121 is $read->desc, 'reference=human_id start=2 end=8 strand=-1 errors=6+G,6+G description="The human genome"';
124 $errors->{'6'}->{'+'} = 'GG';
125 $errors->{'1'}->{'%'} = 'T';
126 $errors->{'3'}->{'-'} = undef;
127 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => 1, -start => 2, -end => 8, -errors => $errors, -qual_levels => [30, 10]);
130 is $read->seq, 'TAAAAGGA';
131 is join(' ', @{$read->qual}), '10 30 30 30 30 10 10 30';
132 is $read->desc, 'reference=human_id start=2 end=8 strand=+1 errors=1%T,3-,6+G,6+G description="The human genome"';
135 $errors->{'6'}->{'+'} = 'GG';
136 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors, -qual_levels => [30, 10]);
139 is $read->seq, 'TAAAAAGGAACCCC';
140 is join(' ', @{$read->qual}), '30 30 30 30 30 30 10 10 30 30 30 30 30 30';
141 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 errors=6+G,6+G description="The human genome"';
143 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors, -mid => 'ACGT', -errors => $errors, -qual_levels => [30, 10]);
146 is $read->seq, 'ACGTTAGGAAAAAACCCC';
147 is join(' ', @{$read->qual}), '30 30 30 30 30 30 10 10 30 30 30 30 30 30 30 30 30 30';
148 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=ACGT errors=6+G,6+G description="The human genome"';
150 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => 'TTTAAA', -qual_levels => [30, 10]);
153 is $read->seq, 'TTTAAATAAAAAAACCCC';
154 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30';
155 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=TTTAAA description="The human genome"';
157 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => '', -qual_levels => []);
160 is $read->seq, 'TAAAAAAACCCC';
161 is join(' ', @{$read->qual}), '';
162 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
168 $errors->{'6'}->{'+'} = ['G', 'G'];
169 $errors->{'1'}->{'%'} = ['A', 'G', 'T'];
170 $errors->{'3'}->{'-'} = [undef, undef];
171 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => 1, -start => 2, -end => 8, -errors => $errors, -qual_levels => [30, 10]), 'redundant errors';
174 is $read->seq, 'TAAAAGGA';
175 is join(' ', @{$read->qual}), '10 30 30 30 30 10 10 30';
176 is $read->desc, 'reference=human_id start=2 end=8 strand=+1 errors=1%A,1%G,1%T,3-,3-,6+G,6+G description="The human genome"';
179 # Specifying errors() after new()
181 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => '', -qual_levels => []);
184 is $read->seq, 'TAAAAAAACCCC';
185 is join(' ', @{$read->qual}), '';
186 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
189 ok $read->errors($errors), 'errors()';
192 is $read->seq, 'TAAAAAAACCCC';
193 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
196 $errors->{'6'}->{'+'} = 'GG';
197 ok $read->errors($errors);
198 is $read->seq, 'TAAAAAGGAACCCC';
201 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 errors=6+G,6+G description="The human genome"';
204 # More tracking tests
206 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => 'ACGT', -qual_levels => [], -coord_style => 'genbank' );
207 is $read->desc, 'reference=human_id position=1..12 mid=ACGT description="The human genome"';
209 ok $read->mid('AAAA');
210 is $read->desc, 'reference=human_id position=1..12 mid=AAAA description="The human genome"';
213 $errors->{'6'}->{'+'} = 'GG';
214 ok $read->errors($errors);
215 is $read->desc, 'reference=human_id position=1..12 mid=AAAA errors=6+G,6+G description="The human genome"';
217 ok not($read->track(0)), 'track()';
219 is $read->desc, undef;
222 is $read->desc, 'reference=human_id position=1..12 mid=AAAA errors=6+G,6+G description="The human genome"';
225 # qual_levels() method
227 ok $read = Bio::Seq::SimulatedRead->new(-verbose => $VERBOSE, );
228 ok $read->qual_levels([30, 10]), 'qual_levels()';
229 is join(' ', @{$read->qual_levels}), '30 10';
233 ok $read->reference($ref), 'reference()';
234 is $read->reference(), $ref;
238 ok $read = Bio::Seq::SimulatedRead->new(-verbose => $VERBOSE, ), 'mid()';
239 ok $read->qual_levels([30, 10]);
240 ok $read->reference($ref);
241 ok $read->mid('ACGT');
242 ok $read->mid, 'ACGT';
244 is $read->seq, 'ACGTTAAAAAAACCCC';
245 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30';
246 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=ACGT description="The human genome"';
248 ok $read->mid('TTTAAA');
249 ok $read->mid, 'TTTAAA';
250 is $read->seq, 'TTTAAATAAAAAAACCCC';
251 is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30';
252 is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=TTTAAA description="The human genome"';
255 # Edge case... mutation of the last bases of a simulated read with MID
258 $errors->{'18'}->{'%'} = 'T';
259 $read->errors($errors);
260 is $read->seq, 'TTTAAATAAAAAAACCCT';
263 # Try different BioPerl object types
265 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref ), 'Bio::Seq::Quality';
266 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref2 ), 'Bio::Seq';
267 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref3 ), 'Bio::PrimarySeq';
268 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref4 ), 'Bio::LocatableSeq';
271 # More detailed tests of the error specifications
274 $errors->{'0'}->{'-'} = undef;
275 warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors )}
276 qr/Positions of substitutions and deletions have to be strictly positive but got 0/;
277 is $read->seq, 'TAAAAAAACCCC';
280 $errors->{'1'}->{'-'} = undef;
281 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
282 is $read->seq, 'AAAAAAACCCC';
285 $errors->{'12'}->{'-'} = undef;
286 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
287 is $read->seq, 'TAAAAAAACCC';
290 $errors->{'13'}->{'-'} = undef;
291 warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors )}
292 qr/Position 13 is beyond end of read \(12 residues\)/; # there should be a warning too
293 is $read->seq, 'TAAAAAAACCCC';
296 $errors->{'0'}->{'%'} = 'G';
297 warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors )}
298 qr/Positions of substitutions and deletions have to be strictly positive/; # there should be a warning too
299 is $read->seq, 'TAAAAAAACCCC';
302 $errors->{'1'}->{'%'} = 'G';
303 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
304 is $read->seq, 'GAAAAAAACCCC';
307 $errors->{'12'}->{'%'} = 'G';
308 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
309 is $read->seq, 'TAAAAAAACCCG';
312 $errors->{'13'}->{'%'} = 'G';
313 warning_like { $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors ) }
314 qr/Position 13 is beyond end of read \(12 residues\)/; # there should be a warning too
315 is $read->seq, 'TAAAAAAACCCC';
318 $errors->{'0'}->{'+'} = 'A';
319 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
320 is $read->seq, 'ATAAAAAAACCCC';
323 $errors->{'1'}->{'+'} = 'A';
324 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
325 is $read->seq, 'TAAAAAAAACCCC';
328 $errors->{'12'}->{'+'} = 'A';
329 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
330 is $read->seq, 'TAAAAAAACCCCA';
333 $errors->{'13'}->{'+'} = 'A';
334 warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors ) }
335 qr/Position 13 is beyond end of read \(12 residues\)/; # there should be a warning too
336 is $read->seq, 'TAAAAAAACCCC';
339 $errors->{'1'}->{'%'} = 'G';
340 $errors->{'2'}->{'%'} = 'G';
341 $errors->{'3'}->{'%'} = 'G';
342 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
343 is $read->seq, 'GGGAAAAACCCC';
346 $errors->{'1'}->{'+'} = 'G';
347 $errors->{'2'}->{'+'} = 'G';
348 $errors->{'3'}->{'+'} = 'G';
349 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
350 is $read->seq, 'TGAGAGAAAAACCCC';
353 $errors->{'1'}->{'-'} = undef;
354 $errors->{'2'}->{'-'} = undef;
355 $errors->{'3'}->{'-'} = undef;
356 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
357 is $read->seq, 'AAAAACCCC';
360 $errors->{'1'}->{'+'} = 'GGG';
361 $errors->{'2'}->{'-'} = undef;
362 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
363 is $read->seq, 'TGGGAAAAAACCCC';
366 $errors->{'2'}->{'+'} = 'CC';
367 $errors->{'2'}->{'-'} = undef;
368 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
369 is $read->seq, 'TCCAAAAAACCCC';
372 $errors->{'2'}->{'%'} = 'C';
373 $errors->{'2'}->{'-'} = undef;
374 ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
375 is $read->seq, 'TAAAAAACCCC';