2 #$Id: BEDTools.t kortsch $
9 my $v = 0; # private verbosity - this module only
12 $home = '.'; # set to '.' for Build use,
13 # '..' for debugging from .t file
16 test_begin(-tests => 423,
17 -requires_modules => [qw(IPC::Run Bio::Tools::Run::BEDTools)]);
20 use Bio::Tools::Run::WrapperBase;
22 use Bio::Tools::GuessSeqFormat;
24 # test command functionality
26 ok my $bedtoolsfac = Bio::Tools::Run::BEDTools->new, "make a default factory";
27 is $bedtoolsfac->command, 'bam_to_bed', "default to command 'bam_to_bed'";
30 annotate fasta_from_bed overlap
31 bam_to_bed genome_coverage pair_to_pair
32 bed_to_bam graph_union pair_to_bed
33 bed_to_IGV group_by shuffle
34 b12_to_b6 intersect slop
36 complement mask_fasta_from_bed subtract
50 'fasta_from_bed' => 0,
51 'genome_coverage' => 2,
56 'mask_fasta_from_bed' => 0,
77 'fasta_from_bed' => 3,
78 'genome_coverage' => 4,
83 'mask_fasta_from_bed' => 1,
95 my $bam_file = test_input_file('Ft.bam');
96 my $bed_file = test_input_file('Ft.bed');
97 my $bed12_file = test_input_file('Ft.bed12');
98 my $fas_file = test_input_file('Ft.frag.fas');
99 my $bedpe1_file = test_input_file('e_coli_1.bedpe');
100 my $bedpe2_file = test_input_file('e_coli_2.bedpe');
101 my $bed3_file = test_input_file('e_coli.bed3');
102 my $bg1_file = test_input_file('1.bg');
103 my $bg2_file = test_input_file('2.bg');
104 my $bg3_file = test_input_file('3.bg');
106 my %format_lookup = (
108 'bam_to_bed' => 'bed',
109 'bed_to_bam' => 'bam',
110 'bed_to_IGV' => 'igv',
111 'b12_to_b6' => 'bed',
112 'closest' => 'bedpe',
113 'complement' => 'bed',
115 'fasta_from_bed' => 'fasta',
116 'genome_coverage' => 'tab',
117 'graph_union' => 'bg',
119 'intersect' => 'bed|bam',
121 'mask_fasta_from_bed' => 'fasta',
124 'pair_to_bed' => 'bedpe|bam',
125 'pair_to_pair' => 'bedpe',
133 my %result_lookup = (
134 'annotate' => 1385, # OK
135 'bam_to_bed' => 1385, # OK
136 'fasta_from_bed' => 1385, # OK
137 'mask_fasta_from_bed' => 1, # OK
138 'shuffle' => 828, # OK
139 'window' => 74998, # OK
140 'closest' => 845, # OK
141 'genome_coverage' => 38, # OK
144 'complement' => 291, # OK - change in data provided by BEDTools or behaviour of complementBed? was 243
145 'intersect' => 72534, # OK
146 'pair_to_bed' => 2, # OK
148 'coverage' => 57261, # OK
149 'links' => 11603, # OK
150 'pair_to_pair' => 497, # OK
151 'subtract' => 57959, # OK
152 'group_by' => 1, # OK
153 'b12_to_b6' => 1385, # OK
154 'overlap' => 500 # OK
158 test_skip( -requires_executable => $bedtoolsfac,
161 # Sorry to all those out there who don't have a find command
162 # - perhaps someone can add an alternative
169 if ($^O =~ m/mswin/i) {
173 # Assume files are in drive C:\, change location if this is not true
174 chdir 'C:\\'; # Search in all C:
175 ($rmsk_bed) = `cmd.exe /C dir /B /S rmsk.hg18.chr21.bed 2>NUL`;
176 ($gene_bed) = `cmd.exe /C dir /B /S knownGene.hg18.chr21.bed 2>NUL`;
177 ($mm8_genome) = `cmd.exe /C dir /B /S mouse.mm8.genome 2>NUL`;
178 ($mm9_genome) = `cmd.exe /C dir /B /S mouse.mm9.genome 2>NUL`;
179 ($hg18_genome) = `cmd.exe /C dir /B /S human.hg18.genome 2>NUL`;
180 ($hg19_genome) = `cmd.exe /C dir /B /S human.hg19.genome 2>NUL`;
182 chdir $dir; # Go back to $dir;
185 ($rmsk_bed) = `find /usr -name 'rmsk.hg18.chr21.bed' 2>/dev/null`;
186 ($gene_bed) = `find /usr -name 'knownGene.hg18.chr21.bed' 2>/dev/null`;
187 ($mm8_genome) = `find /usr -name 'mouse.mm8.genome' 2>/dev/null`;
188 ($mm9_genome) = `find /usr -name 'mouse.mm9.genome' 2>/dev/null`;
189 ($hg18_genome) = `find /usr -name 'human.hg18.genome' 2>/dev/null`;
190 ($hg19_genome) = `find /usr -name 'human.hg19.genome' 2>/dev/null`;
192 chomp $rmsk_bed if $rmsk_bed;
193 chomp $gene_bed if $gene_bed;
194 chomp $mm8_genome if $mm8_genome;
195 chomp $mm9_genome if $mm9_genome;
196 chomp $hg18_genome if $hg18_genome;
197 chomp $hg19_genome if $hg19_genome;
199 COMMAND : for (@commands) {
201 $v && diag("Testing command: '$_'");
202 ok( my $bedtoolsfac = Bio::Tools::Run::BEDTools->new(-command => $_),
203 "make a factory using command '$_'" );
204 $v && diag(" return command");
205 is( my $command = $bedtoolsfac->command, $_, "factory command for '$_' is correct" );
206 $v && diag(" return switches and params");
207 is( scalar $bedtoolsfac->available_parameters, $p{$_}+1+$s{$_}, "all available options for '$_'" );
208 is( scalar $bedtoolsfac->available_parameters('params'), $p{$_}+1, "available parameters for '$_'" );
209 is( scalar $bedtoolsfac->available_parameters('switches'), $s{$_}, "available switches for '$_'" );
210 $v && diag(" return executable version");
211 ok( $bedtoolsfac->version, "get version for '$_'" );
214 $v && diag(" run command as default");
215 m/^annotate$/ && do {
216 ok( my $result = $bedtoolsfac->run( -bgv => $bed_file,
217 -ann => [$bed3_file] ),
218 "can run command '$command'" );
221 m/^bam_to_bed$/ && do {
222 ok( my $result = $bedtoolsfac->run( -bam => $bam_file ),
223 "can run command '$command'" );
226 m/^(?:fasta_from_bed|mask_fasta_from_bed)$/ && do {
227 ok( my $result = $bedtoolsfac->run( -seq => $fas_file,
229 "can run command '$command'" );
232 m/^(?:bed_to_IGV|merge|sort|links)$/ && do {
233 ok( my $result = $bedtoolsfac->run( -bgv => $gene_bed ),
234 "can run command '$command'" );
237 m/^(?:bed_to_bam|shuffle|slop|complement)$/ && do {
238 is( $bedtoolsfac->add_bidirectional(100), 100,
239 "can set parameter -add_bidirectional => 100 " ) if $command eq 'slop';
240 ok( my $result = $bedtoolsfac->run( -bgv => $gene_bed,
241 -genome => $hg18_genome ),
242 "can run command '$command'" );
245 m/^genome_coverage$/ && do {
246 ok( my $result = $bedtoolsfac->run( -bed => $gene_bed,
247 -genome => $hg18_genome ),
248 "can run command '$command'" );
251 m/^(?:window|closest|coverage|subtract|intersect)$/ && do {
252 ok( my $result = $bedtoolsfac->run( -bgv1 => $gene_bed,
253 -bgv2 => $rmsk_bed ),
254 "can run command '$command'" );
257 m/^pair_to_pair$/ && do {
258 is( $bedtoolsfac->type('neither'), 'neither',
259 "can set parameter -type => 'neither'" );
260 ok( my $result = $bedtoolsfac->run( -bedpe1 => $bedpe1_file,
261 -bedpe2 => $bedpe2_file ),
262 "can run command '$command'" );
265 m/^pair_to_bed$/ && do {
266 ok( my $result = $bedtoolsfac->run( -bedpe => $bedpe1_file,
267 -bgv => $bed3_file ),
268 "can run command '$command'" );
272 is( $bedtoolsfac->columns('2,3,5,6'), '2,3,5,6',
273 "can set parameter -columns => '2,3,5,6' " );
274 ok( my $result = $bedtoolsfac->run( -bed => $bedpe1_file, ),
275 "can run command '$command'" );
278 m/^b12_to_b6$/ && do {
279 ok( my $result = $bedtoolsfac->run( -bed => $bed12_file, ),
280 "can run command '$command'" );
283 m/^group_by$/ && do {
284 is( $bedtoolsfac->group(1), 1,
285 "can set parameter -group => 1 " );
286 is( $bedtoolsfac->columns('2,2,3,3'), '2,2,3,3',
287 "can set parameter -columns => '2,2,3,3' " );
288 is( $bedtoolsfac->operations('min,max,min,max'), 'min,max,min,max',
289 "can set parameter -operations => 'min,max,min,max' " );
290 ok( my $result = $bedtoolsfac->run( -bed => $bed3_file ),
291 "can run command '$command'" );
294 m/^graph_union$/ && do {
295 ok( my $result = $bedtoolsfac->run( -bg => [$bg1_file, $bg2_file, $bg2_file] ),
296 "can run command '$command'" );
300 # we should never get here - internal test
301 fail( "all commands tested - missed '$_'");
305 $v && diag(" check file has been written");
306 ok( eval { (-e $bedtoolsfac->result && -r _) }, "result files exists for command '$command'");
307 $v && diag(" check can get internal result format description and confirm it");
308 ok( my $format = $bedtoolsfac->result( -want => 'format' ),
309 "can return output format for command '$command'" );
310 like( $format, qr/(?:$format_lookup{$command})/,
311 "result claims to be in correct format for command '$command'" );
312 $v && diag(" check can get internal result file name value");
313 ok( my $file = $bedtoolsfac->result(-want=>'raw'),
314 "can return output file for command '$command'" );
315 if ($command eq 'links') {
316 $v && diag(" check result file is html and correct size");
317 ok eval { (-e $file)&&(-r _) }, "make readable output";
319 my $lines =(my $first_line)= <FILE>;
321 like( $first_line, qr/\<html\>/, " - html tag line");
322 is( $lines, $result_lookup{$command}, " - number of lines");
323 } elsif ($command eq 'genome_coverage') {
324 $v && diag(" check result file is correct size");
325 ok eval { (-e $file)&&(-r _) }, "make readable output";
327 my $lines =()= <FILE>;
329 is( $lines, $result_lookup{$command}, " - number of lines");
331 $v && diag(" check can get internal result format matches result file");
332 my $guesser = Bio::Tools::GuessSeqFormat->new( -file => $file );
333 for ($format_lookup{$command}) {
334 m/^(?:bed|bedpe|tab)$/ && do {
335 is( $guesser->guess, 'tab', "file format of '$file' consistent with claim for '$command'" );
339 is( $guesser->guess, 'fasta', "file format consistent with claim for '$command'" );
343 $v && diag(" check can get and set wanted result type");
344 is( $bedtoolsfac->want('Bio::Root::IO'), 'Bio::Root::IO',
345 "can set want to IO object for command '$command'" );
346 $v && diag(" check can get a Bio::Root::IO object");
347 ok( my $objres = $bedtoolsfac->result, "can get the basic object result for command '$command'" );
348 $v && diag(" - check can it is actually a Bio::Root::IO object");
349 isa_ok( $objres, 'Bio::Root::IO', "returned object is correct for command '$command'" );
350 for ($format_lookup{$command}) {
351 $v && diag(" check can can get format-specific result object if supported");
352 m/(?:bed|bedpe)/ && do {
353 $v && diag(" - Bio::SeqFeature::Collection");
354 ok( my $objres = $bedtoolsfac->result( -want => 'Bio::SeqFeature::Collection' ),
355 "can get the specific object result for command '$command'" );
356 isa_ok( $objres, 'Bio::SeqFeature::Collection',
357 "returned object is correct for command '$command'" );
358 $v && diag(" - correct number of features");
359 is( scalar $objres->get_all_features, $result_lookup{$command},
360 "correct number of features for command '$command'" );
364 $v && diag(" - Bio::SeqIO");
365 ok( my $objres = $bedtoolsfac->result( -want => 'Bio::SeqIO' ),
366 "can get the specific object result for command '$command'" );
367 isa_ok( $objres, 'Bio::SeqIO',
368 "returned object is correct for command '$command'" );
370 while( my $seq = $objres->next_seq ) {
373 $v && diag(" - correct number of sequences");
374 is( $seq_count, $result_lookup{$command}, "correct number of sequences for command '$command'" );