2 #$Id: BEDTools.t kortsch $
9 my $v = 0; # private verbosity - this module only
12 $home = '.'; # set to '.' for Build use,
13 # '..' for debugging from .t file
16 test_begin(-tests => 423,
17 -requires_modules => [qw(IPC::Run Bio::Tools::Run::BEDTools)]);
20 use Bio::Tools::Run::WrapperBase;
22 use Bio::Tools::GuessSeqFormat;
24 # test command functionality
26 ok my $bedtoolsfac = Bio::Tools::Run::BEDTools->new, "make a default factory";
27 is $bedtoolsfac->command, 'bam_to_bed', "default to command 'bam_to_bed'";
30 annotate fasta_from_bed overlap
31 bam_to_bed genome_coverage pair_to_pair
32 bed_to_bam graph_union pair_to_bed
33 bed_to_IGV group_by shuffle
34 b12_to_b6 intersect slop
36 complement mask_fasta_from_bed subtract
50 'fasta_from_bed' => 0,
51 'genome_coverage' => 2,
56 'mask_fasta_from_bed' => 0,
77 'fasta_from_bed' => 3,
78 'genome_coverage' => 4,
83 'mask_fasta_from_bed' => 1,
95 # Sorry to all those out there who don't have a find command
96 # - perhaps someone can add an alternative
97 my ($rmsk_bed) = `find /usr -name 'rmsk.hg18.chr21.bed' 2>/dev/null`;
98 chomp $rmsk_bed if $rmsk_bed;
99 my ($gene_bed) = `find /usr -name 'knownGene.hg18.chr21.bed' 2>/dev/null`;
100 chomp $gene_bed if $gene_bed;
102 my ($mm8_genome) = `find /usr -name 'mouse.mm8.genome' 2>/dev/null`;
103 chomp $mm8_genome if $mm8_genome;
104 my ($mm9_genome) = `find /usr -name 'mouse.mm9.genome' 2>/dev/null`;
105 chomp $mm9_genome if $mm9_genome;
106 my ($hg18_genome) = `find /usr -name 'human.hg18.genome' 2>/dev/null`;
107 chomp $hg18_genome if $hg18_genome;
108 my ($hg19_genome) = `find /usr -name 'human.hg19.genome' 2>/dev/null`;
109 chomp $hg19_genome if $hg19_genome;
111 my $bam_file = test_input_file('Ft.bam');
112 my $bed_file = test_input_file('Ft.bed');
113 my $bed12_file = test_input_file('Ft.bed12');
114 my $fas_file = test_input_file('Ft.frag.fas');
115 my $bedpe1_file = test_input_file('e_coli_1.bedpe');
116 my $bedpe2_file = test_input_file('e_coli_2.bedpe');
117 my $bed3_file = test_input_file('e_coli.bed3');
118 my $bg1_file = test_input_file('1.bg');
119 my $bg2_file = test_input_file('2.bg');
120 my $bg3_file = test_input_file('3.bg');
122 my %format_lookup = (
124 'bam_to_bed' => 'bed',
125 'bed_to_bam' => 'bam',
126 'bed_to_IGV' => 'igv',
127 'b12_to_b6' => 'bed',
128 'closest' => 'bedpe',
129 'complement' => 'bed',
131 'fasta_from_bed' => 'fasta',
132 'genome_coverage' => 'tab',
133 'graph_union' => 'bg',
135 'intersect' => 'bed|bam',
137 'mask_fasta_from_bed' => 'fasta',
140 'pair_to_bed' => 'bedpe|bam',
141 'pair_to_pair' => 'bedpe',
149 my %result_lookup = (
150 'annotate' => 1385, # OK
151 'bam_to_bed' => 1385, # OK
152 'fasta_from_bed' => 1385, # OK
153 'mask_fasta_from_bed' => 1, # OK
154 'shuffle' => 828, # OK
155 'window' => 74998, # OK
156 'closest' => 845, # OK
157 'genome_coverage' => 38, # OK
160 'complement' => 291, # OK - change in data provided by BEDTools or behaviour of complementBed? was 243
161 'intersect' => 72534, # OK
162 'pair_to_bed' => 2, # OK
164 'coverage' => 57261, # OK
165 'links' => 11603, # OK
166 'pair_to_pair' => 497, # OK
167 'subtract' => 57959, # OK
168 'group_by' => 1, # OK
169 'b12_to_b6' => 1385, # OK
170 'overlap' => 500 # OK
174 test_skip( -requires_executable => $bedtoolsfac,
177 COMMAND : for (@commands) {
179 $v && diag("Testing command: '$_'");
180 ok( my $bedtoolsfac = Bio::Tools::Run::BEDTools->new(-command => $_),
181 "make a factory using command '$_'" );
182 $v && diag(" return command");
183 is( my $command = $bedtoolsfac->command, $_, "factory command for '$_' is correct" );
184 $v && diag(" return switches and params");
185 is( scalar $bedtoolsfac->available_parameters, $p{$_}+1+$s{$_}, "all available options for '$_'" );
186 is( scalar $bedtoolsfac->available_parameters('params'), $p{$_}+1, "available parameters for '$_'" );
187 is( scalar $bedtoolsfac->available_parameters('switches'), $s{$_}, "available switches for '$_'" );
188 $v && diag(" return executable version");
189 ok( $bedtoolsfac->version, "get version for '$_'" );
192 $v && diag(" run command as default");
193 m/^annotate$/ && do {
194 ok( my $result = $bedtoolsfac->run( -bgv => $bed_file,
195 -ann => [$bed3_file] ),
196 "can run command '$command'" );
199 m/^bam_to_bed$/ && do {
200 ok( my $result = $bedtoolsfac->run( -bam => $bam_file ),
201 "can run command '$command'" );
204 m/^(?:fasta_from_bed|mask_fasta_from_bed)$/ && do {
205 ok( my $result = $bedtoolsfac->run( -seq => $fas_file,
207 "can run command '$command'" );
210 m/^(?:bed_to_IGV|merge|sort|links)$/ && do {
211 ok( my $result = $bedtoolsfac->run( -bgv => $gene_bed ),
212 "can run command '$command'" );
215 m/^(?:bed_to_bam|shuffle|slop|complement)$/ && do {
216 is( $bedtoolsfac->add_bidirectional(100), 100,
217 "can set parameter -add_bidirectional => 100 " ) if $command eq 'slop';
218 ok( my $result = $bedtoolsfac->run( -bgv => $gene_bed,
219 -genome => $hg18_genome ),
220 "can run command '$command'" );
223 m/^genome_coverage$/ && do {
224 ok( my $result = $bedtoolsfac->run( -bed => $gene_bed,
225 -genome => $hg18_genome ),
226 "can run command '$command'" );
229 m/^(?:window|closest|coverage|subtract|intersect)$/ && do {
230 ok( my $result = $bedtoolsfac->run( -bgv1 => $gene_bed,
231 -bgv2 => $rmsk_bed ),
232 "can run command '$command'" );
235 m/^pair_to_pair$/ && do {
236 is( $bedtoolsfac->type('neither'), 'neither',
237 "can set parameter -type => 'neither'" );
238 ok( my $result = $bedtoolsfac->run( -bedpe1 => $bedpe1_file,
239 -bedpe2 => $bedpe2_file ),
240 "can run command '$command'" );
243 m/^pair_to_bed$/ && do {
244 ok( my $result = $bedtoolsfac->run( -bedpe => $bedpe1_file,
245 -bgv => $bed3_file ),
246 "can run command '$command'" );
250 is( $bedtoolsfac->columns('2,3,5,6'), '2,3,5,6',
251 "can set parameter -columns => '2,3,5,6' " );
252 ok( my $result = $bedtoolsfac->run( -bed => $bedpe1_file, ),
253 "can run command '$command'" );
256 m/^b12_to_b6$/ && do {
257 ok( my $result = $bedtoolsfac->run( -bed => $bed12_file, ),
258 "can run command '$command'" );
261 m/^group_by$/ && do {
262 is( $bedtoolsfac->group(1), 1,
263 "can set parameter -group => 1 " );
264 is( $bedtoolsfac->columns('2,2,3,3'), '2,2,3,3',
265 "can set parameter -columns => '2,2,3,3' " );
266 is( $bedtoolsfac->operations('min,max,min,max'), 'min,max,min,max',
267 "can set parameter -operations => 'min,max,min,max' " );
268 ok( my $result = $bedtoolsfac->run( -bed => $bed3_file ),
269 "can run command '$command'" );
272 m/^graph_union$/ && do {
273 ok( my $result = $bedtoolsfac->run( -bg => [$bg1_file, $bg2_file, $bg2_file] ),
274 "can run command '$command'" );
278 # we should never get here - internal test
279 fail( "all commands tested - missed '$_'");
283 $v && diag(" check file has been written");
284 ok( eval { (-e $bedtoolsfac->result && -r _) }, "result files exists for command '$command'");
285 $v && diag(" check can get internal result format description and confirm it");
286 ok( my $format = $bedtoolsfac->result( -want => 'format' ),
287 "can return output format for command '$command'" );
288 like( $format, qr/(?:$format_lookup{$command})/,
289 "result claims to be in correct format for command '$command'" );
290 $v && diag(" check can get internal result file name value");
291 ok( my $file = $bedtoolsfac->result(-want=>'raw'),
292 "can return output file for command '$command'" );
293 if ($command eq 'links') {
294 $v && diag(" check result file is html and correct size");
295 ok eval { (-e $file)&&(-r _) }, "make readable output";
297 my $lines =(my $first_line)= <FILE>;
299 like( $first_line, qr/\<html\>/, " - html tag line");
300 is( $lines, $result_lookup{$command}, " - number of lines");
301 } elsif ($command eq 'genome_coverage') {
302 $v && diag(" check result file is correct size");
303 ok eval { (-e $file)&&(-r _) }, "make readable output";
305 my $lines =()= <FILE>;
307 is( $lines, $result_lookup{$command}, " - number of lines");
309 $v && diag(" check can get internal result format matches result file");
310 my $guesser = Bio::Tools::GuessSeqFormat->new( -file => $file );
311 for ($format_lookup{$command}) {
312 m/^(?:bed|bedpe|tab)$/ && do {
313 is( $guesser->guess, 'tab', "file format of '$file' consistent with claim for '$command'" );
317 is( $guesser->guess, 'fasta', "file format consistent with claim for '$command'" );
321 $v && diag(" check can get and set wanted result type");
322 is( $bedtoolsfac->want('Bio::Root::IO'), 'Bio::Root::IO',
323 "can set want to IO object for command '$command'" );
324 $v && diag(" check can get a Bio::Root::IO object");
325 ok( my $objres = $bedtoolsfac->result, "can get the basic object result for command '$command'" );
326 $v && diag(" - check can it is actually a Bio::Root::IO object");
327 isa_ok( $objres, 'Bio::Root::IO', "returned object is correct for command '$command'" );
328 for ($format_lookup{$command}) {
329 $v && diag(" check can can get format-specific result object if supported");
330 m/(?:bed|bedpe)/ && do {
331 $v && diag(" - Bio::SeqFeature::Collection");
332 ok( my $objres = $bedtoolsfac->result( -want => 'Bio::SeqFeature::Collection' ),
333 "can get the specific object result for command '$command'" );
334 isa_ok( $objres, 'Bio::SeqFeature::Collection',
335 "returned object is correct for command '$command'" );
336 $v && diag(" - correct number of features");
337 is( scalar $objres->get_all_features, $result_lookup{$command},
338 "correct number of features for command '$command'" );
342 $v && diag(" - Bio::SeqIO");
343 ok( my $objres = $bedtoolsfac->result( -want => 'Bio::SeqIO' ),
344 "can get the specific object result for command '$command'" );
345 isa_ok( $objres, 'Bio::SeqIO',
346 "returned object is correct for command '$command'" );
348 while( my $seq = $objres->next_seq ) {
351 $v && diag(" - correct number of sequences");
352 is( $seq_count, $result_lookup{$command}, "correct number of sequences for command '$command'" );