speelink fixes, patch courtesy Charles Plessy, fixes #3256
[bioperl-run.git] / lib / Bio / Tools / Run / Bowtie.pm
blobc2d5a51d226a3a6e9dd454af2e2fb20cc3611f19
1 # $Id: Bowtie.pm kortsch $
3 # BioPerl module for Bio::Tools::Run::Bowtie
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Dan Kortschak <dan.kortschak@adelaide.edu.au>
9 # Copyright Dan Kortschak and Mark A. Jensen
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tools::Run::Bowtie - Run wrapper for the Bowtie short-read assembler *BETA*
19 =head1 SYNOPSIS
21 # create an index
22 $bowtie_build = Bio::Tools::Run::Bowtie->new();
23 $index = $bowtie_fac->run( 'reference.fasta', 'index_base' );
25 # or with named args...
27 $index = $bowtie_fac->run( -ref => 'reference.fasta', -ind => 'index_base' );
29 # get the base name of the last index from an index builder
30 $index = $bowtie_fac->result;
32 # create an assembly
33 $bowtie_fac = Bio::Tools::Run::Bowtie->new();
34 $bowtie_fac->want('Bio::Assembly::Scaffold');
35 $bowtie_assy = $bowtie_fac->run( 'reads.fastq', 'index_base' );
37 # if IO::Uncompress::Gunzip is available and with named args...
38 $bowtie_assy = $bowtie_fac->run( -seq => 'reads.fastq.gz', -ind => 'index_base' );
40 # paired-end
41 $bowtie_fac = Bio::Tools::Run::Bowtie->new(-command => 'paired',
42 -want => 'Bio::Assembly::Scaffold');
43 $bowtie_assy = $bowtie_fac->run( 'reads.fastq', 'index_base', 'paired-reads.fastq' );
45 # be more strict
46 $bowtie_fac->set_parameters( -max_qual_mismatch => 50 );
48 # create a Bio::Assembly::Scaffold object
49 $bowtie_assy = $bowtie_fac->run( 'reads.fastq', 'index_base', 'paired-reads.fastq' );
51 # print consensus sequences from assembly object
52 for $contig ($bowtie_assy->all_contigs) {
53 print $contig->get_consensus_sequence->seq,"\n";
56 # get the file object of the last assembly
57 $io = $bowtie_fac->result( -want => 'Bio::Root::IO' );
59 # get a merged SeqFeature::Collection of all hits
60 # - currently only available with SAM format
61 $io = $bowtie_fac->result( -want => 'Bio::SeqFeature::Collection' );
63 #... or the file name directly
64 $filename = $bowtie_fac->result( -want => 'raw' );
66 =head1 DESCRIPTION
68 This module provides a wrapper interface for Ben Langmead and Col
69 Trapnell's ultrafast memory-efficient short read aligner C<bowtie>
70 (see L<http://bowtie-bio.sourceforge.net/> for manuals and downloads).
73 =head1 OPTIONS
75 C<bowtie> is complex, with many command-line options. This module attempts to
76 provide and options comprehensively. You can browse the choices like so:
78 $bowtiefac = Bio::Tools::Run::Bowtie->new( -command => 'single' );
79 # all bowtie commands
80 @all_commands = $bowtiefac->available_parameters('commands');
81 @all_commands = $bowtiefac->available_commands; # alias
82 # just for single
83 @assemble_params = $bowtiefac->available_parameters('params');
84 @assemble_switches = $bowtiefac->available_parameters('switches');
85 @assemble_all_options = $bowtiefac->available_parameters();
87 Reasonably mnemonic names have been assigned to the single-letter
88 command line options. These are the names returned by
89 C<available_parameters>, and can be used in the factory constructor
90 like typical BioPerl named parameters.
92 As a number of options are mutually exclusive, and the interpretation of
93 intent is based on last-pass option reaching bowtie with potentially unpredicted
94 results. This module will prevent inconsistent switches and parameters
95 from being passed.
97 See L<http://bowtie.sourceforge.net/bowtie-manpage.shtml> for details of bowtie
98 options.
100 =head1 FILES
102 When a command requires filenames, these are provided to the C<run> method, not
103 the constructor (C<new()>). To see the set of files required by a command, use
104 C<available_parameters('filespec')> or the alias C<filespec()>:
106 $bowtiefac = Bio::Tools::Run::Bowtie->new( -command => 'paired' );
107 @filespec = $bowtiefac->filespec;
109 This example returns the following array:
113 seq2
114 #out
116 This indicates that ind (C<bowtie> index file base name), seq (fasta/fastq),and seq2
117 (fasta/fastq) files MUST be specified, and that the out file MAY be specified. Use
118 these in the C<run> call like so:
120 $bowtiefac->run( -ind => 'index_base', -seq => 'seq-a.fq',
121 -seq2 => 'seq-b.fq', -out => 'align.out' );
123 Note that named parameters in this form allow you to specify the location of the outfile;
124 without named parameters, the outfile is located in a tempdir and does not persist beyond
125 the life of the object - with the exception of index creation.
127 The object will store the programs STDOUT and STDERR output for you in the C<stdout()>
128 and C<stderr()> attributes:
130 handle_map_warning($bowtiefac) if ($bowtiefac->stderr =~ /warning/);
132 =head1 FEEDBACK
134 =head2 Mailing Lists
136 User feedback is an integral part of the evolution of this and other
137 Bioperl modules. Send your comments and suggestions preferably to
138 the Bioperl mailing list. Your participation is much appreciated.
140 bioperl-l@bioperl.org - General discussion
141 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
143 =head2 Support
145 Please direct usage questions or support issues to the mailing list:
147 L<bioperl-l@bioperl.org>
149 Rather than to the module maintainer directly. Many experienced and
150 reponsive experts will be able look at the problem and quickly
151 address it. Please include a thorough description of the problem
152 with code and data examples if at all possible.
154 =head2 Reporting Bugs
156 Report bugs to the Bioperl bug tracking system to help us keep track
157 of the bugs and their resolution. Bug reports can be submitted via
158 the web:
160 http://redmine.open-bio.org/projects/bioperl/
162 =head1 AUTHOR - Dan Kortschak
164 Email dan.kortschak adelaide.edu.au
166 =head1 CONTRIBUTORS
168 Mark A. Jensen (maj -at- fortinbras -dot- us)
170 =head1 APPENDIX
172 The rest of the documentation details each of the object methods.
173 Internal methods are usually preceded with a _
175 =cut
177 # Let the code begin...
180 package Bio::Tools::Run::Bowtie;
181 use strict;
182 our $HAVE_IO_UNCOMPRESS;
184 BEGIN {
185 eval 'require IO::Uncompress::Gunzip; $HAVE_IO_UNCOMPRESS = 1';
188 use IPC::Run;
190 # Object preamble - inherits from Bio::Root::Root
192 use lib '../../..';
193 use Bio::Tools::Run::Bowtie::Config;
194 use Bio::Tools::Run::WrapperBase;
195 use Bio::Tools::Run::WrapperBase::CommandExts;
196 use Bio::Tools::GuessSeqFormat;
197 use Bio::Tools::Run::Samtools;
198 use Bio::Seq;
199 use File::Basename;
201 use base qw( Bio::Tools::Run::WrapperBase Bio::Tools::Run::AssemblerBase );
203 ## bowtie
204 our $program_name = '*bowtie';
205 our $default_cmd = 'single';
207 our $asm_format; # this is determined dynamically
209 # Note:
210 # other globals required by Bio::Tools::Run::AssemblerBase are
211 # imported from Bio::Tools::Run::Bowtie::Config
213 our $qual_param = undef;
214 our $use_dash = 'mixed';
215 our $join = ' ';
217 =head2 new()
219 Title : new
220 Usage : my $obj = new Bio::Tools::Run::Bowtie();
221 Function: Builds a new Bio::Tools::Run::Bowtie object
222 Returns : an instance of Bio::Tools::Run::Bowtie
223 Args :
225 =cut
227 sub new {
228 my ($class,@args) = @_;
229 unless (grep /command/, @args) {
230 push @args, '-command', $default_cmd;
232 #default to SAM output if no other format specified and we are running an alignment
233 my %args=@args;
234 if ($args{'-command'} =~ m/(?:single|paired|crossbow)/) {
235 unless (grep /(?:sam_format|concise|quiet|refout|refidx)/, @args) {
236 push @args, ('-sam_format', 1);
239 my $self = $class->SUPER::new(@args);
240 foreach (keys %command_executables) {
241 $self->executables($_, $self->_find_executable($command_executables{$_}));
243 my ($want) = $self->_rearrange([qw(WANT)],@args);
244 $self->want($want);
245 $asm_format = $self->_assembly_format;
246 $self->parameters_changed(1); # set on instantiation, per Bio::ParameterBaseI
247 return $self;
250 =head2 run()
252 Title : run
253 Usage : $assembly = $bowtie_assembler->run($read1_fastq_file,
254 $index_location,
255 $read2_fastq_file);
256 $assembly = $bowtie_assembler->run(%params);
257 Function: Run the bowtie assembly pipeline.
258 Returns : Assembly results (file, IO object or Assembly object)
259 Args : - fastq file containing single-end reads
260 - name of the base of the bowtie index
261 - [optional] fastq file containing paired-end reads
262 Named params are also available with args:
263 -seq, -seq2, -ind (bowtie index), -ref (fasta reference) and -out
264 Note : gzipped inputs are allowed if IO::Uncompress::Gunzip
265 is available
266 The behaviour for locating indexes follows the definition in
267 the bowtie manual - you may use the environment variable
268 BOWTIE_INDEXES to specify the index path or use an 'indexes'
269 directory under the directory where the bowtie executable
270 is located
272 =cut
274 sub run {
275 my $self = shift;
277 my ($arg1, $arg2, $arg3); # these are useless names because the different
278 # programs take very different arguments
279 my ($index, $seq, $seq2, $ref, $out); # these are the meaningful names that are used
280 # with named args
282 if (!(@_ % 2)) {
283 my %args = @_;
284 if ((grep /^-\w+/, keys %args) == keys %args) {
285 ($index, $seq, $seq2, $ref, $out) =
286 $self->_rearrange([qw( IND SEQ SEQ2 REF OUT )], @_);
287 } elsif (grep /^-\w+/, keys %args) {
288 $self->throw("Badly formed named args: ".join(' ',@_));
289 } else {
290 ($arg1, $arg2) = @_;
292 } else {
293 if (grep /^-\w+/, @_) {
294 $self->throw("Badly formed named args: ".join(' ',@_));
295 } else {
296 ($arg1, $arg2, $arg3) = @_;
300 # Sanity checks
301 $self->_check_executable();
302 my $cmd = $self->command if $self->can('command');
304 for ($cmd) {
305 m/(?:single|paired|crossbow)/ && do {
306 $seq ||= $arg1;
307 $index ||= $arg2;
308 $seq2 ||= $arg3;
309 $seq or $self->throw("Fasta/fastq/raw read(s) file/Bio::Seq required at arg 1/-seq");
310 $index or $self->throw("Bowtie index base required at arg 2/-index");
312 # expand gzipped files as nec.
313 for ($seq, $seq2) {
314 next unless $_;
315 if (/\.gz[^.]*$/) {
316 unless ($HAVE_IO_UNCOMPRESS) {
317 croak( "IO::Uncompress::Gunzip not available, can't expand '$_'" );
319 my ($tfh, $tf) = $self->io->tempfile;
320 my $z = IO::Uncompress::Gunzip->new($_);
321 while (<$z>) { print $tfh $_ }
322 close $tfh;
323 $_ = $tf;
327 # confirm index files exist
328 $self->_validate_file_input( -ind => $index ) or
329 ($self->_validate_file_input( -ind => $self->io->catfile(dirname($self->executable),'indexes',$index)) and
330 $index = $self->io->catfile(dirname($self->executable),'indexes',$index)) or
331 ($self->_validate_file_input( -ind => $self->io->catfile($ENV{BOWTIE_INDEXES},$index)) and
332 $index = $self->io->catfile($ENV{BOWTIE_INDEXES},$index)) or
333 $self->throw("Incorrect filetype (expecting bowtie index) or absent file arg 2/-index");
335 # bowtie prepare the multiple input types
336 $seq = $self->_prepare_input_sequences($seq);
337 if ($cmd =~ m/^p/) {
338 $seq2 && ($seq2 = $self->_prepare_input_sequences($seq2));
339 } else {
340 $seq2 && $self->throw("Second sequence input not wanted for command: $cmd");
343 # Assemble
344 my $format = $self->_assembly_format;
345 my $suffix = '.'.$format;
347 if ($out) {
348 $out .= $suffix;
349 } else {
350 my ($bowtieh, $bowtief) = $self->io->tempfile( -dir => $self->tempdir(), -suffix => $suffix );
351 $bowtieh->close;
352 $out = $bowtief;
355 my %params = ( -ind => $index, -seq => $seq, -seq2 => $seq2, -out => $out );
356 map {
357 delete $params{$_} unless defined $params{$_}
358 } keys %params;
359 $self->_run(%params);
361 $self->{'_result'}->{'index'} = $index;
362 $self->{'_result'}->{'file_name'} = $out;
363 $self->{'_result'}->{'format'} = $format;
364 $self->{'_result'}->{'file'} = Bio::Root::IO->new( -file => $out );
366 return $self->result;
369 m/build/ && do {
370 $ref ||= $arg1;
371 $index ||= $arg2;
372 $ref or $self->throw("Fasta read(s) file/Bio::Seq required at arg 1/-ref");
373 $index ||= $self->io->tempdir(CLEANUP => 1).'/index'; # we want a new one each time
374 $arg3 && $self->throw("Second sequence input not wanted for command: $cmd");
376 my $format = $self->_assembly_format;
378 # expand gzipped file as nec.
379 if ($ref =~ (m/\.gz[^.]*$/)) {
380 unless ($HAVE_IO_UNCOMPRESS) {
381 croak( "IO::Uncompress::Gunzip not available, can't expand '$_'" );
383 my ($tfh, $tf) = $self->io->tempfile;
384 my $z = IO::Uncompress::Gunzip->new($_);
385 while (<$z>) { print $tfh $_ }
386 close $tfh;
387 $ref = $tf;
390 # bowtie prepare the two input types for the first argument
391 $ref = $self->_prepare_input_sequences($ref);
393 # Build index
394 $self->_run( -ref => $ref, -out => $index );
395 $self->{'_result'}->{'format'} = $format;
396 $self->{'_result'}->{'file_name'} = $index;
398 return $index;
401 m/inspect/ && do {
402 $index ||= $arg1;
403 $out ||= $arg2;
404 $index or $self->throw("Bowtie index required at arg 1");
406 $self->_validate_file_input( -ind => $index ) or
407 ($self->_validate_file_input( -ind => $self->io->catfile(dirname($self->executable),'indexes',$index)) and
408 $index = $self->io->catfile(dirname($self->executable),'indexes',$index)) or
409 ($self->_validate_file_input( -ind => $self->io->catfile($ENV{BOWTIE_INDEXES},$index)) and
410 $index = $self->io->catfile($ENV{BOWTIE_INDEXES},$index)) or
411 $self->throw("'$index' doesn't look like a bowtie index or index component is missing at arg 1/-ind");
412 $arg3 && $self->throw("Second sequence input not wanted for command: $cmd");
414 # Inspect index
415 my $format = $self->_assembly_format;
416 my $suffix = '.'.$format;
418 if ($out) {
419 $out .= $suffix;
420 } else {
421 my ($desch, $descf) = $self->io->tempfile( -dir => $self->tempdir(), -suffix => $suffix );
422 $desch->close;
423 $out = $descf;
426 $self->_run( -ind => $index, -out => $out );
428 $self->{'_result'}->{'file_name'} = $out;
429 $self->{'_result'}->{'format'} = $format;
430 $self->{'_result'}->{'file'} = Bio::Root::IO->new( -file => $out );
432 return $self->result;
437 =head2 want()
439 Title : want
440 Usage : $bowtiefac->want( $class )
441 Function: make factory return $class, or raw (scalar) results in file
442 Returns : return wanted type
443 Args : [optional] string indicating class or raw of wanted result
445 =cut
447 sub want {
448 my $self = shift;
449 return $self->{'_want'} = shift if @_;
450 return $self->{'_want'};
453 =head2 result()
455 Title : result
456 Usage : $bowtiefac->result( [-want => $type|$format] )
457 Function: return result in wanted format
458 Returns : results
459 Args : [optional] hashref of wanted type
461 =cut
463 sub result {
464 my ($self, @args) = @_;
466 my $want = $self->want ? $self->want : $self->want($self->_rearrange([qw(WANT)],@args));
467 my $cmd = $self->command if $self->can('command');
468 my $format = $self->{'_result'}->{'format'};
470 return $self->{'_result'}->{'format'} if (defined $want && $want eq 'format');
471 return $self->{'_result'}->{'file_name'} if (!$want || $want eq 'raw' || $cmd eq 'build');
472 return $self->{'_result'}->{'file'} if ($want =~ m/^Bio::Root::IO/);
474 for ($cmd) {
475 m/(?:single|paired|crossbow)/ && do {
476 my $scaffold;
477 for ($format) {
478 m/^bowtie/i && $want =~ m/^Bio::Assembly::Scaffold/ && do {
479 unless (defined $self->{'_result'}->{'object'} &&
480 ref($self->{'_result'}->{'object'}) =~ m/^Bio::Assembly::Scaffold/) {
481 $self->{'_result'}->{'object'} =
482 $self->_export_results( $self->{'_result'}->{'file_name'},
483 -index => $self->{'_result'}->{'index'},
484 -keep_asm => 1 );
486 last;
488 m/^bowtie/i && $want =~ m/^Bio::SeqFeature::Collection/ && do {
489 $self->warn("Don't know how to create a $want object for $cmd with bowtie format - try SAM format.");
490 last;
492 m/^sam/i && $want =~ m/^Bio::Assembly::Scaffold/ && do {
493 unless (defined $self->{'_result'}->{'object'} &&
494 ref($self->{'_result'}->{'object'}) =~ m/^Bio::Assembly::Scaffold/) {
495 my $bamf = $self->_make_bam($self->{'_result'}->{'file_name'}, 1);
496 my $inspector = Bio::Tools::Run::Bowtie->new( -command => 'inspect' );
497 my $refdb = $inspector->run($self->{'_result'}->{'index'});
498 $self->{'_result'}->{'object'} =
499 $self->_export_results($bamf, -refdb => $refdb, -keep_asm => 1 );
501 last;
503 m/^sam/i && $want =~ m/^Bio::SeqFeature::Collection/ && do {
504 unless (defined $self->{'_result'}->{'object'} &&
505 ref($self->{'_result'}->{'object'}) =~ m/^Bio::Assembly::Scaffold/) {
506 my $bamf = $self->_make_bam($self->{'_result'}->{'file_name'}, 0);
507 my $convert = Bio::Tools::Run::BEDTools->new( -command => 'bam_to_bed' );
508 my $bedf = $convert->run( -bed => $bamf );
509 my $merge = Bio::Tools::Run::BEDTools->new( -command => 'merge' );
510 $merge->run($self->{'_result'}->{'index'});
511 $self->{'_result'}->{'object'} = $merge->result( -want => $want );
513 last;
515 do {
516 $self->warn("Don't know how to create a $want object for $cmd.");
517 return;
520 last;
522 m/inspect/ && do {
523 for ($want) {
524 m/^Bio::SeqIO/ && $format eq 'fasta' && do {
525 unless (defined $self->{'_result'}->{'object'} &&
526 ref($self->{'_result'}->{'object'}) =~ m/^Bio::SeqIO/) {
527 $self->{'_result'}->{'object'} =
528 Bio::SeqIO->new(-file => $self->{'_result'}->{'file'},
529 -format => 'fasta');
531 last;
533 m/^Bio::SeqIO/ && $format ne 'fasta' && do {
534 $self->warn("Don't know how to create a $want object for names only - try -want => 'Bio::Root::IO'.");
535 return;
537 do {
538 $self->warn("Don't know how to create a $want object for $cmd.");
539 return;
545 return $self->{'_result'}->{'object'};
548 =head2 _determine_format()
550 Title : _determine_format
551 Usage : $bowtiefac->_determine_format
552 Function: determine the format of output for current options
553 Returns : format of bowtie output
554 Args :
556 =cut
558 sub _determine_format {
559 my ($self) = shift;
561 my $cmd = $self->command if $self->can('command');
562 for ($cmd) {
563 m/build/ && do {
564 return 'ebwt';
566 m/inspect/ && do {
567 $self->{'_summary'} && return 'text';
568 return $self->{'_names_only'} ? 'text' : 'fasta';
570 m/(?:single|paired|crossbow)/ && do {
571 my $format = 'bowtie'; # this is our default position
572 for (keys %format_lookup) {
573 $format = $format_lookup{$_} if $self->{'_'.$_};
575 return $format;
580 =head2 _make_bam()
582 Title : _make_bam
583 Usage : $bowtiefac->_make_bam( $file, $sort )
584 Function: make a sorted BAM format file from SAM file
585 Returns : sorted BAM file name
586 Args : SAM file name and boolean flag to select sorted BAM format
588 =cut
590 sub _make_bam {
591 my ($self, $file, $sort) = @_;
593 $self->throw("'$file' does not exist or is not readable")
594 unless ( -e $file && -r _ );
596 # make a sorted bam file from a sam file input
597 my ($bamh, $bamf) = $self->io->tempfile( -dir => $self->tempdir(), -suffix => '.bam' );
598 $bamh->close;
600 my $samt = Bio::Tools::Run::Samtools->new( -command => 'view',
601 -sam_input => 1,
602 -bam_output => 1 );
604 $samt->run( -bam => $file, -out => $bamf );
606 if ($sort) {
607 my ($srth, $srtf) = $self->io->tempfile( -dir => $self->io->tempdir(CLEANUP=>1), -suffix => '.srt' );
608 # shared tempdir, so make new - otherwise it is scrubbed during Bio::DB::Sam
609 $srth->close;
611 $samt = Bio::Tools::Run::Samtools->new( -command => 'sort' );
612 $samt->run( -bam => $bamf, -pfx => $srtf);
614 return $srtf.'.bam';
615 } else {
616 return $bamf;
620 =head2 _validate_file_input()
622 Title : _validate_file_input
623 Usage : $bowtiefac->_validate_file_input( -type => $file )
624 Function: validate file type for file spec
625 Returns : file type if valid type for file spec
626 Args : hash of filespec => file_name
628 =cut
630 sub _validate_file_input {
631 my ($self, @args) = @_;
632 my (%args);
633 if (grep (/^-/, @args)) { # named parms
634 $self->throw("Wrong number of args - requires one named arg") if (@args > 2);
635 s/^-// for @args;
636 %args = @args;
637 } else {
638 $self->throw("Must provide named filespec");
641 for (keys %args) {
642 m/^seq|seq2|ref$/ && do {
643 return unless ( -e $args{$_} && -r _ );
644 my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$args{$_});
645 return $guesser->guess if grep {$guesser->guess =~ m/$_/} @{$accepted_types{$_}};
647 m/^ind$/ && do {
648 return 'ebwt' if
649 (-e $args{$_}.'.1.ebwt' && -e $args{$_}.'.2.ebwt' && -e $args{$_}.'.3.ebwt' &&
650 -e $args{$_}.'.4.ebwt' && -e $args{$_}.'.rev.1.ebwt' && -e $args{$_}.'.rev.2.ebwt');
653 return;
656 =head1 Bio::Tools::Run::AssemblerBase overrides
658 =head2 _assembly_format()
660 Title : _assembly_format
661 Usage : $bowtiefac->_determine_format
662 Function: set the format of output for current options
663 Returns : format of bowtie output
664 Args :
666 =cut
668 sub _assembly_format {
669 my $self = shift;
671 my $format = $self->_determine_format;
672 return $self->SUPER::_assembly_format($format);
676 =head2 _check_sequence_input()
678 No-op.
680 =cut
682 sub _check_sequence_input {
683 return 1;
686 =head2 _check_optional_quality_input()
688 No-op.
690 =cut
692 sub _check_optional_quality_input {
693 return 1;
696 =head2 _prepare_input_sequences()
698 Prepare and check input sequences for bowtie.
700 =cut
702 sub _prepare_input_sequences {
704 my ($self, @args) = @_;
705 my (%args, $read);
706 if (grep (/^-/, @args)) { # named parms
707 $self->throw("Input args not an even number") unless !(@args % 2);
708 %args = @args;
709 ($read) = @args{qw( -sequence )};
710 } else {
711 ($read) = @args;
714 # Could use the AssemblerBase routine for this, except that would not permit
715 # an array of strings
716 if ($self->inline) { # expect inline data
717 if (UNIVERSAL::isa($read,'can') && $read->isa("Bio::PrimarySeqI")) { # we have a Bio::*Seq*
718 $read=$read->seq();
719 } else { # we have something else
720 if (ref($read) =~ /ARRAY/i) {
721 my @ts;
722 foreach my $seq (@$read) {
723 if ($seq->isa("Bio::PrimarySeqI")) {
724 $seq=$seq->seq();
725 } else {
726 next if $read=~m/[[^:alpha:]]/;
728 push @ts,$seq;
730 $self->throw("bowtie requires at least one sequence read") unless (@ts);
731 if (@ts>1) {
732 $read="'".join(',',@ts)."'";
733 } else {
734 ($read)=@ts;
736 } else { #must be a string... fail if non-alpha
737 $self->throw("bowtie requires at least one valid sequence read") if $read=~m/[[^:alpha:]]/;
740 } else { # expect file(s) - so test whether it's/they're appropriate
741 # and make a comma-separated list of filenames
742 my @ts = (ref($read) =~ /ARRAY/i) ? @$read : ($read);
743 for my $file (@ts) {
744 if ( -e $file ) {
745 my $cmd = $self->command if $self->can('command');
746 my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$file);
747 for ($guesser->guess) {
748 m/^fasta$/ && do {
749 $cmd =~ m/^b/ && last;
750 ($cmd =~ m/^c/ or $self->fastq or $self->raw) and $self->throw("Fasta reads file '$file' inappropriate");
751 $self->fasta(1);
752 last;
754 m/^fastq$/ && do {
755 ($cmd =~ m/^[cb]/ or $self->fasta or $self->raw) and $self->throw("Fastq reads file '$file' inappropriate");
756 $self->fastq(1);
757 last;
759 m/^tab$/ && do {
760 $cmd =~ m/^c/ or $self->throw("Crossbow reads file '$file' inappropriate"); # this is unrecoverable since the object has default program defined
761 last;
763 m/^raw$/ && do {
764 ($cmd =~ m/^[cb]/ or $self->fasta or $self->fastq) and $self->throw("Raw reads file '$file' inappropriate");
765 $self->raw(1);
766 last;
768 do {
769 $self->throw("File '$file' not a recognised bowtie input filetype");
772 } else {
773 $self->throw("Sequence read file '$file' does not exist");
776 if (@ts>1) {
777 $read="'".join(',',@ts)."'";
778 } else {
779 ($read)=@ts;
783 return $read;
786 =head2 set_parameters()
788 Title : set_parameters
789 Usage : $bowtiefac->set_parameters(%params);
790 Function: sets the parameters listed in the hash or array,
791 maintaining sane options.
792 Returns : true on success
793 Args : [optional] hash or array of parameter/values.
794 Note : This will unset conflicts and set required options,
795 but will not prevent non-sane requests in the arguments
797 =cut
799 sub set_parameters {
800 my ($self, @args) = @_;
802 # Mutually exclusive switches/params prevented from being set to
803 # avoid confusion resulting from setting incompatible switches.
805 $self->throw("Input args not an even number") if (@args % 2);
806 my %args = @args;
809 foreach (keys %args) {
810 my @added;
811 my @removed;
812 s/^-//;
813 foreach my $conflict (@{$incompat_params{$_}}) {
814 return if grep /$conflict/, @added;
815 delete $args{'-'.$conflict};
816 $args{'-'.$conflict} = undef if $self->{'_'.$conflict};
817 push @removed, $conflict;
819 foreach my $requirement (@{$corequisite_switches{$_}}) {
820 return if grep /$requirement/, @removed;
821 $args{'-'.$requirement}=1 if $args{$_};
822 push @added, $requirement;
826 return $self->SUPER::set_parameters(%args);
829 =head2 version()
831 Title : version
832 Usage : $version = $bowtiefac->version()
833 Function: Returns the program version (if available)
834 Returns : string representing location and version of the program
836 =cut
838 sub version{
839 my ($self) = @_;
841 my $cmd = $self->command if $self->can('command');
843 defined $cmd or $self->throw("No command defined - cannot determine program executable");
845 my ($in, $out, $err);
846 my $dum;
847 $in = \$dum;
848 $out = \$self->{'stdout'};
849 $err = \$self->{'stderr'};
851 # Get program executable
852 my $exe = $self->executable;
853 # Get version switch from switches, translate and dash it
854 my $version_switch = $param_translation{"$command_prefixes{$cmd}|version"};
855 $version_switch = $self->_dash_switch( $version_switch );
857 my @ipc_args = ( $exe, $version_switch );
859 eval {
860 IPC::Run::run(\@ipc_args, $in, $out, $err) or
861 die ("There was a problem running $exe : $!");
863 if ($@) {
864 $self->throw("$exe call crashed: $@");
867 my @details = split("\n",$self->stdout);
868 (my $version) = grep /$exe version [[:graph:]]*$/, @details;
869 $version =~ s/version //;
870 (my $addressing) = grep /-bit$/, @details;
872 return $version.' '.$addressing;
875 sub available_commands { shift->available_parameters('commands') };
877 sub filespec { shift->available_parameters('filespec') };