speelink fixes, patch courtesy Charles Plessy, fixes #3256
[bioperl-run.git] / lib / Bio / Tools / Run / Minimo.pm
blob0329ba9c00e5c93d9f08b44243a6502ccca769b7
1 # BioPerl module for Bio::Tools::Run::Minimo
3 # Copyright Florent E Angly <florent-dot-angly-at-gmail-dot-com>
5 # You may distribute this module under the same terms as perl itself
7 # POD documentation - main docs before the code
9 =head1 NAME
11 Bio::Tools::Run::Minimo - Wrapper for local execution of the Minimo assembler
13 =head1 SYNOPSIS
15 use Bio::Tools::Run::Minimo;
16 # Run Minmo using an input FASTA file
17 my $factory = Bio::Tools::Run::Minimo->new( -minimum_overlap_length => 35 );
18 my $asm_obj = $factory->run($fasta_file, $qual_file);
19 # An assembly object is returned by default
20 for my $contig ($assembly->all_contigs) {
21 ... do something ...
24 # Read some sequences
25 use Bio::SeqIO;
26 my $sio = Bio::SeqIO->new(-file => $fasta_file, -format => 'fasta');
27 my @seqs;
28 while (my $seq = $sio->next_seq()) {
29 push @seqs,$seq;
32 # Run Minimo using input sequence objects and returning an assembly file
33 my $asm_file = 'results.ace';
34 $factory->out_type($asm_file);
35 $factory->run(\@seqs);
37 =head1 DESCRIPTION
39 Wrapper module for the local execution of the DNA assembly program Minimo.
40 Minimo is based on AMOS (http://sourceforge.net/apps/mediawiki/amos/) and
41 implements the same conservative assembly algorithm as Minimus
42 (http://sourceforge.net/apps/mediawiki/amos/index.php?title=Minimus).
44 =head1 FEEDBACK
46 =head2 Mailing Lists
48 User feedback is an integral part of the evolution of this and other Bioperl
49 modules. Send your comments and suggestions preferably to one of the Bioperl
50 mailing lists. Your participation is much appreciated.
52 bioperl-l@bioperl.org - General discussion
53 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
55 =head2 Support
57 Please direct usage questions or support issues to the mailing list:
59 I<bioperl-l@bioperl.org>
61 rather than to the module maintainer directly. Many experienced and
62 reponsive experts will be able look at the problem and quickly
63 address it. Please include a thorough description of the problem
64 with code and data examples if at all possible.
66 =head2 Reporting Bugs
68 Report bugs to the Bioperl bug tracking system to help us keep track the bugs
69 and their resolution. Bug reports can be submitted via the web:
71 http://redmine.open-bio.org/projects/bioperl/
73 =head1 AUTHOR - Florent E Angly
75 Email: florent-dot-angly-at-gmail-dot-com
77 =head1 APPENDIX
79 The rest of the documentation details each of the object methods. Internal
80 methods are usually preceded with a _
82 =cut
85 package Bio::Tools::Run::Minimo;
87 use strict;
88 use IPC::Run;
89 use File::Copy;
90 use File::Spec;
91 use File::Basename;
93 use base qw( Bio::Root::Root Bio::Tools::Run::AssemblerBase );
95 our $program_name = 'Minimo'; # name of the executable
96 our @program_params = (qw( qual_in good_qual bad_qual min_len min_ident aln_wiggle out_prefix ace_exp ));
97 our @program_switches;
98 our %param_translation = (
99 'qual_in' => 'D QUAL_IN',
100 'good_qual' => 'D GOOD_QUAL',
101 'bad_qual' => 'D BAD_QUAL',
102 'min_len' => 'D MIN_LEN',
103 'min_ident' => 'D MIN_IDENT',
104 'aln_wiggle' => 'D ALN_WIGGLE',
105 'out_prefix' => 'D OUT_PREFIX',
106 'ace_exp' => 'D ACE_EXP'
109 our $qual_param = 'qual_in';
110 our $use_dash = 1;
111 our $join = '=';
112 our $asm_format = 'ace';
115 =head2 new
117 Title : new
118 Usage : $assembler->new( -min_len => 50,
119 -min_ident => 95 );
120 Function: Creates a Minimo factory
121 Returns : A Bio::Tools::Run::Minimo object
122 Args : Minimo options available in this module:
123 qual_in Input quality score file
124 good_qual Quality score to set for bases within the clear
125 range if no quality file was given (default: 30)
126 bad_qual Quality score to set for bases outside clear range
127 if no quality file was given (default: 10). If your
128 sequences are trimmed, try the same value as GOOD_QUAL.
129 min_len / minimum_overlap_length
130 Minimum contig overlap length (between 20 and 100 bp,
131 default: 35)
132 min_ident / minimum_overlap_similarity
133 Minimum contig overlap identity percentage (between 0
134 and 100 %, default: 98)
135 aln_wiggle Alignment wiggle value when determining the consensus
136 sequence (default: 2 bp)
137 out_prefix Prefix to use for the output file path and name
139 =cut
141 sub new {
142 my ($class,@args) = @_;
143 my $self = $class->SUPER::new(@args);
144 $self->_set_program_options(\@args, \@program_params, \@program_switches,
145 \%param_translation, $qual_param, $use_dash, $join);
146 *minimum_overlap_length = \&min_len;
147 *minimum_overlap_similarity = \&min_ident;
148 $self->program_name($program_name) if not defined $self->program_name();
149 $self->_assembly_format($asm_format);
150 return $self;
154 =head2 out_type
156 Title : out_type
157 Usage : $factory->out_type('Bio::Assembly::ScaffoldI')
158 Function: Get/set the desired type of output
159 Returns : The type of results to return
160 Args : Desired type of results to return (optional):
161 'Bio::Assembly::IO' object
162 'Bio::Assembly::ScaffoldI' object (default)
163 The name of a file to save the results in
165 =cut
168 =head2 run
170 Title : run
171 Usage : $factory->run($fasta_file);
172 Function: Run TIGR Assembler
173 Returns : - a Bio::Assembly::ScaffoldI object, a Bio::Assembly::IO
174 object, a filename, or undef if all sequences were too small to
175 be usable
176 Returns : Assembly results (file, IO object or assembly object)
177 Args : - sequence input (FASTA file or sequence object arrayref)
178 - optional quality score input (QUAL file or quality score object
179 arrayref)
180 =cut
183 =head2 _run
185 Title : _run
186 Usage : $factory->_run()
187 Function: Make a system call and run TIGR Assembler
188 Returns : An assembly file
189 Args : - FASTA file
190 - optional QUAL file
192 =cut
195 sub _run {
196 my ($self, $fasta_file, $qual_file) = @_;
198 # qual_in Input quality score file
199 # fasta_exp Export results in FASTA format (0:no 1:yes, default: 1)
200 # ace_exp Export results in ACE format (0:no 1:yes, default: 1)
202 # Specify that we want an ACE output file
203 $self->ace_exp(1);
205 # Setup needed files and filehandles first
206 my ($output_fh, $output_file) = $self->_prepare_output_file( );
207 my ($stdout_fh, $stdout_file) = $self->io->tempfile( -dir => $self->tempdir() );
209 # Get program executable
210 my $exe = $self->executable;
212 # Get command-line options
213 my $options = $self->_translate_params();
215 # Usage: Minimo FASTA_IN [options]
216 # Options are of the style: -D PARAM=VAL
217 my @program_args = ( $exe, $fasta_file, @$options);
218 my @ipc_args = ( \@program_args, '>', $stdout_file);
220 # Print command for debugging
221 if ($self->verbose() >= 0) {
222 my $cmd = '';
223 $cmd .= join ( ' ', @program_args );
224 for ( my $i = 1 ; $i < scalar @ipc_args ; $i++ ) {
225 my $element = $ipc_args[$i];
226 my $ref = ref($element);
227 my $value;
228 if ( $ref && $ref eq 'SCALAR') {
229 $value = $$element;
230 } else {
231 $value = $element;
233 $cmd .= " $value";
235 $self->debug( "$exe command = $cmd\n" );
238 # Execute command
239 my $log_file = "$fasta_file.runAmos.log";
240 eval {
241 IPC::Run::run(@ipc_args) || die("There was a problem running $exe. The ".
242 "error message is: $!. Check the log file $log_file for possible causes.");
244 if ($@) {
245 $self->throw("$exe call crashed: $@");
248 # Close filehandles
249 close($output_fh);
250 close($stdout_fh);
252 # Result files
253 my $base = $self->out_prefix();
254 if (not defined $base) {
255 my $dirname = dirname($fasta_file);
256 my $basename = basename($fasta_file);
257 $basename =~ s/^(.+)\..+$/$1/;
258 $base = File::Spec->catfile($dirname, $basename);
260 my $ace_file = "$base-contigs.ace";
261 my $amos_file = "$base-contigs.afg";
263 # Remove all files except for the ACE file
264 for my $file ($log_file, $stdout_file, $amos_file) {
265 unlink $file;
268 # Clean the ACE file
269 $self->_clean_file($ace_file);
271 # Move the ACE file to its final destination
272 move ($ace_file, $output_file) or $self->throw("Could not move file ".
273 "'$ace_file' to '$output_file': $!");
275 return $output_file;
278 =head2 _clean_file
280 Title : _clean_file
281 Usage : $factory->_clean_file($file)
282 Function: Clean file in place by removing NULL characters. NULL characters
283 can be present in the output files of AMOS 2.0.8 but they do not
284 validate as proper sequence characters in Bioperl.
285 Returns : 1 for success
286 Args : Filename
288 =cut
290 sub _clean_file {
291 my ($self, $file) = @_;
292 # Set in-place file editing mode
293 local $^I = "~";
294 local @ARGV = ( $file );
295 # Replace lines in file
296 while (<>) {
297 s/\x0//g;
298 print;
300 return 1;