1 # BioPerl module for Bio::Tools::Run::Minimo
3 # Copyright Florent E Angly <florent-dot-angly-at-gmail-dot-com>
5 # You may distribute this module under the same terms as perl itself
7 # POD documentation - main docs before the code
11 Bio::Tools::Run::Minimo - Wrapper for local execution of the Minimo assembler
15 use Bio::Tools::Run::Minimo;
16 # Run Minmo using an input FASTA file
17 my $factory = Bio::Tools::Run::Minimo->new( -minimum_overlap_length => 35 );
18 my $asm_obj = $factory->run($fasta_file, $qual_file);
19 # An assembly object is returned by default
20 for my $contig ($assembly->all_contigs) {
26 my $sio = Bio::SeqIO->new(-file => $fasta_file, -format => 'fasta');
28 while (my $seq = $sio->next_seq()) {
32 # Run Minimo using input sequence objects and returning an assembly file
33 my $asm_file = 'results.ace';
34 $factory->out_type($asm_file);
35 $factory->run(\@seqs);
39 Wrapper module for the local execution of the DNA assembly program Minimo.
40 Minimo is based on AMOS (http://sourceforge.net/apps/mediawiki/amos/) and
41 implements the same conservative assembly algorithm as Minimus
42 (http://sourceforge.net/apps/mediawiki/amos/index.php?title=Minimus).
48 User feedback is an integral part of the evolution of this and other Bioperl
49 modules. Send your comments and suggestions preferably to one of the Bioperl
50 mailing lists. Your participation is much appreciated.
52 bioperl-l@bioperl.org - General discussion
53 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
57 Please direct usage questions or support issues to the mailing list:
59 I<bioperl-l@bioperl.org>
61 rather than to the module maintainer directly. Many experienced and
62 reponsive experts will be able look at the problem and quickly
63 address it. Please include a thorough description of the problem
64 with code and data examples if at all possible.
68 Report bugs to the Bioperl bug tracking system to help us keep track the bugs
69 and their resolution. Bug reports can be submitted via the web:
71 http://redmine.open-bio.org/projects/bioperl/
73 =head1 AUTHOR - Florent E Angly
75 Email: florent-dot-angly-at-gmail-dot-com
79 The rest of the documentation details each of the object methods. Internal
80 methods are usually preceded with a _
85 package Bio
::Tools
::Run
::Minimo
;
93 use base
qw( Bio::Root::Root Bio::Tools::Run::AssemblerBase );
95 our $program_name = 'Minimo'; # name of the executable
96 our @program_params = (qw( qual_in good_qual bad_qual min_len min_ident aln_wiggle out_prefix ace_exp ));
97 our @program_switches;
98 our %param_translation = (
99 'qual_in' => 'D QUAL_IN',
100 'good_qual' => 'D GOOD_QUAL',
101 'bad_qual' => 'D BAD_QUAL',
102 'min_len' => 'D MIN_LEN',
103 'min_ident' => 'D MIN_IDENT',
104 'aln_wiggle' => 'D ALN_WIGGLE',
105 'out_prefix' => 'D OUT_PREFIX',
106 'ace_exp' => 'D ACE_EXP'
109 our $qual_param = 'qual_in';
112 our $asm_format = 'ace';
118 Usage : $assembler->new( -min_len => 50,
120 Function: Creates a Minimo factory
121 Returns : A Bio::Tools::Run::Minimo object
122 Args : Minimo options available in this module:
123 qual_in Input quality score file
124 good_qual Quality score to set for bases within the clear
125 range if no quality file was given (default: 30)
126 bad_qual Quality score to set for bases outside clear range
127 if no quality file was given (default: 10). If your
128 sequences are trimmed, try the same value as GOOD_QUAL.
129 min_len / minimum_overlap_length
130 Minimum contig overlap length (between 20 and 100 bp,
132 min_ident / minimum_overlap_similarity
133 Minimum contig overlap identity percentage (between 0
134 and 100 %, default: 98)
135 aln_wiggle Alignment wiggle value when determining the consensus
136 sequence (default: 2 bp)
137 out_prefix Prefix to use for the output file path and name
142 my ($class,@args) = @_;
143 my $self = $class->SUPER::new
(@args);
144 $self->_set_program_options(\
@args, \
@program_params, \
@program_switches,
145 \
%param_translation, $qual_param, $use_dash, $join);
146 *minimum_overlap_length
= \
&min_len
;
147 *minimum_overlap_similarity
= \
&min_ident
;
148 $self->program_name($program_name) if not defined $self->program_name();
149 $self->_assembly_format($asm_format);
157 Usage : $factory->out_type('Bio::Assembly::ScaffoldI')
158 Function: Get/set the desired type of output
159 Returns : The type of results to return
160 Args : Desired type of results to return (optional):
161 'Bio::Assembly::IO' object
162 'Bio::Assembly::ScaffoldI' object (default)
163 The name of a file to save the results in
171 Usage : $factory->run($fasta_file);
172 Function: Run TIGR Assembler
173 Returns : - a Bio::Assembly::ScaffoldI object, a Bio::Assembly::IO
174 object, a filename, or undef if all sequences were too small to
176 Returns : Assembly results (file, IO object or assembly object)
177 Args : - sequence input (FASTA file or sequence object arrayref)
178 - optional quality score input (QUAL file or quality score object
186 Usage : $factory->_run()
187 Function: Make a system call and run TIGR Assembler
188 Returns : An assembly file
196 my ($self, $fasta_file, $qual_file) = @_;
198 # qual_in Input quality score file
199 # fasta_exp Export results in FASTA format (0:no 1:yes, default: 1)
200 # ace_exp Export results in ACE format (0:no 1:yes, default: 1)
202 # Specify that we want an ACE output file
205 # Setup needed files and filehandles first
206 my ($output_fh, $output_file) = $self->_prepare_output_file( );
207 my ($stdout_fh, $stdout_file) = $self->io->tempfile( -dir
=> $self->tempdir() );
209 # Get program executable
210 my $exe = $self->executable;
212 # Get command-line options
213 my $options = $self->_translate_params();
215 # Usage: Minimo FASTA_IN [options]
216 # Options are of the style: -D PARAM=VAL
217 my @program_args = ( $exe, $fasta_file, @
$options);
218 my @ipc_args = ( \
@program_args, '>', $stdout_file);
220 # Print command for debugging
221 if ($self->verbose() >= 0) {
223 $cmd .= join ( ' ', @program_args );
224 for ( my $i = 1 ; $i < scalar @ipc_args ; $i++ ) {
225 my $element = $ipc_args[$i];
226 my $ref = ref($element);
228 if ( $ref && $ref eq 'SCALAR') {
235 $self->debug( "$exe command = $cmd\n" );
239 my $log_file = "$fasta_file.runAmos.log";
241 IPC
::Run
::run
(@ipc_args) || die("There was a problem running $exe. The ".
242 "error message is: $!. Check the log file $log_file for possible causes.");
245 $self->throw("$exe call crashed: $@");
253 my $base = $self->out_prefix();
254 if (not defined $base) {
255 my $dirname = dirname
($fasta_file);
256 my $basename = basename
($fasta_file);
257 $basename =~ s/^(.+)\..+$/$1/;
258 $base = File
::Spec
->catfile($dirname, $basename);
260 my $ace_file = "$base-contigs.ace";
261 my $amos_file = "$base-contigs.afg";
263 # Remove all files except for the ACE file
264 for my $file ($log_file, $stdout_file, $amos_file) {
269 $self->_clean_file($ace_file);
271 # Move the ACE file to its final destination
272 move
($ace_file, $output_file) or $self->throw("Could not move file ".
273 "'$ace_file' to '$output_file': $!");
281 Usage : $factory->_clean_file($file)
282 Function: Clean file in place by removing NULL characters. NULL characters
283 can be present in the output files of AMOS 2.0.8 but they do not
284 validate as proper sequence characters in Bioperl.
285 Returns : 1 for success
291 my ($self, $file) = @_;
292 # Set in-place file editing mode
294 local @ARGV = ( $file );
295 # Replace lines in file