some tweaks to get rid of warnings
[bioperl-run.git] / lib / Bio / Tools / Run / Cap3.pm
blobd48b59c1300b604dddfc0d708659addb5a51a085
1 # $Id$
2 # You may distribute this module under the same terms as perl itself
4 # POD documentation - main docs before the code
6 =head1 NAME
8 Bio::Tools::Run::Cap3 - wrapper for CAP3
10 =head1 SYNOPSIS
12 use Bio::Tools::Run::Cap3;
13 # Run Cap3 using an input FASTA file
14 my $factory = Bio::Tools::Run::Cap3->new( -clipping_range => 150 );
15 my $asm_obj = $factory->run($fasta_file, $qual_file);
16 # An assembly object is returned by default
17 for my $contig ($assembly->all_contigs) {
18 ... do something ...
21 # Read some sequences
22 use Bio::SeqIO;
23 my $sio = Bio::SeqIO->new(-file => $fasta_file, -format => 'fasta');
24 my @seqs;
25 while (my $seq = $sio->next_seq()) {
26 push @seqs,$seq;
29 # Run Cap3 using input sequence objects and returning an assembly file
30 my $asm_file = 'results.ace';
31 $factory->out_type($asm_file);
32 $factory->run(\@seqs);
35 =head1 DESCRIPTION
37 Wrapper module for CAP3 program
39 =head1 FEEDBACK
41 =head2 Mailing Lists
43 User feedback is an integral part of the evolution of this and other
44 Bioperl modules. Send your comments and suggestions preferably to one
45 of the Bioperl mailing lists. Your participation is much appreciated.
47 bioperl-l@bioperl.org - General discussion
48 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50 =head2 Support
52 Please direct usage questions or support issues to the mailing list:
54 I<bioperl-l@bioperl.org>
56 rather than to the module maintainer directly. Many experienced and
57 reponsive experts will be able look at the problem and quickly
58 address it. Please include a thorough description of the problem
59 with code and data examples if at all possible.
61 =head2 Reporting Bugs
63 Report bugs to the Bioperl bug tracking system to help us keep track
64 the bugs and their resolution. Bug reports can be submitted via the
65 web:
67 http://bugzilla.open-bio.org/
69 =head1 AUTHORS
71 Marc Logghe
73 =head1 APPENDIX
75 The rest of the documentation details each of the object
76 methods. Internal methods are usually preceded with a _
78 =cut
80 package Bio::Tools::Run::Cap3;
82 use strict;
83 use File::Copy;
85 use base qw(Bio::Root::Root Bio::Tools::Run::AssemblerBase);
87 our $program_name = 'cap3';
88 our @program_params = (qw( band_expansion_size differences_quality_cutoff
89 clipping_quality_cutoff max_qscore_sum extra_nof_differences max_gap_length
90 gap_penalty_factor max_overhang_percent match_score_factor mismatch_score_factor
91 overlap_length_cutoff overlap_identity_cutoff reverse_orientation_value
92 overlap_score_cutoff max_word_occurrences min_correction_constraints
93 min_linking_constraints clipping_info_file output_prefix_string clipping_range
94 min_clip_good_reads ));
95 our @program_switches;
96 our %param_translation = (
97 'band_expansion_size' => 'a',
98 'differences_quality_cutoff' => 'b',
99 'clipping_quality_cutoff' => 'c',
100 'max_qscore_sum' => 'd',
101 'extra_nof_differences' => 'e',
102 'max_gap_length' => 'f',
103 'gap_penalty_factor' => 'g',
104 'max_overhang_percent' => 'h',
105 'match_score_factor' => 'm',
106 'mismatch_score_factor' => 'n',
107 'overlap_length_cutoff' => 'o',
108 'overlap_identity_cutoff' => 'p',
109 'reverse_orientation_value' => 'r',
110 'overlap_score_cutoff' => 's',
111 'max_word_occurrences' => 't',
112 'min_correction_constraints' => 'u',
113 'min_linking_constraints' => 'v',
114 'clipping_info_file' => 'w',
115 'output_prefix_string' => 'x',
116 'clipping_range' => 'y',
117 'min_clip_good_reads' => 'z'
119 our $qual_param;
120 our $use_dash = 1;
121 our $join = ' ';
122 our $asm_format = 'ace';
124 =head2 new
126 Title : new
127 Usage : $factory->new(
128 -overlap_length_cutoff => 35,
129 -overlap_identity_cutoff => 98 # %
131 Function: Create a new Cap3 factory
132 Returns : A Bio::Tools::Run::Cap3 object
133 Args : Cap3 options available in this module:
134 band_expansion_size specify band expansion size N > 10 (20)
135 differences_quality_cutoff specify base quality cutoff for differences N > 15 (20)
136 clipping_quality_cutoff specify base quality cutoff for clipping N > 5 (12)
137 max_qscore_sum specify max qscore sum at differences N > 20 (200)
138 extra_nof_differences specify clearance between no. of diff N > 10 (30)
139 max_gap_length specify max gap length in any overlap N > 1 (20)
140 gap_penalty_factor specify gap penalty factor N > 0 (6)
141 max_overhang_percent specify max overhang percent length N > 2 (20)
142 match_score_factor specify match score factor N > 0 (2)
143 mismatch_score_factor specify mismatch score factor N < 0 (-5)
144 overlap_length_cutoff / minimum_overlap_length
145 specify overlap length cutoff > 20 (40)
146 overlap_identity_cutoff / minimum_overlap_similarity
147 specify overlap percent identity cutoff N > 65 (80)
148 reverse_orientation_value specify reverse orientation value N >= 0 (1)
149 overlap_score_cutoff specify overlap similarity score cutoff N > 400 (900)
150 max_word_occurrences specify max number of word matches N > 30 (300)
151 min_correction_constraints specify min number of constraints for correction N > 0 (3)
152 min_linking_constraints specify min number of constraints for linking N > 0 (2)
153 clipping_info_file specify file name for clipping information (none)
154 output_prefix_string specify prefix string for output file names (cap)
155 clipping_range specify clipping range N > 5 (250)
156 min_clip_good_reads specify min no. of good reads at clip pos N > 0 (3)
158 =cut
160 sub new {
161 my ($class,@args) = @_;
162 my $self = $class->SUPER::new(@args);
163 $self->_set_program_options(\@args, \@program_params, \@program_switches, \%param_translation,
164 $qual_param, $use_dash, $join);
165 *minimum_overlap_length = \&overlap_length_cutoff;
166 *minimum_overlap_similarity = \&overlap_identity_cutoff;
167 $self->program_name($program_name) if not defined $self->program_name();
168 $self->_assembly_format($asm_format);
169 return $self;
173 =head2 out_type
175 Title : out_type
176 Usage : $assembler->out_type('Bio::Assembly::ScaffoldI')
177 Function: Get/set the desired type of output
178 Returns : The type of results to return
179 Args : Desired type of results to return (optional):
180 'Bio::Assembly::IO' object
181 'Bio::Assembly::ScaffoldI' object (default)
182 The name of a file to save the results in
184 =cut
187 =head2 run
189 Title : run
190 Usage : $asm = $factory->run($fasta_file);
191 Function: Run CAP3
192 Returns : Assembly results (file, IO object or assembly object)
193 Args : - sequence input (FASTA file or sequence object arrayref)
194 - optional quality score input (QUAL file or quality score object
195 arrayref)
196 =cut
199 =head2 _run
201 Title : _run
202 Usage : $factory->_run()
203 Function: Make a system call and run Cap3
204 Returns : An assembly file
205 Args : - FASTA file
206 - optional QUAL file
208 =cut
210 sub _run {
211 my ($self, $fasta_file, $qual_file) = @_;
213 # Move quality file to proper place
214 my $tmp_qual_file = "$fasta_file.qual";
215 if ($qual_file && not $qual_file eq $tmp_qual_file) {
216 $tmp_qual_file = "$fasta_file.qual"; # by Cap3 convention
217 link ($qual_file, $tmp_qual_file) or copy ($qual_file, $tmp_qual_file) or
218 $self->throw("Could not copy file '$qual_file' to '$tmp_qual_file': $!");
221 # Setup needed files and filehandles
222 my ($output_fh, $output_file) = $self->_prepare_output_file( );
224 # Get program executable
225 my $exe = $self->executable;
227 # Get command-line options
228 my $options = join ' ', @{$self->_translate_params()};
230 # Usage: cap3 File_of_reads [options]
231 my $commandstring = "$exe $fasta_file $options";
233 if ($self->verbose() >= 0) {
234 $self->debug( "$exe command = $commandstring\n" );
237 open(CAP3, "$commandstring |") ||
238 $self->throw(sprintf("%s call crashed: %s %s\n", $self->program_name, $!, $commandstring));
239 local $/ = undef;
240 #my ($result) = <CAP3>; # standard output of the program
241 <CAP3>;
242 close CAP3;
243 close $output_fh;
245 # Result files
246 my $prefix = $self->output_prefix_string() || 'cap';
247 my $ace_file = "$fasta_file.$prefix.ace";
248 my $contigs_file = "$fasta_file.$prefix.contigs";
249 $qual_file = "$fasta_file.$prefix.contigs.links";
250 my $links_file = "$fasta_file.$prefix.contigs.qual";
251 my $info_file = "$fasta_file.$prefix.info";
252 my $singlet_file = "$fasta_file.$prefix.singlets";
254 # Remove all files except for the ACE file
255 for my $file ($contigs_file, $qual_file, $links_file, $info_file, $singlet_file, $tmp_qual_file) {
256 unlink $file;
259 # Move the ACE file to its final destination
260 move ($ace_file, $output_file) or $self->throw("Could not move file '$ace_file' to '$output_file': $!");
262 return $output_file;