2 # You may distribute this module under the same terms as perl itself
4 # POD documentation - main docs before the code
8 Bio::Tools::Run::Cap3 - wrapper for CAP3
12 use Bio::Tools::Run::Cap3;
13 # Run Cap3 using an input FASTA file
14 my $factory = Bio::Tools::Run::Cap3->new( -clipping_range => 150 );
15 my $asm_obj = $factory->run($fasta_file, $qual_file);
16 # An assembly object is returned by default
17 for my $contig ($assembly->all_contigs) {
23 my $sio = Bio::SeqIO->new(-file => $fasta_file, -format => 'fasta');
25 while (my $seq = $sio->next_seq()) {
29 # Run Cap3 using input sequence objects and returning an assembly file
30 my $asm_file = 'results.ace';
31 $factory->out_type($asm_file);
32 $factory->run(\@seqs);
37 Wrapper module for CAP3 program
43 User feedback is an integral part of the evolution of this and other
44 Bioperl modules. Send your comments and suggestions preferably to one
45 of the Bioperl mailing lists. Your participation is much appreciated.
47 bioperl-l@bioperl.org - General discussion
48 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52 Please direct usage questions or support issues to the mailing list:
54 I<bioperl-l@bioperl.org>
56 rather than to the module maintainer directly. Many experienced and
57 reponsive experts will be able look at the problem and quickly
58 address it. Please include a thorough description of the problem
59 with code and data examples if at all possible.
63 Report bugs to the Bioperl bug tracking system to help us keep track
64 the bugs and their resolution. Bug reports can be submitted via the
67 http://bugzilla.open-bio.org/
75 The rest of the documentation details each of the object
76 methods. Internal methods are usually preceded with a _
80 package Bio
::Tools
::Run
::Cap3
;
85 use base
qw(Bio::Root::Root Bio::Tools::Run::AssemblerBase);
87 our $program_name = 'cap3';
88 our @program_params = (qw( band_expansion_size differences_quality_cutoff
89 clipping_quality_cutoff max_qscore_sum extra_nof_differences max_gap_length
90 gap_penalty_factor max_overhang_percent match_score_factor mismatch_score_factor
91 overlap_length_cutoff overlap_identity_cutoff reverse_orientation_value
92 overlap_score_cutoff max_word_occurrences min_correction_constraints
93 min_linking_constraints clipping_info_file output_prefix_string clipping_range
94 min_clip_good_reads ));
95 our @program_switches;
96 our %param_translation = (
97 'band_expansion_size' => 'a',
98 'differences_quality_cutoff' => 'b',
99 'clipping_quality_cutoff' => 'c',
100 'max_qscore_sum' => 'd',
101 'extra_nof_differences' => 'e',
102 'max_gap_length' => 'f',
103 'gap_penalty_factor' => 'g',
104 'max_overhang_percent' => 'h',
105 'match_score_factor' => 'm',
106 'mismatch_score_factor' => 'n',
107 'overlap_length_cutoff' => 'o',
108 'overlap_identity_cutoff' => 'p',
109 'reverse_orientation_value' => 'r',
110 'overlap_score_cutoff' => 's',
111 'max_word_occurrences' => 't',
112 'min_correction_constraints' => 'u',
113 'min_linking_constraints' => 'v',
114 'clipping_info_file' => 'w',
115 'output_prefix_string' => 'x',
116 'clipping_range' => 'y',
117 'min_clip_good_reads' => 'z'
122 our $asm_format = 'ace';
127 Usage : $factory->new(
128 -overlap_length_cutoff => 35,
129 -overlap_identity_cutoff => 98 # %
131 Function: Create a new Cap3 factory
132 Returns : A Bio::Tools::Run::Cap3 object
133 Args : Cap3 options available in this module:
134 band_expansion_size specify band expansion size N > 10 (20)
135 differences_quality_cutoff specify base quality cutoff for differences N > 15 (20)
136 clipping_quality_cutoff specify base quality cutoff for clipping N > 5 (12)
137 max_qscore_sum specify max qscore sum at differences N > 20 (200)
138 extra_nof_differences specify clearance between no. of diff N > 10 (30)
139 max_gap_length specify max gap length in any overlap N > 1 (20)
140 gap_penalty_factor specify gap penalty factor N > 0 (6)
141 max_overhang_percent specify max overhang percent length N > 2 (20)
142 match_score_factor specify match score factor N > 0 (2)
143 mismatch_score_factor specify mismatch score factor N < 0 (-5)
144 overlap_length_cutoff / minimum_overlap_length
145 specify overlap length cutoff > 20 (40)
146 overlap_identity_cutoff / minimum_overlap_similarity
147 specify overlap percent identity cutoff N > 65 (80)
148 reverse_orientation_value specify reverse orientation value N >= 0 (1)
149 overlap_score_cutoff specify overlap similarity score cutoff N > 400 (900)
150 max_word_occurrences specify max number of word matches N > 30 (300)
151 min_correction_constraints specify min number of constraints for correction N > 0 (3)
152 min_linking_constraints specify min number of constraints for linking N > 0 (2)
153 clipping_info_file specify file name for clipping information (none)
154 output_prefix_string specify prefix string for output file names (cap)
155 clipping_range specify clipping range N > 5 (250)
156 min_clip_good_reads specify min no. of good reads at clip pos N > 0 (3)
161 my ($class,@args) = @_;
162 my $self = $class->SUPER::new
(@args);
163 $self->_set_program_options(\
@args, \
@program_params, \
@program_switches, \
%param_translation,
164 $qual_param, $use_dash, $join);
165 *minimum_overlap_length
= \
&overlap_length_cutoff
;
166 *minimum_overlap_similarity
= \
&overlap_identity_cutoff
;
167 $self->program_name($program_name) if not defined $self->program_name();
168 $self->_assembly_format($asm_format);
176 Usage : $assembler->out_type('Bio::Assembly::ScaffoldI')
177 Function: Get/set the desired type of output
178 Returns : The type of results to return
179 Args : Desired type of results to return (optional):
180 'Bio::Assembly::IO' object
181 'Bio::Assembly::ScaffoldI' object (default)
182 The name of a file to save the results in
190 Usage : $asm = $factory->run($fasta_file);
192 Returns : Assembly results (file, IO object or assembly object)
193 Args : - sequence input (FASTA file or sequence object arrayref)
194 - optional quality score input (QUAL file or quality score object
202 Usage : $factory->_run()
203 Function: Make a system call and run Cap3
204 Returns : An assembly file
211 my ($self, $fasta_file, $qual_file) = @_;
213 # Move quality file to proper place
214 my $tmp_qual_file = "$fasta_file.qual";
215 if ($qual_file && not $qual_file eq $tmp_qual_file) {
216 $tmp_qual_file = "$fasta_file.qual"; # by Cap3 convention
217 link ($qual_file, $tmp_qual_file) or copy
($qual_file, $tmp_qual_file) or
218 $self->throw("Could not copy file '$qual_file' to '$tmp_qual_file': $!");
221 # Setup needed files and filehandles
222 my ($output_fh, $output_file) = $self->_prepare_output_file( );
224 # Get program executable
225 my $exe = $self->executable;
227 # Get command-line options
228 my $options = join ' ', @
{$self->_translate_params()};
230 # Usage: cap3 File_of_reads [options]
231 my $commandstring = "$exe $fasta_file $options";
233 if ($self->verbose() >= 0) {
234 $self->debug( "$exe command = $commandstring\n" );
237 open(CAP3
, "$commandstring |") ||
238 $self->throw(sprintf("%s call crashed: %s %s\n", $self->program_name, $!, $commandstring));
240 #my ($result) = <CAP3>; # standard output of the program
246 my $prefix = $self->output_prefix_string() || 'cap';
247 my $ace_file = "$fasta_file.$prefix.ace";
248 my $contigs_file = "$fasta_file.$prefix.contigs";
249 $qual_file = "$fasta_file.$prefix.contigs.links";
250 my $links_file = "$fasta_file.$prefix.contigs.qual";
251 my $info_file = "$fasta_file.$prefix.info";
252 my $singlet_file = "$fasta_file.$prefix.singlets";
254 # Remove all files except for the ACE file
255 for my $file ($contigs_file, $qual_file, $links_file, $info_file, $singlet_file, $tmp_qual_file) {
259 # Move the ACE file to its final destination
260 move
($ace_file, $output_file) or $self->throw("Could not move file '$ace_file' to '$output_file': $!");