speelink fixes, patch courtesy Charles Plessy, fixes #3256
[bioperl-run.git] / lib / Bio / Tools / Run / Phylo / Semphy.pm
blobd4faebfd8ccde7f350e2a7dd81ad3c6e0e3a2c9b
1 # $Id: Semphy.pm,v 1.3 2007/05/25 10:14:55 sendu Exp $
3 # BioPerl module for Bio::Tools::Run::Phylo::Semphy
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Sendu Bala <bix@sendu.me.uk>
9 # Copyright Sendu Bala
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tools::Run::Phylo::Semphy - Wrapper for Semphy
19 =head1 SYNOPSIS
21 use Bio::Tools::Run::Phylo::Semphy;
23 # Make a Semphy factory
24 $factory = Bio::Tools::Run::Phylo::Semphy->new();
26 # Run Semphy with an alignment
27 my $tree = $factory->run($alignfilename);
29 # or with alignment object
30 $tree = $factory->run($bio_simplalign);
32 # you can supply an initial tree as well, which can be a newick tree file,
33 # Bio::Tree::Tree object...
34 $tree = $factory->run($bio_simplalign, $tree_obj);
36 # ... or Bio::DB::Taxonomy object
37 $tree = $factory->run($bio_simplalign, $bio_db_taxonomy);
39 # (mixtures of all the above are possible)
41 # $tree isa Bio::Tree::Tree
43 =head1 DESCRIPTION
45 This is a wrapper for running the Semphy application by N. Friedman et a.. You
46 can get details here: http://compbio.cs.huji.ac.il/semphy/. Semphy is used for
47 phylogenetic reconstruction (making a tree with branch lengths from an aligned
48 set of input sequences).
50 You can try supplying normal Semphy command-line arguments to new(), eg.
51 new(-hky => 1) or calling arg-named methods (excluding the initial hyphen(s),
52 eg. $factory->hky(1) to set the --hky switch to true).
53 Note that Semphy args are case-sensitive. To distinguish between Bioperl's
54 -verbose and the Semphy's --verbose, you must set Semphy's verbosity with
55 -semphy_verbose or the semphy_verbose() method.
58 You will need to enable this Semphy wrapper to find the Semphy program.
59 This can be done in (at least) three ways:
61 1. Make sure the Semphy executable is in your path.
62 2. Define an environmental variable SEMPHYDIR which is a
63 directory which contains the Semphy application:
64 In bash:
66 export SEMPHYDIR=/home/username/semphy/
68 In csh/tcsh:
70 setenv SEMPHYDIR /home/username/semphy
72 3. Include a definition of an environmental variable SEMPHYDIR in
73 every script that will use this Semphy wrapper module, e.g.:
75 BEGIN { $ENV{SEMPHYDIR} = '/home/username/semphy/' }
76 use Bio::Tools::Run::Phylo::Semphy;
78 =head1 FEEDBACK
80 =head2 Mailing Lists
82 User feedback is an integral part of the evolution of this and other
83 Bioperl modules. Send your comments and suggestions preferably to
84 the Bioperl mailing list. Your participation is much appreciated.
86 bioperl-l@bioperl.org - General discussion
87 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
89 =head2 Support
91 Please direct usage questions or support issues to the mailing list:
93 I<bioperl-l@bioperl.org>
95 rather than to the module maintainer directly. Many experienced and
96 reponsive experts will be able look at the problem and quickly
97 address it. Please include a thorough description of the problem
98 with code and data examples if at all possible.
100 =head2 Reporting Bugs
102 Report bugs to the Bioperl bug tracking system to help us keep track
103 of the bugs and their resolution. Bug reports can be submitted via
104 the web:
106 http://redmine.open-bio.org/projects/bioperl/
108 =head1 AUTHOR - Sendu Bala
110 Email bix@sendu.me.uk
112 =head1 APPENDIX
114 The rest of the documentation details each of the object methods.
115 Internal methods are usually preceded with a _
117 =cut
119 package Bio::Tools::Run::Phylo::Semphy;
120 use strict;
122 use File::Spec;
123 use Bio::AlignIO;
124 use Bio::TreeIO;
126 use base qw(Bio::Tools::Run::Phylo::PhyloBase);
128 our $PROGRAM_NAME = 'semphy';
129 our $PROGRAM_DIR = $ENV{'SEMPHYDIR'};
131 # methods for the semphy args we support
132 our %PARAMS = (outputfile => 'o',
133 treeoutputfile => 'T',
134 constraint => 'c',
135 gaps => 'g',
136 seed => 'r',
137 Logfile => 'l',
138 alphabet => 'a',
139 ratio => 'z',
140 ACGprob => 'p',
141 BPrepeats => 'BPrepeats',
142 BPconsensus => 'BPconsensus',
143 SEMPHY => 'S',
144 modelfile => 'modelfile',
145 alpha => 'A',
146 categories => 'C',
147 semphy_verbose => 'semphy_verbose');
148 our %SWITCHES = (homogeneousRatesDTME => 'homogeneousRatesDTME',
149 NJ => 'J',
150 pairwiseGammaDTME => 'pairwiseGammaDTME',
151 commonAlphaDTME => 'commonAlphaDTME',
152 rate4siteDTME => 'rate4siteDTME',
153 posteriorDTME => 'posteriorDTME',
154 BPonUserTree => 'BPonUserTree',
155 nucjc => 'nucjc',
156 aaJC => 'aaJC',
157 k2p => 'k2p',
158 hky => 'hky',
159 day => 'day',
160 jtt => 'jtt',
161 rev => 'rev',
162 wag => 'wag',
163 cprev => 'cprev',
164 homogeneous => 'H',
165 optimizeAlpha => 'O',
166 bbl => 'n',
167 likelihood => 'L',
168 PerPosLike => 'P',
169 PerPosPosterior => 'B',
170 rate => 'R');
172 # just to be explicit, args we don't support (yet) or we handle ourselves
173 our @UNSUPPORTED = qw(h help full-help s sequence t tree);
176 =head2 program_name
178 Title : program_name
179 Usage : $factory>program_name()
180 Function: holds the program name
181 Returns : string
182 Args : None
184 =cut
186 sub program_name {
187 return $PROGRAM_NAME;
190 =head2 program_dir
192 Title : program_dir
193 Usage : $factory->program_dir(@params)
194 Function: returns the program directory, obtained from ENV variable.
195 Returns : string
196 Args : None
198 =cut
200 sub program_dir {
201 return $PROGRAM_DIR;
204 =head2 new
206 Title : new
207 Usage : $factory = Bio::Tools::Run::Phylo::Semphy->new()
208 Function: creates a new Semphy factory
209 Returns : Bio::Tools::Run::Phylo::Semphy
210 Args : Most options understood by Semphy can be supplied as key =>
211 value pairs, with a true value for switches.
213 These options can NOT be used with this wrapper (they are handled
214 internally or don't make sense in this context):
215 -h | --help | --fill-help
216 -s | --sequence
217 -t | --tree
219 To distinguish between Bioperl's -verbose and the Semphy's --verbose,
220 you must set Semphy's verbosity with -semphy_verbose
222 =cut
224 sub new {
225 my ($class, @args) = @_;
226 my $self = $class->SUPER::new(@args);
228 $self->_set_from_args(\@args, -methods => {(map { $_ => $PARAMS{$_} } keys %PARAMS),
229 (map { $_ => $SWITCHES{$_} } keys %SWITCHES),
230 quiet => 'quiet'},
231 -create => 1,
232 -case_sensitive => 1);
234 return $self;
237 =head2 run
239 Title : run
240 Usage : $result = $factory->run($fasta_align_file);
241 -or-
242 $result = $factory->run($align_object);
243 -or-
244 $result = $factory->run($fasta_align_file, $newick_tree_file);
245 -or-
246 $result = $factory->run($align_object, $tree_object);
247 -or-
248 $result = $factory->run($align_object, $db_taxonomy_object);
249 Function: Runs Semphy on an alignment.
250 Returns : Bio::Tree::Tree
251 Args : The first argument represents an alignment, the second (optional)
252 argument a species tree (to set an initial tree: normally the -t
253 option to Semphy).
254 The alignment can be provided as a multi-fasta format alignment
255 filename, or a Bio::Align::AlignI compliant object (eg. a
256 Bio::SimpleAlign).
257 The species tree can be provided as a newick format tree filename
258 or a Bio::Tree::TreeI compliant object. Alternatively a
259 Bio::DB::Taxonomy object can be supplied, in which case the species
260 tree will be generated by using the alignment sequence names as
261 species names and looking for those in the supplied database.
263 In all cases where an initial tree was supplied, the alignment
264 sequence names must correspond to node ids in the species tree.
266 =cut
268 sub run {
269 my ($self, $aln, $tree) = @_;
271 $aln || $self->throw("alignment must be supplied");
272 $self->_alignment($aln);
274 if ($tree) {
275 $self->_tree($tree);
277 # check node and seq names match
278 $self->_check_names;
281 return $self->_run;
284 sub _run {
285 my $self = shift;
287 my $exe = $self->executable || return;
289 my $aln_file = $self->_write_alignment;
291 # generate a semphy-friendly tree file
292 my $tree = $self->_tree;
293 my $tree_file = '';
294 if ($tree) {
295 $tree = $self->_write_tree;
298 unless ($self->T) {
299 my ($tfh, $tempfile) = $self->io->tempfile(-dir => $self->tempdir);
300 $self->T($tempfile);
301 close($tfh);
304 my $command = $exe.$self->_setparams($aln_file, $tree_file);
305 $self->debug("semphy command = $command\n");
307 open(my $pipe, "$command |") || $self->throw("semphy call ($command) failed to start: $? | $!");
308 my $error = '';
309 while (<$pipe>) {
310 print unless $self->quiet;
311 $error .= $_;
313 close($pipe) || ($error ? $self->warn("semphy call ($command) failed: $error") : $self->throw("semphy call ($command) crashed: $?"));
315 my $result_file = $self->T();
316 my $tio = Bio::TreeIO->new(-format => 'newick', -file => $result_file);
317 my $result_tree = $tio->next_tree;
319 return $result_tree;
322 =head2 _setparams
324 Title : _setparams
325 Usage : Internal function, not to be called directly
326 Function: Creates a string of params to be used in the command string
327 Returns : string of params
328 Args : alignment and tree file names
330 =cut
332 sub _setparams {
333 my ($self, $aln_file, $tree_file) = @_;
335 my $param_string = ' -s '.$aln_file;
336 $param_string .= ' -t '.$tree_file if $tree_file;
338 my %methods = map { $_ => $_ } keys %PARAMS;
339 $methods{'semphy_verbose'} = 'verbose';
340 $param_string .= $self->SUPER::_setparams(-params => \%methods,
341 -switches => [keys %SWITCHES],
342 -double_dash => 1);
344 $param_string .= ' 2>&1';
345 $param_string .= ' 1>/dev/null' if $self->quiet;
347 return $param_string;