Updating Mauricio's email
[bioperl-run.git] / lib / Bio / Tools / Run / Phylo / Semphy.pm
blobd8fce97e75fdf20386c8d9b59584b3ae4fc27ac0
2 # BioPerl module for Bio::Tools::Run::Phylo::Semphy
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sendu Bala <bix@sendu.me.uk>
8 # Copyright Sendu Bala
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Tools::Run::Phylo::Semphy - Wrapper for Semphy
18 =head1 SYNOPSIS
20 use Bio::Tools::Run::Phylo::Semphy;
22 # Make a Semphy factory
23 $factory = Bio::Tools::Run::Phylo::Semphy->new();
25 # Run Semphy with an alignment
26 my $tree = $factory->run($alignfilename);
28 # or with alignment object
29 $tree = $factory->run($bio_simplalign);
31 # you can supply an initial tree as well, which can be a newick tree file,
32 # Bio::Tree::Tree object...
33 $tree = $factory->run($bio_simplalign, $tree_obj);
35 # ... or Bio::DB::Taxonomy object
36 $tree = $factory->run($bio_simplalign, $bio_db_taxonomy);
38 # (mixtures of all the above are possible)
40 # $tree isa Bio::Tree::Tree
42 =head1 DESCRIPTION
44 This is a wrapper for running the Semphy application by N. Friedman et a.. You
45 can get details here: http://compbio.cs.huji.ac.il/semphy/. Semphy is used for
46 phylogenetic reconstruction (making a tree with branch lengths from an aligned
47 set of input sequences).
49 You can try supplying normal Semphy command-line arguments to new(), eg.
50 new(-hky => 1) or calling arg-named methods (excluding the initial hyphen(s),
51 eg. $factory->hky(1) to set the --hky switch to true).
52 Note that Semphy args are case-sensitive. To distinguish between Bioperl's
53 -verbose and the Semphy's --verbose, you must set Semphy's verbosity with
54 -semphy_verbose or the semphy_verbose() method.
57 You will need to enable this Semphy wrapper to find the Semphy program.
58 This can be done in (at least) three ways:
60 1. Make sure the Semphy executable is in your path.
61 2. Define an environmental variable SEMPHYDIR which is a
62 directory which contains the Semphy application:
63 In bash:
65 export SEMPHYDIR=/home/username/semphy/
67 In csh/tcsh:
69 setenv SEMPHYDIR /home/username/semphy
71 3. Include a definition of an environmental variable SEMPHYDIR in
72 every script that will use this Semphy wrapper module, e.g.:
74 BEGIN { $ENV{SEMPHYDIR} = '/home/username/semphy/' }
75 use Bio::Tools::Run::Phylo::Semphy;
77 =head1 FEEDBACK
79 =head2 Mailing Lists
81 User feedback is an integral part of the evolution of this and other
82 Bioperl modules. Send your comments and suggestions preferably to
83 the Bioperl mailing list. Your participation is much appreciated.
85 bioperl-l@bioperl.org - General discussion
86 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
88 =head2 Support
90 Please direct usage questions or support issues to the mailing list:
92 I<bioperl-l@bioperl.org>
94 rather than to the module maintainer directly. Many experienced and
95 reponsive experts will be able look at the problem and quickly
96 address it. Please include a thorough description of the problem
97 with code and data examples if at all possible.
99 =head2 Reporting Bugs
101 Report bugs to the Bioperl bug tracking system to help us keep track
102 of the bugs and their resolution. Bug reports can be submitted via
103 the web:
105 http://redmine.open-bio.org/projects/bioperl/
107 =head1 AUTHOR - Sendu Bala
109 Email bix@sendu.me.uk
111 =head1 APPENDIX
113 The rest of the documentation details each of the object methods.
114 Internal methods are usually preceded with a _
116 =cut
118 package Bio::Tools::Run::Phylo::Semphy;
119 use strict;
121 use File::Spec;
122 use Bio::AlignIO;
123 use Bio::TreeIO;
125 use base qw(Bio::Tools::Run::Phylo::PhyloBase);
127 our $PROGRAM_NAME = 'semphy';
128 our $PROGRAM_DIR = $ENV{'SEMPHYDIR'};
130 # methods for the semphy args we support
131 our %PARAMS = (outputfile => 'o',
132 treeoutputfile => 'T',
133 constraint => 'c',
134 gaps => 'g',
135 seed => 'r',
136 Logfile => 'l',
137 alphabet => 'a',
138 ratio => 'z',
139 ACGprob => 'p',
140 BPrepeats => 'BPrepeats',
141 BPconsensus => 'BPconsensus',
142 SEMPHY => 'S',
143 modelfile => 'modelfile',
144 alpha => 'A',
145 categories => 'C',
146 semphy_verbose => 'semphy_verbose');
147 our %SWITCHES = (homogeneousRatesDTME => 'homogeneousRatesDTME',
148 NJ => 'J',
149 pairwiseGammaDTME => 'pairwiseGammaDTME',
150 commonAlphaDTME => 'commonAlphaDTME',
151 rate4siteDTME => 'rate4siteDTME',
152 posteriorDTME => 'posteriorDTME',
153 BPonUserTree => 'BPonUserTree',
154 nucjc => 'nucjc',
155 aaJC => 'aaJC',
156 k2p => 'k2p',
157 hky => 'hky',
158 day => 'day',
159 jtt => 'jtt',
160 rev => 'rev',
161 wag => 'wag',
162 cprev => 'cprev',
163 homogeneous => 'H',
164 optimizeAlpha => 'O',
165 bbl => 'n',
166 likelihood => 'L',
167 PerPosLike => 'P',
168 PerPosPosterior => 'B',
169 rate => 'R');
171 # just to be explicit, args we don't support (yet) or we handle ourselves
172 our @UNSUPPORTED = qw(h help full-help s sequence t tree);
175 =head2 program_name
177 Title : program_name
178 Usage : $factory>program_name()
179 Function: holds the program name
180 Returns : string
181 Args : None
183 =cut
185 sub program_name {
186 return $PROGRAM_NAME;
189 =head2 program_dir
191 Title : program_dir
192 Usage : $factory->program_dir(@params)
193 Function: returns the program directory, obtained from ENV variable.
194 Returns : string
195 Args : None
197 =cut
199 sub program_dir {
200 return $PROGRAM_DIR;
203 =head2 new
205 Title : new
206 Usage : $factory = Bio::Tools::Run::Phylo::Semphy->new()
207 Function: creates a new Semphy factory
208 Returns : Bio::Tools::Run::Phylo::Semphy
209 Args : Most options understood by Semphy can be supplied as key =>
210 value pairs, with a true value for switches.
212 These options can NOT be used with this wrapper (they are handled
213 internally or don't make sense in this context):
214 -h | --help | --fill-help
215 -s | --sequence
216 -t | --tree
218 To distinguish between Bioperl's -verbose and the Semphy's --verbose,
219 you must set Semphy's verbosity with -semphy_verbose
221 =cut
223 sub new {
224 my ($class, @args) = @_;
225 my $self = $class->SUPER::new(@args);
227 $self->_set_from_args(\@args, -methods => {(map { $_ => $PARAMS{$_} } keys %PARAMS),
228 (map { $_ => $SWITCHES{$_} } keys %SWITCHES),
229 quiet => 'quiet'},
230 -create => 1,
231 -case_sensitive => 1);
233 return $self;
236 =head2 run
238 Title : run
239 Usage : $result = $factory->run($fasta_align_file);
240 -or-
241 $result = $factory->run($align_object);
242 -or-
243 $result = $factory->run($fasta_align_file, $newick_tree_file);
244 -or-
245 $result = $factory->run($align_object, $tree_object);
246 -or-
247 $result = $factory->run($align_object, $db_taxonomy_object);
248 Function: Runs Semphy on an alignment.
249 Returns : Bio::Tree::Tree
250 Args : The first argument represents an alignment, the second (optional)
251 argument a species tree (to set an initial tree: normally the -t
252 option to Semphy).
253 The alignment can be provided as a multi-fasta format alignment
254 filename, or a Bio::Align::AlignI compliant object (eg. a
255 Bio::SimpleAlign).
256 The species tree can be provided as a newick format tree filename
257 or a Bio::Tree::TreeI compliant object. Alternatively a
258 Bio::DB::Taxonomy object can be supplied, in which case the species
259 tree will be generated by using the alignment sequence names as
260 species names and looking for those in the supplied database.
262 In all cases where an initial tree was supplied, the alignment
263 sequence names must correspond to node ids in the species tree.
265 =cut
267 sub run {
268 my ($self, $aln, $tree) = @_;
270 $aln || $self->throw("alignment must be supplied");
271 $self->_alignment($aln);
273 if ($tree) {
274 $self->_tree($tree);
276 # check node and seq names match
277 $self->_check_names;
280 return $self->_run;
283 sub _run {
284 my $self = shift;
286 my $exe = $self->executable || return;
288 my $aln_file = $self->_write_alignment;
290 # generate a semphy-friendly tree file
291 my $tree = $self->_tree;
292 my $tree_file = '';
293 if ($tree) {
294 $tree = $self->_write_tree;
297 unless ($self->T) {
298 my ($tfh, $tempfile) = $self->io->tempfile(-dir => $self->tempdir);
299 $self->T($tempfile);
300 close($tfh);
303 my $command = $exe.$self->_setparams($aln_file, $tree_file);
304 $self->debug("semphy command = $command\n");
306 open(my $pipe, "$command |") || $self->throw("semphy call ($command) failed to start: $? | $!");
307 my $error = '';
308 while (<$pipe>) {
309 print unless $self->quiet;
310 $error .= $_;
312 close($pipe) || ($error ? $self->warn("semphy call ($command) failed: $error") : $self->throw("semphy call ($command) crashed: $?"));
314 my $result_file = $self->T();
315 my $tio = Bio::TreeIO->new(-format => 'newick', -file => $result_file);
316 my $result_tree = $tio->next_tree;
318 return $result_tree;
321 =head2 _setparams
323 Title : _setparams
324 Usage : Internal function, not to be called directly
325 Function: Creates a string of params to be used in the command string
326 Returns : string of params
327 Args : alignment and tree file names
329 =cut
331 sub _setparams {
332 my ($self, $aln_file, $tree_file) = @_;
334 my $param_string = ' -s '.$aln_file;
335 $param_string .= ' -t '.$tree_file if $tree_file;
337 my %methods = map { $_ => $_ } keys %PARAMS;
338 $methods{'semphy_verbose'} = 'verbose';
339 $param_string .= $self->SUPER::_setparams(-params => \%methods,
340 -switches => [keys %SWITCHES],
341 -double_dash => 1);
343 $param_string .= ' 2>&1';
344 my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
345 $param_string .= " 1>$null" if $self->quiet;
347 return $param_string;