speelink fixes, patch courtesy Charles Plessy, fixes #3256
[bioperl-run.git] / lib / Bio / Tools / Run / Phylo / Phast / PhyloFit.pm
blob940851d9cf34ecb1237fcceabff50c2f53463feb
1 # $Id$
3 # BioPerl module for Bio::Tools::Run::Phylo::Phast::PhyloFit
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Sendu Bala <bix@sendu.me.uk>
9 # Copyright Sendu Bala
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tools::Run::Phylo::Phast::PhyloFit - Wrapper for phyloFit
19 =head1 SYNOPSIS
21 use Bio::Tools::Run::Phylo::Phast::PhyloFit;
23 # Make a PhyloFit factory
24 $factory = Bio::Tools::Run::Phylo::Phast::PhastCons->new();
26 # Generate an init.mod file for use by phastCons
27 my $init_file = $factory->run($alignment, $tree);
29 =head1 DESCRIPTION
31 This is a wrapper for running the phyloFit application by Adam Siepel. You
32 can get details here: http://compgen.bscb.cornell.edu/~acs/software.html
34 Currently the interface is extremely simplified. Only the --tree form of usage
35 is allowed (not --init-model), which means a tree must be supplied with the
36 alignment (to run()). You can try supplying normal phyloFit arguments to new(),
37 or calling arg-named methods (excluding initial hyphens and converting others
38 to underscores, eg. $factory-E<gt>gaps_as_bases(1) to set the --gaps-as-bases arg).
40 WARNING: the API may change in the future to allow for greater flexability and
41 access to more phyloFit features.
44 You will need to enable this PhyloFit wrapper to find the phast programs (at
45 least phyloFit itself).
46 This can be done in (at least) three ways:
48 1. Make sure the phyloFit executable is in your path.
49 2. Define an environmental variable PHASTDIR which is a
50 directory which contains the phyloFit application:
51 In bash:
53 export PHASTDIR=/home/username/phast/bin
55 In csh/tcsh:
57 setenv PHASTDIR /home/username/phast/bin
59 3. Include a definition of an environmental variable PHASTDIR in
60 every script that will use this PhyloFit wrapper module, e.g.:
62 BEGIN { $ENV{PHASTDIR} = '/home/username/phast/bin' }
63 use Bio::Tools::Run::Phylo::Phast::PhyloFit;
65 =head1 FEEDBACK
67 =head2 Mailing Lists
69 User feedback is an integral part of the evolution of this and other
70 Bioperl modules. Send your comments and suggestions preferably to
71 the Bioperl mailing list. Your participation is much appreciated.
73 bioperl-l@bioperl.org - General discussion
74 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
76 =head2 Support
78 Please direct usage questions or support issues to the mailing list:
80 I<bioperl-l@bioperl.org>
82 rather than to the module maintainer directly. Many experienced and
83 reponsive experts will be able look at the problem and quickly
84 address it. Please include a thorough description of the problem
85 with code and data examples if at all possible.
87 =head2 Reporting Bugs
89 Report bugs to the Bioperl bug tracking system to help us keep track
90 of the bugs and their resolution. Bug reports can be submitted via
91 the web:
93 http://redmine.open-bio.org/projects/bioperl/
95 =head1 AUTHOR - Sendu Bala
97 Email bix@sendu.me.uk
99 =head1 APPENDIX
101 The rest of the documentation details each of the object methods.
102 Internal methods are usually preceded with a _
104 =cut
106 package Bio::Tools::Run::Phylo::Phast::PhyloFit;
107 use strict;
109 use Cwd;
110 use File::Spec;
111 use Bio::AlignIO;
112 use Bio::TreeIO;
114 use base qw(Bio::Tools::Run::Phylo::PhyloBase);
116 our $PROGRAM_NAME = 'phyloFit';
117 our $PROGRAM_DIR = $ENV{'PHASTDIR'};
119 # methods and their synonyms from the phastCons args we support
120 our %PARAMS = (subst_mod => 's',
121 min_informative => 'I',
122 precision => 'p',
123 log => 'l',
124 ancestor => 'A',
125 nrates => 'k',
126 alpha => 'a',
127 rate_constants => 'K',
128 features => 'g',
129 catmap => 'c',
130 do_cats => 'C',
131 reverse_groups => 'R');
133 our %SWITCHES = (gaps_as_bases => 'G',
134 quiet => 'q',
135 EM => 'E',
136 init_random => 'r',
137 estimate_freqs => 'F',
138 markov => 'N',
139 non_overlapping => 'V');
141 # just to be explicit, args we don't support (yet) or we handle ourselves
142 our %UNSUPPORTED = (msa_format => 'i',
143 out_root => 'o',
144 tree => 't',
145 help => 'h',
146 lnl => 'L',
147 init_model => 'M',
148 scale_only => 'B',
149 scale_subtree => 'S',
150 no_freqs => 'f',
151 no_rates => 'n',
152 post_probs => 'P',
153 expected_subs => 'X',
154 expected_total_subs => 'Z',
155 column_probs => 'U',
156 windows => 'w',
157 windows_explicit => 'v');
159 =head2 program_name
161 Title : program_name
162 Usage : $factory>program_name()
163 Function: holds the program name
164 Returns : string
165 Args : None
167 =cut
169 sub program_name {
170 return $PROGRAM_NAME;
173 =head2 program_dir
175 Title : program_dir
176 Usage : $factory->program_dir(@params)
177 Function: returns the program directory, obtained from ENV variable.
178 Returns : string
179 Args : None
181 =cut
183 sub program_dir {
184 return $PROGRAM_DIR;
187 =head2 new
189 Title : new
190 Usage : $factory = Bio::Tools::Run::Phylo::Phast::PhyloFit->new()
191 Function: creates a new PhyloFit factory
192 Returns : Bio::Tools::Run::Phylo::Phast::PhyloFit
193 Args : Most options understood by phastCons can be supplied as key =>
194 value pairs. Options that don't normally take a value
195 should be given a value of 1. You can type the keys as you would on
196 the command line (eg. '--gaps-as-bases' => 1) or with only a single
197 hyphen to start and internal hyphens converted to underscores (eg.
198 -gaps_as_bases => 1) to avoid having to quote the key.
200 These options can NOT be used with this wrapper currently:
201 msa_format / i
202 out_root / o
203 tree / t
204 help / h
205 lnl / L
206 init_model / M
207 scale_only / B
208 scale_subtree / S
209 no_freqs / f
210 no_rates / n
211 post_probs / P
212 expected_subs / X
213 expected_total_subs / Z
214 column_probs / U
215 windows / w
216 windows_explicit / v
218 =cut
220 sub new {
221 my ($class, @args) = @_;
222 my $self = $class->SUPER::new(@args);
224 $self->_set_from_args(\@args, -methods => {(map { $_ => $PARAMS{$_} } keys %PARAMS),
225 (map { $_ => $SWITCHES{$_} } keys %SWITCHES)},
226 -create => 1);
228 return $self;
231 =head2 run
233 Title : run
234 Usage : $result = $factory->run($fasta_align_file, $newick_tree_file);
235 -or-
236 $result = $factory->run($align_object, $tree_object);
237 -or-
238 $result = $factory->run($align_object, $db_taxonomy_object);
239 Function: Runs phyloFit on an alignment.
240 Returns : filename of init.mod file produced
241 Args : The first argument represents an alignment, the second argument
242 a species tree.
243 The alignment can be provided as a multi-fasta format alignment
244 filename, or a Bio::Align::AlignI compliant object (eg. a
245 Bio::SimpleAlign).
246 The species tree can be provided as a newick format tree filename
247 or a Bio::Tree::TreeI compliant object. Alternatively a
248 Bio::DB::Taxonomy object can be supplied, in which case the species
249 tree will be generated by using the alignment sequence names as
250 species names and looking for those in the supplied database.
252 In all cases, the alignment sequence names must correspond to node
253 ids in the species tree. Multi-word species names should be joined
254 with underscores to form the sequence names, eg. Homo_sapiens
256 =cut
258 sub run {
259 my ($self, $aln, $tree) = @_;
261 ($aln && $tree) || $self->throw("alignment and tree must be supplied");
262 $self->_alignment($aln);
263 $tree = $self->_tree($tree);
265 $tree->force_binary;
267 # adjust tree node ids to convert spaces to underscores (eg. if tree
268 # generated from taxonomy)
269 foreach my $node ($tree->get_leaf_nodes) {
270 my $id = $node->id;
271 $id =~ s/ /_/g;
272 $node->id($id);
275 # check node and seq names match
276 $self->_check_names;
278 return $self->_run;
281 sub _run {
282 my $self = shift;
284 my $exe = $self->executable || return;
286 # cd to a temp dir
287 my $temp_dir = $self->tempdir;
288 my $cwd = Cwd->cwd();
289 chdir($temp_dir) || $self->throw("Couldn't change to temp dir '$temp_dir'");
291 my $aln_file = $self->_write_alignment;
292 my $tree_file = $self->_write_tree;
294 #...phyloFit --tree "(human,(mouse,rat))" --msa-format FASTA --out-root init alignment.fa
295 my $command = $exe.$self->_setparams($aln_file, $tree_file);
296 $self->debug("phyloFit command = $command\n");
297 system($command) && $self->throw("phyloFit call ($command) crashed: $?");
299 # cd back again
300 chdir($cwd) || $self->throw("Couldn't change back to working directory '$cwd'");
302 return File::Spec->catfile($temp_dir, 'init.mod');
305 =head2 _setparams
307 Title : _setparams
308 Usage : Internal function, not to be called directly
309 Function: Creates a string of params to be used in the command string
310 Returns : string of params
311 Args : alignment and tree file names
313 =cut
315 sub _setparams {
316 my ($self, $aln_file, $tree_file) = @_;
318 my $param_string = ' --tree '.$tree_file;
319 $param_string .= ' --msa-format FASTA';
320 $param_string .= ' --out-root init';
322 # --min-informative defaults to 50, but must not be greater than the number
323 # of bases in the alignment
324 my $aln = $self->_alignment;
325 my $length = $aln->length;
326 my $min_informative = $self->min_informative || 50;
327 if ($length < $min_informative) {
328 $self->min_informative($length);
331 $param_string .= $self->SUPER::_setparams(-params => [keys %PARAMS],
332 -switches => [keys %SWITCHES],
333 -double_dash => 1,
334 -underscore_to_dash => 1);
335 $param_string .= ' '.$aln_file;
337 return $param_string;