3 # BioPerl module for Bio::Tools::Run::Phylo::Phast::PhyloFit
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Sendu Bala <bix@sendu.me.uk>
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::Tools::Run::Phylo::Phast::PhyloFit - Wrapper for phyloFit
21 use Bio::Tools::Run::Phylo::Phast::PhyloFit;
23 # Make a PhyloFit factory
24 $factory = Bio::Tools::Run::Phylo::Phast::PhastCons->new();
26 # Generate an init.mod file for use by phastCons
27 my $init_file = $factory->run($alignment, $tree);
31 This is a wrapper for running the phyloFit application by Adam Siepel. You
32 can get details here: http://compgen.bscb.cornell.edu/~acs/software.html
34 Currently the interface is extremely simplified. Only the --tree form of usage
35 is allowed (not --init-model), which means a tree must be supplied with the
36 alignment (to run()). You can try supplying normal phyloFit arguments to new(),
37 or calling arg-named methods (excluding initial hyphens and converting others
38 to underscores, eg. $factory-E<gt>gaps_as_bases(1) to set the --gaps-as-bases arg).
40 WARNING: the API may change in the future to allow for greater flexability and
41 access to more phyloFit features.
44 You will need to enable this PhyloFit wrapper to find the phast programs (at
45 least phyloFit itself).
46 This can be done in (at least) three ways:
48 1. Make sure the phyloFit executable is in your path.
49 2. Define an environmental variable PHASTDIR which is a
50 directory which contains the phyloFit application:
53 export PHASTDIR=/home/username/phast/bin
57 setenv PHASTDIR /home/username/phast/bin
59 3. Include a definition of an environmental variable PHASTDIR in
60 every script that will use this PhyloFit wrapper module, e.g.:
62 BEGIN { $ENV{PHASTDIR} = '/home/username/phast/bin' }
63 use Bio::Tools::Run::Phylo::Phast::PhyloFit;
69 User feedback is an integral part of the evolution of this and other
70 Bioperl modules. Send your comments and suggestions preferably to
71 the Bioperl mailing list. Your participation is much appreciated.
73 bioperl-l@bioperl.org - General discussion
74 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
78 Please direct usage questions or support issues to the mailing list:
80 I<bioperl-l@bioperl.org>
82 rather than to the module maintainer directly. Many experienced and
83 reponsive experts will be able look at the problem and quickly
84 address it. Please include a thorough description of the problem
85 with code and data examples if at all possible.
89 Report bugs to the Bioperl bug tracking system to help us keep track
90 of the bugs and their resolution. Bug reports can be submitted via
93 http://redmine.open-bio.org/projects/bioperl/
95 =head1 AUTHOR - Sendu Bala
101 The rest of the documentation details each of the object methods.
102 Internal methods are usually preceded with a _
106 package Bio
::Tools
::Run
::Phylo
::Phast
::PhyloFit
;
114 use base
qw(Bio::Tools::Run::Phylo::PhyloBase);
116 our $PROGRAM_NAME = 'phyloFit';
117 our $PROGRAM_DIR = $ENV{'PHASTDIR'};
119 # methods and their synonyms from the phastCons args we support
120 our %PARAMS = (subst_mod
=> 's',
121 min_informative
=> 'I',
127 rate_constants
=> 'K',
131 reverse_groups
=> 'R');
133 our %SWITCHES = (gaps_as_bases
=> 'G',
137 estimate_freqs
=> 'F',
139 non_overlapping
=> 'V');
141 # just to be explicit, args we don't support (yet) or we handle ourselves
142 our %UNSUPPORTED = (msa_format
=> 'i',
149 scale_subtree
=> 'S',
153 expected_subs
=> 'X',
154 expected_total_subs
=> 'Z',
157 windows_explicit
=> 'v');
162 Usage : $factory>program_name()
163 Function: holds the program name
170 return $PROGRAM_NAME;
176 Usage : $factory->program_dir(@params)
177 Function: returns the program directory, obtained from ENV variable.
190 Usage : $factory = Bio::Tools::Run::Phylo::Phast::PhyloFit->new()
191 Function: creates a new PhyloFit factory
192 Returns : Bio::Tools::Run::Phylo::Phast::PhyloFit
193 Args : Most options understood by phastCons can be supplied as key =>
194 value pairs. Options that don't normally take a value
195 should be given a value of 1. You can type the keys as you would on
196 the command line (eg. '--gaps-as-bases' => 1) or with only a single
197 hyphen to start and internal hyphens converted to underscores (eg.
198 -gaps_as_bases => 1) to avoid having to quote the key.
200 These options can NOT be used with this wrapper currently:
213 expected_total_subs / Z
221 my ($class, @args) = @_;
222 my $self = $class->SUPER::new
(@args);
224 $self->_set_from_args(\
@args, -methods
=> {(map { $_ => $PARAMS{$_} } keys %PARAMS),
225 (map { $_ => $SWITCHES{$_} } keys %SWITCHES)},
234 Usage : $result = $factory->run($fasta_align_file, $newick_tree_file);
236 $result = $factory->run($align_object, $tree_object);
238 $result = $factory->run($align_object, $db_taxonomy_object);
239 Function: Runs phyloFit on an alignment.
240 Returns : filename of init.mod file produced
241 Args : The first argument represents an alignment, the second argument
243 The alignment can be provided as a multi-fasta format alignment
244 filename, or a Bio::Align::AlignI compliant object (eg. a
246 The species tree can be provided as a newick format tree filename
247 or a Bio::Tree::TreeI compliant object. Alternatively a
248 Bio::DB::Taxonomy object can be supplied, in which case the species
249 tree will be generated by using the alignment sequence names as
250 species names and looking for those in the supplied database.
252 In all cases, the alignment sequence names must correspond to node
253 ids in the species tree. Multi-word species names should be joined
254 with underscores to form the sequence names, eg. Homo_sapiens
259 my ($self, $aln, $tree) = @_;
261 ($aln && $tree) || $self->throw("alignment and tree must be supplied");
262 $self->_alignment($aln);
263 $tree = $self->_tree($tree);
267 # adjust tree node ids to convert spaces to underscores (eg. if tree
268 # generated from taxonomy)
269 foreach my $node ($tree->get_leaf_nodes) {
275 # check node and seq names match
284 my $exe = $self->executable || return;
287 my $temp_dir = $self->tempdir;
288 my $cwd = Cwd
->cwd();
289 chdir($temp_dir) || $self->throw("Couldn't change to temp dir '$temp_dir'");
291 my $aln_file = $self->_write_alignment;
292 my $tree_file = $self->_write_tree;
294 #...phyloFit --tree "(human,(mouse,rat))" --msa-format FASTA --out-root init alignment.fa
295 my $command = $exe.$self->_setparams($aln_file, $tree_file);
296 $self->debug("phyloFit command = $command\n");
297 system($command) && $self->throw("phyloFit call ($command) crashed: $?");
300 chdir($cwd) || $self->throw("Couldn't change back to working directory '$cwd'");
302 return File
::Spec
->catfile($temp_dir, 'init.mod');
308 Usage : Internal function, not to be called directly
309 Function: Creates a string of params to be used in the command string
310 Returns : string of params
311 Args : alignment and tree file names
316 my ($self, $aln_file, $tree_file) = @_;
318 my $param_string = ' --tree '.$tree_file;
319 $param_string .= ' --msa-format FASTA';
320 $param_string .= ' --out-root init';
322 # --min-informative defaults to 50, but must not be greater than the number
323 # of bases in the alignment
324 my $aln = $self->_alignment;
325 my $length = $aln->length;
326 my $min_informative = $self->min_informative || 50;
327 if ($length < $min_informative) {
328 $self->min_informative($length);
331 $param_string .= $self->SUPER::_setparams
(-params
=> [keys %PARAMS],
332 -switches
=> [keys %SWITCHES],
334 -underscore_to_dash
=> 1);
335 $param_string .= ' '.$aln_file;
337 return $param_string;