support new exported Menu variable
[bioperl-run.git] / Bio / Tools / Run / Phylo / Phylip / SeqBoot.pm
blobfad0ddae6b16bb3d5c7e12a5b526d1a3a77f3092
1 # BioPerl module for Bio::Tools::Run::Phylo::Phylip::SeqBoot
3 # Created by
5 # Shawn Hoon
7 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
11 =head1 NAME
13 Bio::Tools::Run::Phylo::Phylip::SeqBoot - Wrapper for the phylip
14 program SeqBoot
16 =head1 SYNOPSIS
18 #Create a SimpleAlign object
19 @params = ('ktuple' => 2, 'matrix' => 'BLOSUM');
20 $factory = Bio::Tools::Run::Alignment::Clustalw->new(@params);
21 $inputfilename = 't/data/cysprot.fa';
22 $aln = $factory->align($inputfilename); # $aln is a SimpleAlign object.
24 # Use seqboot to generate bootstap alignments
25 my @params = ('datatype'=>'SEQUENCE','replicates'=>100);
26 my $seq = Bio::Tools::Run::Phylo::Phylip::SeqBoot->new(@params);
28 my $aln_ref = $seq->run($aln);
30 my $aio = Bio::AlignIO->new(-file=>">alignment.bootstrap",-format=>"phylip");
31 foreach my $ai(@{$aln_ref}){
32 $aio->write_aln($ai);
35 =head1 DESCRIPTION
37 Wrapper for seqboot from the phylip package by Joseph Felsentein.
39 Taken from phylip doc...
41 "SEQBOOT is a general boostrapping tool. It is intended to allow you to
42 generate multiple data sets that are resampled versions of the input data set.
43 SEQBOOT can handle molecular sequences, binary characters,
44 restriction sites, or gene frequencies."
46 More documentation on using seqboot and setting parameters may be found
47 in the phylip package.
49 VERSION Support
50 This wrapper currently supports v3.5 of phylip. There is also support for v3.6 although
51 this is still experimental as v3.6 is still under alpha release and not all functionalities maybe supported.
53 =head1 PARAMETERS FOR SEQBOOT
55 =head2 MODEL
57 Title : DATATYPE
58 Description : (optional)
60 This program supports 3 different datatypes
61 SEQUENCE: Molecular Sequences
62 MORPH : Discrete Morphological Characters
63 REST : Restriction Sites
64 GENEFREQ: Gene Frequencies
66 Defaults to SEQUENCE
68 =head2 PERMUTE
70 Title: PERMUTE
71 Description: (optional)
73 3 different resampling methods are available:
75 BOOTSTRAP : creating a new data set by sampling N
76 characters randomly with replacement The
77 resulting data set has the same size as the
78 original, but some characters have been left
79 out and others are duplicated
81 JACKKNIFE : Delete-half-jackknifing. It involves sampling
82 a random half of the characters, and
83 including them in the data but dropping the
84 others The resulting data sets are half the
85 size of the original, and no characters are
86 duplicated.
88 PERMUTE : Permuting species within characters. It
89 involves permuting the columns of the data
90 matrix separately. This produces data matrices
91 that have the same number and kinds of
92 characters but no taxonomic structure.
94 Defaults to BOOTSTRAP
96 =head2 REPLICATES
98 Title : REPLICATES
99 Description : (optional)
101 This options allows the user to set the number of
102 replicate data sets. Most statisticians would be
103 happiest with 1000 to 10,000 replicates in a
104 bootstrap, but 100 gives a good rough picture
106 Defaults to 100
108 =head2 ALLELES
110 Title : ALLELES
111 Description : (optional)
113 This option is to be used with gene frequencies datatype
114 option to specify that all alleles at each locus are in
115 the input file.
117 Defaults to NULL
119 =head1 FEEDBACK
121 =head2 Mailing Lists
123 User feedback is an integral part of the evolution of this and other
124 Bioperl modules. Send your comments and suggestions preferably to one
125 of the Bioperl mailing lists. Your participation is much appreciated.
127 bioperl-l@bioperl.org - General discussion
128 http://bio.perl.org/MailList.html - About the mailing lists
130 =head2 Reporting Bugs
132 Report bugs to the Bioperl bug tracking system to help us keep track
133 the bugs and their resolution. Bug reports can be submitted via the
134 web:
136 http://bugzilla.bioperl.org/
138 =head1 AUTHOR - Shawn Hoon
140 Email shawnh@fugu-sg.org
142 =head1 APPENDIX
144 The rest of the documentation details each of the object
145 methods. Internal methods are usually preceded with a _
147 =cut
152 package Bio::Tools::Run::Phylo::Phylip::SeqBoot;
154 use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
155 @SEQBOOT_PARAMS @OTHER_SWITCHES
156 %OK_FIELD);
157 use strict;
158 use Bio::SimpleAlign;
159 use Bio::AlignIO;
160 use Bio::TreeIO;
161 use Bio::Tools::Run::Phylo::Phylip::Base;
162 use Bio::Tools::Run::Phylo::Phylip::PhylipConf qw(%Menu);
163 use Bio::Matrix::PhylipDist;
164 use Cwd;
167 # inherit from Phylip::Base which has some methods for dealing with
168 # Phylip specifics
169 @ISA = qw(Bio::Tools::Run::Phylo::Phylip::Base);
171 # You will need to enable the SeqBoot program. This
172 # can be done in (at least) 3 ways:
174 # 1. define an environmental variable PHYLIPDIR:
175 # export PHYLIPDIR=/home/shawnh/PHYLIP/bin
177 # 2. include a definition of an environmental variable CLUSTALDIR in
178 # every script that will use Clustal.pm.
179 # $ENV{PHYLIPDIR} = '/home/shawnh/PHYLIP/bin';
181 # 3. You can set the path to the program through doing:
182 # my @params('executable'=>'/usr/local/bin/seqboot');
183 # my $SeqBoot_factory = Bio::Tools::Run::Phylo::Phylip::SeqBoot->new(@params);
187 BEGIN {
188 @SEQBOOT_PARAMS = qw(DATATYPE PERMUTE BLOCKSIZE REPLICATES READWEIGHTS READCAT);
189 @OTHER_SWITCHES = qw(QUIET);
190 foreach my $attr(@SEQBOOT_PARAMS,@OTHER_SWITCHES) {
191 $OK_FIELD{$attr}++;
195 =head2 program_name
197 Title : program_name
198 Usage : >program_name()
199 Function: holds the program name
200 Returns: string
201 Args : None
203 =cut
205 sub program_name {
206 return 'seqboot';
209 =head2 program_dir
211 Title : program_dir
212 Usage : ->program_dir()
213 Function: returns the program directory, obtiained from ENV variable.
214 Returns: string
215 Args :
217 =cut
219 sub program_dir {
220 return Bio::Root::IO->catfile($ENV{PHYLIPDIR}) if $ENV{PHYLIPDIR};
223 sub new {
224 my ($class,@args) = @_;
225 my $self = $class->SUPER::new(@args);
227 my ($attr, $value);
228 while (@args) {
229 $attr = shift @args;
230 $value = shift @args;
231 next if( $attr =~ /^-/ ); # don't want named parameters
232 if ($attr =~/PROGRAM/i) {
233 $self->executable($value);
234 next;
236 if ($attr =~ /IDLENGTH/i){
237 $self->idlength($value);
238 next;
240 $self->$attr($value);
242 return $self;
245 sub AUTOLOAD {
246 my $self = shift;
247 my $attr = $AUTOLOAD;
248 $attr =~ s/.*:://;
249 $attr = uc $attr;
250 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
251 $self->{$attr} = shift if @_;
252 return $self->{$attr};
255 =head2 idlength
257 Title : idlength
258 Usage : $obj->idlength ($newval)
259 Function:
260 Returns : value of idlength
261 Args : newvalue (optional)
264 =cut
266 sub idlength{
267 my $self = shift;
268 if( @_ ) {
269 my $value = shift;
270 $self->{'idlength'} = $value;
272 return $self->{'idlength'};
277 =head2 run
279 Title : run
280 Usage :
281 $inputfilename = 't/data/prot.phy';
282 $matrix= $seqboot_factory->run($inputfilename);
284 $seq_array_ref = \@seq_array; @seq_array is array of Seq objs
285 $aln = $clustalw_factory->align($seq_array_ref);
286 $aln_ref = $SeqBootfactory->run($aln);
288 Function: Create bootstrap sets of alignments
289 Example :
290 Returns : an array ref of L<Bio::SimpleAlign>
291 Args : Name of a file containing a multiple alignment in Phylip format
292 or an SimpleAlign object
294 Throws an exception if argument is not either a string (eg a
295 filename) or a Bio::SimpleAlign object. If
296 argument is string, throws exception if file corresponding to string
297 name can not be found.
299 =cut
301 sub run{
303 my ($self,$input) = @_;
304 my ($infilename);
306 # Create input file pointer
307 $infilename = $self->_setinput($input);
308 if (!$infilename) {$self->throw("Problems setting up for seqboot. Probably bad input data in $input !");}
310 # Create parameter string to pass to SeqBoot program
311 my $param_string = $self->_setparams();
312 # run SeqBoot
313 my $aln = $self->_run($infilename,$param_string);
314 return $aln;
317 #################################################
319 =head2 _run
321 Title : _run
322 Usage : Internal function, not to be called directly
323 Function: makes actual system call to SeqBoot program
324 Example :
325 Returns : an array ref of <Bio::SimpleAlign>
326 Args : Name of a file containing a set of multiple alignments in Phylip format
327 and a parameter string to be passed to SeqBoot
330 =cut
332 sub _run {
333 my ($self,$infile,$param_string) = @_;
334 my $instring;
335 my $curpath = cwd;
336 unless( File::Spec->file_name_is_absolute($infile) ) {
337 $infile = $self->io->catfile($curpath,$infile);
339 #odd random seed
340 my $rand = (2 * int(rand(10000)) + 1);
341 if ($self->version == 3.5){
342 $instring = $infile."\n$rand\n$param_string";
344 else {
345 $instring = $infile."\n$param_string$rand\n";
347 $self->debug( "Program ".$self->executable." $instring\n");
349 chdir($self->tempdir);
350 #open a pipe to run SeqBoot to bypass interactive menus
351 if ($self->quiet() || $self->verbose() < 0) {
352 open(SeqBoot,"|".$self->executable .">/dev/null");
354 else {
355 open(SeqBoot,"|".$self->executable);
357 print SeqBoot $instring;
358 close(SeqBoot);
360 # get the results
361 my $outfile = $self->io->catfile($self->tempdir,$self->outfile);
362 chdir($curpath);
363 $self->throw("SeqBoot did not create files correctly ($outfile)")
364 unless (-e $outfile);
366 #parse the alignments
367 my @aln;
368 my @parse_params;
370 push @parse_params, ('-interleaved' => 1) if $self->version == 3.6;
371 my $aio = Bio::AlignIO->new(-file=>$outfile,-format=>"phylip",
372 @parse_params);
373 while (my $aln = $aio->next_aln){
374 push @aln, $aln;
377 # Clean up the temporary files created along the way...
378 unlink $outfile unless $self->save_tempfiles;
380 return \@aln;
384 =head2 _setinput()
386 Title : _setinput
387 Usage : Internal function, not to be called directly
388 Function: Create input file for SeqBoot program
389 Example :
390 Returns : name of file containing a multiple alignment in Phylip format
391 Args : SimpleAlign object reference or input file name
394 =cut
396 sub _setinput {
397 my ($self, $input) = @_;
398 my ($alnfilename,$tfh);
400 # a phy formatted alignment file
401 unless (ref $input) {
402 # check that file exists or throw
403 $alnfilename= $input;
404 unless (-e $input) {return 0;}
405 return $alnfilename;
407 my @input = ref($input) eq 'ARRAY' ? @{$input}: ($input);
409 ($tfh,$alnfilename) = $self->io->tempfile(-dir=>$self->tempdir);
410 my $alnIO = Bio::AlignIO->new(-fh => $tfh,
411 -format=>'phylip',
412 -idlength=>$self->idlength());
413 foreach my $input(@input){
414 # $input should be a Bio::Align::AlignI
415 $input->isa("Bio::Align::AlignI") || $self->throw("Expecting a Bio::Align::AlignI object");
416 # Open temporary file for both reading & writing of BioSeq array
417 $alnIO->write_aln($input);
419 $alnIO->close();
420 close($tfh);
421 return $alnfilename;
424 =head2 _setparams()
426 Title : _setparams
427 Usage : Internal function, not to be called directly
428 Function: Create parameter inputs for SeqBoot program
429 Example :
430 Returns : parameter string to be passed to SeqBoot
431 Args : name of calling object
433 =cut
435 sub _setparams {
436 my ($attr, $value, $self);
438 #do nothing for now
439 $self = shift;
440 my $param_string = "";
441 my $cat = 0;
442 my $gene_freq = 0;
443 my %menu = %{$Menu{$self->version}->{'SEQBOOT'}};
445 foreach my $attr ( @SEQBOOT_PARAMS) {
446 $value = $self->$attr();
447 next unless (defined $value);
448 if ($attr =~/REPLICATES/i){
449 if( $value !~ /(\d+(\.\d+)?)/ ) {
450 $self->warn("Expected a number in $attr\n");
451 next;
453 $param_string .= $menu{'REPLICATES'}."$value\n";
455 elsif($attr=~/DATATYPE/i){
456 $gene_freq = 1 if $value =~/GENEFREQ/i;
457 $param_string .= $menu{'DATATYPE'}{uc $value};
459 else {
460 if($attr =~/ALLELES/i){
461 if(!$gene_freq){
462 $self->warn("Alleles options only be used with alleles option");
463 return;
465 $param_string .=$menu{uc $attr};
469 $param_string .= $menu{'SUBMIT'};
471 return $param_string;
476 =head1 Bio::Tools::Run::Wrapper methods
478 =cut
480 =head2 no_param_checks
482 Title : no_param_checks
483 Usage : $obj->no_param_checks($newval)
484 Function: Boolean flag as to whether or not we should
485 trust the sanity checks for parameter values
486 Returns : value of no_param_checks
487 Args : newvalue (optional)
490 =cut
492 =head2 save_tempfiles
494 Title : save_tempfiles
495 Usage : $obj->save_tempfiles($newval)
496 Function:
497 Returns : value of save_tempfiles
498 Args : newvalue (optional)
501 =cut
503 =head2 outfile_name
505 Title : outfile_name
506 Usage : my $outfile = $SeqBoot->outfile_name();
507 Function: Get/Set the name of the output file for this run
508 (if you wanted to do something special)
509 Returns : string
510 Args : [optional] string to set value to
513 =cut
516 =head2 tempdir
518 Title : tempdir
519 Usage : my $tmpdir = $self->tempdir();
520 Function: Retrieve a temporary directory name (which is created)
521 Returns : string which is the name of the temporary directory
522 Args : none
525 =cut
527 =head2 cleanup
529 Title : cleanup
530 Usage : $codeml->cleanup();
531 Function: Will cleanup the tempdir directory after a SeqBoot run
532 Returns : none
533 Args : none
536 =cut
538 =head2 io
540 Title : io
541 Usage : $obj->io($newval)
542 Function: Gets a L<Bio::Root::IO> object
543 Returns : L<Bio::Root::IO>
544 Args : none
547 =cut
549 1; # Needed to keep compiler happy