1 # BioPerl module for Bio::Tools::Run::Phylo::Phylip::SeqBoot
7 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
13 Bio::Tools::Run::Phylo::Phylip::SeqBoot - Wrapper for the phylip
18 #Create a SimpleAlign object
19 @params = ('ktuple' => 2, 'matrix' => 'BLOSUM');
20 $factory = Bio::Tools::Run::Alignment::Clustalw->new(@params);
21 $inputfilename = 't/data/cysprot.fa';
22 $aln = $factory->align($inputfilename); # $aln is a SimpleAlign object.
24 # Use seqboot to generate bootstap alignments
25 my @params = ('datatype'=>'SEQUENCE','replicates'=>100);
26 my $seq = Bio::Tools::Run::Phylo::Phylip::SeqBoot->new(@params);
28 my $aln_ref = $seq->run($aln);
30 my $aio = Bio::AlignIO->new(-file=>">alignment.bootstrap",-format=>"phylip");
31 foreach my $ai(@{$aln_ref}){
37 Wrapper for seqboot from the phylip package by Joseph Felsentein.
39 Taken from phylip doc...
41 "SEQBOOT is a general boostrapping tool. It is intended to allow you to
42 generate multiple data sets that are resampled versions of the input data set.
43 SEQBOOT can handle molecular sequences, binary characters,
44 restriction sites, or gene frequencies."
46 More documentation on using seqboot and setting parameters may be found
47 in the phylip package.
50 This wrapper currently supports v3.5 of phylip. There is also support for v3.6 although
51 this is still experimental as v3.6 is still under alpha release and not all functionalities maybe supported.
53 =head1 PARAMETERS FOR SEQBOOT
58 Description : (optional)
60 This program supports 3 different datatypes
61 SEQUENCE: Molecular Sequences
62 MORPH : Discrete Morphological Characters
63 REST : Restriction Sites
64 GENEFREQ: Gene Frequencies
71 Description: (optional)
73 3 different resampling methods are available:
75 BOOTSTRAP : creating a new data set by sampling N
76 characters randomly with replacement The
77 resulting data set has the same size as the
78 original, but some characters have been left
79 out and others are duplicated
81 JACKKNIFE : Delete-half-jackknifing. It involves sampling
82 a random half of the characters, and
83 including them in the data but dropping the
84 others The resulting data sets are half the
85 size of the original, and no characters are
88 PERMUTE : Permuting species within characters. It
89 involves permuting the columns of the data
90 matrix separately. This produces data matrices
91 that have the same number and kinds of
92 characters but no taxonomic structure.
99 Description : (optional)
101 This options allows the user to set the number of
102 replicate data sets. Most statisticians would be
103 happiest with 1000 to 10,000 replicates in a
104 bootstrap, but 100 gives a good rough picture
111 Description : (optional)
113 This option is to be used with gene frequencies datatype
114 option to specify that all alleles at each locus are in
123 User feedback is an integral part of the evolution of this and other
124 Bioperl modules. Send your comments and suggestions preferably to one
125 of the Bioperl mailing lists. Your participation is much appreciated.
127 bioperl-l@bioperl.org - General discussion
128 http://bio.perl.org/MailList.html - About the mailing lists
130 =head2 Reporting Bugs
132 Report bugs to the Bioperl bug tracking system to help us keep track
133 the bugs and their resolution. Bug reports can be submitted via the
136 http://bugzilla.bioperl.org/
138 =head1 AUTHOR - Shawn Hoon
140 Email shawnh@fugu-sg.org
144 The rest of the documentation details each of the object
145 methods. Internal methods are usually preceded with a _
152 package Bio
::Tools
::Run
::Phylo
::Phylip
::SeqBoot
;
154 use vars
qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
155 @SEQBOOT_PARAMS @OTHER_SWITCHES
158 use Bio::SimpleAlign;
161 use Bio::Tools::Run::Phylo::Phylip::Base;
162 use Bio::Tools::Run::Phylo::Phylip::PhylipConf qw(%Menu);
163 use Bio::Matrix::PhylipDist;
167 # inherit from Phylip::Base which has some methods for dealing with
169 @ISA = qw(Bio::Tools::Run::Phylo::Phylip::Base);
171 # You will need to enable the SeqBoot program. This
172 # can be done in (at least) 3 ways:
174 # 1. define an environmental variable PHYLIPDIR:
175 # export PHYLIPDIR=/home/shawnh/PHYLIP/bin
177 # 2. include a definition of an environmental variable CLUSTALDIR in
178 # every script that will use Clustal.pm.
179 # $ENV{PHYLIPDIR} = '/home/shawnh/PHYLIP/bin';
181 # 3. You can set the path to the program through doing:
182 # my @params('executable'=>'/usr/local/bin/seqboot');
183 # my $SeqBoot_factory = Bio::Tools::Run::Phylo::Phylip::SeqBoot->new(@params);
188 @SEQBOOT_PARAMS = qw(DATATYPE PERMUTE BLOCKSIZE REPLICATES READWEIGHTS READCAT);
189 @OTHER_SWITCHES = qw(QUIET);
190 foreach my $attr(@SEQBOOT_PARAMS,@OTHER_SWITCHES) {
198 Usage : >program_name()
199 Function: holds the program name
212 Usage : ->program_dir()
213 Function: returns the program directory, obtiained from ENV variable.
220 return Bio
::Root
::IO
->catfile($ENV{PHYLIPDIR
}) if $ENV{PHYLIPDIR
};
224 my ($class,@args) = @_;
225 my $self = $class->SUPER::new
(@args);
230 $value = shift @args;
231 next if( $attr =~ /^-/ ); # don't want named parameters
232 if ($attr =~/PROGRAM/i) {
233 $self->executable($value);
236 if ($attr =~ /IDLENGTH/i){
237 $self->idlength($value);
240 $self->$attr($value);
247 my $attr = $AUTOLOAD;
250 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
251 $self->{$attr} = shift if @_;
252 return $self->{$attr};
258 Usage : $obj->idlength ($newval)
260 Returns : value of idlength
261 Args : newvalue (optional)
270 $self->{'idlength'} = $value;
272 return $self->{'idlength'};
281 $inputfilename = 't/data/prot.phy';
282 $matrix= $seqboot_factory->run($inputfilename);
284 $seq_array_ref = \@seq_array; @seq_array is array of Seq objs
285 $aln = $clustalw_factory->align($seq_array_ref);
286 $aln_ref = $SeqBootfactory->run($aln);
288 Function: Create bootstrap sets of alignments
290 Returns : an array ref of L<Bio::SimpleAlign>
291 Args : Name of a file containing a multiple alignment in Phylip format
292 or an SimpleAlign object
294 Throws an exception if argument is not either a string (eg a
295 filename) or a Bio::SimpleAlign object. If
296 argument is string, throws exception if file corresponding to string
297 name can not be found.
303 my ($self,$input) = @_;
306 # Create input file pointer
307 $infilename = $self->_setinput($input);
308 if (!$infilename) {$self->throw("Problems setting up for seqboot. Probably bad input data in $input !");}
310 # Create parameter string to pass to SeqBoot program
311 my $param_string = $self->_setparams();
313 my $aln = $self->_run($infilename,$param_string);
317 #################################################
322 Usage : Internal function, not to be called directly
323 Function: makes actual system call to SeqBoot program
325 Returns : an array ref of <Bio::SimpleAlign>
326 Args : Name of a file containing a set of multiple alignments in Phylip format
327 and a parameter string to be passed to SeqBoot
333 my ($self,$infile,$param_string) = @_;
336 unless( File
::Spec
->file_name_is_absolute($infile) ) {
337 $infile = $self->io->catfile($curpath,$infile);
340 my $rand = (2 * int(rand(10000)) + 1);
341 if ($self->version == 3.5){
342 $instring = $infile."\n$rand\n$param_string";
345 $instring = $infile."\n$param_string$rand\n";
347 $self->debug( "Program ".$self->executable." $instring\n");
349 chdir($self->tempdir);
350 #open a pipe to run SeqBoot to bypass interactive menus
351 if ($self->quiet() || $self->verbose() < 0) {
352 open(SeqBoot
,"|".$self->executable .">/dev/null");
355 open(SeqBoot
,"|".$self->executable);
357 print SeqBoot
$instring;
361 my $outfile = $self->io->catfile($self->tempdir,$self->outfile);
363 $self->throw("SeqBoot did not create files correctly ($outfile)")
364 unless (-e
$outfile);
366 #parse the alignments
370 push @parse_params, ('-interleaved' => 1) if $self->version == 3.6;
371 my $aio = Bio
::AlignIO
->new(-file
=>$outfile,-format
=>"phylip",
373 while (my $aln = $aio->next_aln){
377 # Clean up the temporary files created along the way...
378 unlink $outfile unless $self->save_tempfiles;
387 Usage : Internal function, not to be called directly
388 Function: Create input file for SeqBoot program
390 Returns : name of file containing a multiple alignment in Phylip format
391 Args : SimpleAlign object reference or input file name
397 my ($self, $input) = @_;
398 my ($alnfilename,$tfh);
400 # a phy formatted alignment file
401 unless (ref $input) {
402 # check that file exists or throw
403 $alnfilename= $input;
404 unless (-e
$input) {return 0;}
407 my @input = ref($input) eq 'ARRAY' ? @
{$input}: ($input);
409 ($tfh,$alnfilename) = $self->io->tempfile(-dir
=>$self->tempdir);
410 my $alnIO = Bio
::AlignIO
->new(-fh
=> $tfh,
412 -idlength
=>$self->idlength());
413 foreach my $input(@input){
414 # $input should be a Bio::Align::AlignI
415 $input->isa("Bio::Align::AlignI") || $self->throw("Expecting a Bio::Align::AlignI object");
416 # Open temporary file for both reading & writing of BioSeq array
417 $alnIO->write_aln($input);
427 Usage : Internal function, not to be called directly
428 Function: Create parameter inputs for SeqBoot program
430 Returns : parameter string to be passed to SeqBoot
431 Args : name of calling object
436 my ($attr, $value, $self);
440 my $param_string = "";
443 my %menu = %{$Menu{$self->version}->{'SEQBOOT'}};
445 foreach my $attr ( @SEQBOOT_PARAMS) {
446 $value = $self->$attr();
447 next unless (defined $value);
448 if ($attr =~/REPLICATES/i){
449 if( $value !~ /(\d+(\.\d+)?)/ ) {
450 $self->warn("Expected a number in $attr\n");
453 $param_string .= $menu{'REPLICATES'}."$value\n";
455 elsif($attr=~/DATATYPE/i){
456 $gene_freq = 1 if $value =~/GENEFREQ/i;
457 $param_string .= $menu{'DATATYPE'}{uc $value};
460 if($attr =~/ALLELES/i){
462 $self->warn("Alleles options only be used with alleles option");
465 $param_string .=$menu{uc $attr};
469 $param_string .= $menu{'SUBMIT'};
471 return $param_string;
476 =head1 Bio::Tools::Run::Wrapper methods
480 =head2 no_param_checks
482 Title : no_param_checks
483 Usage : $obj->no_param_checks($newval)
484 Function: Boolean flag as to whether or not we should
485 trust the sanity checks for parameter values
486 Returns : value of no_param_checks
487 Args : newvalue (optional)
492 =head2 save_tempfiles
494 Title : save_tempfiles
495 Usage : $obj->save_tempfiles($newval)
497 Returns : value of save_tempfiles
498 Args : newvalue (optional)
506 Usage : my $outfile = $SeqBoot->outfile_name();
507 Function: Get/Set the name of the output file for this run
508 (if you wanted to do something special)
510 Args : [optional] string to set value to
519 Usage : my $tmpdir = $self->tempdir();
520 Function: Retrieve a temporary directory name (which is created)
521 Returns : string which is the name of the temporary directory
530 Usage : $codeml->cleanup();
531 Function: Will cleanup the tempdir directory after a SeqBoot run
541 Usage : $obj->io($newval)
542 Function: Gets a L<Bio::Root::IO> object
543 Returns : L<Bio::Root::IO>
549 1; # Needed to keep compiler happy