3 # BioPerl module for Bio::Tools::Run::Alignment::MAFFT
5 # Cared for by Jason Stajich
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::Tools::Run::Alignment::MAFFT - run the MAFFT alignment tools
19 use Bio::Tools::Run::Alignment::MAFFT;
24 You can get MAFFT from
25 http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/
27 Basically fftnsi is the default
33 User feedback is an integral part of the evolution of this and other
34 Bioperl modules. Send your comments and suggestions preferably to one
35 of the Bioperl mailing lists. Your participation is much appreciated.
37 bioperl-l@bioperl.org - General discussion
38 http://bio.perl.org/MailList.html - About the mailing lists
42 Report bugs to the Bioperl bug tracking system to help us keep track
43 the bugs and their resolution. Bug reports can be submitted via the web:
44 http://bugzilla.open-bio.org/
46 =head1 AUTHOR - Jason Stajich
48 Email jason-at-bioperl.org
52 The rest of the documentation details each of the object
53 methods. Internal methods are usually preceded with a _
57 package Bio
::Tools
::Run
::Alignment
::MAFFT
;
59 use vars
qw($AUTOLOAD @ISA $PROGRAMNAME $PROGRAM %DEFAULTS
60 @MAFFT_PARAMS @MAFFT_SWITCHES @OTHER_SWITCHES %OK_FIELD
70 use Bio::Factory::ApplicationFactoryI;
71 use Bio::Tools::Run::WrapperBase;
72 @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase
73 Bio::Factory::ApplicationFactoryI);
76 %DEFAULTS = ( 'OUTPUT' => 'fasta',
79 @MAFFT_PARAMS =qw( METHOD CYCLES );
80 @MAFFT_SWITCHES = qw( NJ ALL_POSITIVE);
81 @OTHER_SWITCHES = qw(QUIET ALIGN OUTPUT OUTFILE);
82 @MAFFT_ALN_METHODS = qw(fftnsi fftns nwnsi nwns fftnsrough nwnsrough);
83 # Authorize attribute fields
84 foreach my $attr ( @MAFFT_SWITCHES,@MAFFT_PARAMS,@OTHER_SWITCHES ) {
92 Usage : $factory->program_name()
93 Function: holds the program name
106 Usage : my $exe = $blastfactory->executable('blastall');
107 Function: Finds the full path to the 'codeml' executable
108 Returns : string representing the full path to the exe
109 Args : [optional] name of executable to set path to
110 [optional] boolean flag whether or not warn when exe is not found
116 my ($self, $exename, $exe,$warn) = @_;
117 $exename = $self->program_name unless (defined $exename );
119 if( defined $exe && -x
$exe ) {
120 $self->{'_pathtoexe'}->{$exename} = $exe;
122 unless( defined $self->{'_pathtoexe'}->{$exename} ) {
123 my $f = $self->program_path($exename);
124 $exe = $self->{'_pathtoexe'}->{$exename} = $f if(-e
$f && -x
$f );
126 # This is how I meant to split up these conditionals --jason
127 # if exe is null we will execute this (handle the case where
128 # PROGRAMDIR pointed to something invalid)
129 unless( $exe ) { # we didn't find it in that last conditional
130 if( ($exe = $self->io->exists_exe($exename)) && -x
$exe ) {
131 $self->{'_pathtoexe'}->{$exename} = $exe;
133 $self->warn("Cannot find executable for $exename") if $warn;
134 $self->{'_pathtoexe'}->{$exename} = undef;
138 return $self->{'_pathtoexe'}->{$exename};
145 Usage : my $path = $factory->program_path();
146 Function: Builds path for executable
147 Returns : string representing the full path to the exe
153 my ($self,$program_name) = @_;
155 push @path, $self->program_dir if $self->program_dir;
156 push @path, $program_name .($^O
=~ /mswin/i ?
'.exe':'');
158 return Bio
::Root
::IO
->catfile(@path);
164 Usage : $factory->program_dir(@params)
165 Function: returns the program directory, obtiained from ENV variable.
172 return Bio
::Root
::IO
->rel2abs($ENV{MAFFTDIR
}) if $ENV{MAFFTDIR
};
176 my ($class,@args) = @_;
177 my $self = $class->SUPER::new
(@args);
182 $value = shift @args;
183 next if( $attr =~ /^-/); # don't want named parameters
184 $self->$attr($value);
187 $self->output($DEFAULTS{'OUTPUT'}) unless( $self->output );
188 $self->method($DEFAULTS{'METHOD'}) unless( $self->method );
194 my $attr = $AUTOLOAD;
198 $attr = 'OUTFILE' if $attr eq 'OUTFILE_NAME';
199 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
201 $self->{$attr} = shift if @_;
202 return $self->{$attr};
208 Usage : $obj->error_string($newval)
209 Function: Where the output from the last analysus run is stored.
210 Returns : value of error_string
211 Args : newvalue (optional)
217 my ($self,$value) = @_;
218 if( defined $value) {
219 $self->{'error_string'} = $value;
221 return $self->{'error_string'};
228 Usage : exit if $prog->version() < 1.8
229 Function: Determine the version number of the program
231 Returns : float or undef
239 return undef unless $exe = $self->executable;
240 # this is a bit of a hack, but MAFFT is just a gawk script
241 if( open(NAME
, "grep 'MAFFT version' $exe |") ) {
242 if( <NAME
> =~ /MAFFT\s+version\s+([\d.]+)/ ) {
252 Usage : my $output = $application->run(\@seqs);
253 Function: Generic run of an application
254 Returns : Bio::SimpleAlign object
255 Args : array ref of Bio::PrimarySeqI objects OR
256 filename of sequences to run with
261 my ($self,$seqs) = @_;
262 return $self->align($seqs);
269 $inputfilename = 't/data/cysprot.fa';
270 $aln = $factory->align($inputfilename);
272 $seq_array_ref = \@seq_array;
273 # @seq_array is an array of Seq objs
274 $aln = $factory->align($seq_array_ref);
275 Function: Perform a multiple sequence alignment
276 Returns : Reference to a SimpleAlign object containing the
278 Args : Name of a file containing a set of unaligned fasta sequences
279 or else an array of references to Bio::Seq objects.
281 Throws an exception if argument is not either a string (eg a
282 filename) or a reference to an array of Bio::Seq objects. If
283 argument is string, throws exception if file corresponding to string
284 name can not be found. If argument is Bio::Seq array, throws
285 exception if less than two sequence objects are in array.
290 my ($self,$input) = @_;
291 # Create input file pointer
292 $self->io->_io_cleanup();
293 my ($infilename,$type) = $self->_setinput($input);
295 $self->throw("Bad input data or less than 2 sequences in $input !");
298 my ($param_string,$outstr) = $self->_setparams();
301 return &_run
($self, $infilename, $param_string,$outstr);
307 Usage : Internal function, not to be called directly
308 Function: makes actual system call to tcoffee program
310 Returns : nothing; tcoffee output is written to a
311 temporary file OR specified output file
312 Args : Name of a file containing a set of unaligned fasta sequences
313 and hash of parameters to be passed to tcoffee
319 my ($self,$infilename,$paramstr,$outstr) = @_;
320 my $commandstring = $self->executable($self->method)." $paramstr $infilename $outstr";
322 $self->debug( "mafft command = $commandstring \n");
324 my $status = system($commandstring);
325 my $outfile = $self->outfile();
326 if( !-e
$outfile || -z
$outfile ) {
327 $self->warn( "MAFFT call crashed: $? [command $commandstring]\n");
331 my $in = Bio
::AlignIO
->new('-file' => $outfile,
332 '-format' => $self->output);
333 my $aln = $in->next_aln();
341 Usage : Internal function, not to be called directly
342 Function: Create input file for mafft programs
344 Returns : name of file containing mafft data input
345 Args : Seq or Align object reference or input file name
351 my ($self,$input) = @_;
352 my ($infilename, $seq, $temp, $tfh);
354 # check that file exists or throw
355 $infilename = $input;
356 unless (-e
$input) {return 0;}
357 return ($infilename);
358 } elsif (ref($input) =~ /ARRAY/i ) { # $input may be an
359 # array of BioSeq objects...
360 # Open temporary file for both reading & writing of array
361 ($tfh,$infilename) = $self->io->tempfile();
362 if( ! ref($input->[0]) ) {
363 $self->warn("passed an array ref which did not contain objects to _setinput");
365 } elsif( $input->[0]->isa('Bio::PrimarySeqI') ) {
366 $temp = Bio
::SeqIO
->new('-fh' => $tfh,
367 '-format' => 'fasta');
369 foreach $seq (@
$input) {
370 return 0 unless ( ref($seq) &&
371 $seq->isa("Bio::PrimarySeqI") );
372 if( ! defined $seq->display_id ||
373 $seq->display_id =~ /^\s+$/) {
374 $seq->display_id( "Seq".$ct++);
376 $temp->write_seq($seq);
383 $self->warn( "got an array ref with 1st entry ".
385 " and don't know what to do with it\n");
388 return ($infilename);
390 $self->warn("Got $input and don't know what to do with it\n");
399 Usage : Internal function, not to be called directly
400 Function: Create parameter inputs for mafft program
402 Returns : parameter string to be passed to mafft program
403 Args : name of calling object
409 my ($outfile,$param_string) = ('','');
411 # Set default output file if no explicit output file selected
412 unless (defined($outfile = $self->outfile) ) {
414 ($tfh, $outfile) = $self->io->tempfile(-dir
=>$self->tempdir());
417 $self->outfile($outfile);
421 for $attr ( @MAFFT_SWITCHES) {
422 $value = $self->$attr();
423 next unless ($value);
424 my $attr_key = lc $attr; #put switches in format expected by mafft
425 $attr_key = ' --'.$attr_key;
426 $param_string .= $attr_key ;
428 my $method = $self->method;
429 $self->throw("no method ") unless defined $method;
430 if( $method !~ /(rough|nsi)$/ &&
431 defined $self->cycles) {
432 $param_string .= " ".$self->cycles;
434 my $outputstr = " 1>$outfile" ;
436 if ($self->quiet() || $self->verbose < 0) {
437 $outputstr .= ' 2>/dev/null';
439 return ($param_string, $outputstr);
445 Usage : my @methods = $self->methods()
446 Function: Get/Set Alignment methods - NOT VALIDATED
447 Returns : array of strings
448 Args : arrayref of strings
455 return @MAFFT_ALN_METHODS;
459 =head1 Bio::Tools::Run::BaseWrapper methods
463 =head2 no_param_checks
465 Title : no_param_checks
466 Usage : $obj->no_param_checks($newval)
467 Function: Boolean flag as to whether or not we should
468 trust the sanity checks for parameter values
469 Returns : value of no_param_checks
470 Args : newvalue (optional)
475 =head2 save_tempfiles
477 Title : save_tempfiles
478 Usage : $obj->save_tempfiles($newval)
480 Returns : value of save_tempfiles
481 Args : newvalue (optional)
489 Usage : my $outfile = $mafft->outfile_name();
490 Function: Get/Set the name of the output file for this run
491 (if you wanted to do something special)
493 Args : [optional] string to set value to
502 Usage : my $tmpdir = $self->tempdir();
503 Function: Retrieve a temporary directory name (which is created)
504 Returns : string which is the name of the temporary directory
513 Usage : $mafft->cleanup();
514 Function: Will cleanup the tempdir directory
524 Usage : $obj->io($newval)
525 Function: Gets a L<Bio::Root::IO> object
526 Returns : L<Bio::Root::IO>
532 1; # Needed to keep compiler happy