more, better tests for the new implementation for tree/bootstrap -- still some things...
[bioperl-run.git] / Bio / Tools / Run / Alignment / MAFFT.pm
blob8272a2d769789ecaa28ee7035a9668842d7f089f
1 # $Id$
3 # BioPerl module for Bio::Tools::Run::Alignment::MAFFT
5 # Cared for by Jason Stajich
7 # Copyright Jason Stajich
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Tools::Run::Alignment::MAFFT - run the MAFFT alignment tools
17 =head1 SYNOPSIS
19 use Bio::Tools::Run::Alignment::MAFFT;
22 =head1 DESCRIPTION
24 You can get MAFFT from
25 http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/
27 Basically fftnsi is the default
29 =head1 FEEDBACK
31 =head2 Mailing Lists
33 User feedback is an integral part of the evolution of this and other
34 Bioperl modules. Send your comments and suggestions preferably to one
35 of the Bioperl mailing lists. Your participation is much appreciated.
37 bioperl-l@bioperl.org - General discussion
38 http://bio.perl.org/MailList.html - About the mailing lists
40 =head2 Reporting Bugs
42 Report bugs to the Bioperl bug tracking system to help us keep track
43 the bugs and their resolution. Bug reports can be submitted via the web:
44 http://bugzilla.open-bio.org/
46 =head1 AUTHOR - Jason Stajich
48 Email jason-at-bioperl.org
50 =head1 APPENDIX
52 The rest of the documentation details each of the object
53 methods. Internal methods are usually preceded with a _
55 =cut
57 package Bio::Tools::Run::Alignment::MAFFT;
59 use vars qw($AUTOLOAD @ISA $PROGRAMNAME $PROGRAM %DEFAULTS
60 @MAFFT_PARAMS @MAFFT_SWITCHES @OTHER_SWITCHES %OK_FIELD
61 @MAFFT_ALN_METHODS
63 use strict;
64 use Bio::Seq;
65 use Bio::SeqIO;
66 use Bio::SimpleAlign;
67 use Bio::AlignIO;
68 use Bio::Root::Root;
69 use Bio::Root::IO;
70 use Bio::Factory::ApplicationFactoryI;
71 use Bio::Tools::Run::WrapperBase;
72 @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase
73 Bio::Factory::ApplicationFactoryI);
75 BEGIN {
76 %DEFAULTS = ( 'OUTPUT' => 'fasta',
77 'METHOD' => 'fftnsi',
78 'CYCLES' => 2);
79 @MAFFT_PARAMS =qw( METHOD CYCLES );
80 @MAFFT_SWITCHES = qw( NJ ALL_POSITIVE);
81 @OTHER_SWITCHES = qw(QUIET ALIGN OUTPUT OUTFILE);
82 @MAFFT_ALN_METHODS = qw(fftnsi fftns nwnsi nwns fftnsrough nwnsrough);
83 # Authorize attribute fields
84 foreach my $attr ( @MAFFT_SWITCHES,@MAFFT_PARAMS,@OTHER_SWITCHES ) {
85 $OK_FIELD{$attr}++;
89 =head2 program_name
91 Title : program_name
92 Usage : $factory->program_name()
93 Function: holds the program name
94 Returns: string
95 Args : None
97 =cut
99 sub program_name {
100 return 'mafft';
103 =head2 executable
105 Title : executable
106 Usage : my $exe = $blastfactory->executable('blastall');
107 Function: Finds the full path to the 'codeml' executable
108 Returns : string representing the full path to the exe
109 Args : [optional] name of executable to set path to
110 [optional] boolean flag whether or not warn when exe is not found
113 =cut
115 sub executable {
116 my ($self, $exename, $exe,$warn) = @_;
117 $exename = $self->program_name unless (defined $exename );
119 if( defined $exe && -x $exe ) {
120 $self->{'_pathtoexe'}->{$exename} = $exe;
122 unless( defined $self->{'_pathtoexe'}->{$exename} ) {
123 my $f = $self->program_path($exename);
124 $exe = $self->{'_pathtoexe'}->{$exename} = $f if(-e $f && -x $f );
126 # This is how I meant to split up these conditionals --jason
127 # if exe is null we will execute this (handle the case where
128 # PROGRAMDIR pointed to something invalid)
129 unless( $exe ) { # we didn't find it in that last conditional
130 if( ($exe = $self->io->exists_exe($exename)) && -x $exe ) {
131 $self->{'_pathtoexe'}->{$exename} = $exe;
132 } else {
133 $self->warn("Cannot find executable for $exename") if $warn;
134 $self->{'_pathtoexe'}->{$exename} = undef;
138 return $self->{'_pathtoexe'}->{$exename};
142 =head2 program_path
144 Title : program_path
145 Usage : my $path = $factory->program_path();
146 Function: Builds path for executable
147 Returns : string representing the full path to the exe
148 Args : none
150 =cut
152 sub program_path {
153 my ($self,$program_name) = @_;
154 my @path;
155 push @path, $self->program_dir if $self->program_dir;
156 push @path, $program_name .($^O =~ /mswin/i ?'.exe':'');
158 return Bio::Root::IO->catfile(@path);
161 =head2 program_dir
163 Title : program_dir
164 Usage : $factory->program_dir(@params)
165 Function: returns the program directory, obtiained from ENV variable.
166 Returns: string
167 Args :
169 =cut
171 sub program_dir {
172 return Bio::Root::IO->rel2abs($ENV{MAFFTDIR}) if $ENV{MAFFTDIR};
175 sub new {
176 my ($class,@args) = @_;
177 my $self = $class->SUPER::new(@args);
178 my ($attr, $value);
180 while (@args) {
181 $attr = shift @args;
182 $value = shift @args;
183 next if( $attr =~ /^-/); # don't want named parameters
184 $self->$attr($value);
187 $self->output($DEFAULTS{'OUTPUT'}) unless( $self->output );
188 $self->method($DEFAULTS{'METHOD'}) unless( $self->method );
189 return $self;
192 sub AUTOLOAD {
193 my $self = shift;
194 my $attr = $AUTOLOAD;
195 $attr =~ s/.*:://;
196 $attr = uc $attr;
197 # aliasing
198 $attr = 'OUTFILE' if $attr eq 'OUTFILE_NAME';
199 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
201 $self->{$attr} = shift if @_;
202 return $self->{$attr};
205 =head2 error_string
207 Title : error_string
208 Usage : $obj->error_string($newval)
209 Function: Where the output from the last analysus run is stored.
210 Returns : value of error_string
211 Args : newvalue (optional)
214 =cut
216 sub error_string{
217 my ($self,$value) = @_;
218 if( defined $value) {
219 $self->{'error_string'} = $value;
221 return $self->{'error_string'};
225 =head2 version
227 Title : version
228 Usage : exit if $prog->version() < 1.8
229 Function: Determine the version number of the program
230 Example :
231 Returns : float or undef
232 Args : none
234 =cut
236 sub version {
237 my ($self) = @_;
238 my $exe;
239 return undef unless $exe = $self->executable;
240 # this is a bit of a hack, but MAFFT is just a gawk script
241 if( open(NAME, "grep 'MAFFT version' $exe |") ) {
242 if( <NAME> =~ /MAFFT\s+version\s+([\d.]+)/ ) {
243 return $1;
246 return undef;
249 =head2 run
251 Title : run
252 Usage : my $output = $application->run(\@seqs);
253 Function: Generic run of an application
254 Returns : Bio::SimpleAlign object
255 Args : array ref of Bio::PrimarySeqI objects OR
256 filename of sequences to run with
258 =cut
260 sub run {
261 my ($self,$seqs) = @_;
262 return $self->align($seqs);
265 =head2 align
267 Title : align
268 Usage :
269 $inputfilename = 't/data/cysprot.fa';
270 $aln = $factory->align($inputfilename);
272 $seq_array_ref = \@seq_array;
273 # @seq_array is an array of Seq objs
274 $aln = $factory->align($seq_array_ref);
275 Function: Perform a multiple sequence alignment
276 Returns : Reference to a SimpleAlign object containing the
277 sequence alignment.
278 Args : Name of a file containing a set of unaligned fasta sequences
279 or else an array of references to Bio::Seq objects.
281 Throws an exception if argument is not either a string (eg a
282 filename) or a reference to an array of Bio::Seq objects. If
283 argument is string, throws exception if file corresponding to string
284 name can not be found. If argument is Bio::Seq array, throws
285 exception if less than two sequence objects are in array.
287 =cut
289 sub align {
290 my ($self,$input) = @_;
291 # Create input file pointer
292 $self->io->_io_cleanup();
293 my ($infilename,$type) = $self->_setinput($input);
294 if (! $infilename) {
295 $self->throw("Bad input data or less than 2 sequences in $input !");
298 my ($param_string,$outstr) = $self->_setparams();
300 # run mafft
301 return &_run($self, $infilename, $param_string,$outstr);
304 =head2 _run
306 Title : _run
307 Usage : Internal function, not to be called directly
308 Function: makes actual system call to tcoffee program
309 Example :
310 Returns : nothing; tcoffee output is written to a
311 temporary file OR specified output file
312 Args : Name of a file containing a set of unaligned fasta sequences
313 and hash of parameters to be passed to tcoffee
316 =cut
318 sub _run {
319 my ($self,$infilename,$paramstr,$outstr) = @_;
320 my $commandstring = $self->executable($self->method)." $paramstr $infilename $outstr";
322 $self->debug( "mafft command = $commandstring \n");
324 my $status = system($commandstring);
325 my $outfile = $self->outfile();
326 if( !-e $outfile || -z $outfile ) {
327 $self->warn( "MAFFT call crashed: $? [command $commandstring]\n");
328 return undef;
331 my $in = Bio::AlignIO->new('-file' => $outfile,
332 '-format' => $self->output);
333 my $aln = $in->next_aln();
334 return $aln;
338 =head2 _setinput
340 Title : _setinput
341 Usage : Internal function, not to be called directly
342 Function: Create input file for mafft programs
343 Example :
344 Returns : name of file containing mafft data input
345 Args : Seq or Align object reference or input file name
348 =cut
350 sub _setinput {
351 my ($self,$input) = @_;
352 my ($infilename, $seq, $temp, $tfh);
353 if (! ref $input) {
354 # check that file exists or throw
355 $infilename = $input;
356 unless (-e $input) {return 0;}
357 return ($infilename);
358 } elsif (ref($input) =~ /ARRAY/i ) { # $input may be an
359 # array of BioSeq objects...
360 # Open temporary file for both reading & writing of array
361 ($tfh,$infilename) = $self->io->tempfile();
362 if( ! ref($input->[0]) ) {
363 $self->warn("passed an array ref which did not contain objects to _setinput");
364 return undef;
365 } elsif( $input->[0]->isa('Bio::PrimarySeqI') ) {
366 $temp = Bio::SeqIO->new('-fh' => $tfh,
367 '-format' => 'fasta');
368 my $ct = 1;
369 foreach $seq (@$input) {
370 return 0 unless ( ref($seq) &&
371 $seq->isa("Bio::PrimarySeqI") );
372 if( ! defined $seq->display_id ||
373 $seq->display_id =~ /^\s+$/) {
374 $seq->display_id( "Seq".$ct++);
376 $temp->write_seq($seq);
378 $temp->close();
379 undef $temp;
380 close($tfh);
381 $tfh = undef;
382 } else {
383 $self->warn( "got an array ref with 1st entry ".
384 $input->[0].
385 " and don't know what to do with it\n");
388 return ($infilename);
389 } else {
390 $self->warn("Got $input and don't know what to do with it\n");
392 return 0;
396 =head2 _setparams
398 Title : _setparams
399 Usage : Internal function, not to be called directly
400 Function: Create parameter inputs for mafft program
401 Example :
402 Returns : parameter string to be passed to mafft program
403 Args : name of calling object
405 =cut
407 sub _setparams {
408 my ($self) = @_;
409 my ($outfile,$param_string) = ('','');
411 # Set default output file if no explicit output file selected
412 unless (defined($outfile = $self->outfile) ) {
413 my $tfh;
414 ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir());
415 close($tfh);
416 undef $tfh;
417 $self->outfile($outfile);
419 my ($attr,$value);
421 for $attr ( @MAFFT_SWITCHES) {
422 $value = $self->$attr();
423 next unless ($value);
424 my $attr_key = lc $attr; #put switches in format expected by mafft
425 $attr_key = ' --'.$attr_key;
426 $param_string .= $attr_key ;
428 my $method = $self->method;
429 $self->throw("no method ") unless defined $method;
430 if( $method !~ /(rough|nsi)$/ &&
431 defined $self->cycles) {
432 $param_string .= " ".$self->cycles;
434 my $outputstr = " 1>$outfile" ;
436 if ($self->quiet() || $self->verbose < 0) {
437 $outputstr .= ' 2>/dev/null';
439 return ($param_string, $outputstr);
442 =head2 methods
444 Title : methods
445 Usage : my @methods = $self->methods()
446 Function: Get/Set Alignment methods - NOT VALIDATED
447 Returns : array of strings
448 Args : arrayref of strings
451 =cut
453 sub methods {
454 my ($self) = shift;
455 return @MAFFT_ALN_METHODS;
459 =head1 Bio::Tools::Run::BaseWrapper methods
461 =cut
463 =head2 no_param_checks
465 Title : no_param_checks
466 Usage : $obj->no_param_checks($newval)
467 Function: Boolean flag as to whether or not we should
468 trust the sanity checks for parameter values
469 Returns : value of no_param_checks
470 Args : newvalue (optional)
473 =cut
475 =head2 save_tempfiles
477 Title : save_tempfiles
478 Usage : $obj->save_tempfiles($newval)
479 Function:
480 Returns : value of save_tempfiles
481 Args : newvalue (optional)
484 =cut
486 =head2 outfile_name
488 Title : outfile_name
489 Usage : my $outfile = $mafft->outfile_name();
490 Function: Get/Set the name of the output file for this run
491 (if you wanted to do something special)
492 Returns : string
493 Args : [optional] string to set value to
496 =cut
499 =head2 tempdir
501 Title : tempdir
502 Usage : my $tmpdir = $self->tempdir();
503 Function: Retrieve a temporary directory name (which is created)
504 Returns : string which is the name of the temporary directory
505 Args : none
508 =cut
510 =head2 cleanup
512 Title : cleanup
513 Usage : $mafft->cleanup();
514 Function: Will cleanup the tempdir directory
515 Returns : none
516 Args : none
519 =cut
521 =head2 io
523 Title : io
524 Usage : $obj->io($newval)
525 Function: Gets a L<Bio::Root::IO> object
526 Returns : L<Bio::Root::IO>
527 Args : none
530 =cut
532 1; # Needed to keep compiler happy