can't declare are expect a variable at the same time
[bioperl-run.git] / Bio / Tools / Run / Alignment / Lagan.pm
blobcfc4553db4aef30d09f2e8ecc4fb23f6bd756d75
1 # BioPerl module for Bio::Tools::Run::Alignment::Lagan
3 # Cared for by Bioperl
5 # Copyright Bioperl, Stephen Montgomery <smontgom@bcgsc.bc.ca>
7 # Special thanks to Jason Stajich.
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Tools::Run::Alignment::Lagan - Object for the local execution of the LAGAN suite of tools (including MLAGAN for multiple sequence alignments)
17 =head1 SYNOPSIS
19 use Bio::Tools::Run::Alignment::Lagan;
21 @params =
22 ('chaos' => "The contents of this string will be passed as args to chaos",
24 #Read you chaos README file for more info/This functionality
25 #has not been tested and will be integrated in future versions.
27 'order' => "\"-gs -7 -gc -2 -mt 2 -ms -1\"",
28 #Where gap start penalty of- 7, gap continue of -2, match of 2,
29 #and mismatch of -1.
31 'recurse' => "\"(12,25),(7,25),(4,30)"\",
32 #A list of (wordlength,score cutoff) pairs to be used in the
33 #recursive anchoring
35 'tree' => "\"(sample1 (sample2 sample3))"\",
36 #Used by mlagan / tree can also be passed when calling mlagan directly
38 #SCORING PARAMETERS FOR MLAGAN:
39 'match' => 12,
40 'mismatch' => -8,
41 'gapstart' => -50,
42 'gapend' => -50,
43 'gapcont' => -2,
47 =head1 DESCRIPTION
49 To run mlagan/lagan, the executables "mlagan" and "lagan.pl" must be
50 in your path or you must have an environment variable that points to
51 the executable directory "LAGANDIR=/opt/lagan_executables/"
53 All lagan and mlagan parameters listed in their Readmes can be set
54 except for the mfa flag which has been turned on by default to prevent
55 parsing of the alignment format.
57 TO USE LAGAN:
59 my $lagan = new Bio::Tools::Run::Alignment::Lagan(@params);
60 my $report_out = $lagan->lagan($seq1, $seq2);
62 A SimpleAlign object is returned.
64 TO USE MLAGAN:
66 my $lagan = new Bio::Tools::Run::Alignment::Lagan();
67 my $tree = "(($seqname1 $seqname2) $seqname3)";
68 my @sequence_objs; #an array of bioperl Seq objects
70 ##If you use an unblessed seq array
71 my $seq_ref = \@sequence_objs;
72 bless $seq_ref, "ARRAY";
74 my $report_out = $lagan->mlagan($seq_ref, $tree);
76 A SimpleAlign object is returned
78 Only basic mlagan/lagan functionality has been implemented due to the
79 iterative development of their project. Future maintenance upgrades
80 will include enhanced features and scoring.
82 =head1 FEEDBACK
84 =head2 Mailing Lists
86 User feedback is an integral part of the evolution of this and other
87 Bioperl modules. Send your comments and suggestions preferably to
88 the Bioperl mailing list. Your participation is much appreciated.
91 bioperl-l@bioperl.org - General discussion
92 http://bioperl.org/MailList.shtml - About the mailing lists
94 =head2 Reporting Bugs
96 Report bugs to the Bioperl bug tracking system to help us keep track
97 of the bugs and their resolution. Bug reports can be submitted via
98 email or the web:
100 bioperl-bugs@bioperl.org
101 http://bugzilla.bioperl.org/
103 =head1 AUTHOR - Stephen Montgomery
105 Email smontgom@bcgsc.bc.ca
107 Genome Sciences Centre in beautiful Vancouver, British Columbia CANADA
109 =head1 CONTRIBUTORS
111 MLagan/Lagan is the hard work of Michael Brudno et al.
113 =head1 APPENDIX
115 The rest of the documentation details each of the object methods.
116 Internal methods are usually preceded with a _
118 =cut
120 package Bio::Tools::Run::Alignment::Lagan;
122 use vars qw(@ISA $PROGRAM_DIR @LAGAN_PARAMS @MLAGAN_PARAMS @LAGAN_SWITCHES @OTHER_PARAMS
123 %OK_FIELD $AUTOLOAD);
125 use strict;
126 use Bio::Root::Root;
127 use Bio::Root::IO;
128 use Bio::Seq;
129 use Bio::SeqIO;
130 use Bio::AlignIO;
131 use Bio::AlignIO::fasta;
132 use Bio::SimpleAlign;
133 use Bio::Tools::Run::WrapperBase;
135 @ISA = qw( Bio::Root::Root
136 Bio::Tools::Run::WrapperBase);
138 BEGIN {
140 @LAGAN_PARAMS = qw(chaos order recurse mfa out lazy maskedonly
141 usebounds rc translate draft info fastreject);
142 @OTHER_PARAMS = qw(outfile);
143 @LAGAN_SWITCHES = qw(silent quiet);
144 @MLAGAN_PARAMS = qw(nested postir translate lazy verbose tree match mismatch
145 gapstart gapend gapcont out version);
147 #Not all of these parameters are useful in this context, care
148 #should be used in setting only standard ones
150 #Authorize Attribute fields
151 foreach my $attr (@LAGAN_PARAMS, @LAGAN_SWITCHES, @MLAGAN_PARAMS,@OTHER_PARAMS) {
152 $OK_FIELD{$attr}++;
155 #The LAGANDIR environment variable should be set if the lagan
156 #executables aren't in your path.
157 $PROGRAM_DIR = $ENV{'LAGANDIR'} || '';
160 sub new {
161 my($class, @args) = @_;
162 my $self = $class->SUPER::new(@args);
163 while (@args) {
164 my $attr = shift @args;
165 my $value = shift @args;
166 $self->$attr($value);
168 my ($tfh, $tempfile) = $self->io->tempfile();
169 my $outfile = $self->out || $self->outfile || $tempfile;
170 $self->out($outfile);
171 close($tfh);
172 undef $tfh;
173 return $self;
176 sub AUTOLOAD {
177 my $self = shift;
178 my $attr = $AUTOLOAD;
179 $attr =~ s/.*:://;
181 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
182 $self->{$attr} = shift if @_;
183 return $self->{$attr};
186 =head2 lagan
188 Runs the Lagan pairwise alignment algorithm
189 Inputs should be two PrimarySeq objects.
191 Returns an SimpleAlign object / preloaded with the tmp file of the
192 Lagan multifasta output.
194 =cut
196 sub lagan {
197 my ($self, $input1, $input2) = @_;
198 $self->io->_io_cleanup();
199 my $executable = 'lagan.pl';
201 #my (undef, $tempfile) = $self->io->tempfile();
202 #$self->out($tempfile);
203 my ($infile1, $infile2) = $self->_setinput($executable, $input1, $input2);
204 my $lagan_report = &_generic_lagan( $self,
205 $executable,
206 $infile1,
207 $infile2 );
210 =head2 mlagan
212 Runs the Mlagan multiple sequence alignment algorithm
213 Inputs should be an Array of Primary Seq objects and a Phylogenetic Tree in String format
214 Returns an SimpleAlign object / preloaded with the tmp file of the Mlagan multifasta output.
216 =cut
218 sub mlagan {
219 my ($self, $input1, $tree) = @_;
220 $self->io->_io_cleanup();
221 my $executable = 'mlagan';
222 my $infiles;
223 ($infiles, $tree) = $self->_setinput($executable, $input1, $tree);
224 my $lagan_report = &_generic_lagan ( $self,
225 $executable,
226 $infiles,
227 $tree );
230 =head2 _setinput
232 Title : _setinput
233 Usage : Internal function, not to be called directly
234 Function: Create input file(s) for Lagan executables
235 Returns : name of files containing Lagan data input /
236 or array of files and phylo tree for Mlagan data input
238 =cut
241 sub _setinput {
242 my ($self, $executable, $input1, $input2) = @_;
243 my ($fh, $infile1, $infile2, $temp1, $temp2, $seq1, $seq2);
245 $self->io->_io_cleanup();
247 SWITCH: {
248 if (ref($input1) =~ /ARRAY/i) {
250 ##INPUTS TO MLAGAN / WILL hAVE TO BE CHANGED IF LAGAN EVER
251 ##SUPPORTS MULTI-INPUT
252 my @infilearr;
253 foreach $seq1 (@$input1) {
254 ($fh, $infile1) = $self->io->tempfile();
255 my $temp = Bio::SeqIO->new( -fh => $fh,
256 -format => 'Fasta' );
257 unless ($seq1->isa("Bio::PrimarySeqI")) {
258 return 0;
260 $temp->write_seq($seq1);
261 close $fh;
262 undef $fh;
263 push @infilearr, $infile1;
265 $infile1 = \@infilearr;
266 last SWITCH;
268 elsif ($input1->isa("Bio::PrimarySeqI")) {
269 ##INPUTS TO LAGAN
270 ($fh, $infile1) = $self->io->tempfile();
272 #Want to make sure their are no white spaces in sequence.
273 #Happens if input1 is taken from an alignment.
275 my $sequence = $input1->seq();
276 $sequence =~ s/\W+//g;
277 $input1->seq($sequence);
278 $temp1 = Bio::SeqIO->new( -fh => $fh,
279 -format => 'Fasta' );
280 $temp1->write_seq($input1);
281 close $fh;
282 undef $fh;
283 last SWITCH;
286 SWITCH2: {
287 if (ref($input2)) {
288 if ($input2->isa("Bio::PrimarySeqI")) {
289 ($fh, $infile2) = $self->io->tempfile();
291 #Want to make sure their are no white spaces in
292 #sequence. Happens if input2 is taken from an
293 #alignment.
295 my $sequence = $input2->seq();
296 $sequence =~ s/\W+//g;
297 $input2->seq($sequence);
299 $temp2 = Bio::SeqIO->new( -fh => $fh,
300 -format => 'Fasta' );
301 $temp2->write_seq($input2);
302 close $fh;
303 undef $fh;
304 last SWITCH2;
306 } else {
307 $infile2 = $input2;
308 ##A tree as a scalar has been passed, pass it through
311 return ($infile1, $infile2);
314 =head2 _generic_lagan
316 Title : _generic_lagan
317 Usage : internal function not called directly
318 Returns : SimpleAlign object
320 =cut
323 sub _generic_lagan {
324 my ($self, $executable, $input1, $input2) = @_;
325 my $param_string = $self->_setparams($executable);
326 my $lagan_report = &_runlagan($self, $executable, $param_string,
327 $input1, $input2);
330 =head2 _setparams
332 Title : _setparams
333 Usage : Internal function, not to be called directly
334 Function: Create parameter inputs for (m)Lagan program
335 Returns : parameter string to be passed to Lagan
336 Args : Reference to calling object and name of (m)Lagan executable
338 =cut
341 sub _setparams {
342 my ($self, $executable) = @_;
343 my ($attr, $value, @execparams);
345 if ($executable eq 'lagan.pl') {
346 @execparams = @LAGAN_PARAMS;
348 if ($executable eq 'mlagan') {
349 @execparams = @MLAGAN_PARAMS;
351 ##EXPAND OTHER LAGAN SUITE PROGRAMS HERE
353 my $param_string = "";
354 for $attr (@execparams) {
355 $value = $self->$attr();
356 next unless (defined $value);
357 $attr = '-' . $attr;
358 $param_string .= " $attr $value ";
360 return $param_string . " -mfa ";
364 =head2 _runlagan
366 Title : _runlagan
367 Usage : Internal function, not to be called directly
368 Function: makes actual system call to (m)Lagan program
369 Example :
370 Returns : Report object in the SimpleAlign object
372 =cut
374 sub _runlagan {
375 my ($self, $executable, $param_string, $input1, $input2) = @_;
376 my ($lagan_obj, $exe);
377 if ( ! ($exe = $self->executable($executable))) {
378 $self->warn("cannot find path to $executable");
379 return undef;
382 my $command_string;
383 if ($executable eq 'lagan.pl') {
384 $command_string = $exe . " " . $input1 . " " . $input2 . $param_string;
386 if ($executable eq 'mlagan') {
387 $command_string = $exe;
388 foreach my $tempfile (@$input1) {
389 $command_string .= " " . $tempfile;
391 if (defined $input2) {
392 $command_string .= " -tree " . "\"" . $input2 . "\"";
394 $command_string .= " " . $param_string;
397 if (($self->silent || $self->quiet) &&
398 ($^O !~ /os2|dos|MSWin32|amigaos/)) {
399 $command_string .= ' 2> /dev/null';
403 $self->debug("$command_string\n");
404 my $status = system($command_string);
405 my $outfile = $self->out();
406 my $align = Bio::AlignIO->new( '-file' => $outfile,
407 '-format' => 'fasta' );
408 my $aln = $align->next_aln();
410 return $aln;
413 =head2 executable
415 Title : executable
416 Usage : my $exe = $lagan->executable('mlagan');
417 Function: Finds the full path to the 'lagan' executable
418 Returns : string representing the full path to the exe
419 Args : [optional] name of executable to set path to
420 [optional] boolean flag whether or not warn when exe is not found
422 Thanks to Jason Stajich for providing the framework for this subroutine
424 =cut
427 sub executable {
428 my ($self, $exename, $exe, $warn) = @_;
429 $exename = 'lagan.pl' unless defined $exename;
431 if ( defined $exe && -x $exe ) {
432 $self->{'_pathtoexe'}->{$exename} = $exe;
434 unless ( defined $self->{'_pathtoexe'}->{$exename} ) {
435 my $f = $self->program_path($exename);
436 $exe = $self->{'_pathtoexe'}->{$exename} = $f if(-e $f && -x $f );
438 unless( $exe ) {
439 if ( ($exe = $self->io->exists_exe($exename)) && -x $exe ) {
440 $self->{'_pathtoexe'}->{$exename} = $exe;
441 } else {
442 $self->warn("Cannot find executable for $exename") if $warn;
443 $self->{'_pathtoexe'}->{$exename} = undef;
447 return $self->{'_pathtoexe'}->{$exename};
450 =head2 program_path
452 Title : program_path
453 Usage : my $path = $lagan->program_path();
454 Function: Builds path for executable
455 Returns : string representing the full path to the exe
457 Thanks to Jason Stajich for providing the framework for this subroutine
459 =cut
461 sub program_path {
462 my ($self,$program_name) = @_;
463 my @path;
464 push @path, $self->program_dir if $self->program_dir;
465 push @path, $program_name .($^O =~ /mswin/i ?'':'');
466 # Option for Windows variants / None so far
468 return Bio::Root::IO->catfile(@path);
471 =head2 program_dir
473 Title : program_dir
474 Usage : my $dir = $lagan->program_dir();
475 Function: Abstract get method for dir of program. To be implemented
476 by wrapper.
477 Returns : string representing program directory
479 Thanks to Jason Stajich for providing the framework for this subroutine
481 =cut
483 sub program_dir {
484 $PROGRAM_DIR;