speelink fixes, patch courtesy Charles Plessy, fixes #3256
[bioperl-run.git] / lib / Bio / Tools / Run / Phylo / Phylip / Consense.pm
blob5aaa208a3992adff4b9efc9cd70f222cdc1e66a0
1 # BioPerl module for Bio::Tools::Run::Phylo::Phylip::Consense
3 # Created by
5 # Shawn Hoon
7 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
11 =head1 NAME
13 Bio::Tools::Run::Phylo::Phylip::Consense - Wrapper for the phylip
14 program Consense
16 =head1 SYNOPSIS
19 use Bio::Tools::Run::Phylo::Phylip::Consense;
20 use Bio::Tools::Run::Phylo::Phylip::SeqBoot;
21 use Bio::Tools::Run::Phylo::Phylip::ProtDist;
22 use Bio::Tools::Run::Phylo::Phylip::Neighbor;
23 use Bio::Tools::Run::Phylo::Phylip::DrawTree;
26 #first get an alignment
27 my $aio= Bio::AlignIO->new(-file=>$ARGV[0],-format=>"clustalw");
28 my $aln = $aio->next_aln;
30 # To prevent truncation of sequence names by PHYLIP runs, use set_displayname_safe
31 my ($aln_safe, $ref_name)=$aln->set_displayname_safe();
33 #next use seqboot to generate multiple aligments
34 my @params = ('datatype'=>'SEQUENCE','replicates'=>10);
35 my $seqboot_factory = Bio::Tools::Run::Phylo::Phylip::SeqBoot->new(@params);
37 my $aln_ref= $seqboot_factory->run($aln);
39 Or, for long sequence names:
41 my $aln_ref= $seqboot_factory->run($aln_safe);
43 #next build distance matrices and construct trees
44 my $pd_factory = Bio::Tools::Run::Phylo::Phylip::ProtDist->new();
45 my $ne_factory = Bio::Tools::Run::Phylo::Phylip::Neighbor->new();
47 foreach my $a (@{$aln_ref}){
48 my $mat = $pd_factory->create_distance_matrix($a);
49 push @tree, $ne_factory->create_tree($mat);
52 #now use consense to get a final tree
53 my $con_factory = Bio::Tools::Run::Phylo::Phylip::Consense->new();
55 #you may set outgroup either by the number representing the order in
56 #which species are entered or by the name of the species
58 $con_factory->outgroup(1);
59 $con_factory->outgroup('HUMAN');
61 my $tree = $con_factory->run(\@tree);
63 # Restore original sequence names, after ALL phylip runs:
64 my @nodes = $tree->get_nodes();
65 foreach my $nd (@nodes){
66 $nd->id($ref_name->{$nd->id_output}) if $nd->is_Leaf;
69 #now draw the tree
70 my $draw_factory = Bio::Tools::Run::Phylo::Phylip::DrawTree->new();
71 my $image_filename = $draw_factory->draw_tree($tree);
73 =head1 DESCRIPTION
75 Wrapper for phylip consense program
77 Taken from phylip documentation...
79 CONSENSE reads a file of computer-readable trees and prints out
80 (and may also write out onto a file) a consensus tree. At the moment
81 it carries out a family of consensus tree methods called the M[l] methods
82 (Margush and McMorris, 1981). These include strict consensus
83 and majority rule consensus. Basically the consensus tree consists of monophyletic
84 groups that occur as often as possible in the data.
86 More documentation on using Consense and setting parameters may be found
87 in the phylip package.
89 VERSION Support
91 This wrapper currently supports v3.5 of phylip. There is also support for v3.6 although
92 this is still experimental as v3.6 is still under alpha release and not all functionalities maybe supported.
94 =head1 PARAMETERS FOR Consense
96 =head2 TYPE
98 Title : TYPE
99 Description : (optional)
100 Only available in phylip v3.6
102 This program supports 3 types of consensus generation
104 MRe : Majority Rule (extended) Any set of species that
105 appears in more than 50% of the trees is included.
106 The program then considers the other sets of species
107 in order of the frequency with which they have appeared,
108 adding to the consensus tree any which are compatible
109 with it until
111 STRICT: A set of species must appear in all input trees to be
112 included in the strict consensus tree.
114 MR : A set of species is included in the consensus tree
115 if it is present in more than half of the input trees.
117 Ml : The user is asked for a fraction between 0.5 and 1, and
118 the program then includes in the consensus tree any set
119 of species that occurs among the input trees more than
120 that fraction of then time. The Strict consensus and the
121 Majority Rule consensus are extreme cases of the M[l] consensus,
122 being for fractions of 1 and 0.5 respectively
124 usage: my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-type=>"Ml 0.7");
127 Defaults to MRe
129 =head2 ROOTED
131 Title: ROOTED
132 Description: (optional)
134 toggles between the default assumption that the input trees are unrooted trees and
135 the selection that specifies that the tree is to be treated as a rooted tree and not
136 re-rooted. Otherwise the tree will be treated as outgroup-rooted and will be
137 re-rooted automatically at the first species encountered on the first tree
138 (or at a species designated by the Outgroup option)
140 usage: my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-rooted=>1);
142 Defaults to unrooted
144 =head2 OUTGROUP
146 Title : OUTGROUP
147 Description : (optional)
149 It is in effect only if the Rooted option selection is not in effect.
150 The trees will be re-rooted with a species of your choosing.
152 usage my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-outgroup=>2);
154 Defaults to first species encountered.
156 =head1 FEEDBACK
158 =head2 Mailing Lists
160 User feedback is an integral part of the evolution of this and other
161 Bioperl modules. Send your comments and suggestions preferably to one
162 of the Bioperl mailing lists. Your participation is much appreciated.
164 bioperl-l@bioperl.org - General discussion
165 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
167 =head2 Support
169 Please direct usage questions or support issues to the mailing list:
171 I<bioperl-l@bioperl.org>
173 rather than to the module maintainer directly. Many experienced and
174 reponsive experts will be able look at the problem and quickly
175 address it. Please include a thorough description of the problem
176 with code and data examples if at all possible.
178 =head2 Reporting Bugs
180 Report bugs to the Bioperl bug tracking system to help us keep track
181 the bugs and their resolution. Bug reports can be submitted via the
182 web:
184 http://redmine.open-bio.org/projects/bioperl/
186 =head1 AUTHOR - Shawn Hoon
188 Email shawnh@fugu-sg.org
190 =head1 APPENDIX
192 The rest of the documentation details each of the object
193 methods. Internal methods are usually preceded with a _
195 =cut
200 package Bio::Tools::Run::Phylo::Phylip::Consense;
202 use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
203 @CONSENSE_PARAMS @OTHER_SWITCHES
204 %OK_FIELD);
205 use strict;
206 use Bio::SimpleAlign;
207 use Bio::TreeIO;
208 use Bio::Tools::Run::Phylo::Phylip::Base;
209 use Bio::Tools::Run::Phylo::Phylip::PhylipConf qw(%Menu);
210 use IO::String;
211 use Cwd;
214 # inherit from Phylip::Base which has some methods for dealing with
215 # Phylip specifics
216 @ISA = qw(Bio::Tools::Run::Phylo::Phylip::Base);
218 # You will need to enable the Consense program. This
219 # can be done in (at least) 3 ways:
221 # 1. define an environmental variable PHYLIPDIR:
222 # export PHYLIPDIR=/home/shawnh/PHYLIP/bin
224 # 2. include a definition of an environmental variable PHYLIPDIR in
225 # every script that will use Consense.pm.
226 # $ENV{PHYLIPDIR} = '/home/shawnh/PHYLIP/bin';
228 # 3. You can set the path to the program through doing:
229 # my @params('executable'=>'/usr/local/bin/consense');
230 # my $Consense_factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(@params);
234 BEGIN {
235 @CONSENSE_PARAMS = qw(TYPE OUTGROUP ROOTED);
236 @OTHER_SWITCHES = qw(QUIET);
237 foreach my $attr(@CONSENSE_PARAMS,@OTHER_SWITCHES) {
238 $OK_FIELD{$attr}++;
242 =head2 program_name
244 Title : program_name
245 Usage : $obj->program_name()
246 Function: holds the program name
247 Returns: string
248 Args : None
250 =cut
252 sub program_name {
253 return 'consense';
256 =head2 program_dir
258 Title : program_dir
259 Usage : ->program_dir()
260 Function: returns the program directory, obtained from ENV variable.
261 Returns: string
262 Args :
264 =cut
266 sub program_dir {
267 return Bio::Root::IO->catfile($ENV{PHYLIPDIR}) if $ENV{PHYLIPDIR};
270 sub new {
271 my ($class,@args) = @_;
272 my $self = $class->SUPER::new(@args);
274 my ($attr, $value);
275 while (@args) {
276 $attr = shift @args;
277 $value = shift @args;
279 next if( $attr =~ /^-/ ); # don't want named parameters
280 if ($attr =~/PROGRAM/i) {
281 $self->executable($value);
282 next;
284 if ($attr =~ /IDLENGTH/i){
285 $self->idlength($value);
286 next;
288 $self->$attr($value);
290 return $self;
293 sub AUTOLOAD {
294 my $self = shift;
295 my $attr = $AUTOLOAD;
296 $attr =~ s/.*:://;
297 $attr = uc $attr;
298 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
299 $self->{$attr} = shift if @_;
300 return $self->{$attr};
303 =head2 idlength
305 Title : idlength
306 Usage : $obj->idlength ($newval)
307 Function:
308 Returns : value of idlength
309 Args : newvalue (optional)
312 =cut
314 sub idlength{
315 my $self = shift;
316 if( @_ ) {
317 my $value = shift;
318 $self->{'idlength'} = $value;
320 return $self->{'idlength'};
325 =head2 run
327 Title : run
328 Usage :
329 $inputfilename = 't/data/prot.treefile';
330 $tree= $Consense_factory->run($inputfilename);
333 $tree= $consense_factory->run(\@tree);
335 Function: Create bootstrap sets of alignments
336 Example :
337 Returns : a L<Bio::Tree::Tree>
338 Args : either a file containing trees in newick format
339 or an array ref of L<Bio::Tree::Tree>
341 Throws an exception if argument is not either a string (eg a
342 filename) or a Bio::Tree::TreeI object. If
343 argument is string, throws exception if file corresponding to string
344 name can not be found.
346 =cut
348 sub run{
350 my ($self,$input) = @_;
351 my ($infilename);
353 # Create input file pointer
354 $infilename = $self->_setinput($input);
355 if (!$infilename) {
356 $self->throw("Problems setting up for Consense. Probably bad input data in $input !");
359 # Create parameter string to pass to Consense program
360 my $param_string = $self->_setparams();
361 # run Consense
362 my $aln = $self->_run($infilename,$param_string);
365 #################################################
367 =head2 _run
369 Title : _run
370 Usage : Internal function, not to be called directly
371 Function: makes actual system call to Consense program
372 Example :
373 Returns : an array ref of <Bio::Tree::Tree>
374 Args : Name of a file containing a set of tree in newick format
375 and a parameter string to be passed to Consense
378 =cut
380 sub _run {
381 my ($self,$infile,$param_string) = @_;
382 my $instring;
383 my $curpath = cwd;
384 unless( File::Spec->file_name_is_absolute($infile) ) {
385 $infile = $self->io->catfile($curpath,$infile);
387 my $tmpdir = $self->tempdir;
388 chdir($self->tempdir);
389 # open a pipe to run Consense to bypass interactive menus
390 if ($self->quiet() || $self->verbose() < 0) {
391 open(Consense,"| ".$self->executable .">/dev/null");
393 else {
394 open(Consense,"| ".$self->executable);
396 $instring = $infile."\n".$param_string;
397 $self->debug( "Program ".$self->executable." $instring\n");
398 print Consense $instring;
399 close(Consense);
401 # get the results
402 my $outfile = $self->io->catfile($self->tempdir,$self->treefile);
403 chdir($curpath);
405 $self->throw("Consense did not create files correctly ($outfile)")
406 unless (-e $outfile);
408 #parse the alignments
410 my @aln;
411 my $tio = Bio::TreeIO->new(-file=>$outfile,-format=>"newick");
412 my $tree = $tio->next_tree;
414 # Clean up the temporary files created along the way...
415 unlink $outfile unless $self->save_tempfiles;
417 return $tree;
420 sub _set_names_from_tree {
421 my ($self,$tree) = @_;
422 my $newick;
423 my $ios = IO::String->new($newick);
424 my $tio = Bio::TreeIO->new(-fh=>$ios,-format=>'newick');
425 $tio->write_tree($tree);
426 my @names = $newick=~/(\w+):\d+/g;
427 my %names;
428 for(my $i=0; $i < $#names; $i++){
429 $names{$names[$i]} = $i+1;
431 $self->names(\%names);
433 return;
437 =head2 _setinput()
439 Title : _setinput
440 Usage : Internal function, not to be called directly
441 Function: Create input file for Consense program
442 Example :
443 Returns : name of file containing a trees in newick format
444 Args : an array ref of Bio::Tree::Tree object or input file name
447 =cut
449 sub _setinput {
450 my ($self, $input) = @_;
451 my ($alnfilename,$tfh);
453 # a phy formatted alignment file
454 unless (ref $input) {
455 # check that file exists or throw
456 $alnfilename= $input;
457 unless (-e $input) {return 0;}
458 my $tio = Bio::TreeIO->new(-file=>$alnfilename,-format=>'newick');
459 my $tree = $tio->next_tree;
460 $self->_set_names_from_tree($tree);
461 return $alnfilename;
464 # $input may be a SimpleAlign Object
465 my @input = ref($input) eq "ARRAY" ? @{$input} : ($input);
466 ($tfh,$alnfilename) = $self->io->tempfile(-dir=>$self->tempdir);
467 my $treeIO = Bio::TreeIO->new(-fh => $tfh,
468 -format=>'newick');
470 foreach my $tree(@input){
471 $tree->isa('Bio::Tree::TreeI') || $self->throw('Expected a Bio::TreeI object');
472 $treeIO->write_tree($tree);
474 #get the species names in order, using the first one
475 $self->_set_names_from_tree($input[0]);
476 $treeIO->close();
477 close($tfh);
478 undef $tfh;
479 return $alnfilename;
482 =head2 names()
484 Title : names
485 Usage : $tree->names(\%names)
486 Function: get/set for a hash ref for storing names in matrix
487 with rank as values.
488 Example :
489 Returns : hash reference
490 Args : hash reference
492 =cut
494 sub names {
495 my ($self,$name) = @_;
496 if($name){
497 $self->{'_names'} = $name;
499 return $self->{'_names'};
502 =head2 _setparams()
504 Title : _setparams
505 Usage : Internal function, not to be called directly
506 Function: Create parameter inputs for Consense program
507 Example :
508 Returns : parameter string to be passed to Consense
509 Args : name of calling object
511 =cut
513 sub _setparams {
514 my ($attr, $value, $self);
516 #do nothing for now
517 $self = shift;
518 my $param_string = "";
519 my $rooted = 0;
521 #for case where type is Ml
522 my $Ml = 0;
523 my $frac = 0.5;
524 my %menu = %{$Menu{$self->version}->{'CONSENSE'}};
526 foreach my $attr ( @CONSENSE_PARAMS) {
527 $value = $self->$attr();
528 next unless (defined $value);
529 if ($attr =~/ROOTED/i){
530 $rooted = 1;
531 $param_string .= $menu{'ROOTED'};
533 elsif($attr =~/OUTGROUP/i){
534 if($rooted == 1){
535 $self->warn("Outgroup option cannot be used with a rooted tree");
536 next;
538 if($value !~/^\d+$/){ # is a name
539 my %names = %{$self->names};
540 $names{$value} || $self->throw("Outgroup $value not found");
541 $value = $names{$value};
543 $param_string .=$menu{'OUTGROUP'}."$value\n";
545 elsif($attr=~/TYPE/i){
546 if($value=~/Ml/i){
547 ($value,$frac) = split(/\s+/,$value);
548 #default if not given
549 $frac ||= 0.5;
550 if($frac <= 0.5 || $frac > 1){
551 $self->warn("fraction given is out of range 0.5<frac<1, setting to 0.5");
552 $frac = 0.5;
554 $Ml=1;
556 $param_string.=$menu{'TYPE'}{uc $value};
558 else {
559 $param_string.=$menu{uc $attr};
562 $param_string .= $menu{'SUBMIT'};
563 if($Ml){
564 $param_string.=$frac."\n";
567 return $param_string;
572 =head1 Bio::Tools::Run::Wrapper methods
574 =cut
576 =head2 no_param_checks
578 Title : no_param_checks
579 Usage : $obj->no_param_checks($newval)
580 Function: Boolean flag as to whether or not we should
581 trust the sanity checks for parameter values
582 Returns : value of no_param_checks
583 Args : newvalue (optional)
586 =cut
588 =head2 save_tempfiles
590 Title : save_tempfiles
591 Usage : $obj->save_tempfiles($newval)
592 Function:
593 Returns : value of save_tempfiles
594 Args : newvalue (optional)
597 =cut
599 =head2 outfile_name
601 Title : outfile_name
602 Usage : my $outfile = $Consense->outfile_name();
603 Function: Get/Set the name of the output file for this run
604 (if you wanted to do something special)
605 Returns : string
606 Args : [optional] string to set value to
609 =cut
612 =head2 tempdir
614 Title : tempdir
615 Usage : my $tmpdir = $self->tempdir();
616 Function: Retrieve a temporary directory name (which is created)
617 Returns : string which is the name of the temporary directory
618 Args : none
621 =cut
623 =head2 cleanup
625 Title : cleanup
626 Usage : $codeml->cleanup();
627 Function: Will cleanup the tempdir directory after a Consense run
628 Returns : none
629 Args : none
632 =cut
634 =head2 io
636 Title : io
637 Usage : $obj->io($newval)
638 Function: Gets a L<Bio::Root::IO> object
639 Returns : L<Bio::Root::IO>
640 Args : none
643 =cut
645 1; # Needed to keep compiler happy