1 # BioPerl module for Bio::Tools::Run::Phylo::Phylip::Consense
7 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
13 Bio::Tools::Run::Phylo::Phylip::Consense - Wrapper for the phylip
19 use Bio::Tools::Run::Phylo::Phylip::Consense;
20 use Bio::Tools::Run::Phylo::Phylip::SeqBoot;
21 use Bio::Tools::Run::Phylo::Phylip::ProtDist;
22 use Bio::Tools::Run::Phylo::Phylip::Neighbor;
23 use Bio::Tools::Run::Phylo::Phylip::DrawTree;
26 #first get an alignment
27 my $aio= Bio::AlignIO->new(-file=>$ARGV[0],-format=>"clustalw");
28 my $aln = $aio->next_aln;
30 # To prevent truncation of sequence names by PHYLIP runs, use set_displayname_safe
31 my ($aln_safe, $ref_name)=$aln->set_displayname_safe();
33 #next use seqboot to generate multiple aligments
34 my @params = ('datatype'=>'SEQUENCE','replicates'=>10);
35 my $seqboot_factory = Bio::Tools::Run::Phylo::Phylip::SeqBoot->new(@params);
37 my $aln_ref= $seqboot_factory->run($aln);
39 Or, for long sequence names:
41 my $aln_ref= $seqboot_factory->run($aln_safe);
43 #next build distance matrices and construct trees
44 my $pd_factory = Bio::Tools::Run::Phylo::Phylip::ProtDist->new();
45 my $ne_factory = Bio::Tools::Run::Phylo::Phylip::Neighbor->new();
47 foreach my $a (@{$aln_ref}){
48 my $mat = $pd_factory->create_distance_matrix($a);
49 push @tree, $ne_factory->create_tree($mat);
52 #now use consense to get a final tree
53 my $con_factory = Bio::Tools::Run::Phylo::Phylip::Consense->new();
55 #you may set outgroup either by the number representing the order in
56 #which species are entered or by the name of the species
58 $con_factory->outgroup(1);
59 $con_factory->outgroup('HUMAN');
61 my $tree = $con_factory->run(\@tree);
63 # Restore original sequence names, after ALL phylip runs:
64 my @nodes = $tree->get_nodes();
65 foreach my $nd (@nodes){
66 $nd->id($ref_name->{$nd->id_output}) if $nd->is_Leaf;
70 my $draw_factory = Bio::Tools::Run::Phylo::Phylip::DrawTree->new();
71 my $image_filename = $draw_factory->draw_tree($tree);
75 Wrapper for phylip consense program
77 Taken from phylip documentation...
79 CONSENSE reads a file of computer-readable trees and prints out
80 (and may also write out onto a file) a consensus tree. At the moment
81 it carries out a family of consensus tree methods called the M[l] methods
82 (Margush and McMorris, 1981). These include strict consensus
83 and majority rule consensus. Basically the consensus tree consists of monophyletic
84 groups that occur as often as possible in the data.
86 More documentation on using Consense and setting parameters may be found
87 in the phylip package.
91 This wrapper currently supports v3.5 of phylip. There is also support for v3.6 although
92 this is still experimental as v3.6 is still under alpha release and not all functionalities maybe supported.
94 =head1 PARAMETERS FOR Consense
99 Description : (optional)
100 Only available in phylip v3.6
102 This program supports 3 types of consensus generation
104 MRe : Majority Rule (extended) Any set of species that
105 appears in more than 50% of the trees is included.
106 The program then considers the other sets of species
107 in order of the frequency with which they have appeared,
108 adding to the consensus tree any which are compatible
111 STRICT: A set of species must appear in all input trees to be
112 included in the strict consensus tree.
114 MR : A set of species is included in the consensus tree
115 if it is present in more than half of the input trees.
117 Ml : The user is asked for a fraction between 0.5 and 1, and
118 the program then includes in the consensus tree any set
119 of species that occurs among the input trees more than
120 that fraction of then time. The Strict consensus and the
121 Majority Rule consensus are extreme cases of the M[l] consensus,
122 being for fractions of 1 and 0.5 respectively
124 usage: my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-type=>"Ml 0.7");
132 Description: (optional)
134 toggles between the default assumption that the input trees are unrooted trees and
135 the selection that specifies that the tree is to be treated as a rooted tree and not
136 re-rooted. Otherwise the tree will be treated as outgroup-rooted and will be
137 re-rooted automatically at the first species encountered on the first tree
138 (or at a species designated by the Outgroup option)
140 usage: my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-rooted=>1);
147 Description : (optional)
149 It is in effect only if the Rooted option selection is not in effect.
150 The trees will be re-rooted with a species of your choosing.
152 usage my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-outgroup=>2);
154 Defaults to first species encountered.
160 User feedback is an integral part of the evolution of this and other
161 Bioperl modules. Send your comments and suggestions preferably to one
162 of the Bioperl mailing lists. Your participation is much appreciated.
164 bioperl-l@bioperl.org - General discussion
165 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
169 Please direct usage questions or support issues to the mailing list:
171 I<bioperl-l@bioperl.org>
173 rather than to the module maintainer directly. Many experienced and
174 reponsive experts will be able look at the problem and quickly
175 address it. Please include a thorough description of the problem
176 with code and data examples if at all possible.
178 =head2 Reporting Bugs
180 Report bugs to the Bioperl bug tracking system to help us keep track
181 the bugs and their resolution. Bug reports can be submitted via the
184 http://redmine.open-bio.org/projects/bioperl/
186 =head1 AUTHOR - Shawn Hoon
188 Email shawnh@fugu-sg.org
192 The rest of the documentation details each of the object
193 methods. Internal methods are usually preceded with a _
200 package Bio
::Tools
::Run
::Phylo
::Phylip
::Consense
;
202 use vars
qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
203 @CONSENSE_PARAMS @OTHER_SWITCHES
206 use Bio::SimpleAlign;
208 use Bio::Tools::Run::Phylo::Phylip::Base;
209 use Bio::Tools::Run::Phylo::Phylip::PhylipConf qw(%Menu);
214 # inherit from Phylip::Base which has some methods for dealing with
216 @ISA = qw(Bio::Tools::Run::Phylo::Phylip::Base);
218 # You will need to enable the Consense program. This
219 # can be done in (at least) 3 ways:
221 # 1. define an environmental variable PHYLIPDIR:
222 # export PHYLIPDIR=/home/shawnh/PHYLIP/bin
224 # 2. include a definition of an environmental variable PHYLIPDIR in
225 # every script that will use Consense.pm.
226 # $ENV{PHYLIPDIR} = '/home/shawnh/PHYLIP/bin';
228 # 3. You can set the path to the program through doing:
229 # my @params('executable'=>'/usr/local/bin/consense');
230 # my $Consense_factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(@params);
235 @CONSENSE_PARAMS = qw(TYPE OUTGROUP ROOTED);
236 @OTHER_SWITCHES = qw(QUIET);
237 foreach my $attr(@CONSENSE_PARAMS,@OTHER_SWITCHES) {
245 Usage : $obj->program_name()
246 Function: holds the program name
259 Usage : ->program_dir()
260 Function: returns the program directory, obtained from ENV variable.
267 return Bio
::Root
::IO
->catfile($ENV{PHYLIPDIR
}) if $ENV{PHYLIPDIR
};
271 my ($class,@args) = @_;
272 my $self = $class->SUPER::new
(@args);
277 $value = shift @args;
279 next if( $attr =~ /^-/ ); # don't want named parameters
280 if ($attr =~/PROGRAM/i) {
281 $self->executable($value);
284 if ($attr =~ /IDLENGTH/i){
285 $self->idlength($value);
288 $self->$attr($value);
295 my $attr = $AUTOLOAD;
298 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
299 $self->{$attr} = shift if @_;
300 return $self->{$attr};
306 Usage : $obj->idlength ($newval)
308 Returns : value of idlength
309 Args : newvalue (optional)
318 $self->{'idlength'} = $value;
320 return $self->{'idlength'};
329 $inputfilename = 't/data/prot.treefile';
330 $tree= $Consense_factory->run($inputfilename);
333 $tree= $consense_factory->run(\@tree);
335 Function: Create bootstrap sets of alignments
337 Returns : a L<Bio::Tree::Tree>
338 Args : either a file containing trees in newick format
339 or an array ref of L<Bio::Tree::Tree>
341 Throws an exception if argument is not either a string (eg a
342 filename) or a Bio::Tree::TreeI object. If
343 argument is string, throws exception if file corresponding to string
344 name can not be found.
350 my ($self,$input) = @_;
353 # Create input file pointer
354 $infilename = $self->_setinput($input);
356 $self->throw("Problems setting up for Consense. Probably bad input data in $input !");
359 # Create parameter string to pass to Consense program
360 my $param_string = $self->_setparams();
362 my $aln = $self->_run($infilename,$param_string);
365 #################################################
370 Usage : Internal function, not to be called directly
371 Function: makes actual system call to Consense program
373 Returns : an array ref of <Bio::Tree::Tree>
374 Args : Name of a file containing a set of tree in newick format
375 and a parameter string to be passed to Consense
381 my ($self,$infile,$param_string) = @_;
384 unless( File
::Spec
->file_name_is_absolute($infile) ) {
385 $infile = $self->io->catfile($curpath,$infile);
387 my $tmpdir = $self->tempdir;
388 chdir($self->tempdir);
389 # open a pipe to run Consense to bypass interactive menus
390 if ($self->quiet() || $self->verbose() < 0) {
391 open(Consense
,"| ".$self->executable .">/dev/null");
394 open(Consense
,"| ".$self->executable);
396 $instring = $infile."\n".$param_string;
397 $self->debug( "Program ".$self->executable." $instring\n");
398 print Consense
$instring;
402 my $outfile = $self->io->catfile($self->tempdir,$self->treefile);
405 $self->throw("Consense did not create files correctly ($outfile)")
406 unless (-e
$outfile);
408 #parse the alignments
411 my $tio = Bio
::TreeIO
->new(-file
=>$outfile,-format
=>"newick");
412 my $tree = $tio->next_tree;
414 # Clean up the temporary files created along the way...
415 unlink $outfile unless $self->save_tempfiles;
420 sub _set_names_from_tree
{
421 my ($self,$tree) = @_;
423 my $ios = IO
::String
->new($newick);
424 my $tio = Bio
::TreeIO
->new(-fh
=>$ios,-format
=>'newick');
425 $tio->write_tree($tree);
426 my @names = $newick=~/(\w+):\d+/g;
428 for(my $i=0; $i < $#names; $i++){
429 $names{$names[$i]} = $i+1;
431 $self->names(\
%names);
440 Usage : Internal function, not to be called directly
441 Function: Create input file for Consense program
443 Returns : name of file containing a trees in newick format
444 Args : an array ref of Bio::Tree::Tree object or input file name
450 my ($self, $input) = @_;
451 my ($alnfilename,$tfh);
453 # a phy formatted alignment file
454 unless (ref $input) {
455 # check that file exists or throw
456 $alnfilename= $input;
457 unless (-e
$input) {return 0;}
458 my $tio = Bio
::TreeIO
->new(-file
=>$alnfilename,-format
=>'newick');
459 my $tree = $tio->next_tree;
460 $self->_set_names_from_tree($tree);
464 # $input may be a SimpleAlign Object
465 my @input = ref($input) eq "ARRAY" ? @
{$input} : ($input);
466 ($tfh,$alnfilename) = $self->io->tempfile(-dir
=>$self->tempdir);
467 my $treeIO = Bio
::TreeIO
->new(-fh
=> $tfh,
470 foreach my $tree(@input){
471 $tree->isa('Bio::Tree::TreeI') || $self->throw('Expected a Bio::TreeI object');
472 $treeIO->write_tree($tree);
474 #get the species names in order, using the first one
475 $self->_set_names_from_tree($input[0]);
485 Usage : $tree->names(\%names)
486 Function: get/set for a hash ref for storing names in matrix
489 Returns : hash reference
490 Args : hash reference
495 my ($self,$name) = @_;
497 $self->{'_names'} = $name;
499 return $self->{'_names'};
505 Usage : Internal function, not to be called directly
506 Function: Create parameter inputs for Consense program
508 Returns : parameter string to be passed to Consense
509 Args : name of calling object
514 my ($attr, $value, $self);
518 my $param_string = "";
521 #for case where type is Ml
524 my %menu = %{$Menu{$self->version}->{'CONSENSE'}};
526 foreach my $attr ( @CONSENSE_PARAMS) {
527 $value = $self->$attr();
528 next unless (defined $value);
529 if ($attr =~/ROOTED/i){
531 $param_string .= $menu{'ROOTED'};
533 elsif($attr =~/OUTGROUP/i){
535 $self->warn("Outgroup option cannot be used with a rooted tree");
538 if($value !~/^\d+$/){ # is a name
539 my %names = %{$self->names};
540 $names{$value} || $self->throw("Outgroup $value not found");
541 $value = $names{$value};
543 $param_string .=$menu{'OUTGROUP'}."$value\n";
545 elsif($attr=~/TYPE/i){
547 ($value,$frac) = split(/\s+/,$value);
548 #default if not given
550 if($frac <= 0.5 || $frac > 1){
551 $self->warn("fraction given is out of range 0.5<frac<1, setting to 0.5");
556 $param_string.=$menu{'TYPE'}{uc $value};
559 $param_string.=$menu{uc $attr};
562 $param_string .= $menu{'SUBMIT'};
564 $param_string.=$frac."\n";
567 return $param_string;
572 =head1 Bio::Tools::Run::Wrapper methods
576 =head2 no_param_checks
578 Title : no_param_checks
579 Usage : $obj->no_param_checks($newval)
580 Function: Boolean flag as to whether or not we should
581 trust the sanity checks for parameter values
582 Returns : value of no_param_checks
583 Args : newvalue (optional)
588 =head2 save_tempfiles
590 Title : save_tempfiles
591 Usage : $obj->save_tempfiles($newval)
593 Returns : value of save_tempfiles
594 Args : newvalue (optional)
602 Usage : my $outfile = $Consense->outfile_name();
603 Function: Get/Set the name of the output file for this run
604 (if you wanted to do something special)
606 Args : [optional] string to set value to
615 Usage : my $tmpdir = $self->tempdir();
616 Function: Retrieve a temporary directory name (which is created)
617 Returns : string which is the name of the temporary directory
626 Usage : $codeml->cleanup();
627 Function: Will cleanup the tempdir directory after a Consense run
637 Usage : $obj->io($newval)
638 Function: Gets a L<Bio::Root::IO> object
639 Returns : L<Bio::Root::IO>
645 1; # Needed to keep compiler happy