1 # $Id: MCS.pm,v 1.3 2007/05/25 10:14:55 sendu Exp $
3 # BioPerl module for Bio::Tools::Run::MCS
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Sendu Bala <bix@sendu.me.uk>
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::Tools::Run::MCS - Wrapper for MCS
21 use Bio::Tools::Run::MCS;
24 $factory = Bio::Tools::Run::MCS->new();
26 # Run MCS on an alignment
27 my @results = $factory->run($alignfilename);
29 # or with alignment object
30 @results = $factory->run($bio_simplalign);
33 foreach my $feat (@results) {
34 my $seq_id = $feat->seq_id;
35 my $start = $feat->start;
37 my $score = $feat->score;
38 my ($pvalue) = $feat->get_tag_values('pvalue');
39 my ($kind) = $feat->get_tag_values('kind'); # 'all', 'exon' or 'nonexon'
44 This is a wrapper for running the MCS (binCons) scripts by Elliott H Margulies.
45 You can get details here: http://zoo.nhgri.nih.gov/elliott/mcs_doc/. MCS is used
46 for the prediciton of transcription factor binding sites and other regions of
47 the genome conserved amongst different species.
49 Note that this wrapper assumes you already have alignments, so only uses MCS
50 for the latter stages (the stages involving align2binomial.pl,
51 generate_phyloMAX_score.pl and generate_mcs_beta.pl).
53 You can try supplying normal MCS command-line arguments to new(), eg.
55 $factory->new(-percentile => 95)
57 or calling arg-named methods (excluding the initial
60 $factory->percentile(95)
62 to set the --percentile arg).
65 You will need to enable this MCS wrapper to find the MCS scripts.
66 This can be done in (at least) three ways:
68 1. Make sure the MCS scripts are in your path.
69 2. Define an environmental variable MCSDIR which is a
70 directory which contains the MCS scripts:
73 export MCSDIR=/home/username/mcs/
77 setenv MCSDIR /home/username/mcs
79 3. Include a definition of an environmental variable MCSDIR in
80 every script that will use this MCS wrapper module, e.g.:
82 BEGIN { $ENV{MCSDIR} = '/home/username/mcs/' }
83 use Bio::Tools::Run::MCS;
89 User feedback is an integral part of the evolution of this and other
90 Bioperl modules. Send your comments and suggestions preferably to
91 the Bioperl mailing list. Your participation is much appreciated.
93 bioperl-l@bioperl.org - General discussion
94 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
98 Please direct usage questions or support issues to the mailing list:
100 I<bioperl-l@bioperl.org>
102 rather than to the module maintainer directly. Many experienced and
103 reponsive experts will be able look at the problem and quickly
104 address it. Please include a thorough description of the problem
105 with code and data examples if at all possible.
107 =head2 Reporting Bugs
109 Report bugs to the Bioperl bug tracking system to help us keep track
110 of the bugs and their resolution. Bug reports can be submitted via
113 http://redmine.open-bio.org/projects/bioperl/
115 =head1 AUTHOR - Sendu Bala
117 Email bix@sendu.me.uk
121 The rest of the documentation details each of the object methods.
122 Internal methods are usually preceded with a _
126 package Bio
::Tools
::Run
::MCS
;
133 use Bio
::Annotation
::SimpleValue
;
135 use base
qw(Bio::Tools::Run::Phylo::PhyloBase);
137 our $PROGRAM_NAME = 'align2binomial.pl';
140 # methods for the mcs args we support
141 our @PARAMS = qw(neutral percentile mcs specificity sensitivity name);
142 our @SWITCHES = qw(neg-score);
144 # just to be explicit, args we don't support (yet) or we handle ourselves
145 our @UNSUPPORTED = qw(ucsc gtf neutral-only fourd-align align-only ar);
148 # lets add all the mcs scripts to the path so that when we call
149 # align2binomial.pl it can find its siblings
150 $PROGRAM_DIR = $ENV{'MCSDIR'};
151 $ENV{PATH
} = "$PROGRAM_DIR:$ENV{PATH}" if $PROGRAM_DIR;
157 Usage : $factory>program_name()
158 Function: holds the program name
165 return $PROGRAM_NAME;
171 Usage : $factory->program_dir(@params)
172 Function: returns the program directory, obtained from ENV variable.
185 Usage : $factory = Bio::Tools::Run::MCS->new()
186 Function: creates a new MCS factory
187 Returns : Bio::Tools::Run::MCS
188 Args : Many options understood by MCS can be supplied as key =>
191 These options can NOT be used with this wrapper:
192 ucsc gtf neutral-only fourd-align align-only ar
197 my ($class, @args) = @_;
198 my $self = $class->SUPER::new
(@args);
200 $self->_set_from_args(\
@args, -methods
=> [@PARAMS, @SWITCHES, 'quiet'],
209 Usage : $result = $factory->run($align_file_or_object, Bio::Location::Atomic,
211 Function: Runs the MCS scripts on an alignment.
212 Returns : list of Bio::SeqFeatureI feature objects (with coordinates corrected
213 according to the supplied offset, if any)
214 Args : The first argument represents an alignment, the optional second
215 argument represents the chromosome, stand and end and the optional
216 third argument represents annotation of the exons in the alignment.
218 The alignment can be provided as a multi-fasta format alignment
219 filename, or a Bio::Align::AlignI compliant object (eg. a
222 The position in the genome can be provided as a Bio::Location::Atomic
223 with start, end and seq_id set.
225 The annnotation can be provided as an array of Bio::SeqFeatureI
231 my ($self, $aln, $offset, $exon_feats) = @_;
232 $self->_alignment($aln || $self->alignment || $self->throw("An alignment must be supplied"));
234 return $self->_run($offset, $exon_feats);
238 my ($self, $atomic, $exon_feats) = @_;
240 my $exe = $self->executable || return;
243 my $temp_dir = $self->tempdir;
244 my $cwd = Cwd
->cwd();
245 chdir($temp_dir) || $self->throw("Couldn't change to temp dir '$temp_dir'");
248 my $start_adjust = 0;
250 $start_adjust = $atomic->start;
251 $offset = '--ucsc '.$atomic->seq_id.':'.$start_adjust.'-'.$atomic->end;
255 my $gtf_file = 'exons.gtf';
257 my $fout = Bio
::FeatureIO
->new(-file
=> ">$gtf_file", -format
=> 'gtf');
258 foreach my $feat (@
{$exon_feats}) {
259 $fout->write_feature($feat);
262 my $gtf = $exon_feats ?
"--gtf $gtf_file" : '';
264 # step '2' (http://zoo.nhgri.nih.gov/elliott/mcs_doc/node1.html) of MCS:
265 # run align2binomial.pl to calculate individual species binomial scores
266 my $aln_file = $self->_write_alignment;
267 my $error_file = 'stderr';
268 my $binomial_file = 'align_name.binomial';
269 my $cmd = "align2binomial.pl $offset $gtf $aln_file > $binomial_file 2> $error_file";
270 #system("rm -fr $cwd/mcs_dir; cp -R $temp_dir $cwd/mcs_dir");
271 my $throw = system($cmd);
272 open(my $efh, "<", $error_file) || $self->throw("Could not open error file '$error_file'");
276 $throw = 1 if /not divisible by 3/;
279 $self->throw($error) if $throw;
281 # step '3': run generate_phyloMAX_score.pl to combine the individual
282 # binomial scores and generate the final Multi-species Conservation Score
283 my $phylo_file = 'align_name.phylo';
284 system("generate_phyloMAX_score.pl $binomial_file > $phylo_file") && $self->throw("generate_phyloMAX_score.pl call failed: $?, $!");
286 # step '4': Generate MCSs from the conservation score using
287 # generate_mcs_beta.pl
288 my $mcs_file = 'mcs_result.stdout';
289 my $bed_file = 'align_name.bed'; # hardcoded in generate_mcs_beta.pl
290 system("generate_mcs_beta.pl $offset $gtf $phylo_file > $mcs_file") && $self->throw("generate_mcs_beta.pl failed: $?, $!");
293 my $fin = Bio
::FeatureIO
->new(-file
=> $bed_file, -format
=> 'bed');
294 my $source = Bio
::Annotation
::SimpleValue
->new(-value
=> 'MCS');
295 while (my $feat = $fin->next_feature()) {
296 # convert coords given offset
298 $feat->start($feat->start + $start_adjust);
299 $feat->end($feat->end + $start_adjust);
301 $feat->source($source);
306 chdir($cwd) || $self->throw("Couldn't change back to working directory '$cwd'");
314 Usage : Internal function, not to be called directly
315 Function: Creates a string of params to be used in the command string
316 Returns : string of params
324 my $param_string = $self->SUPER::_setparams
(-params
=> \
@PARAMS,
325 -switches
=> \
@SWITCHES,
328 $param_string .= ' 1>/dev/null' if $self->quiet;
330 return $param_string;