3 # BioPerl module for Bio::Tools::MZEF
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Hilmar Lapp <hlapp-at-gmx.net>
9 # Copyright Hilmar Lapp
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::Tools::MZEF - Results of one MZEF run
21 $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef');
23 $mzef = Bio::Tools::MZEF->new( -fh => \*INPUT );
24 # to indicate that the sequence was reversed prior to feeding it to MZEF
25 # and that you want to have this reflected in the strand() attribute of
26 # the exons, as well have the coordinates translated to the non-reversed
28 $mzef = Bio::Tools::MZEF->new( -file => 'result.mzef',
32 # note: this class is-a Bio::Tools::AnalysisResult which implements
33 # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
34 while($gene = $mzef->next_prediction()) {
35 # $gene is an instance of Bio::Tools::Prediction::Gene
37 # $gene->exons() returns an array of
38 # Bio::Tools::Prediction::Exon objects
40 @exon_arr = $gene->exons();
43 @intrl_exons = $gene->exons('Internal');
44 # note that presently MZEF predicts only internal exons!
47 # essential if you gave a filename at initialization (otherwise the file
53 The MZEF module provides a parser for MZEF gene structure prediction
56 This module inherits off L<Bio::Tools::AnalysisResult> and therefore
57 implements L<Bio::SeqAnalysisParserI>.
63 User feedback is an integral part of the evolution of this and other
64 Bioperl modules. Send your comments and suggestions preferably to one
65 of the Bioperl mailing lists. Your participation is much appreciated.
67 bioperl-l@bioperl.org - General discussion
68 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
72 Please direct usage questions or support issues to the mailing list:
74 I<bioperl-l@bioperl.org>
76 rather than to the module maintainer directly. Many experienced and
77 reponsive experts will be able look at the problem and quickly
78 address it. Please include a thorough description of the problem
79 with code and data examples if at all possible.
83 Report bugs to the Bioperl bug tracking system to help us keep track
84 the bugs and their resolution. Bug reports can be submitted via the
87 http://bugzilla.open-bio.org/
89 =head1 AUTHOR - Hilmar Lapp
91 Email hlapp-at-gmx.net (or hilmar.lapp-at-pharma.novartis.com)
95 The rest of the documentation details each of the object
96 methods. Internal methods are usually preceded with a _
101 # Let the code begin...
104 package Bio
::Tools
::MZEF
;
107 use Bio
::Tools
::Prediction
::Gene
;
108 use Bio
::Tools
::Prediction
::Exon
;
110 use base
qw(Bio::Tools::AnalysisResult);
112 sub _initialize_state
{
113 my($self,@args) = @_;
115 # first call the inherited method!
116 my $make = $self->SUPER::_initialize_state
(@args);
118 # handle our own parameters
119 my ($strand, $params) =
120 $self->_rearrange([qw(STRAND
124 # our private state variables
125 $strand = 1 unless defined($strand);
126 $self->{'_strand'} = $strand;
127 $self->{'_preds_parsed'} = 0;
128 $self->{'_has_cds'} = 0;
129 # array of pre-parsed predictions
130 $self->{'_preds'} = [];
133 =head2 analysis_method
135 Usage : $mzef->analysis_method();
136 Purpose : Inherited method. Overridden to ensure that the name matches
144 sub analysis_method
{
146 my ($self, $method) = @_;
147 if($method && ($method !~ /mzef/i)) {
148 $self->throw("method $method not supported in " . ref($self));
150 return $self->SUPER::analysis_method
($method);
156 Usage : while($gene = $mzef->next_feature()) {
159 Function: Returns the next gene structure prediction of the MZEF result
160 file. Call this method repeatedly until FALSE is returned.
162 The returned object is actually a SeqFeatureI implementing object.
163 This method is required for classes implementing the
164 SeqAnalysisParserI interface, and is merely an alias for
165 next_prediction() at present.
167 Note that with the present version of MZEF there will only be one
168 object returned, because MZEF does not predict individual genes
169 but just potential internal exons.
171 Returns : A Bio::Tools::Prediction::Gene object.
177 my ($self,@args) = @_;
178 # even though next_prediction doesn't expect any args (and this method
179 # does neither), we pass on args in order to be prepared if this changes
181 return $self->next_prediction(@args);
184 =head2 next_prediction
186 Title : next_prediction
187 Usage : while($gene = $mzef->next_prediction()) {
190 Function: Returns the next gene structure prediction of the MZEF result
191 file. Call this method repeatedly until FALSE is returned.
193 Note that with the present version of MZEF there will only be one
194 object returned, because MZEF does not predict individual genes
195 but just potential internal exons.
197 Returns : A Bio::Tools::Prediction::Gene object.
202 sub next_prediction
{
206 # if the prediction section hasn't been parsed yet, we do this now
207 $self->_parse_predictions() unless $self->_predictions_parsed();
209 # return the next gene structure (transcript)
210 return $self->_prediction();
213 =head2 _parse_predictions
215 Title : _parse_predictions()
216 Usage : $obj->_parse_predictions()
217 Function: Parses the prediction section. Automatically called by
218 next_prediction() if not yet done.
224 sub _parse_predictions
{
226 my ($method); # set but not used presently
227 my $exon_tag = "InternalExon";
229 # my $seqname; # name given in output is poorly formatted
233 while(defined($_ = $self->_readline())) {
234 if(/^\s*(\d+)\s*-\s*(\d+)\s+/) {
236 if(! defined($gene)) {
237 $gene = Bio
::Tools
::Prediction
::Gene
->new(
238 '-primary' => "GenePrediction$prednr",
239 '-source' => 'MZEF');
241 # we handle start-end first because may not be space delimited
243 my ($start,$end) = ($1,$2);
244 s/^\s*(\d+)\s*-\s*(\d+)\s+//;
245 # split the rest into fields
247 # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss
248 # index: 0 1 2 3 4 5 6 7
249 my @flds = split(' ', $_);
250 # create the feature object depending on the type of signal --
251 # which is always an (internal) exon for MZEF
252 my $predobj = Bio
::Tools
::Prediction
::Exon
->new();
254 $predobj->source_tag('MZEF');
255 $predobj->significance($flds[0]);
256 $predobj->score($flds[0]); # what shall we set as overall score?
257 $predobj->strand($self->{'_strand'}); # MZEF searches only one
258 if($predobj->strand() == 1) {
259 $predobj->start($start);
262 $predobj->start($seqlen-$end+1);
263 $predobj->end($seqlen-$start+1);
266 $predobj->start_signal_score($flds[5]);
267 $predobj->end_signal_score($flds[7]);
268 $predobj->coding_signal_score($flds[6]);
269 # frame -- we simply extract the one with highest score from the
270 # orf field, and store the individual scores for now
271 my $frm = index($flds[4], "1");
272 $predobj->frame(($frm < 0) ?
undef : $frm);
273 $predobj->primary_tag($exon_tag);
274 $predobj->is_coding(1);
275 # add to gene structure (should be done only when start and end
276 # are set, in order to allow for proper expansion of the range)
277 $gene->add_exon($predobj);
280 if(/^\s*Internal .*(MZEF)/) {
281 $self->analysis_method($1);
284 if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) {
285 # $seqname = $1; # this is too poor currently (file name truncated
286 # to 10 chars) in order to be sensible enough
291 # $gene->seq_id($seqname);
292 $self->_add_prediction($gene) if defined($gene);
293 $self->_predictions_parsed(1);
298 Title : _prediction()
299 Usage : $gene = $obj->_prediction()
309 return unless(exists($self->{'_preds'}) && @
{$self->{'_preds'}});
310 return shift(@
{$self->{'_preds'}});
313 =head2 _add_prediction
315 Title : _add_prediction()
316 Usage : $obj->_add_prediction($gene)
323 sub _add_prediction
{
324 my ($self, $gene) = @_;
326 if(! exists($self->{'_preds'})) {
327 $self->{'_preds'} = [];
329 push(@
{$self->{'_preds'}}, $gene);
332 =head2 _predictions_parsed
334 Title : _predictions_parsed
335 Usage : $obj->_predictions_parsed
338 Returns : TRUE or FALSE
342 sub _predictions_parsed
{
343 my ($self, $val) = @_;
345 $self->{'_preds_parsed'} = $val if $val;
346 if(! exists($self->{'_preds_parsed'})) {
347 $self->{'_preds_parsed'} = 0;
349 return $self->{'_preds_parsed'};