[bug 2621]
[bioperl-live.git] / Bio / Tools / MZEF.pm
blobb5dd53f2122ffe0b1def7162a1b7902ef93202a6
1 # $Id$
3 # BioPerl module for Bio::Tools::MZEF
5 # Cared for by Hilmar Lapp <hlapp-at-gmx.net>
7 # Copyright Hilmar Lapp
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Tools::MZEF - Results of one MZEF run
17 =head1 SYNOPSIS
19 $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef');
20 # filehandle:
21 $mzef = Bio::Tools::MZEF->new( -fh => \*INPUT );
22 # to indicate that the sequence was reversed prior to feeding it to MZEF
23 # and that you want to have this reflected in the strand() attribute of
24 # the exons, as well have the coordinates translated to the non-reversed
25 # sequence
26 $mzef = Bio::Tools::MZEF->new( -file => 'result.mzef',
27 -strand => -1 );
29 # parse the results
30 # note: this class is-a Bio::Tools::AnalysisResult which implements
31 # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
32 while($gene = $mzef->next_prediction()) {
33 # $gene is an instance of Bio::Tools::Prediction::Gene
35 # $gene->exons() returns an array of
36 # Bio::Tools::Prediction::Exon objects
37 # all exons:
38 @exon_arr = $gene->exons();
40 # internal exons only
41 @intrl_exons = $gene->exons('Internal');
42 # note that presently MZEF predicts only internal exons!
45 # essential if you gave a filename at initialization (otherwise the file
46 # will stay open)
47 $mzef->close();
49 =head1 DESCRIPTION
51 The MZEF module provides a parser for MZEF gene structure prediction
52 output.
54 This module inherits off L<Bio::Tools::AnalysisResult> and therefore
55 implements L<Bio::SeqAnalysisParserI>.
57 =head1 FEEDBACK
59 =head2 Mailing Lists
61 User feedback is an integral part of the evolution of this and other
62 Bioperl modules. Send your comments and suggestions preferably to one
63 of the Bioperl mailing lists. Your participation is much appreciated.
65 bioperl-l@bioperl.org - General discussion
66 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
68 =head2 Reporting Bugs
70 Report bugs to the Bioperl bug tracking system to help us keep track
71 the bugs and their resolution. Bug reports can be submitted via the
72 web:
74 http://bugzilla.open-bio.org/
76 =head1 AUTHOR - Hilmar Lapp
78 Email hlapp-at-gmx.net (or hilmar.lapp-at-pharma.novartis.com)
80 =head1 APPENDIX
82 The rest of the documentation details each of the object
83 methods. Internal methods are usually preceded with a _
85 =cut
88 # Let the code begin...
91 package Bio::Tools::MZEF;
92 use strict;
94 use Bio::Tools::Prediction::Gene;
95 use Bio::Tools::Prediction::Exon;
97 use base qw(Bio::Tools::AnalysisResult);
99 sub _initialize_state {
100 my($self,@args) = @_;
102 # first call the inherited method!
103 my $make = $self->SUPER::_initialize_state(@args);
105 # handle our own parameters
106 my ($strand, $params) =
107 $self->_rearrange([qw(STRAND
109 @args);
111 # our private state variables
112 $strand = 1 unless defined($strand);
113 $self->{'_strand'} = $strand;
114 $self->{'_preds_parsed'} = 0;
115 $self->{'_has_cds'} = 0;
116 # array of pre-parsed predictions
117 $self->{'_preds'} = [];
120 =head2 analysis_method
122 Usage : $mzef->analysis_method();
123 Purpose : Inherited method. Overridden to ensure that the name matches
124 /mzef/i.
125 Returns : String
126 Argument : n/a
128 =cut
130 #-------------
131 sub analysis_method {
132 #-------------
133 my ($self, $method) = @_;
134 if($method && ($method !~ /mzef/i)) {
135 $self->throw("method $method not supported in " . ref($self));
137 return $self->SUPER::analysis_method($method);
140 =head2 next_feature
142 Title : next_feature
143 Usage : while($gene = $mzef->next_feature()) {
144 # do something
146 Function: Returns the next gene structure prediction of the MZEF result
147 file. Call this method repeatedly until FALSE is returned.
149 The returned object is actually a SeqFeatureI implementing object.
150 This method is required for classes implementing the
151 SeqAnalysisParserI interface, and is merely an alias for
152 next_prediction() at present.
154 Note that with the present version of MZEF there will only be one
155 object returned, because MZEF does not predict individual genes
156 but just potential internal exons.
157 Example :
158 Returns : A Bio::Tools::Prediction::Gene object.
159 Args :
161 =cut
163 sub next_feature {
164 my ($self,@args) = @_;
165 # even though next_prediction doesn't expect any args (and this method
166 # does neither), we pass on args in order to be prepared if this changes
167 # ever
168 return $self->next_prediction(@args);
171 =head2 next_prediction
173 Title : next_prediction
174 Usage : while($gene = $mzef->next_prediction()) {
175 # do something
177 Function: Returns the next gene structure prediction of the MZEF result
178 file. Call this method repeatedly until FALSE is returned.
180 Note that with the present version of MZEF there will only be one
181 object returned, because MZEF does not predict individual genes
182 but just potential internal exons.
183 Example :
184 Returns : A Bio::Tools::Prediction::Gene object.
185 Args :
187 =cut
189 sub next_prediction {
190 my ($self) = @_;
191 my $gene;
193 # if the prediction section hasn't been parsed yet, we do this now
194 $self->_parse_predictions() unless $self->_predictions_parsed();
196 # return the next gene structure (transcript)
197 return $self->_prediction();
200 =head2 _parse_predictions
202 Title : _parse_predictions()
203 Usage : $obj->_parse_predictions()
204 Function: Parses the prediction section. Automatically called by
205 next_prediction() if not yet done.
206 Example :
207 Returns :
209 =cut
211 sub _parse_predictions {
212 my ($self) = @_;
213 my ($method); # set but not used presently
214 my $exon_tag = "InternalExon";
215 my $gene;
216 # my $seqname; # name given in output is poorly formatted
217 my $seqlen;
218 my $prednr = 1;
220 while(defined($_ = $self->_readline())) {
221 if(/^\s*(\d+)\s*-\s*(\d+)\s+/) {
222 # exon or signal
223 if(! defined($gene)) {
224 $gene = Bio::Tools::Prediction::Gene->new(
225 '-primary' => "GenePrediction$prednr",
226 '-source' => 'MZEF');
228 # we handle start-end first because may not be space delimited
229 # for large numbers
230 my ($start,$end) = ($1,$2);
231 s/^\s*(\d+)\s*-\s*(\d+)\s+//;
232 # split the rest into fields
233 chomp();
234 # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss
235 # index: 0 1 2 3 4 5 6 7
236 my @flds = split(' ', $_);
237 # create the feature object depending on the type of signal --
238 # which is always an (internal) exon for MZEF
239 my $predobj = Bio::Tools::Prediction::Exon->new();
240 # set common fields
241 $predobj->source_tag('MZEF');
242 $predobj->significance($flds[0]);
243 $predobj->score($flds[0]); # what shall we set as overall score?
244 $predobj->strand($self->{'_strand'}); # MZEF searches only one
245 if($predobj->strand() == 1) {
246 $predobj->start($start);
247 $predobj->end($end);
248 } else {
249 $predobj->start($seqlen-$end+1);
250 $predobj->end($seqlen-$start+1);
252 # set scores
253 $predobj->start_signal_score($flds[5]);
254 $predobj->end_signal_score($flds[7]);
255 $predobj->coding_signal_score($flds[6]);
256 # frame -- we simply extract the one with highest score from the
257 # orf field, and store the individual scores for now
258 my $frm = index($flds[4], "1");
259 $predobj->frame(($frm < 0) ? undef : $frm);
260 $predobj->primary_tag($exon_tag);
261 $predobj->is_coding(1);
262 # add to gene structure (should be done only when start and end
263 # are set, in order to allow for proper expansion of the range)
264 $gene->add_exon($predobj);
265 next;
267 if(/^\s*Internal .*(MZEF)/) {
268 $self->analysis_method($1);
269 next;
271 if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) {
272 # $seqname = $1; # this is too poor currently (file name truncated
273 # to 10 chars) in order to be sensible enough
274 $seqlen = $2;
275 next;
278 # $gene->seq_id($seqname);
279 $self->_add_prediction($gene) if defined($gene);
280 $self->_predictions_parsed(1);
283 =head2 _prediction
285 Title : _prediction()
286 Usage : $gene = $obj->_prediction()
287 Function: internal
288 Example :
289 Returns :
291 =cut
293 sub _prediction {
294 my ($self) = @_;
296 return unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
297 return shift(@{$self->{'_preds'}});
300 =head2 _add_prediction
302 Title : _add_prediction()
303 Usage : $obj->_add_prediction($gene)
304 Function: internal
305 Example :
306 Returns :
308 =cut
310 sub _add_prediction {
311 my ($self, $gene) = @_;
313 if(! exists($self->{'_preds'})) {
314 $self->{'_preds'} = [];
316 push(@{$self->{'_preds'}}, $gene);
319 =head2 _predictions_parsed
321 Title : _predictions_parsed
322 Usage : $obj->_predictions_parsed
323 Function: internal
324 Example :
325 Returns : TRUE or FALSE
327 =cut
329 sub _predictions_parsed {
330 my ($self, $val) = @_;
332 $self->{'_preds_parsed'} = $val if $val;
333 if(! exists($self->{'_preds_parsed'})) {
334 $self->{'_preds_parsed'} = 0;
336 return $self->{'_preds_parsed'};