tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / Tools / MZEF.pm
blob216e557e3890dbc252c871cc915b338e91cdbb5e
1 # $Id$
3 # BioPerl module for Bio::Tools::MZEF
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Hilmar Lapp <hlapp-at-gmx.net>
9 # Copyright Hilmar Lapp
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tools::MZEF - Results of one MZEF run
19 =head1 SYNOPSIS
21 $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef');
22 # filehandle:
23 $mzef = Bio::Tools::MZEF->new( -fh => \*INPUT );
24 # to indicate that the sequence was reversed prior to feeding it to MZEF
25 # and that you want to have this reflected in the strand() attribute of
26 # the exons, as well have the coordinates translated to the non-reversed
27 # sequence
28 $mzef = Bio::Tools::MZEF->new( -file => 'result.mzef',
29 -strand => -1 );
31 # parse the results
32 # note: this class is-a Bio::Tools::AnalysisResult which implements
33 # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
34 while($gene = $mzef->next_prediction()) {
35 # $gene is an instance of Bio::Tools::Prediction::Gene
37 # $gene->exons() returns an array of
38 # Bio::Tools::Prediction::Exon objects
39 # all exons:
40 @exon_arr = $gene->exons();
42 # internal exons only
43 @intrl_exons = $gene->exons('Internal');
44 # note that presently MZEF predicts only internal exons!
47 # essential if you gave a filename at initialization (otherwise the file
48 # will stay open)
49 $mzef->close();
51 =head1 DESCRIPTION
53 The MZEF module provides a parser for MZEF gene structure prediction
54 output.
56 This module inherits off L<Bio::Tools::AnalysisResult> and therefore
57 implements L<Bio::SeqAnalysisParserI>.
59 =head1 FEEDBACK
61 =head2 Mailing Lists
63 User feedback is an integral part of the evolution of this and other
64 Bioperl modules. Send your comments and suggestions preferably to one
65 of the Bioperl mailing lists. Your participation is much appreciated.
67 bioperl-l@bioperl.org - General discussion
68 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
70 =head2 Support
72 Please direct usage questions or support issues to the mailing list:
74 I<bioperl-l@bioperl.org>
76 rather than to the module maintainer directly. Many experienced and
77 reponsive experts will be able look at the problem and quickly
78 address it. Please include a thorough description of the problem
79 with code and data examples if at all possible.
81 =head2 Reporting Bugs
83 Report bugs to the Bioperl bug tracking system to help us keep track
84 the bugs and their resolution. Bug reports can be submitted via the
85 web:
87 http://bugzilla.open-bio.org/
89 =head1 AUTHOR - Hilmar Lapp
91 Email hlapp-at-gmx.net (or hilmar.lapp-at-pharma.novartis.com)
93 =head1 APPENDIX
95 The rest of the documentation details each of the object
96 methods. Internal methods are usually preceded with a _
98 =cut
101 # Let the code begin...
104 package Bio::Tools::MZEF;
105 use strict;
107 use Bio::Tools::Prediction::Gene;
108 use Bio::Tools::Prediction::Exon;
110 use base qw(Bio::Tools::AnalysisResult);
112 sub _initialize_state {
113 my($self,@args) = @_;
115 # first call the inherited method!
116 my $make = $self->SUPER::_initialize_state(@args);
118 # handle our own parameters
119 my ($strand, $params) =
120 $self->_rearrange([qw(STRAND
122 @args);
124 # our private state variables
125 $strand = 1 unless defined($strand);
126 $self->{'_strand'} = $strand;
127 $self->{'_preds_parsed'} = 0;
128 $self->{'_has_cds'} = 0;
129 # array of pre-parsed predictions
130 $self->{'_preds'} = [];
133 =head2 analysis_method
135 Usage : $mzef->analysis_method();
136 Purpose : Inherited method. Overridden to ensure that the name matches
137 /mzef/i.
138 Returns : String
139 Argument : n/a
141 =cut
143 #-------------
144 sub analysis_method {
145 #-------------
146 my ($self, $method) = @_;
147 if($method && ($method !~ /mzef/i)) {
148 $self->throw("method $method not supported in " . ref($self));
150 return $self->SUPER::analysis_method($method);
153 =head2 next_feature
155 Title : next_feature
156 Usage : while($gene = $mzef->next_feature()) {
157 # do something
159 Function: Returns the next gene structure prediction of the MZEF result
160 file. Call this method repeatedly until FALSE is returned.
162 The returned object is actually a SeqFeatureI implementing object.
163 This method is required for classes implementing the
164 SeqAnalysisParserI interface, and is merely an alias for
165 next_prediction() at present.
167 Note that with the present version of MZEF there will only be one
168 object returned, because MZEF does not predict individual genes
169 but just potential internal exons.
170 Example :
171 Returns : A Bio::Tools::Prediction::Gene object.
172 Args :
174 =cut
176 sub next_feature {
177 my ($self,@args) = @_;
178 # even though next_prediction doesn't expect any args (and this method
179 # does neither), we pass on args in order to be prepared if this changes
180 # ever
181 return $self->next_prediction(@args);
184 =head2 next_prediction
186 Title : next_prediction
187 Usage : while($gene = $mzef->next_prediction()) {
188 # do something
190 Function: Returns the next gene structure prediction of the MZEF result
191 file. Call this method repeatedly until FALSE is returned.
193 Note that with the present version of MZEF there will only be one
194 object returned, because MZEF does not predict individual genes
195 but just potential internal exons.
196 Example :
197 Returns : A Bio::Tools::Prediction::Gene object.
198 Args :
200 =cut
202 sub next_prediction {
203 my ($self) = @_;
204 my $gene;
206 # if the prediction section hasn't been parsed yet, we do this now
207 $self->_parse_predictions() unless $self->_predictions_parsed();
209 # return the next gene structure (transcript)
210 return $self->_prediction();
213 =head2 _parse_predictions
215 Title : _parse_predictions()
216 Usage : $obj->_parse_predictions()
217 Function: Parses the prediction section. Automatically called by
218 next_prediction() if not yet done.
219 Example :
220 Returns :
222 =cut
224 sub _parse_predictions {
225 my ($self) = @_;
226 my ($method); # set but not used presently
227 my $exon_tag = "InternalExon";
228 my $gene;
229 # my $seqname; # name given in output is poorly formatted
230 my $seqlen;
231 my $prednr = 1;
233 while(defined($_ = $self->_readline())) {
234 if(/^\s*(\d+)\s*-\s*(\d+)\s+/) {
235 # exon or signal
236 if(! defined($gene)) {
237 $gene = Bio::Tools::Prediction::Gene->new(
238 '-primary' => "GenePrediction$prednr",
239 '-source' => 'MZEF');
241 # we handle start-end first because may not be space delimited
242 # for large numbers
243 my ($start,$end) = ($1,$2);
244 s/^\s*(\d+)\s*-\s*(\d+)\s+//;
245 # split the rest into fields
246 chomp();
247 # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss
248 # index: 0 1 2 3 4 5 6 7
249 my @flds = split(' ', $_);
250 # create the feature object depending on the type of signal --
251 # which is always an (internal) exon for MZEF
252 my $predobj = Bio::Tools::Prediction::Exon->new();
253 # set common fields
254 $predobj->source_tag('MZEF');
255 $predobj->significance($flds[0]);
256 $predobj->score($flds[0]); # what shall we set as overall score?
257 $predobj->strand($self->{'_strand'}); # MZEF searches only one
258 if($predobj->strand() == 1) {
259 $predobj->start($start);
260 $predobj->end($end);
261 } else {
262 $predobj->start($seqlen-$end+1);
263 $predobj->end($seqlen-$start+1);
265 # set scores
266 $predobj->start_signal_score($flds[5]);
267 $predobj->end_signal_score($flds[7]);
268 $predobj->coding_signal_score($flds[6]);
269 # frame -- we simply extract the one with highest score from the
270 # orf field, and store the individual scores for now
271 my $frm = index($flds[4], "1");
272 $predobj->frame(($frm < 0) ? undef : $frm);
273 $predobj->primary_tag($exon_tag);
274 $predobj->is_coding(1);
275 # add to gene structure (should be done only when start and end
276 # are set, in order to allow for proper expansion of the range)
277 $gene->add_exon($predobj);
278 next;
280 if(/^\s*Internal .*(MZEF)/) {
281 $self->analysis_method($1);
282 next;
284 if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) {
285 # $seqname = $1; # this is too poor currently (file name truncated
286 # to 10 chars) in order to be sensible enough
287 $seqlen = $2;
288 next;
291 # $gene->seq_id($seqname);
292 $self->_add_prediction($gene) if defined($gene);
293 $self->_predictions_parsed(1);
296 =head2 _prediction
298 Title : _prediction()
299 Usage : $gene = $obj->_prediction()
300 Function: internal
301 Example :
302 Returns :
304 =cut
306 sub _prediction {
307 my ($self) = @_;
309 return unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
310 return shift(@{$self->{'_preds'}});
313 =head2 _add_prediction
315 Title : _add_prediction()
316 Usage : $obj->_add_prediction($gene)
317 Function: internal
318 Example :
319 Returns :
321 =cut
323 sub _add_prediction {
324 my ($self, $gene) = @_;
326 if(! exists($self->{'_preds'})) {
327 $self->{'_preds'} = [];
329 push(@{$self->{'_preds'}}, $gene);
332 =head2 _predictions_parsed
334 Title : _predictions_parsed
335 Usage : $obj->_predictions_parsed
336 Function: internal
337 Example :
338 Returns : TRUE or FALSE
340 =cut
342 sub _predictions_parsed {
343 my ($self, $val) = @_;
345 $self->{'_preds_parsed'} = $val if $val;
346 if(! exists($self->{'_preds_parsed'})) {
347 $self->{'_preds_parsed'} = 0;
349 return $self->{'_preds_parsed'};