3 # BioPerl module for Bio::Tools::MZEF
5 # Cared for by Hilmar Lapp <hlapp-at-gmx.net>
7 # Copyright Hilmar Lapp
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::Tools::MZEF - Results of one MZEF run
19 $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef');
21 $mzef = Bio::Tools::MZEF->new( -fh => \*INPUT );
22 # to indicate that the sequence was reversed prior to feeding it to MZEF
23 # and that you want to have this reflected in the strand() attribute of
24 # the exons, as well have the coordinates translated to the non-reversed
26 $mzef = Bio::Tools::MZEF->new( -file => 'result.mzef',
30 # note: this class is-a Bio::Tools::AnalysisResult which implements
31 # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
32 while($gene = $mzef->next_prediction()) {
33 # $gene is an instance of Bio::Tools::Prediction::Gene
35 # $gene->exons() returns an array of
36 # Bio::Tools::Prediction::Exon objects
38 @exon_arr = $gene->exons();
41 @intrl_exons = $gene->exons('Internal');
42 # note that presently MZEF predicts only internal exons!
45 # essential if you gave a filename at initialization (otherwise the file
51 The MZEF module provides a parser for MZEF gene structure prediction
54 This module inherits off L<Bio::Tools::AnalysisResult> and therefore
55 implements L<Bio::SeqAnalysisParserI>.
61 User feedback is an integral part of the evolution of this and other
62 Bioperl modules. Send your comments and suggestions preferably to one
63 of the Bioperl mailing lists. Your participation is much appreciated.
65 bioperl-l@bioperl.org - General discussion
66 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
70 Report bugs to the Bioperl bug tracking system to help us keep track
71 the bugs and their resolution. Bug reports can be submitted via the
74 http://bugzilla.open-bio.org/
76 =head1 AUTHOR - Hilmar Lapp
78 Email hlapp-at-gmx.net (or hilmar.lapp-at-pharma.novartis.com)
82 The rest of the documentation details each of the object
83 methods. Internal methods are usually preceded with a _
88 # Let the code begin...
91 package Bio
::Tools
::MZEF
;
94 use Bio
::Tools
::Prediction
::Gene
;
95 use Bio
::Tools
::Prediction
::Exon
;
97 use base
qw(Bio::Tools::AnalysisResult);
99 sub _initialize_state
{
100 my($self,@args) = @_;
102 # first call the inherited method!
103 my $make = $self->SUPER::_initialize_state
(@args);
105 # handle our own parameters
106 my ($strand, $params) =
107 $self->_rearrange([qw(STRAND
111 # our private state variables
112 $strand = 1 unless defined($strand);
113 $self->{'_strand'} = $strand;
114 $self->{'_preds_parsed'} = 0;
115 $self->{'_has_cds'} = 0;
116 # array of pre-parsed predictions
117 $self->{'_preds'} = [];
120 =head2 analysis_method
122 Usage : $mzef->analysis_method();
123 Purpose : Inherited method. Overridden to ensure that the name matches
131 sub analysis_method
{
133 my ($self, $method) = @_;
134 if($method && ($method !~ /mzef/i)) {
135 $self->throw("method $method not supported in " . ref($self));
137 return $self->SUPER::analysis_method
($method);
143 Usage : while($gene = $mzef->next_feature()) {
146 Function: Returns the next gene structure prediction of the MZEF result
147 file. Call this method repeatedly until FALSE is returned.
149 The returned object is actually a SeqFeatureI implementing object.
150 This method is required for classes implementing the
151 SeqAnalysisParserI interface, and is merely an alias for
152 next_prediction() at present.
154 Note that with the present version of MZEF there will only be one
155 object returned, because MZEF does not predict individual genes
156 but just potential internal exons.
158 Returns : A Bio::Tools::Prediction::Gene object.
164 my ($self,@args) = @_;
165 # even though next_prediction doesn't expect any args (and this method
166 # does neither), we pass on args in order to be prepared if this changes
168 return $self->next_prediction(@args);
171 =head2 next_prediction
173 Title : next_prediction
174 Usage : while($gene = $mzef->next_prediction()) {
177 Function: Returns the next gene structure prediction of the MZEF result
178 file. Call this method repeatedly until FALSE is returned.
180 Note that with the present version of MZEF there will only be one
181 object returned, because MZEF does not predict individual genes
182 but just potential internal exons.
184 Returns : A Bio::Tools::Prediction::Gene object.
189 sub next_prediction
{
193 # if the prediction section hasn't been parsed yet, we do this now
194 $self->_parse_predictions() unless $self->_predictions_parsed();
196 # return the next gene structure (transcript)
197 return $self->_prediction();
200 =head2 _parse_predictions
202 Title : _parse_predictions()
203 Usage : $obj->_parse_predictions()
204 Function: Parses the prediction section. Automatically called by
205 next_prediction() if not yet done.
211 sub _parse_predictions
{
213 my ($method); # set but not used presently
214 my $exon_tag = "InternalExon";
216 # my $seqname; # name given in output is poorly formatted
220 while(defined($_ = $self->_readline())) {
221 if(/^\s*(\d+)\s*-\s*(\d+)\s+/) {
223 if(! defined($gene)) {
224 $gene = Bio
::Tools
::Prediction
::Gene
->new(
225 '-primary' => "GenePrediction$prednr",
226 '-source' => 'MZEF');
228 # we handle start-end first because may not be space delimited
230 my ($start,$end) = ($1,$2);
231 s/^\s*(\d+)\s*-\s*(\d+)\s+//;
232 # split the rest into fields
234 # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss
235 # index: 0 1 2 3 4 5 6 7
236 my @flds = split(' ', $_);
237 # create the feature object depending on the type of signal --
238 # which is always an (internal) exon for MZEF
239 my $predobj = Bio
::Tools
::Prediction
::Exon
->new();
241 $predobj->source_tag('MZEF');
242 $predobj->significance($flds[0]);
243 $predobj->score($flds[0]); # what shall we set as overall score?
244 $predobj->strand($self->{'_strand'}); # MZEF searches only one
245 if($predobj->strand() == 1) {
246 $predobj->start($start);
249 $predobj->start($seqlen-$end+1);
250 $predobj->end($seqlen-$start+1);
253 $predobj->start_signal_score($flds[5]);
254 $predobj->end_signal_score($flds[7]);
255 $predobj->coding_signal_score($flds[6]);
256 # frame -- we simply extract the one with highest score from the
257 # orf field, and store the individual scores for now
258 my $frm = index($flds[4], "1");
259 $predobj->frame(($frm < 0) ?
undef : $frm);
260 $predobj->primary_tag($exon_tag);
261 $predobj->is_coding(1);
262 # add to gene structure (should be done only when start and end
263 # are set, in order to allow for proper expansion of the range)
264 $gene->add_exon($predobj);
267 if(/^\s*Internal .*(MZEF)/) {
268 $self->analysis_method($1);
271 if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) {
272 # $seqname = $1; # this is too poor currently (file name truncated
273 # to 10 chars) in order to be sensible enough
278 # $gene->seq_id($seqname);
279 $self->_add_prediction($gene) if defined($gene);
280 $self->_predictions_parsed(1);
285 Title : _prediction()
286 Usage : $gene = $obj->_prediction()
296 return unless(exists($self->{'_preds'}) && @
{$self->{'_preds'}});
297 return shift(@
{$self->{'_preds'}});
300 =head2 _add_prediction
302 Title : _add_prediction()
303 Usage : $obj->_add_prediction($gene)
310 sub _add_prediction
{
311 my ($self, $gene) = @_;
313 if(! exists($self->{'_preds'})) {
314 $self->{'_preds'} = [];
316 push(@
{$self->{'_preds'}}, $gene);
319 =head2 _predictions_parsed
321 Title : _predictions_parsed
322 Usage : $obj->_predictions_parsed
325 Returns : TRUE or FALSE
329 sub _predictions_parsed
{
330 my ($self, $val) = @_;
332 $self->{'_preds_parsed'} = $val if $val;
333 if(! exists($self->{'_preds_parsed'})) {
334 $self->{'_preds_parsed'} = 0;
336 return $self->{'_preds_parsed'};