[bug 2714]
[bioperl-live.git] / Bio / FeatureIO.pm
blob2decefe2a5acaecfdafe685a69536834663c58c6
1 # $Id$
3 # BioPerl module for Bio::FeatureIO
5 # Cared for by Allen Day <allenday@ucla.edu>
7 # Copyright Allen Day
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::FeatureIO - Handler for FeatureIO
17 =head1 SYNOPSIS
19 use Bio::FeatureIO;
21 #read from a file
22 $in = Bio::FeatureIO->new(-file => "my.gff" , -format => 'GFF');
24 #read from a filehandle
25 $in = Bio::FeatureIO->new(-fh => \*GFF , -format => 'GFF');
27 #read features already attached to a sequence
28 my $feat = Bio::FeatureIO->new(-seq => $seq , -format => 'features');
30 #read new features for existing sequence
31 my $seq = Bio::FeatureIO->new(-seq => $seq , -format => 'Das');
33 #write out features
34 $out = Bio::FeatureIO->new(-file => ">outputfilename" ,
35 -format => 'GFF' ,
36 -version => 3);
38 while ( my $feature = $in->next_feature() ) {
39 $out->write_feature($feature);
42 =head1 DESCRIPTION
44 An I/O iterator subsystem for genomic sequence features.
46 Bio::FeatureIO is a handler module for the formats in the FeatureIO set (eg,
47 Bio::FeatureIO::GFF). It is the officially sanctioned way of getting at
48 the format objects, which most people should use.
50 The Bio::FeatureIO system can be thought of like biological file handles.
51 They are attached to filehandles with smart formatting rules (eg,
52 GFF format, or BED format) and
53 can either read or write feature objects (Bio::SeqFeature objects, or
54 more correctly, Bio::FeatureHolderI implementing objects, of which
55 Bio::SeqFeature is one such object). If you want to know what to
56 do with a Bio::SeqFeatureI object, read L<Bio::SeqFeatureI>.
58 The idea is that you request a stream object for a particular format.
59 All the stream objects have a notion of an internal file that is read
60 from or written to. A particular FeatureIO object instance is configured
61 for either input or output. A specific example of a stream object is
62 the Bio::FeatureIO::gff object.
64 Each stream object has functions:
66 $stream->next_feature();
67 $stream->write_feature($feature);
69 =head1 SUPPORTED FORMATS
71 name module
72 -----------------------------------
73 BED bed.pm
74 GFF gff.pm
75 GTF gtf.pm
76 InterPro (IPRScan 4.0) interpro.pm
77 PTT (NCBI protein table) ptt.pm
80 =head1 CONSTRUCTORS
82 =head2 Bio::FeatureIO-E<gt>new()
84 $featureIO = Bio::FeatureIO->new(-file => 'filename', -format=>$format);
85 $featureIO = Bio::FeatureIO->new(-fh => \*FILEHANDLE, -format=>$format);
86 $featureIO = Bio::FeatureIO->new(-seq => $seq, -format=>$format);
88 The new() class method constructs a new Bio::FeatureIO object. The
89 returned object can be used to retrieve or print Seq objects. new()
90 accepts the following parameters:
92 =over 4
94 =item -file
96 A file path to be opened for reading or writing. The usual Perl
97 conventions apply:
99 'file' # open file for reading
100 '>file' # open file for writing
101 '>>file' # open file for appending
102 '+<file' # open file read/write
103 'command |' # open a pipe from the command
104 '| command' # open a pipe to the command
106 =item -fh
108 You may provide new() with a previously-opened filehandle. For
109 example, to read from STDIN:
111 $featio = Bio::FeatureIO->new(-fh => \*STDIN);
113 Note that you must pass filehandles as references to globs.
115 If neither a filehandle nor a filename is specified, then the module
116 will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
117 semantics.
119 A string filehandle is handy if you want to modify the output in the
120 memory, before printing it out. The following program reads in EMBL
121 formatted entries from a file and prints them out in fasta format with
122 some HTML tags:
124 use Bio::FeatureIO;
125 use IO::String;
126 my $in = Bio::FeatureIO->new('-file' => "my.gff" ,
127 '-format' => 'EMBL');
128 while ( my $f = $in->next_feature() ) {
129 # the output handle is reset for every file
130 my $stringio = IO::String->new($string);
131 my $out = Bio::FeatureIO->new('-fh' => $stringio,
132 '-format' => 'gtf');
133 # output goes into $string
134 $out->write_feature($f);
135 # modify $string
136 $string =~ s|(>)(\w+)|$1<font color="Red">$2</font>|g;
137 # print into STDOUT
138 print $string;
141 =item -format
143 Specify the format of the file. See above for list of supported formats
145 =item -flush
147 By default, all files (or filehandles) opened for writing sequences
148 will be flushed after each write_seq() (making the file immediately
149 usable). If you don't need this facility and would like to marginally
150 improve the efficiency of writing multiple sequences to the same file
151 (or filehandle), pass the -flush option '0' or any other value that
152 evaluates as defined but false:
154 my $f1 = Bio::FeatureIO->new -file => "<a.f1",
155 -format => "f1";
156 my $f2 = Bio::FeatureIO->new -file => ">a.f2",
157 -format => "f2",
158 -flush => 0; # go as fast as we can!
160 while($feature = $f1->next_feature) { $f2->write_feature($feature) }
162 =back
164 =head2 Bio::FeatureIO-E<gt>newFh()
166 $fh = Bio::FeatureIO->newFh(-fh => \*FILEHANDLE, -format=>$format);
167 $fh = Bio::FeatureIO->newFh(-format => $format);
168 # etc.
170 This constructor behaves like new(), but returns a tied filehandle
171 rather than a Bio::FeatureIO object. You can read sequences from this
172 object using the familiar E<lt>E<gt> operator, and write to it using
173 print(). The usual array and $_ semantics work. For example, you can
174 read all sequence objects into an array like this:
176 @features = <$fh>;
178 Other operations, such as read(), sysread(), write(), close(), and printf()
179 are not supported.
181 =head1 OBJECT METHODS
183 See below for more detailed summaries. The main methods are:
185 =head2 $feature = $featureIO-E<gt>next_feature()
187 Fetch the next feature from the stream.
189 =head2 $featureIO-E<gt>write_feature($feature [,$another_feature,...])
191 Write the specified feature(s) to the stream.
193 =head2 TIEHANDLE(), READLINE(), PRINT()
195 These provide the tie interface. See L<perltie> for more details.
197 =head1 FEEDBACK
199 =head2 Mailing Lists
201 User feedback is an integral part of the evolution of this
202 and other Bioperl modules. Send your comments and suggestions preferably
203 to one of the Bioperl mailing lists.
205 Your participation is much appreciated.
207 bioperl-l@bioperl.org - General discussion
208 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
210 =head2 Reporting Bugs
212 Report bugs to the Bioperl bug tracking system to help us keep track
213 the bugs and their resolution. Bug reports can be submitted via the
214 web:
216 http://bugzilla.open-bio.org/
218 =head1 AUTHOR - Allen Day
220 Email allenday@ucla.edu
222 =head1 APPENDIX
224 The rest of the documentation details each of the object
225 methods. Internal methods are usually preceded with a _
227 =cut
229 #' Let the code begin...
231 package Bio::FeatureIO;
233 use strict;
235 use Symbol();
237 use base qw(Bio::Root::Root Bio::Root::IO);
239 =head2 new
241 Title : new
242 Usage : $stream = Bio::FeatureIO->new(-file => $filename, -format => 'Format')
243 Function: Returns a new feature stream
244 Returns : A Bio::FeatureIO stream initialised with the appropriate format
245 Args : Named parameters:
246 -file => $filename
247 -fh => filehandle to attach to
248 -format => format
250 =cut
252 my $entry = 0;
254 sub new {
255 my ($caller,@args) = @_;
256 my $class = ref($caller) || $caller;
258 # or do we want to call SUPER on an object if $caller is an
259 # object?
260 if( $class =~ /Bio::FeatureIO::(\S+)/ ) {
262 my ($self) = $class->SUPER::new(@args);
263 $self->_initialize(@args);
264 return $self;
266 } else {
268 my %param = @args;
270 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
271 my $format = $param{'-format'} ||
272 $class->_guess_format( $param{-file} || $ARGV[0] );
274 if( ! $format ) {
275 if ($param{-file}) {
276 $format = $class->_guess_format($param{-file});
277 } elsif ($param{-fh}) {
278 $format = $class->_guess_format(undef);
281 $format = "\L$format"; # normalize capitalization to lower case
282 return unless( $class->_load_format_module($format) );
283 return "Bio::FeatureIO::$format"->new(@args);
288 =head2 newFh
290 Title : newFh
291 Usage : $fh = Bio::FeatureIO->newFh(-file=>$filename,-format=>'Format')
292 Function: does a new() followed by an fh()
293 Example : $fh = Bio::FeatureIO->newFh(-file=>$filename,-format=>'Format')
294 $feature = <$fh>; # read a feature object
295 print $fh $feature; # write a feature object
296 Returns : filehandle tied to the Bio::FeatureIO::Fh class
297 Args :
299 See L<Bio::FeatureIO::Fh>
301 =cut
303 sub newFh {
304 my $class = shift;
305 return unless my $self = $class->new(@_);
306 return $self->fh;
309 =head2 fh
311 Title : fh
312 Usage : $obj->fh
313 Function:
314 Example : $fh = $obj->fh; # make a tied filehandle
315 $feature = <$fh>; # read a feature object
316 print $fh $feature; # write a feature object
317 Returns : filehandle tied to Bio::FeatureIO class
318 Args : none
320 =cut
323 sub fh {
324 my $self = shift;
325 my $class = ref($self) || $self;
326 my $s = Symbol::gensym;
327 tie $$s,$class,$self;
328 return $s;
331 # _initialize is chained for all FeatureIO classes
333 sub _initialize {
334 my($self, %arg) = @_;
336 # flush is initialized by the Root::IO init
338 # initialize the IO part
339 $self->seq($arg{-seq});
340 $self->_initialize_io(%arg);
343 =head2 next_feature
345 Title : next_feature
346 Usage : $feature = stream->next_feature
347 Function: Reads the next feature object from the stream and returns it.
349 Certain driver modules may encounter entries in the stream
350 that are either misformatted or that use syntax not yet
351 understood by the driver. If such an incident is
352 recoverable, e.g., by dismissing a feature of a feature
353 table or some other non-mandatory part of an entry, the
354 driver will issue a warning. In the case of a
355 non-recoverable situation an exception will be thrown. Do
356 not assume that you can resume parsing the same stream
357 after catching the exception. Note that you can always turn
358 recoverable errors into exceptions by calling
359 $stream->verbose(2).
361 Returns : a Bio::SeqFeatureI feature object
362 Args : none
364 See L<Bio::Root::RootI>, L<Bio::SeqFeatureI>
366 =cut
368 sub next_feature {
369 my ($self, $seq) = @_;
370 $self->throw("Sorry, you cannot read from a generic Bio::FeatureIO object.");
373 =head2 write_feature
375 Title : write_feature
376 Usage : $stream->write_feature($feature)
377 Function: writes the $feature object into the stream
378 Returns : 1 for success and 0 for error
379 Args : Bio::SeqFeature object
381 =cut
383 sub write_feature {
384 my ($self, $seq) = @_;
385 if(ref($self) eq __PACKAGE__){
386 $self->throw("Sorry, you cannot write to a generic Bio::FeatureIO object.");
387 } else {
388 $self->throw_not_implemented();
392 =head2 _load_format_module
394 Title : _load_format_module
395 Usage : *INTERNAL FeatureIO stuff*
396 Function: Loads up (like use) a module at run time on demand
397 Example :
398 Returns :
399 Args :
401 =cut
403 sub _load_format_module {
404 my ($self, $format) = @_;
405 my $class = ref($self) || $self;
406 my $module = $class."::$format";#"Bio::Feature::" . $format;
407 my $ok;
409 eval {
410 $ok = $self->_load_module($module);
412 if ( $@ ) {
413 print STDERR <<END;
414 $self: $format cannot be found
415 Exception $@
416 For more information about the FeatureIO system please see the FeatureIO docs.
417 This includes ways of checking for formats at compile time, not run time
421 return $ok;
424 =head2 seq
426 Title : seq
427 Usage : $obj->seq() OR $obj->seq($newSeq)
428 Example :
429 Returns : Bio::SeqI object
430 Args : newSeq (optional)
432 =cut
434 sub seq {
435 my $self = shift;
436 my $val = shift;
438 $self->{'seq'} = $val if defined($val);
439 return $self->{'seq'};
442 =head2 _filehandle
444 Title : _filehandle
445 Usage : $obj->_filehandle($newval)
446 Function: This method is deprecated. Call _fh() instead.
447 Example :
448 Returns : value of _filehandle
449 Args : newvalue (optional)
452 =cut
454 sub _filehandle {
455 my ($self,@args) = @_;
456 return $self->_fh(@args);
459 =head2 _guess_format
461 Title : _guess_format
462 Usage : $obj->_guess_format($filename)
463 Function: guess format based on file suffix
464 Example :
465 Returns : guessed format of filename (lower case)
466 Args :
467 Notes : See "SUPPORTED FORMATS"
469 =cut
471 sub _guess_format {
472 my $class = shift;
473 return unless $_ = shift;
474 return 'gff' if /\.gff3?$/i;
475 return 'gff' if /\.gtf$/i;
476 return 'bed' if /\.bed$/i;
477 return 'ptt' if /\.ptt$/i;
479 return 'gff'; #the default
482 sub DESTROY {
483 my $self = shift;
484 $self->close();
487 sub TIEHANDLE {
488 my ($class,$val) = @_;
489 return bless {'featio' => $val}, $class;
492 sub READLINE {
493 my $self = shift;
494 return $self->{'featio'}->next_feature() unless wantarray;
495 my (@list, $obj);
496 push @list, $obj while $obj = $self->{'featio'}->next_feature();
497 return @list;
500 sub PRINT {
501 my $self = shift;
502 $self->{'featio'}->write_feature(@_);