3 # BioPerl module for Bio::Structure::IO
5 # Copyright 2001, 2002 Kris Boulez
7 # You may distribute this module under the same terms as perl itself
10 # October 18, 1999 Largely rewritten by Lincoln Stein
11 # November 16, 2001 Copied Bio::SeqIO to Bio::Structure::IO and modified
12 # where needed. Factoring out common methods
13 # (to Bio::Root::IO) might be a good idea.
15 # POD documentation - main docs before the code
19 Bio::Structure::IO - Handler for Structure Formats
23 use Bio::Structure::IO;
25 $in = Bio::Structure::IO->new(-file => "inputfilename",
28 while ( my $struc = $in->next_structure() ) {
29 print "Structure ", $struc->id, " number of models: ",
30 scalar $struc->model,"\n";
35 Bio::Structure::IO is a handler module for the formats in the
36 Structure::IO set (e.g. L<Bio::Structure::IO::pdb>). It is the officially
37 sanctioned way of getting at the format objects, which most people
40 The Bio::Structure::IO system can be thought of like biological file
41 handles. They are attached to filehandles with smart formatting rules
42 (e.g. PDB format) and can either read or write structure objects
43 (Bio::Structure objects, or more correctly, Bio::Structure::StructureI
44 implementing objects, of which Bio::Structure is one such object). If
45 you want to know what to do with a Bio::Structure object, read
48 The idea is that you request a stream object for a particular format.
49 All the stream objects have a notion of an internal file that is read
50 from or written to. A particular Structure::IO object instance is
51 configured for either input or output. A specific example of a stream
52 object is the Bio::Structure::IO::pdb object.
54 Each stream object has functions
56 $stream->next_structure();
60 $stream->write_structure($struc);
64 $stream->type() # returns 'INPUT' or 'OUTPUT'
66 As an added bonus, you can recover a filehandle that is tied to the
67 Structure::IOIO object, allowing you to use the standard E<lt>E<gt>
68 and print operations to read and write structure::IOuence objects:
70 use Bio::Structure::IO;
72 $stream = Bio::Structure::IO->newFh(-format => 'pdb'); # read from standard input
74 while ( $structure = <$stream> ) {
75 # do something with $structure
80 print $stream $structure; # when stream is in output mode
85 =head2 Bio::Structure::IO-E<gt>new()
87 $stream = Bio::Structure::IO->new(-file => 'filename', -format=>$format);
88 $stream = Bio::Structure::IO->new(-fh => \*FILEHANDLE, -format=>$format);
89 $stream = Bio::Structure::IO->new(-format => $format);
91 The new() class method constructs a new Bio::Structure::IO object. The
92 returned object can be used to retrieve or print Bio::Structure
93 objects. new() accepts the following parameters:
99 A file path to be opened for reading or writing. The usual Perl
102 'file' # open file for reading
103 '>file' # open file for writing
104 '>>file' # open file for appending
105 '+<file' # open file read/write
106 'command |' # open a pipe from the command
107 '| command' # open a pipe to the command
111 You may provide new() with a previously-opened filehandle. For
112 example, to read from STDIN:
114 $strucIO = Bio::Structure::IO->new(-fh => \*STDIN);
116 Note that you must pass filehandles as references to globs.
118 If neither a filehandle nor a filename is specified, then the module
119 will read from the @ARGV array or STDIN, using the familiar E<lt>E<gt>
124 Specify the format of the file. Supported formats include:
126 pdb Protein Data Bank format
128 If no format is specified and a filename is given, then the module
129 will attempt to deduce it from the filename. If this is unsuccessful,
130 PDB format is assumed.
132 The format name is case insensitive. 'PDB', 'Pdb' and 'pdb' are
137 =head2 Bio::Structure::IO-E<gt>newFh()
139 $fh = Bio::Structure::IO->newFh(-fh => \*FILEHANDLE, -format=>$format);
140 $fh = Bio::Structure::IO->newFh(-format => $format);
143 This constructor behaves like new(), but returns a tied filehandle
144 rather than a Bio::Structure::IO object. You can read structures from this
145 object using the familiar E<lt>E<gt> operator, and write to it using
146 print(). The usual array and $_ semantics work. For example, you can
147 read all structure objects into an array like this:
151 Other operations, such as read(), sysread(), write(), close(), and printf()
154 =head1 OBJECT METHODS
156 See below for more detailed summaries. The main methods are:
158 =head2 $structure = $structIO-E<gt>next_structure()
160 Fetch the next structure from the stream.
162 =head2 $structIO-E<gt>write_structure($struc [,$another_struc,...])
164 Write the specified structure(s) to the stream.
166 =head2 TIEHANDLE(), READLINE(), PRINT()
168 These provide the tie interface. See L<perltie> for more details.
174 User feedback is an integral part of the evolution of this and other
175 Bioperl modules. Send your comments and suggestions preferably to one
176 of the Bioperl mailing lists. Your participation is much appreciated.
178 bioperl-l@bioperl.org - General discussion
179 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
181 =head2 Reporting Bugs
183 Report bugs to the Bioperl bug tracking system to help us keep track
184 the bugs and their resolution.
185 Bug reports can be submitted via the web:
187 http://bugzilla.open-bio.org/
189 =head1 AUTHORS - Ewan Birney, Lincoln Stein, Kris Boulez
191 Email birney@ebi.ac.uk, lstein@cshl.org, kris.boulez@algonomics.com
196 The rest of the documentation details each of the object
197 methods. Internal methods are usually preceded with a _
201 # Let the code begin...
203 package Bio
::Structure
::IO
;
210 use base
qw(Bio::Root::Root Bio::Root::IO);
215 Usage : $stream = Bio::Structure::IO->new(-file => $filename, -format => 'Format')
216 Function: Returns a new structIOstream
217 Returns : A Bio::Structure::IO handler initialised with the appropriate format
218 Args : -file => $filename
220 -fh => filehandle to attach to
227 my ($caller,@args) = @_;
228 my $class = ref($caller) || $caller;
230 # or do we want to call SUPER on an object if $caller is an
232 if( $class =~ /Bio::Structure::IO::(\S+)/ ) {
233 my ($self) = $class->SUPER::new
(@args);
234 $self->_initialize(@args);
239 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
240 my $format = $param{'-format'} ||
241 $class->_guess_format( $param{-file
} || $ARGV[0] ) ||
243 $format = "\L$format"; # normalize capitalization to lower case
245 # normalize capitalization
246 return unless( &_load_format_module
($format) );
247 return "Bio::Structure::IO::$format"->new(@args);
254 Usage : $fh = Bio::Structure::IO->newFh(-file=>$filename,-format=>'Format')
255 Function: does a new() followed by an fh()
256 Example : $fh = Bio::Structure::IO->newFh(-file=>$filename,-format=>'Format')
257 $structure = <$fh>; # read a structure object
258 print $fh $structure; # write a structure object
259 Returns : filehandle tied to the Bio::Structure::IO::Fh class
266 return unless my $self = $class->new(@_);
275 Example : $fh = $obj->fh; # make a tied filehandle
276 $structure = <$fh>; # read a structure object
277 print $fh $structure; # write a structure object
278 Returns : filehandle tied to the Bio::Structure::IO::Fh class
286 my $class = ref($self) || $self;
287 my $s = Symbol
::gensym
;
288 tie
$$s,$class,$self;
293 # _initialize is chained for all SeqIO classes
296 my($self, @args) = @_;
298 # not really necessary unless we put more in RootI
299 $self->SUPER::_initialize
(@args);
301 # initialize the IO part
302 $self->_initialize_io(@args);
305 =head2 next_structure
307 Title : next_structure
308 Usage : $structure = stream->next_structure
309 Function: Reads the next structure object from the stream and returns a
310 Bio::Structure::Entry object.
312 Certain driver modules may encounter entries in the stream that
313 are either misformatted or that use syntax not yet understood
314 by the driver. If such an incident is recoverable, e.g., by
315 dismissing a feature of a feature table or some other non-mandatory
316 part of an entry, the driver will issue a warning. In the case
317 of a non-recoverable situation an exception will be thrown.
318 Do not assume that you can resume parsing the same stream after
319 catching the exception. Note that you can always turn recoverable
320 errors into exceptions by calling $stream->verbose(2) (see
321 Bio::RootI POD page).
322 Returns : a Bio::Structure::Entry object
328 my ($self, $struc) = @_;
329 $self->throw("Sorry, you cannot read from a generic Bio::Structure::IO object.");
332 # Do we want people to read out the sequence directly from a $structIO stream
334 ##=head2 next_primary_seq
336 ## Title : next_primary_seq
337 ## Usage : $seq = $stream->next_primary_seq
338 ## Function: Provides a primaryseq type of sequence object
339 ## Returns : A Bio::PrimarySeqI object
345 ##sub next_primary_seq {
348 ## # in this case, we default to next_seq. This is because
349 ## # Bio::Seq's are Bio::PrimarySeqI objects. However we
350 ## # expect certain sub classes to override this method to provide
351 ## # less parsing heavy methods to retrieving the objects
353 ## return $self->next_seq();
356 =head2 write_structure
358 Title : write_structure
359 Usage : $stream->write_structure($structure)
360 Function: writes the $structure object into the stream
361 Returns : 1 for success and 0 for error
362 Args : Bio::Structure object
367 my ($self, $struc) = @_;
368 $self->throw("Sorry, you cannot write to a generic Bio::Structure::IO object.");
372 # De we need this here
377 ## Usage : $self->alphabet($newval)
378 ## Function: Set/get the molecule type for the Seq objects to be created.
379 ## Example : $seqio->alphabet('protein')
380 ## Returns : value of alphabet: 'dna', 'rna', or 'protein'
381 ## Args : newvalue (optional)
382 ## Throws : Exception if the argument is not one of 'dna', 'rna', or 'protein'
387 ## my ($self, $value) = @_;
389 ## if ( defined $value) {
390 ## # instead of hard-coding the allowed values once more, we check by
391 ## # creating a dummy sequence object
393 ## my $seq = Bio::PrimarySeq->new('-alphabet' => $value);
396 ## $self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values.");
398 ## $self->{'alphabet'} = "\L$value";
400 ## return $self->{'alphabet'};
403 =head2 _load_format_module
405 Title : _load_format_module
406 Usage : *INTERNAL Structure::IO stuff*
407 Function: Loads up (like use) a module at run time on demand
414 sub _load_format_module
{
416 my ($module, $load, $m);
418 $module = "_<Bio/Structure/IO/$format.pm";
419 $load = "Bio/Structure/IO/$format.pm";
421 return 1 if $main::{$module};
427 $load: $format cannot be found
429 For more information about the Structure::IO system please see the
430 Bio::Structure::IO docs. This includes ways of checking for formats at
431 compile time, not run time
439 =head2 _concatenate_lines
441 Title : _concatenate_lines
442 Usage : $s = _concatenate_lines($line, $continuation_line)
443 Function: Private. Concatenates two strings assuming that the second stems
444 from a continuation line of the first. Adds a space between both
445 unless the first ends with a dash.
447 Takes care of either arg being empty.
454 sub _concatenate_lines
{
455 my ($self, $s1, $s2) = @_;
456 $s1 .= " " if($s1 && ($s1 !~ /-$/) && $s2);
457 return ($s1 ?
$s1 : "") . ($s2 ?
$s2 : "");
463 Usage : $obj->_filehandle($newval)
464 Function: This method is deprecated. Call _fh() instead.
466 Returns : value of _filehandle
467 Args : newvalue (optional)
473 my ($self,@args) = @_;
474 return $self->_fh(@args);
479 Title : _guess_format
480 Usage : $obj->_guess_format($filename)
483 Returns : guessed format of filename (lower case)
490 return unless $_ = shift;
491 return 'fasta' if /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i;
492 return 'genbank' if /\.(gb|gbank|genbank)$/i;
493 return 'scf' if /\.scf$/i;
494 return 'pir' if /\.pir$/i;
495 return 'embl' if /\.(embl|ebl|emb|dat)$/i;
496 return 'raw' if /\.(txt)$/i;
497 return 'gcg' if /\.gcg$/i;
498 return 'ace' if /\.ace$/i;
499 return 'bsml' if /\.(bsm|bsml)$/i;
500 return 'pdb' if /\.(ent|pdb)$/i;
510 my ($class,$val) = @_;
511 return bless {'structio' => $val}, $class;
516 return $self->{'structio'}->next_seq() unless wantarray;
518 push @list, $obj while $obj = $self->{'structio'}->next_seq();
524 $self->{'structio'}->write_seq(@_);