3 # BioPerl module for Bio::ClusterIO.pm
5 # Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz>
7 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
8 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
9 # http://anatomy.otago.ac.nz/meg
11 # You may distribute this module under the same terms as perl itself
15 # May 7, 2002 - changed from UniGene.pm to more generic ClusterIO.pm
18 # April 17, 2002 - Initial implementation by Andrew Macgregor
19 # POD documentation - main docs before the code
23 Bio::ClusterIO - Handler for Cluster Formats
27 #NB: This example is unigene specific
31 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
32 '-format' => "unigene");
33 # note: we quote -format to keep older perl's from complaining.
35 while ( my $in = $stream->next_cluster() ) {
36 print $in->unigene_id() . "\n";
37 while ( my $sequence = $in->next_seq() ) {
38 print $sequence->accession_number() . "\n";
41 # Parsing errors are printed to STDERR.
45 The ClusterIO module works with the ClusterIO format module to read
46 various cluster formats such as NCBI UniGene.
51 =head2 Bio::ClusterIO-E<gt>new()
53 $str = Bio::ClusterIO->new(-file => 'filename',
56 The new() class method constructs a new Bio::ClusterIO object. The
57 returned object can be used to retrieve or print cluster
58 objects. new() accepts the following parameters:
64 A file path to be opened for reading.
68 Specify the format of the file. Supported formats include:
70 unigene *.data UniGene build files.
71 dbsnp *.xml dbSNP XML files
73 If no format is specified and a filename is given, then the module
74 will attempt to deduce it from the filename. If this is unsuccessful,
75 the main UniGene build format is assumed.
77 The format name is case insensitive. 'UNIGENE', 'UniGene' and
78 'unigene' are all supported, as are dbSNP, dbsnp, and DBSNP
84 See below for more detailed summaries. The main methods are:
86 =head2 $cluster = $str-E<gt>next_cluster()
88 Fetch the next cluster from the stream.
91 =head2 TIEHANDLE(), READLINE(), PRINT()
93 These I've left in here because they were in the SeqIO
94 module. Feedback appreciated. There they provide the tie interface.
95 See L<perltie> for more details.
101 User feedback is an integral part of the evolution of this
102 and other Bioperl modules. Send your comments and suggestions preferably
103 to one of the Bioperl mailing lists.
104 Your participation is much appreciated.
106 bioperl-l@bioperl.org - General discussion
107 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
109 =head2 Reporting Bugs
111 Report bugs to the Bioperl bug tracking system to help us keep track
112 the bugs and their resolution. Bug reports can be submitted via the
115 http://bugzilla.open-bio.org/
117 =head1 AUTHOR - Andrew Macgregor
119 Email andrew@anatomy.otago.ac.nz
123 The rest of the documentation details each of the object
124 methods. Internal methods are usually preceded with a _
129 # Let the code begin...
131 package Bio
::ClusterIO
;
136 use base
qw(Bio::Root::Root Bio::Root::IO);
143 Usage : Bio::ClusterIO->new(-file => $filename, -format => 'format')
144 Function: Returns a new cluster stream
145 Returns : A Bio::ClusterIO::Handler initialised with the appropriate format
146 Args : -file => $filename
155 my ($caller,@args) = @_;
156 my $class = ref($caller) || $caller;
158 # or do we want to call SUPER on an object if $caller is an
160 if( $class =~ /Bio::ClusterIO::(\S+)/ ) {
161 my ($self) = $class->SUPER::new
(@args);
162 $self->_initialize(@args);
167 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
168 my $format = $param{'-format'} ||
169 $class->_guess_format( $param{-file
} || $ARGV[0] );
170 $format = "\L$format"; # normalize capitalization to lower case
172 return unless( $class->_load_format_module($format) );
173 return "Bio::ClusterIO::$format"->new(@args);
178 # _initialize is chained for all ClusterIO classes
181 my($self, @args) = @_;
182 # initialize the IO part
183 $self->_initialize_io(@args);
189 Usage : $cluster = $stream->next_cluster()
190 Function: Reads the next cluster object from the stream and returns it.
191 Returns : a L<Bio::ClusterI> compliant object
198 my ($self, $seq) = @_;
199 $self->throw("Sorry, you cannot read from a generic Bio::ClusterIO object.");
202 =head2 cluster_factory
204 Title : cluster_factory
205 Usage : $obj->cluster_factory($newval)
206 Function: Get/set the object factory to use for creating the cluster
209 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
210 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
211 compliant object or undef, optional)
219 return $self->{'cluster_factory'} = shift if @_;
220 return $self->{'cluster_factory'};
223 =head2 object_factory
225 Title : object_factory
226 Usage : $obj->object_factory($newval)
227 Function: This is an alias to cluster_factory with a more generic name.
229 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
230 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
231 compliant object or undef, optional)
237 return shift->cluster_factory(@_);
240 =head2 _load_format_module
242 Title : _load_format_module
243 Usage : *INTERNAL ClusterIO stuff*
244 Function: Loads up (like use) a module at run time on demand
251 sub _load_format_module
{
252 my ($self,$format) = @_;
253 my $module = "Bio::ClusterIO::" . $format;
257 $ok = $self->_load_module($module);
261 $self: could not load $format - for more details on supported formats please see the ClusterIO docs
271 Title : _guess_format
272 Usage : $obj->_guess_format($filename)
273 Function: guess format based on file suffix
275 Returns : guessed format of filename (lower case)
277 Notes : formats that _filehandle() will guess include unigene and dbsnp
283 return unless $_ = shift;
284 return 'unigene' if /\.(data)$/i;
285 return 'dbsnp' if /\.(xml)$/i;
294 # I need some direction on these!! The module works so I haven't fiddled with them!
297 my ($class,$val) = @_;
298 return bless {'seqio' => $val}, $class;
303 return $self->{'seqio'}->next_seq() unless wantarray;
305 push @list, $obj while $obj = $self->{'seqio'}->next_seq();
311 $self->{'seqio'}->write_seq(@_);