3 # BioPerl module for Bio::ClusterIO.pm
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz>
9 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
10 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
11 # http://anatomy.otago.ac.nz/meg
13 # You may distribute this module under the same terms as perl itself
17 # May 7, 2002 - changed from UniGene.pm to more generic ClusterIO.pm
20 # April 17, 2002 - Initial implementation by Andrew Macgregor
21 # POD documentation - main docs before the code
25 Bio::ClusterIO - Handler for Cluster Formats
29 #NB: This example is unigene specific
33 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
34 '-format' => "unigene");
35 # note: we quote -format to keep older perl's from complaining.
37 while ( my $in = $stream->next_cluster() ) {
38 print $in->unigene_id() . "\n";
39 while ( my $sequence = $in->next_seq() ) {
40 print $sequence->accession_number() . "\n";
43 # Parsing errors are printed to STDERR.
47 The ClusterIO module works with the ClusterIO format module to read
48 various cluster formats such as NCBI UniGene.
53 =head2 Bio::ClusterIO-E<gt>new()
55 $str = Bio::ClusterIO->new(-file => 'filename',
58 The new() class method constructs a new Bio::ClusterIO object. The
59 returned object can be used to retrieve or print cluster
60 objects. new() accepts the following parameters:
66 A file path to be opened for reading.
70 Specify the format of the file. Supported formats include:
72 unigene *.data UniGene build files.
73 dbsnp *.xml dbSNP XML files
75 If no format is specified and a filename is given, then the module
76 will attempt to deduce it from the filename. If this is unsuccessful,
77 the main UniGene build format is assumed.
79 The format name is case insensitive. 'UNIGENE', 'UniGene' and
80 'unigene' are all supported, as are dbSNP, dbsnp, and DBSNP
86 See below for more detailed summaries. The main methods are:
88 =head2 $cluster = $str-E<gt>next_cluster()
90 Fetch the next cluster from the stream.
93 =head2 TIEHANDLE(), READLINE(), PRINT()
95 These I've left in here because they were in the SeqIO
96 module. Feedback appreciated. There they provide the tie interface.
97 See L<perltie> for more details.
103 User feedback is an integral part of the evolution of this
104 and other Bioperl modules. Send your comments and suggestions preferably
105 to one of the Bioperl mailing lists.
106 Your participation is much appreciated.
108 bioperl-l@bioperl.org - General discussion
109 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
113 Please direct usage questions or support issues to the mailing list:
115 I<bioperl-l@bioperl.org>
117 rather than to the module maintainer directly. Many experienced and
118 reponsive experts will be able look at the problem and quickly
119 address it. Please include a thorough description of the problem
120 with code and data examples if at all possible.
122 =head2 Reporting Bugs
124 Report bugs to the Bioperl bug tracking system to help us keep track
125 the bugs and their resolution. Bug reports can be submitted via the
128 http://bugzilla.open-bio.org/
130 =head1 AUTHOR - Andrew Macgregor
132 Email andrew@anatomy.otago.ac.nz
136 The rest of the documentation details each of the object
137 methods. Internal methods are usually preceded with a _
142 # Let the code begin...
144 package Bio
::ClusterIO
;
149 use base
qw(Bio::Root::Root Bio::Root::IO);
156 Usage : Bio::ClusterIO->new(-file => $filename, -format => 'format')
157 Function: Returns a new cluster stream
158 Returns : A Bio::ClusterIO::Handler initialised with the appropriate format
159 Args : -file => $filename
168 my ($caller,@args) = @_;
169 my $class = ref($caller) || $caller;
171 # or do we want to call SUPER on an object if $caller is an
173 if( $class =~ /Bio::ClusterIO::(\S+)/ ) {
174 my ($self) = $class->SUPER::new
(@args);
175 $self->_initialize(@args);
180 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
181 my $format = $param{'-format'} ||
182 $class->_guess_format( $param{-file
} || $ARGV[0] );
183 $format = "\L$format"; # normalize capitalization to lower case
185 return unless( $class->_load_format_module($format) );
186 return "Bio::ClusterIO::$format"->new(@args);
191 # _initialize is chained for all ClusterIO classes
194 my($self, @args) = @_;
195 # initialize the IO part
196 $self->_initialize_io(@args);
202 Usage : $cluster = $stream->next_cluster()
203 Function: Reads the next cluster object from the stream and returns it.
204 Returns : a L<Bio::ClusterI> compliant object
211 my ($self, $seq) = @_;
212 $self->throw("Sorry, you cannot read from a generic Bio::ClusterIO object.");
215 =head2 cluster_factory
217 Title : cluster_factory
218 Usage : $obj->cluster_factory($newval)
219 Function: Get/set the object factory to use for creating the cluster
222 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
223 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
224 compliant object or undef, optional)
232 return $self->{'cluster_factory'} = shift if @_;
233 return $self->{'cluster_factory'};
236 =head2 object_factory
238 Title : object_factory
239 Usage : $obj->object_factory($newval)
240 Function: This is an alias to cluster_factory with a more generic name.
242 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
243 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
244 compliant object or undef, optional)
250 return shift->cluster_factory(@_);
253 =head2 _load_format_module
255 Title : _load_format_module
256 Usage : *INTERNAL ClusterIO stuff*
257 Function: Loads up (like use) a module at run time on demand
264 sub _load_format_module
{
265 my ($self,$format) = @_;
266 my $module = "Bio::ClusterIO::" . $format;
270 $ok = $self->_load_module($module);
274 $self: could not load $format - for more details on supported formats please see the ClusterIO docs
284 Title : _guess_format
285 Usage : $obj->_guess_format($filename)
286 Function: guess format based on file suffix
288 Returns : guessed format of filename (lower case)
290 Notes : formats that _filehandle() will guess include unigene and dbsnp
296 return unless $_ = shift;
297 return 'unigene' if /\.(data)$/i;
298 return 'dbsnp' if /\.(xml)$/i;
307 # I need some direction on these!! The module works so I haven't fiddled with them!
310 my ($class,$val) = @_;
311 return bless {'seqio' => $val}, $class;
316 return $self->{'seqio'}->next_seq() unless wantarray;
318 push @list, $obj while $obj = $self->{'seqio'}->next_seq();
324 $self->{'seqio'}->write_seq(@_);