add some significant milestones
[bioperl-live.git] / Bio / ClusterIO.pm
blobb6318174388f43d6a727c797499bc71f96d9369a
1 # $Id$
3 # BioPerl module for Bio::ClusterIO.pm
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz>
9 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
10 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
11 # http://anatomy.otago.ac.nz/meg
13 # You may distribute this module under the same terms as perl itself
15 # _history
17 # May 7, 2002 - changed from UniGene.pm to more generic ClusterIO.pm
18 # by Andrew Macgregor
20 # April 17, 2002 - Initial implementation by Andrew Macgregor
21 # POD documentation - main docs before the code
23 =head1 NAME
25 Bio::ClusterIO - Handler for Cluster Formats
27 =head1 SYNOPSIS
29 #NB: This example is unigene specific
31 use Bio::ClusterIO;
33 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
34 '-format' => "unigene");
35 # note: we quote -format to keep older perl's from complaining.
37 while ( my $in = $stream->next_cluster() ) {
38 print $in->unigene_id() . "\n";
39 while ( my $sequence = $in->next_seq() ) {
40 print $sequence->accession_number() . "\n";
43 # Parsing errors are printed to STDERR.
45 =head1 DESCRIPTION
47 The ClusterIO module works with the ClusterIO format module to read
48 various cluster formats such as NCBI UniGene.
51 =head1 CONSTRUCTORS
53 =head2 Bio::ClusterIO-E<gt>new()
55 $str = Bio::ClusterIO->new(-file => 'filename',
56 -format=>$format);
58 The new() class method constructs a new Bio::ClusterIO object. The
59 returned object can be used to retrieve or print cluster
60 objects. new() accepts the following parameters:
62 =over 4
64 =item -file
66 A file path to be opened for reading.
68 =item -format
70 Specify the format of the file. Supported formats include:
72 unigene *.data UniGene build files.
73 dbsnp *.xml dbSNP XML files
75 If no format is specified and a filename is given, then the module
76 will attempt to deduce it from the filename. If this is unsuccessful,
77 the main UniGene build format is assumed.
79 The format name is case insensitive. 'UNIGENE', 'UniGene' and
80 'unigene' are all supported, as are dbSNP, dbsnp, and DBSNP
82 =back
84 =head1 OBJECT METHODS
86 See below for more detailed summaries. The main methods are:
88 =head2 $cluster = $str-E<gt>next_cluster()
90 Fetch the next cluster from the stream.
93 =head2 TIEHANDLE(), READLINE(), PRINT()
95 These I've left in here because they were in the SeqIO
96 module. Feedback appreciated. There they provide the tie interface.
97 See L<perltie> for more details.
99 =head1 FEEDBACK
101 =head2 Mailing Lists
103 User feedback is an integral part of the evolution of this
104 and other Bioperl modules. Send your comments and suggestions preferably
105 to one of the Bioperl mailing lists.
106 Your participation is much appreciated.
108 bioperl-l@bioperl.org - General discussion
109 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
111 =head2 Support
113 Please direct usage questions or support issues to the mailing list:
115 I<bioperl-l@bioperl.org>
117 rather than to the module maintainer directly. Many experienced and
118 reponsive experts will be able look at the problem and quickly
119 address it. Please include a thorough description of the problem
120 with code and data examples if at all possible.
122 =head2 Reporting Bugs
124 Report bugs to the Bioperl bug tracking system to help us keep track
125 the bugs and their resolution. Bug reports can be submitted via the
126 web:
128 http://bugzilla.open-bio.org/
130 =head1 AUTHOR - Andrew Macgregor
132 Email andrew@anatomy.otago.ac.nz
134 =head1 APPENDIX
136 The rest of the documentation details each of the object
137 methods. Internal methods are usually preceded with a _
139 =cut
142 # Let the code begin...
144 package Bio::ClusterIO;
146 use strict;
149 use base qw(Bio::Root::Root Bio::Root::IO);
153 =head2 new
155 Title : new
156 Usage : Bio::ClusterIO->new(-file => $filename, -format => 'format')
157 Function: Returns a new cluster stream
158 Returns : A Bio::ClusterIO::Handler initialised with the appropriate format
159 Args : -file => $filename
160 -format => format
162 =cut
165 my $entry = 0;
167 sub new {
168 my ($caller,@args) = @_;
169 my $class = ref($caller) || $caller;
171 # or do we want to call SUPER on an object if $caller is an
172 # object?
173 if( $class =~ /Bio::ClusterIO::(\S+)/ ) {
174 my ($self) = $class->SUPER::new(@args);
175 $self->_initialize(@args);
176 return $self;
177 } else {
179 my %param = @args;
180 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
181 my $format = $param{'-format'} ||
182 $class->_guess_format( $param{-file} || $ARGV[0] );
183 $format = "\L$format"; # normalize capitalization to lower case
185 return unless( $class->_load_format_module($format) );
186 return "Bio::ClusterIO::$format"->new(@args);
191 # _initialize is chained for all ClusterIO classes
193 sub _initialize {
194 my($self, @args) = @_;
195 # initialize the IO part
196 $self->_initialize_io(@args);
199 =head2 next_cluster
201 Title : next_cluster
202 Usage : $cluster = $stream->next_cluster()
203 Function: Reads the next cluster object from the stream and returns it.
204 Returns : a L<Bio::ClusterI> compliant object
205 Args : none
208 =cut
210 sub next_cluster {
211 my ($self, $seq) = @_;
212 $self->throw("Sorry, you cannot read from a generic Bio::ClusterIO object.");
215 =head2 cluster_factory
217 Title : cluster_factory
218 Usage : $obj->cluster_factory($newval)
219 Function: Get/set the object factory to use for creating the cluster
220 objects.
221 Example :
222 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
223 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
224 compliant object or undef, optional)
227 =cut
229 sub cluster_factory{
230 my $self = shift;
232 return $self->{'cluster_factory'} = shift if @_;
233 return $self->{'cluster_factory'};
236 =head2 object_factory
238 Title : object_factory
239 Usage : $obj->object_factory($newval)
240 Function: This is an alias to cluster_factory with a more generic name.
241 Example :
242 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
243 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
244 compliant object or undef, optional)
247 =cut
249 sub object_factory{
250 return shift->cluster_factory(@_);
253 =head2 _load_format_module
255 Title : _load_format_module
256 Usage : *INTERNAL ClusterIO stuff*
257 Function: Loads up (like use) a module at run time on demand
258 Example :
259 Returns :
260 Args :
262 =cut
264 sub _load_format_module {
265 my ($self,$format) = @_;
266 my $module = "Bio::ClusterIO::" . $format;
267 my $ok;
269 eval {
270 $ok = $self->_load_module($module);
272 if ( $@ ) {
273 print STDERR <<END;
274 $self: could not load $format - for more details on supported formats please see the ClusterIO docs
275 Exception $@
279 return $ok;
282 =head2 _guess_format
284 Title : _guess_format
285 Usage : $obj->_guess_format($filename)
286 Function: guess format based on file suffix
287 Example :
288 Returns : guessed format of filename (lower case)
289 Args :
290 Notes : formats that _filehandle() will guess include unigene and dbsnp
292 =cut
294 sub _guess_format {
295 my $class = shift;
296 return unless $_ = shift;
297 return 'unigene' if /\.(data)$/i;
298 return 'dbsnp' if /\.(xml)$/i;
301 sub DESTROY {
302 my $self = shift;
304 $self->close();
307 # I need some direction on these!! The module works so I haven't fiddled with them!
309 sub TIEHANDLE {
310 my ($class,$val) = @_;
311 return bless {'seqio' => $val}, $class;
314 sub READLINE {
315 my $self = shift;
316 return $self->{'seqio'}->next_seq() unless wantarray;
317 my (@list, $obj);
318 push @list, $obj while $obj = $self->{'seqio'}->next_seq();
319 return @list;
322 sub PRINT {
323 my $self = shift;
324 $self->{'seqio'}->write_seq(@_);