[bug 2450]
[bioperl-live.git] / Bio / ClusterIO.pm
blob4f937f2377ed8a67403a306e1538b514615484f1
1 # $Id$
3 # BioPerl module for Bio::ClusterIO.pm
5 # Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz>
7 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
8 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
9 # http://anatomy.otago.ac.nz/meg
11 # You may distribute this module under the same terms as perl itself
13 # _history
15 # May 7, 2002 - changed from UniGene.pm to more generic ClusterIO.pm
16 # by Andrew Macgregor
18 # April 17, 2002 - Initial implementation by Andrew Macgregor
19 # POD documentation - main docs before the code
21 =head1 NAME
23 Bio::ClusterIO - Handler for Cluster Formats
25 =head1 SYNOPSIS
27 #NB: This example is unigene specific
29 use Bio::ClusterIO;
31 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
32 '-format' => "unigene");
33 # note: we quote -format to keep older perl's from complaining.
35 while ( my $in = $stream->next_cluster() ) {
36 print $in->unigene_id() . "\n";
37 while ( my $sequence = $in->next_seq() ) {
38 print $sequence->accession_number() . "\n";
41 # Parsing errors are printed to STDERR.
43 =head1 DESCRIPTION
45 The ClusterIO module works with the ClusterIO format module to read
46 various cluster formats such as NCBI UniGene.
49 =head1 CONSTRUCTORS
51 =head2 Bio::ClusterIO-E<gt>new()
53 $str = Bio::ClusterIO->new(-file => 'filename',
54 -format=>$format);
56 The new() class method constructs a new Bio::ClusterIO object. The
57 returned object can be used to retrieve or print cluster
58 objects. new() accepts the following parameters:
60 =over 4
62 =item -file
64 A file path to be opened for reading.
66 =item -format
68 Specify the format of the file. Supported formats include:
70 unigene *.data UniGene build files.
71 dbsnp *.xml dbSNP XML files
73 If no format is specified and a filename is given, then the module
74 will attempt to deduce it from the filename. If this is unsuccessful,
75 the main UniGene build format is assumed.
77 The format name is case insensitive. 'UNIGENE', 'UniGene' and
78 'unigene' are all supported, as are dbSNP, dbsnp, and DBSNP
80 =back
82 =head1 OBJECT METHODS
84 See below for more detailed summaries. The main methods are:
86 =head2 $cluster = $str-E<gt>next_cluster()
88 Fetch the next cluster from the stream.
91 =head2 TIEHANDLE(), READLINE(), PRINT()
93 These I've left in here because they were in the SeqIO
94 module. Feedback appreciated. There they provide the tie interface.
95 See L<perltie> for more details.
97 =head1 FEEDBACK
99 =head2 Mailing Lists
101 User feedback is an integral part of the evolution of this
102 and other Bioperl modules. Send your comments and suggestions preferably
103 to one of the Bioperl mailing lists.
104 Your participation is much appreciated.
106 bioperl-l@bioperl.org - General discussion
107 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
109 =head2 Reporting Bugs
111 Report bugs to the Bioperl bug tracking system to help us keep track
112 the bugs and their resolution. Bug reports can be submitted via the
113 web:
115 http://bugzilla.open-bio.org/
117 =head1 AUTHOR - Andrew Macgregor
119 Email andrew@anatomy.otago.ac.nz
121 =head1 APPENDIX
123 The rest of the documentation details each of the object
124 methods. Internal methods are usually preceded with a _
126 =cut
129 # Let the code begin...
131 package Bio::ClusterIO;
133 use strict;
136 use base qw(Bio::Root::Root Bio::Root::IO);
140 =head2 new
142 Title : new
143 Usage : Bio::ClusterIO->new(-file => $filename, -format => 'format')
144 Function: Returns a new cluster stream
145 Returns : A Bio::ClusterIO::Handler initialised with the appropriate format
146 Args : -file => $filename
147 -format => format
149 =cut
152 my $entry = 0;
154 sub new {
155 my ($caller,@args) = @_;
156 my $class = ref($caller) || $caller;
158 # or do we want to call SUPER on an object if $caller is an
159 # object?
160 if( $class =~ /Bio::ClusterIO::(\S+)/ ) {
161 my ($self) = $class->SUPER::new(@args);
162 $self->_initialize(@args);
163 return $self;
164 } else {
166 my %param = @args;
167 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
168 my $format = $param{'-format'} ||
169 $class->_guess_format( $param{-file} || $ARGV[0] );
170 $format = "\L$format"; # normalize capitalization to lower case
172 return unless( $class->_load_format_module($format) );
173 return "Bio::ClusterIO::$format"->new(@args);
178 # _initialize is chained for all ClusterIO classes
180 sub _initialize {
181 my($self, @args) = @_;
182 # initialize the IO part
183 $self->_initialize_io(@args);
186 =head2 next_cluster
188 Title : next_cluster
189 Usage : $cluster = $stream->next_cluster()
190 Function: Reads the next cluster object from the stream and returns it.
191 Returns : a L<Bio::ClusterI> compliant object
192 Args : none
195 =cut
197 sub next_cluster {
198 my ($self, $seq) = @_;
199 $self->throw("Sorry, you cannot read from a generic Bio::ClusterIO object.");
202 =head2 cluster_factory
204 Title : cluster_factory
205 Usage : $obj->cluster_factory($newval)
206 Function: Get/set the object factory to use for creating the cluster
207 objects.
208 Example :
209 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
210 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
211 compliant object or undef, optional)
214 =cut
216 sub cluster_factory{
217 my $self = shift;
219 return $self->{'cluster_factory'} = shift if @_;
220 return $self->{'cluster_factory'};
223 =head2 object_factory
225 Title : object_factory
226 Usage : $obj->object_factory($newval)
227 Function: This is an alias to cluster_factory with a more generic name.
228 Example :
229 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
230 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
231 compliant object or undef, optional)
234 =cut
236 sub object_factory{
237 return shift->cluster_factory(@_);
240 =head2 _load_format_module
242 Title : _load_format_module
243 Usage : *INTERNAL ClusterIO stuff*
244 Function: Loads up (like use) a module at run time on demand
245 Example :
246 Returns :
247 Args :
249 =cut
251 sub _load_format_module {
252 my ($self,$format) = @_;
253 my $module = "Bio::ClusterIO::" . $format;
254 my $ok;
256 eval {
257 $ok = $self->_load_module($module);
259 if ( $@ ) {
260 print STDERR <<END;
261 $self: could not load $format - for more details on supported formats please see the ClusterIO docs
262 Exception $@
266 return $ok;
269 =head2 _guess_format
271 Title : _guess_format
272 Usage : $obj->_guess_format($filename)
273 Function: guess format based on file suffix
274 Example :
275 Returns : guessed format of filename (lower case)
276 Args :
277 Notes : formats that _filehandle() will guess include unigene and dbsnp
279 =cut
281 sub _guess_format {
282 my $class = shift;
283 return unless $_ = shift;
284 return 'unigene' if /\.(data)$/i;
285 return 'dbsnp' if /\.(xml)$/i;
288 sub DESTROY {
289 my $self = shift;
291 $self->close();
294 # I need some direction on these!! The module works so I haven't fiddled with them!
296 sub TIEHANDLE {
297 my ($class,$val) = @_;
298 return bless {'seqio' => $val}, $class;
301 sub READLINE {
302 my $self = shift;
303 return $self->{'seqio'}->next_seq() unless wantarray;
304 my (@list, $obj);
305 push @list, $obj while $obj = $self->{'seqio'}->next_seq();
306 return @list;
309 sub PRINT {
310 my $self = shift;
311 $self->{'seqio'}->write_seq(@_);