add skips and a warning re: NeXML v0.9 support
[bioperl-live.git] / Bio / ClusterIO.pm
blob118875df5f1dcabdc397c54a84f17dd4a220b782
2 # BioPerl module for Bio::ClusterIO.pm
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz>
8 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
9 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
10 # http://anatomy.otago.ac.nz/meg
12 # You may distribute this module under the same terms as perl itself
14 # _history
16 # May 7, 2002 - changed from UniGene.pm to more generic ClusterIO.pm
17 # by Andrew Macgregor
19 # April 17, 2002 - Initial implementation by Andrew Macgregor
20 # POD documentation - main docs before the code
22 =head1 NAME
24 Bio::ClusterIO - Handler for Cluster Formats
26 =head1 SYNOPSIS
28 #NB: This example is unigene specific
30 use Bio::ClusterIO;
32 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
33 '-format' => "unigene");
34 # note: we quote -format to keep older perl's from complaining.
36 while ( my $in = $stream->next_cluster() ) {
37 print $in->unigene_id() . "\n";
38 while ( my $sequence = $in->next_seq() ) {
39 print $sequence->accession_number() . "\n";
42 # Parsing errors are printed to STDERR.
44 =head1 DESCRIPTION
46 The ClusterIO module works with the ClusterIO format module to read
47 various cluster formats such as NCBI UniGene.
50 =head1 CONSTRUCTORS
52 =head2 Bio::ClusterIO-E<gt>new()
54 $str = Bio::ClusterIO->new(-file => 'filename',
55 -format=>$format);
57 The new() class method constructs a new Bio::ClusterIO object. The
58 returned object can be used to retrieve or print cluster
59 objects. new() accepts the following parameters:
61 =over 4
63 =item -file
65 A file path to be opened for reading.
67 =item -format
69 Specify the format of the file. Supported formats include:
71 unigene *.data UniGene build files.
72 dbsnp *.xml dbSNP XML files
74 If no format is specified and a filename is given, then the module
75 will attempt to deduce it from the filename. If this is unsuccessful,
76 the main UniGene build format is assumed.
78 The format name is case insensitive. 'UNIGENE', 'UniGene' and
79 'unigene' are all supported, as are dbSNP, dbsnp, and DBSNP
81 =back
83 =head1 OBJECT METHODS
85 See below for more detailed summaries. The main methods are:
87 =head2 $cluster = $str-E<gt>next_cluster()
89 Fetch the next cluster from the stream.
92 =head2 TIEHANDLE(), READLINE(), PRINT()
94 These I've left in here because they were in the SeqIO
95 module. Feedback appreciated. There they provide the tie interface.
96 See L<perltie> for more details.
98 =head1 FEEDBACK
100 =head2 Mailing Lists
102 User feedback is an integral part of the evolution of this
103 and other Bioperl modules. Send your comments and suggestions preferably
104 to one of the Bioperl mailing lists.
105 Your participation is much appreciated.
107 bioperl-l@bioperl.org - General discussion
108 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
110 =head2 Support
112 Please direct usage questions or support issues to the mailing list:
114 I<bioperl-l@bioperl.org>
116 rather than to the module maintainer directly. Many experienced and
117 reponsive experts will be able look at the problem and quickly
118 address it. Please include a thorough description of the problem
119 with code and data examples if at all possible.
121 =head2 Reporting Bugs
123 Report bugs to the Bioperl bug tracking system to help us keep track
124 the bugs and their resolution. Bug reports can be submitted via the
125 web:
127 https://redmine.open-bio.org/projects/bioperl/
129 =head1 AUTHOR - Andrew Macgregor
131 Email andrew@anatomy.otago.ac.nz
133 =head1 APPENDIX
135 The rest of the documentation details each of the object
136 methods. Internal methods are usually preceded with a _
138 =cut
141 # Let the code begin...
143 package Bio::ClusterIO;
145 use strict;
148 use base qw(Bio::Root::Root Bio::Root::IO);
152 =head2 new
154 Title : new
155 Usage : Bio::ClusterIO->new(-file => $filename, -format => 'format')
156 Function: Returns a new cluster stream
157 Returns : A Bio::ClusterIO::Handler initialised with the appropriate format
158 Args : -file => $filename
159 -format => format
161 =cut
164 my $entry = 0;
166 sub new {
167 my ($caller,@args) = @_;
168 my $class = ref($caller) || $caller;
170 # or do we want to call SUPER on an object if $caller is an
171 # object?
172 if( $class =~ /Bio::ClusterIO::(\S+)/ ) {
173 my ($self) = $class->SUPER::new(@args);
174 $self->_initialize(@args);
175 return $self;
176 } else {
178 my %param = @args;
179 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
180 my $format = $param{'-format'} ||
181 $class->_guess_format( $param{-file} || $ARGV[0] );
182 $format = "\L$format"; # normalize capitalization to lower case
184 return unless( $class->_load_format_module($format) );
185 return "Bio::ClusterIO::$format"->new(@args);
190 # _initialize is chained for all ClusterIO classes
192 sub _initialize {
193 my($self, @args) = @_;
194 # initialize the IO part
195 $self->_initialize_io(@args);
198 =head2 next_cluster
200 Title : next_cluster
201 Usage : $cluster = $stream->next_cluster()
202 Function: Reads the next cluster object from the stream and returns it.
203 Returns : a L<Bio::ClusterI> compliant object
204 Args : none
207 =cut
209 sub next_cluster {
210 my ($self, $seq) = @_;
211 $self->throw("Sorry, you cannot read from a generic Bio::ClusterIO object.");
214 =head2 cluster_factory
216 Title : cluster_factory
217 Usage : $obj->cluster_factory($newval)
218 Function: Get/set the object factory to use for creating the cluster
219 objects.
220 Example :
221 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
222 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
223 compliant object or undef, optional)
226 =cut
228 sub cluster_factory{
229 my $self = shift;
231 return $self->{'cluster_factory'} = shift if @_;
232 return $self->{'cluster_factory'};
235 =head2 object_factory
237 Title : object_factory
238 Usage : $obj->object_factory($newval)
239 Function: This is an alias to cluster_factory with a more generic name.
240 Example :
241 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
242 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
243 compliant object or undef, optional)
246 =cut
248 sub object_factory{
249 return shift->cluster_factory(@_);
252 =head2 _load_format_module
254 Title : _load_format_module
255 Usage : *INTERNAL ClusterIO stuff*
256 Function: Loads up (like use) a module at run time on demand
257 Example :
258 Returns :
259 Args :
261 =cut
263 sub _load_format_module {
264 my ($self,$format) = @_;
265 my $module = "Bio::ClusterIO::" . $format;
266 my $ok;
268 eval {
269 $ok = $self->_load_module($module);
271 if ( $@ ) {
272 print STDERR <<END;
273 $self: could not load $format - for more details on supported formats please see the ClusterIO docs
274 Exception $@
278 return $ok;
281 =head2 _guess_format
283 Title : _guess_format
284 Usage : $obj->_guess_format($filename)
285 Function: guess format based on file suffix
286 Example :
287 Returns : guessed format of filename (lower case)
288 Args :
289 Notes : formats that _filehandle() will guess include unigene and dbsnp
291 =cut
293 sub _guess_format {
294 my $class = shift;
295 return unless $_ = shift;
296 return 'unigene' if /\.(data)$/i;
297 return 'dbsnp' if /\.(xml)$/i;
300 sub DESTROY {
301 my $self = shift;
303 $self->close();
306 # I need some direction on these!! The module works so I haven't fiddled with them!
308 sub TIEHANDLE {
309 my ($class,$val) = @_;
310 return bless {'seqio' => $val}, $class;
313 sub READLINE {
314 my $self = shift;
315 return $self->{'seqio'}->next_seq() unless wantarray;
316 my (@list, $obj);
317 push @list, $obj while $obj = $self->{'seqio'}->next_seq();
318 return @list;
321 sub PRINT {
322 my $self = shift;
323 $self->{'seqio'}->write_seq(@_);