sync w/ main trunk
[bioperl-live.git] / Bio / Biblio / IO.pm
blob98b01be800a39f6ab1f0d76ee86b7a69769f23b0
1 # $Id$
3 # BioPerl module for Bio::Biblio::IO
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Martin Senger <senger@ebi.ac.uk>
8 # For copyright and disclaimer see below.
10 # POD documentation - main docs before the code
12 =head1 NAME
14 Bio::Biblio::IO - Handling the bibliographic references
16 =head1 SYNOPSIS
18 use Bio::Biblio::IO;
20 # getting citations from a file
21 $in = Bio::Biblio::IO->new ('-file' => 'myfile.xml' ,
22 '-format' => 'medlinexml');
23 # --- OR ---
25 # getting citations from a string
26 $in = Bio::Biblio::IO->new ('-data' => '<MedlineCitation>...</MedlineCitation>' ,
27 '-format' => 'medlinexml');
28 #--- OR ---
30 # getting citations from a string if IO::String is installed
31 use IO::String;
32 $in = Bio::Biblio::IO->new ('-fh' => IO::String->new ($citation),
33 '-format' => 'medlinexml');
35 $in = Bio::Biblio::IO->new(-fh => $io_handle , '-format' => 'medlinexml');
37 #--- OR ---
39 # getting citations from any IO handler
40 $in = Bio::Biblio::IO->new('-fh' => $io_handle ,
41 '-format' => 'medlinexml');
44 # now, having $in, we can read all citations
45 while ( my $citation = $in->next_bibref() ) {
46 &do_something_with_citation ($citation);
49 #--- OR ---
51 # again reading all citation but now a callback defined in your
52 # code is used (note that the reading starts already when new()
53 # is called)
54 $io = Bio::Biblio::IO->new('-format' => 'medlinexml',
55 '-file' => $testfile,
56 '-callback' => \&callback);
57 sub callback {
58 my $citation = shift;
59 print $citation->{'_identifier'} . "\n";
62 #Now, to actually get a citation in an XML format,
63 #use I<Bio::Biblio> module which returns an XML string:
65 use Bio::Biblio;
66 use Bio::Biblio::IO;
67 my $xml = Bio::Biblio->new->get_by_id ('12368254');
68 my $reader = Bio::Biblio::IO->new ('-data' => $xml,
69 '-format' => 'medlinexml');
71 while (my $citation = $reader->next_bibref()) {
72 #... do something here with $citation
75 #And, finally, the resulting citation can be received in different
76 #output formats:
78 $io = Bio::Biblio::IO->new('-format' => 'medlinexml',
79 '-result' => 'raw');
80 #--- OR ---
82 $io = Bio::Biblio::IO->new('-format' => 'medlinexml',
83 '-result' => 'medline2ref');
85 #--- OR ---
87 $io = Bio::Biblio::IO->new('-format' => 'pubmedxml',
88 '-result' => 'pubmed2ref');
90 =head1 DESCRIPTION
92 Bio::Biblio::IO is a handler module for accessing bibliographic
93 citations. The citations can be in different formats - assuming that
94 there is a corresponding module knowing that format in Bio::Biblio::IO
95 directory (e.g. Bio::Biblio::IO::medlinexml). The format (and the
96 module name) is given by the argument I<-format>.
98 Once an instance of C<Bio::Biblio::IO> class is available, the
99 citations can be read by calling repeatedly method I<next_bibref>:
101 while (my $citation = $reader->next_bibref()) {
102 ... do something here with $citation
105 However, this may imply that all citations were already read into the
106 memory. If you expect a huge amount of citations to be read, you may
107 choose a I<callback> option. Your subroutine is specified in the
108 C<new()> method and is called everytime a new citation is available
109 (see an example above in SYNOPSIS).
111 The citations returned by I<next_bibref> or given to your callback
112 routine can be of different formats depending on the argument
113 I<-result>. One result type is I<raw> and it is represented by a
114 simple, not blessed hash table:
116 $io = Bio::Biblio::IO->new('-result' => 'raw');
118 What other result formats are available depends on the module who
119 reads the citations in the first place. At the moment, the following
120 ones are available:
122 $io = Bio::Biblio::IO->new('-result' => 'medline2ref');
124 This is a default result format for reading citations by the
125 I<medlinexml> module. The C<medlinexml> module is again the default
126 one. Which means that you can almost omit arguments (you still need to
127 say where the citations come from):
129 $io = Bio::Biblio::IO->new('-file' => 'data/medline_data.xml');
131 Another result format available is for PUBMED citations (which is a
132 super-set of the MEDLINE citations having few more tags):
134 $io = Bio::Biblio::IO->new('-format' => 'pubmedxml',
135 '-result' => 'pubmed2ref',
136 '-data' => $citation);
138 Or, because C<pubmed2ref> is a default one for PUBMED citations, you can say just:
140 $io = Bio::Biblio::IO->new('-format' => 'pubmedxml',
141 '-data' => $citation);
143 Both C<medline2ref> and C<pubmed2ref> results are objects defined in
144 the directory C<Bio::Biblio>.
146 =head1 SEE ALSO
148 =over 4
150 =item *
152 An example script I<examples/biblio.pl>. It has many options and its
153 own help. The relevant options to this IO module are I<-f>
154 (specifying what file to read) and I<-O> (specifying what result
155 format to achieve).
157 =item *
159 OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs
161 =item *
163 Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
165 =back
167 =head1 FEEDBACK
169 =head2 Mailing Lists
171 User feedback is an integral part of the evolution of this
172 and other Bioperl modules. Send your comments and suggestions preferably
173 to one of the Bioperl mailing lists.
174 Your participation is much appreciated.
176 bioperl-l@bioperl.org - General discussion
177 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
179 =head2 Support
181 Please direct usage questions or support issues to the mailing list:
183 L<bioperl-l@bioperl.org>
185 rather than to the module maintainer directly. Many experienced and
186 reponsive experts will be able look at the problem and quickly
187 address it. Please include a thorough description of the problem
188 with code and data examples if at all possible.
190 =head2 Reporting Bugs
192 Report bugs to the Bioperl bug tracking system to help us keep track
193 the bugs and their resolution. Bug reports can be submitted via the
194 web:
196 http://bugzilla.open-bio.org/
198 =head1 AUTHOR
200 Martin Senger (senger@ebi.ac.uk)
202 =head1 COPYRIGHT
204 Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
206 This module is free software; you can redistribute it and/or modify
207 it under the same terms as Perl itself.
209 =head1 DISCLAIMER
211 This software is provided "as is" without warranty of any kind.
213 =head1 APPENDIX
215 The rest of the documentation details each of the object
216 methods. Internal methods are preceded with a _
218 =cut
221 # Let the code begin...
223 package Bio::Biblio::IO;
225 use strict;
227 use Symbol;
229 use base qw(Bio::Root::Root Bio::Root::IO);
231 my $entry = 0;
233 sub new {
234 my ($caller, @args) = @_;
235 my $class = ref ($caller) || $caller;
237 # if $caller is an object, or if it is an underlying
238 # 'real-work-doing' class (e.g. Bio::Biblio::IO::medlinexml) then
239 # we want to call SUPER to create and bless an object
240 if( $class =~ /Bio::Biblio::IO::(\S+)/ ) {
241 my ($self) = $class->SUPER::new (@args);
242 $self->_initialize (@args);
243 return $self;
245 # this is called only the first time when somebody calls: 'new
246 # Bio::Biblio::IO (...)', and it actually loads a 'real-work-doing'
247 # module and call this new() method again (unless the loaded
248 # module has its own new() method)
249 } else {
250 my %param = @args;
251 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
252 my $format = $param{'-format'} ||
253 $class->_guess_format( $param{-file} || $ARGV[0] ) ||
254 'medlinexml';
255 $format = "\L$format"; # normalize capitalization to lower case
257 # load module with the real implementation - as defined in $format
258 return unless (&_load_format_module ($format));
260 # this will call this same method new() - but rather its
261 # upper (object) branche
262 return "Bio::Biblio::IO::$format"->new(@args);
266 sub newFh {
267 my $class = shift;
268 return unless my $self = $class->new(@_);
269 return $self->fh;
273 sub fh {
274 my $self = shift;
275 my $class = ref($self) || $self;
276 my $s = Symbol::gensym;
277 tie $$s,$class,$self;
278 return $s;
281 # _initialize is chained for all Bio::Biblio::IO classes
283 sub _initialize {
284 my ($self, @args) = @_;
285 # initialize the IO part
286 $self->_initialize_io (@args);
289 =head2 next_bibref
291 Usage : $citation = stream->next_bibref
292 Function: Reads the next citation object from the stream and returns it.
293 Returns : a Bio::Biblio::Ref citation object, or something else
294 (depending on the '-result' argument given in the 'new()'
295 method).
296 Args : none
298 =cut
300 sub next_bibref {
301 my ($self) = shift;
302 $self->throw ("Sorry, you cannot read from a generic Bio::Biblio::IO object.");
305 # -----------------------------------------------------------------------------
307 =head2 _load_format_module
309 Usage : $class->_load_format_module ($format)
310 Returns : 1 on success, undef on failure
311 Args : 'format' should contain the last part of the
312 name of a module who does the real implementation
314 It does (in run-time) a similar thing as
316 require Bio::Biblio::IO::$format
318 It throws an exception if it fails to find and load the module
319 (for example, because of the compilation errors in the module).
321 =cut
323 sub _load_format_module {
324 my ($format) = @_;
325 my ($module, $load, $m);
327 $module = "_<Bio/Biblio/IO/$format.pm";
328 $load = "Bio/Biblio/IO/$format.pm";
330 return 1 if $main::{$module};
331 eval {
332 require $load;
334 if ( $@ ) {
335 Bio::Root::Root->throw (<<END);
336 $load: $format cannot be found or loaded
337 Exception $@
338 For more information about the Biblio system please see the Bio::Biblio::IO docs.
341 return;
343 return 1;
346 =head2 _guess_format
348 Usage : $class->_guess_format ($filename)
349 Returns : string with a guessed format of the input data (e.g. 'medlinexml')
350 Args : a file name whose extension can help to guess its format
352 It makes an expert guess what kind of data are in the given file
353 (but be prepare that $filename may be empty).
355 =cut
357 sub _guess_format {
358 my $class = shift;
359 return unless $_ = shift;
360 return 'medlinexml' if (/\.(xml|medlinexml)$/i);
361 return;
364 sub DESTROY {
365 my $self = shift;
367 $self->close();
370 sub TIEHANDLE {
371 my ($class,$val) = @_;
372 return bless {'biblio' => $val}, $class;
375 sub READLINE {
376 my $self = shift;
377 return $self->{'biblio'}->next_bibref() unless wantarray;
378 my (@list, $obj);
379 push @list, $obj while $obj = $self->{'biblio'}->next_bibref();
380 return @list;