[bug 3148] switch default to "expasy" until we can work out REST service interface
[bioperl-live.git] / Bio / Biblio / IO.pm
blob6cd8f7ca29e166c16e42054af1a8806d6e692082
2 # BioPerl module for Bio::Biblio::IO
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Martin Senger <senger@ebi.ac.uk>
7 # For copyright and disclaimer see below.
9 # POD documentation - main docs before the code
11 =head1 NAME
13 Bio::Biblio::IO - Handling the bibliographic references
15 =head1 SYNOPSIS
17 use Bio::Biblio::IO;
19 # getting citations from a file
20 $in = Bio::Biblio::IO->new ('-file' => 'myfile.xml' ,
21 '-format' => 'medlinexml');
22 # --- OR ---
24 # getting citations from a string
25 $in = Bio::Biblio::IO->new ('-data' => '<MedlineCitation>...</MedlineCitation>' ,
26 '-format' => 'medlinexml');
27 #--- OR ---
29 # getting citations from a string if IO::String is installed
30 use IO::String;
31 $in = Bio::Biblio::IO->new ('-fh' => IO::String->new ($citation),
32 '-format' => 'medlinexml');
34 $in = Bio::Biblio::IO->new(-fh => $io_handle , '-format' => 'medlinexml');
36 #--- OR ---
38 # getting citations from any IO handler
39 $in = Bio::Biblio::IO->new('-fh' => $io_handle ,
40 '-format' => 'medlinexml');
43 # now, having $in, we can read all citations
44 while ( my $citation = $in->next_bibref() ) {
45 &do_something_with_citation ($citation);
48 #--- OR ---
50 # again reading all citation but now a callback defined in your
51 # code is used (note that the reading starts already when new()
52 # is called)
53 $io = Bio::Biblio::IO->new('-format' => 'medlinexml',
54 '-file' => $testfile,
55 '-callback' => \&callback);
56 sub callback {
57 my $citation = shift;
58 print $citation->{'_identifier'} . "\n";
61 #Now, to actually get a citation in an XML format,
62 #use I<Bio::Biblio> module which returns an XML string:
64 use Bio::Biblio;
65 use Bio::Biblio::IO;
66 my $xml = Bio::Biblio->new->get_by_id ('12368254');
67 my $reader = Bio::Biblio::IO->new ('-data' => $xml,
68 '-format' => 'medlinexml');
70 while (my $citation = $reader->next_bibref()) {
71 #... do something here with $citation
74 #And, finally, the resulting citation can be received in different
75 #output formats:
77 $io = Bio::Biblio::IO->new('-format' => 'medlinexml',
78 '-result' => 'raw');
79 #--- OR ---
81 $io = Bio::Biblio::IO->new('-format' => 'medlinexml',
82 '-result' => 'medline2ref');
84 #--- OR ---
86 $io = Bio::Biblio::IO->new('-format' => 'pubmedxml',
87 '-result' => 'pubmed2ref');
89 =head1 DESCRIPTION
91 Bio::Biblio::IO is a handler module for accessing bibliographic
92 citations. The citations can be in different formats - assuming that
93 there is a corresponding module knowing that format in Bio::Biblio::IO
94 directory (e.g. Bio::Biblio::IO::medlinexml). The format (and the
95 module name) is given by the argument I<-format>.
97 Once an instance of C<Bio::Biblio::IO> class is available, the
98 citations can be read by calling repeatedly method I<next_bibref>:
100 while (my $citation = $reader->next_bibref()) {
101 ... do something here with $citation
104 However, this may imply that all citations were already read into the
105 memory. If you expect a huge amount of citations to be read, you may
106 choose a I<callback> option. Your subroutine is specified in the
107 C<new()> method and is called everytime a new citation is available
108 (see an example above in SYNOPSIS).
110 The citations returned by I<next_bibref> or given to your callback
111 routine can be of different formats depending on the argument
112 I<-result>. One result type is I<raw> and it is represented by a
113 simple, not blessed hash table:
115 $io = Bio::Biblio::IO->new('-result' => 'raw');
117 What other result formats are available depends on the module who
118 reads the citations in the first place. At the moment, the following
119 ones are available:
121 $io = Bio::Biblio::IO->new('-result' => 'medline2ref');
123 This is a default result format for reading citations by the
124 I<medlinexml> module. The C<medlinexml> module is again the default
125 one. Which means that you can almost omit arguments (you still need to
126 say where the citations come from):
128 $io = Bio::Biblio::IO->new('-file' => 'data/medline_data.xml');
130 Another result format available is for PUBMED citations (which is a
131 super-set of the MEDLINE citations having few more tags):
133 $io = Bio::Biblio::IO->new('-format' => 'pubmedxml',
134 '-result' => 'pubmed2ref',
135 '-data' => $citation);
137 Or, because C<pubmed2ref> is a default one for PUBMED citations, you can say just:
139 $io = Bio::Biblio::IO->new('-format' => 'pubmedxml',
140 '-data' => $citation);
142 Both C<medline2ref> and C<pubmed2ref> results are objects defined in
143 the directory C<Bio::Biblio>.
145 =head1 SEE ALSO
147 =over 4
149 =item *
151 An example script I<examples/biblio.pl>. It has many options and its
152 own help. The relevant options to this IO module are I<-f>
153 (specifying what file to read) and I<-O> (specifying what result
154 format to achieve).
156 =item *
158 OpenBQS home page: http://www.ebi.ac.uk/~senger/openbqs
160 =item *
162 Comments to the Perl client: http://www.ebi.ac.uk/~senger/openbqs/Client_perl.html
164 =back
166 =head1 FEEDBACK
168 =head2 Mailing Lists
170 User feedback is an integral part of the evolution of this
171 and other Bioperl modules. Send your comments and suggestions preferably
172 to one of the Bioperl mailing lists.
173 Your participation is much appreciated.
175 bioperl-l@bioperl.org - General discussion
176 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
178 =head2 Support
180 Please direct usage questions or support issues to the mailing list:
182 I<bioperl-l@bioperl.org>
184 rather than to the module maintainer directly. Many experienced and
185 reponsive experts will be able look at the problem and quickly
186 address it. Please include a thorough description of the problem
187 with code and data examples if at all possible.
189 =head2 Reporting Bugs
191 Report bugs to the Bioperl bug tracking system to help us keep track
192 the bugs and their resolution. Bug reports can be submitted via the
193 web:
195 http://bugzilla.open-bio.org/
197 =head1 AUTHOR
199 Martin Senger (senger@ebi.ac.uk)
201 =head1 COPYRIGHT
203 Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
205 This module is free software; you can redistribute it and/or modify
206 it under the same terms as Perl itself.
208 =head1 DISCLAIMER
210 This software is provided "as is" without warranty of any kind.
212 =head1 APPENDIX
214 The rest of the documentation details each of the object
215 methods. Internal methods are preceded with a _
217 =cut
220 # Let the code begin...
222 package Bio::Biblio::IO;
224 use strict;
226 use Symbol;
228 use base qw(Bio::Root::Root Bio::Root::IO);
230 my $entry = 0;
232 sub new {
233 my ($caller, @args) = @_;
234 my $class = ref ($caller) || $caller;
236 # if $caller is an object, or if it is an underlying
237 # 'real-work-doing' class (e.g. Bio::Biblio::IO::medlinexml) then
238 # we want to call SUPER to create and bless an object
239 if( $class =~ /Bio::Biblio::IO::(\S+)/ ) {
240 my ($self) = $class->SUPER::new (@args);
241 $self->_initialize (@args);
242 return $self;
244 # this is called only the first time when somebody calls: 'new
245 # Bio::Biblio::IO (...)', and it actually loads a 'real-work-doing'
246 # module and call this new() method again (unless the loaded
247 # module has its own new() method)
248 } else {
249 my %param = @args;
250 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
251 my $format = $param{'-format'} ||
252 $class->_guess_format( $param{-file} || $ARGV[0] ) ||
253 'medlinexml';
254 $format = "\L$format"; # normalize capitalization to lower case
256 # load module with the real implementation - as defined in $format
257 return unless (&_load_format_module ($format));
259 # this will call this same method new() - but rather its
260 # upper (object) branche
261 return "Bio::Biblio::IO::$format"->new(@args);
265 sub newFh {
266 my $class = shift;
267 return unless my $self = $class->new(@_);
268 return $self->fh;
272 sub fh {
273 my $self = shift;
274 my $class = ref($self) || $self;
275 my $s = Symbol::gensym;
276 tie $$s,$class,$self;
277 return $s;
280 # _initialize is chained for all Bio::Biblio::IO classes
282 sub _initialize {
283 my ($self, @args) = @_;
284 # initialize the IO part
285 $self->_initialize_io (@args);
288 =head2 next_bibref
290 Usage : $citation = stream->next_bibref
291 Function: Reads the next citation object from the stream and returns it.
292 Returns : a Bio::Biblio::Ref citation object, or something else
293 (depending on the '-result' argument given in the 'new()'
294 method).
295 Args : none
297 =cut
299 sub next_bibref {
300 my ($self) = shift;
301 $self->throw ("Sorry, you cannot read from a generic Bio::Biblio::IO object.");
304 # -----------------------------------------------------------------------------
306 =head2 _load_format_module
308 Usage : $class->_load_format_module ($format)
309 Returns : 1 on success, undef on failure
310 Args : 'format' should contain the last part of the
311 name of a module who does the real implementation
313 It does (in run-time) a similar thing as
315 require Bio::Biblio::IO::$format
317 It throws an exception if it fails to find and load the module
318 (for example, because of the compilation errors in the module).
320 =cut
322 sub _load_format_module {
323 my ($format) = @_;
324 my ($module, $load, $m);
326 $module = "_<Bio/Biblio/IO/$format.pm";
327 $load = "Bio/Biblio/IO/$format.pm";
329 return 1 if $main::{$module};
330 eval {
331 require $load;
333 if ( $@ ) {
334 Bio::Root::Root->throw (<<END);
335 $load: $format cannot be found or loaded
336 Exception $@
337 For more information about the Biblio system please see the Bio::Biblio::IO docs.
340 return;
342 return 1;
345 =head2 _guess_format
347 Usage : $class->_guess_format ($filename)
348 Returns : string with a guessed format of the input data (e.g. 'medlinexml')
349 Args : a file name whose extension can help to guess its format
351 It makes an expert guess what kind of data are in the given file
352 (but be prepare that $filename may be empty).
354 =cut
356 sub _guess_format {
357 my $class = shift;
358 return unless $_ = shift;
359 return 'medlinexml' if (/\.(xml|medlinexml)$/i);
360 return;
363 sub DESTROY {
364 my $self = shift;
366 $self->close();
369 sub TIEHANDLE {
370 my ($class,$val) = @_;
371 return bless {'biblio' => $val}, $class;
374 sub READLINE {
375 my $self = shift;
376 return $self->{'biblio'}->next_bibref() unless wantarray;
377 my (@list, $obj);
378 push @list, $obj while $obj = $self->{'biblio'}->next_bibref();
379 return @list;