sync w/ main trunk
[bioperl-live.git] / Bio / DB / Biblio / biofetch.pm
blob92a1cc14e81b5092575b50b3a3323f7f94b24b50
1 # $Id$
3 # BioPerl module Bio::DB::Biblio::biofetch.pm
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
8 # For copyright and disclaimer see below.
10 # POD documentation - main docs before the code
12 =head1 NAME
14 Bio::DB::Biblio::biofetch - A BioFetch-based access to a bibliographic
15 citation retrieval
17 =head1 SYNOPSIS
19 Do not use this object directly, only access it through the
20 I<Bio::Biblio> module:
22 use Bio::Biblio;
23 my $biblio = Bio::Biblio->new(-access => 'biofetch');
24 my $ref = $biblio->get_by_id('20063307'));
26 my $ids = ['20063307', '98276153'];
27 my $refio = $biblio->get_all($ids);
28 while ($ref = $refio->next_bibref) {
29 print $ref->identifier, "\n";
32 =head1 DESCRIPTION
34 This class uses BioFetch protocol based service to retrieve Medline
35 references by their ID.
37 =head1 FEEDBACK
39 =head2 Mailing Lists
41 User feedback is an integral part of the evolution of this and other
42 Bioperl modules. Send your comments and suggestions preferably to
43 the Bioperl mailing list. Your participation is much appreciated.
45 bioperl-l@bioperl.org - General discussion
46 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48 =head2 Support
50 Please direct usage questions or support issues to the mailing list:
52 L<bioperl-l@bioperl.org>
54 rather than to the module maintainer directly. Many experienced and
55 reponsive experts will be able look at the problem and quickly
56 address it. Please include a thorough description of the problem
57 with code and data examples if at all possible.
59 =head2 Reporting Bugs
61 Report bugs to the Bioperl bug tracking system to help us keep track
62 of the bugs and their resolution. Bug reports can be submitted via the
63 web:
65 http://bugzilla.open-bio.org/
67 =head1 AUTHOR
69 Heikki Lehvaslaiho (heikki-at-bioperl-dot-org)
71 =head1 COPYRIGHT
73 Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
75 This module is free software; you can redistribute it and/or modify
76 it under the same terms as Perl itself.
78 =head1 DISCLAIMER
80 This software is provided "as is" without warranty of any kind.
82 =head1 BUGS AND LIMITATIONS
84 =over 1
86 =item *
88 Only method get_by_id() is supported.
90 =back
92 =head1 APPENDIX
94 The main documentation details are to be found in
95 L<Bio::DB::BiblioI>.
97 Here is the rest of the object methods. Internal methods are preceded
98 with an underscore _.
100 =cut
103 # Let the code begin...
106 package Bio::DB::Biblio::biofetch;
107 use vars qw(%HOSTS %FORMATMAP $DEFAULTFORMAT $DEFAULTRETRIEVAL_TYPE
108 $DEFAULT_SERVICE $DEFAULT_NAMESPACE);
109 use strict;
111 use Bio::Biblio::IO;
113 use base qw(Bio::DB::DBFetch Bio::Biblio);
115 BEGIN {
117 # you can add your own here theoretically.
118 %HOSTS = (
119 'dbfetch' => {
120 baseurl => 'http://%s/cgi-bin/dbfetch?db=medline&style=raw',
121 hosts => {
122 'ebi' => 'www.ebi.ac.uk'
126 %FORMATMAP = ( 'default' => 'medlinexml'
128 $DEFAULTFORMAT = 'medlinexml';
130 $DEFAULT_SERVICE = 'http://www.ebi.ac.uk/cgi-bin/dbfetch';
131 $DEFAULTRETRIEVAL_TYPE = 'tempfile';
134 sub new {
135 my ($class, @args ) = @_;
136 my $self = $class->SUPER::new(@args);
138 $self->{ '_hosts' } = {};
139 $self->{ '_formatmap' } = {};
141 $self->hosts(\%HOSTS);
142 $self->formatmap(\%FORMATMAP);
143 $self->retrieval_type($DEFAULTRETRIEVAL_TYPE);
144 $self->{'_default_format'} = $DEFAULTFORMAT;
146 return $self;
149 =head2 get_by_id
151 Title : get_by_id
152 Usage : $entry = $db->get__by_id('20063307')
153 Function: Gets a Bio::Biblio::RefI object by its name
154 Returns : a Bio::Biblio::Medline object
155 Args : the id (as a string) of the reference
157 =cut
159 sub get_by_id {
160 my ($self,$id) = @_;
161 my $io = $self->get_Stream_by_id([$id]);
162 $self->throw("id does not exist") if( !defined $io ) ;
163 return $io->next_bibref();
167 =head2 get_all
169 Title : get_all
170 Usage : $seq = $db->get_all($ref);
171 Function: Retrieves reference objects from the server 'en masse',
172 rather than one at a time. For large numbers of sequences,
173 this is far superior than get_by_id().
174 Example :
175 Returns : a stream of Bio::Biblio::Medline objects
176 Args : $ref : either an array reference, a filename, or a filehandle
177 from which to get the list of unique ids/accession numbers.
179 =cut
181 sub get_all {
182 my ($self, $ids) = @_;
183 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
186 =head2 get_seq_stream
188 Title : get_seq_stream
189 Usage : my $seqio = $self->get_seq_stream(%qualifiers)
190 Function: builds a url and queries a web db
191 Returns : a Bio::SeqIO stream capable of producing sequence
192 Args : %qualifiers = a hash qualifiers that the implementing class
193 will process to make a url suitable for web querying
195 =cut
197 sub get_seq_stream {
198 my ($self, %qualifiers) = @_;
199 my ($rformat, $ioformat) = $self->request_format();
200 my $seen = 0;
201 foreach my $key ( keys %qualifiers ) {
202 if( $key =~ /format/i ) {
203 $rformat = $qualifiers{$key};
204 $seen = 1;
207 $qualifiers{'-format'} = $rformat if( !$seen);
208 ($rformat, $ioformat) = $self->request_format($rformat);
210 my $request = $self->get_request(%qualifiers);
211 my ($stream,$resp);
212 if ( $self->retrieval_type =~ /temp/i ) {
213 my $dir = $self->io()->tempdir( CLEANUP => 1);
214 my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir );
215 close $fh;
216 my ($resp) = $self->_request($request, $tmpfile);
217 if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) {
218 $self->throw("WebDBSeqI Error - check query sequences!\n");
220 $self->postprocess_data('type' => 'file',
221 'location' => $tmpfile);
222 # this may get reset when requesting batch mode
223 ($rformat,$ioformat) = $self->request_format();
224 if ( $self->verbose > 0 ) {
225 open(my $ERR, "<", $tmpfile);
226 while(<$ERR>) { $self->debug($_);}
228 $stream = Bio::Biblio::IO->new('-format' => $ioformat,
229 '-file' => $tmpfile);
230 } elsif ( $self->retrieval_type =~ /io_string/i ) {
231 my ($resp) = $self->_request($request);
232 my $content = $resp->content_ref;
233 $self->debug( "content is $$content\n");
234 if( ! $resp->is_success() || length(${$resp->content_ref()}) == 0 ) {
235 $self->throw("WebDBSeqI Error - check query sequences!\n");
237 ($rformat,$ioformat) = $self->request_format();
238 $self->postprocess_data('type'=> 'string',
239 'location' => $content);
240 $stream = Bio::Biblio::IO->new('-format' => $ioformat,
241 # '-data' => "<tag>". $$content. "</tag>");
242 '-data' => $$content
244 } else {
245 $self->throw("retrieval type " . $self->retrieval_type .
246 " unsupported\n");
248 return $stream;
252 =head2 postprocess_data
254 Title : postprocess_data
255 Usage : $self->postprocess_data ( 'type' => 'string',
256 'location' => \$datastr);
257 Function: process downloaded data before loading into a Bio::SeqIO
258 Returns : void
259 Args : hash with two keys - 'type' can be 'string' or 'file'
260 - 'location' either file location or string
261 reference containing data
263 =cut
265 # the default method, works for genbank/genpept, other classes should
266 # override it with their own method.
268 sub postprocess_data {
269 my ($self, %args) = @_;
270 my ($data, $TMP);
271 my $type = uc $args{'type'};
272 my $location = $args{'location'};
273 if( !defined $type || $type eq '' || !defined $location) {
274 return;
275 } elsif( $type eq 'STRING' ) {
276 $data = $$location;
277 } elsif ( $type eq 'FILE' ) {
278 open($TMP, "<", $location) or $self->throw("could not open file $location");
279 my @in = <$TMP>;
280 $data = join("", @in);
283 if( $type eq 'FILE' ) {
284 open($TMP, ">", $location) or $self->throw("could overwrite file $location");
285 print $TMP $data;
286 } elsif ( $type eq 'STRING' ) {
287 ${$args{'location'}} = $data;
290 $self->debug("format is ". $self->request_format(). " data is $data\n");
293 =head2 VERSION and Revision
295 Usage : print $Bio::DB::Biblio::biofetch::VERSION;
296 print $Bio::DB::Biblio::biofetch::Revision;
298 =cut
300 =head2 Defaults
302 Usage : print $Bio::DB::Biblio::biofetch::DEFAULT_SERVICE;
304 =cut
307 __END__