3 # BioPerl module Bio::DB::Biblio::biofetch.pm
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
8 # For copyright and disclaimer see below.
10 # POD documentation - main docs before the code
14 Bio::DB::Biblio::biofetch - A BioFetch-based access to a bibliographic
19 Do not use this object directly, only access it through the
20 I<Bio::Biblio> module:
23 my $biblio = Bio::Biblio->new(-access => 'biofetch');
24 my $ref = $biblio->get_by_id('20063307'));
26 my $ids = ['20063307', '98276153'];
27 my $refio = $biblio->get_all($ids);
28 while ($ref = $refio->next_bibref) {
29 print $ref->identifier, "\n";
34 This class uses BioFetch protocol based service to retrieve Medline
35 references by their ID.
41 User feedback is an integral part of the evolution of this and other
42 Bioperl modules. Send your comments and suggestions preferably to
43 the Bioperl mailing list. Your participation is much appreciated.
45 bioperl-l@bioperl.org - General discussion
46 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50 Please direct usage questions or support issues to the mailing list:
52 L<bioperl-l@bioperl.org>
54 rather than to the module maintainer directly. Many experienced and
55 reponsive experts will be able look at the problem and quickly
56 address it. Please include a thorough description of the problem
57 with code and data examples if at all possible.
61 Report bugs to the Bioperl bug tracking system to help us keep track
62 of the bugs and their resolution. Bug reports can be submitted via the
65 http://bugzilla.open-bio.org/
69 Heikki Lehvaslaiho (heikki-at-bioperl-dot-org)
73 Copyright (c) 2002 European Bioinformatics Institute. All Rights Reserved.
75 This module is free software; you can redistribute it and/or modify
76 it under the same terms as Perl itself.
80 This software is provided "as is" without warranty of any kind.
82 =head1 BUGS AND LIMITATIONS
88 Only method get_by_id() is supported.
94 The main documentation details are to be found in
97 Here is the rest of the object methods. Internal methods are preceded
103 # Let the code begin...
106 package Bio
::DB
::Biblio
::biofetch
;
107 use vars
qw(%HOSTS %FORMATMAP $DEFAULTFORMAT $DEFAULTRETRIEVAL_TYPE
108 $DEFAULT_SERVICE $DEFAULT_NAMESPACE);
113 use base qw(Bio::DB::DBFetch Bio::Biblio);
117 # you can add your own here theoretically.
120 baseurl
=> 'http://%s/cgi-bin/dbfetch?db=medline&style=raw',
122 'ebi' => 'www.ebi.ac.uk'
126 %FORMATMAP = ( 'default' => 'medlinexml'
128 $DEFAULTFORMAT = 'medlinexml';
130 $DEFAULT_SERVICE = 'http://www.ebi.ac.uk/cgi-bin/dbfetch';
131 $DEFAULTRETRIEVAL_TYPE = 'tempfile';
135 my ($class, @args ) = @_;
136 my $self = $class->SUPER::new
(@args);
138 $self->{ '_hosts' } = {};
139 $self->{ '_formatmap' } = {};
141 $self->hosts(\
%HOSTS);
142 $self->formatmap(\
%FORMATMAP);
143 $self->retrieval_type($DEFAULTRETRIEVAL_TYPE);
144 $self->{'_default_format'} = $DEFAULTFORMAT;
152 Usage : $entry = $db->get__by_id('20063307')
153 Function: Gets a Bio::Biblio::RefI object by its name
154 Returns : a Bio::Biblio::Medline object
155 Args : the id (as a string) of the reference
161 my $io = $self->get_Stream_by_id([$id]);
162 $self->throw("id does not exist") if( !defined $io ) ;
163 return $io->next_bibref();
170 Usage : $seq = $db->get_all($ref);
171 Function: Retrieves reference objects from the server 'en masse',
172 rather than one at a time. For large numbers of sequences,
173 this is far superior than get_by_id().
175 Returns : a stream of Bio::Biblio::Medline objects
176 Args : $ref : either an array reference, a filename, or a filehandle
177 from which to get the list of unique ids/accession numbers.
182 my ($self, $ids) = @_;
183 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
186 =head2 get_seq_stream
188 Title : get_seq_stream
189 Usage : my $seqio = $self->get_seq_stream(%qualifiers)
190 Function: builds a url and queries a web db
191 Returns : a Bio::SeqIO stream capable of producing sequence
192 Args : %qualifiers = a hash qualifiers that the implementing class
193 will process to make a url suitable for web querying
198 my ($self, %qualifiers) = @_;
199 my ($rformat, $ioformat) = $self->request_format();
201 foreach my $key ( keys %qualifiers ) {
202 if( $key =~ /format/i ) {
203 $rformat = $qualifiers{$key};
207 $qualifiers{'-format'} = $rformat if( !$seen);
208 ($rformat, $ioformat) = $self->request_format($rformat);
210 my $request = $self->get_request(%qualifiers);
212 if ( $self->retrieval_type =~ /temp/i ) {
213 my $dir = $self->io()->tempdir( CLEANUP
=> 1);
214 my ( $fh, $tmpfile) = $self->io()->tempfile( DIR
=> $dir );
216 my ($resp) = $self->_request($request, $tmpfile);
217 if( ! -e
$tmpfile || -z
$tmpfile || ! $resp->is_success() ) {
218 $self->throw("WebDBSeqI Error - check query sequences!\n");
220 $self->postprocess_data('type' => 'file',
221 'location' => $tmpfile);
222 # this may get reset when requesting batch mode
223 ($rformat,$ioformat) = $self->request_format();
224 if ( $self->verbose > 0 ) {
225 open(my $ERR, "<", $tmpfile);
226 while(<$ERR>) { $self->debug($_);}
228 $stream = Bio
::Biblio
::IO
->new('-format' => $ioformat,
229 '-file' => $tmpfile);
230 } elsif ( $self->retrieval_type =~ /io_string/i ) {
231 my ($resp) = $self->_request($request);
232 my $content = $resp->content_ref;
233 $self->debug( "content is $$content\n");
234 if( ! $resp->is_success() || length(${$resp->content_ref()}) == 0 ) {
235 $self->throw("WebDBSeqI Error - check query sequences!\n");
237 ($rformat,$ioformat) = $self->request_format();
238 $self->postprocess_data('type'=> 'string',
239 'location' => $content);
240 $stream = Bio
::Biblio
::IO
->new('-format' => $ioformat,
241 # '-data' => "<tag>". $$content. "</tag>");
245 $self->throw("retrieval type " . $self->retrieval_type .
252 =head2 postprocess_data
254 Title : postprocess_data
255 Usage : $self->postprocess_data ( 'type' => 'string',
256 'location' => \$datastr);
257 Function: process downloaded data before loading into a Bio::SeqIO
259 Args : hash with two keys - 'type' can be 'string' or 'file'
260 - 'location' either file location or string
261 reference containing data
265 # the default method, works for genbank/genpept, other classes should
266 # override it with their own method.
268 sub postprocess_data
{
269 my ($self, %args) = @_;
271 my $type = uc $args{'type'};
272 my $location = $args{'location'};
273 if( !defined $type || $type eq '' || !defined $location) {
275 } elsif( $type eq 'STRING' ) {
277 } elsif ( $type eq 'FILE' ) {
278 open($TMP, "<", $location) or $self->throw("could not open file $location");
280 $data = join("", @in);
283 if( $type eq 'FILE' ) {
284 open($TMP, ">", $location) or $self->throw("could overwrite file $location");
286 } elsif ( $type eq 'STRING' ) {
287 ${$args{'location'}} = $data;
290 $self->debug("format is ". $self->request_format(). " data is $data\n");
293 =head2 VERSION and Revision
295 Usage : print $Bio::DB::Biblio::biofetch::VERSION;
296 print $Bio::DB::Biblio::biofetch::Revision;
302 Usage : print $Bio::DB::Biblio::biofetch::DEFAULT_SERVICE;