Bump RC to 2; will tag, bag, and ship tomorrow after tests
[bioperl-live.git] / Bio / DB / DBFetch.pm
blob06474a324765f4943ad1197f363f75de2b9d816c
1 # $Id$
3 # BioPerl module for Bio::DB::DBFetch
5 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::DB::DBFetch - Database object for retrieving using the dbfetch script
17 =head1 SYNOPSIS
19 #do not use this module directly
21 =head1 DESCRIPTION
23 Allows the dynamic retrieval of entries from databases using the
24 dbfetch script at EBI:
25 L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>cgi-binE<sol>dbfetch>.
27 In order to make changes transparent we have host type (currently only
28 ebi) and location (defaults to ebi) separated out. This allows later
29 additions of more servers in different geographical locations.
31 This is a superclass which is called by instantiable subclasses with
32 correct parameters.
34 =head1 FEEDBACK
36 =head2 Mailing Lists
38 User feedback is an integral part of the evolution of this and other
39 Bioperl modules. Send your comments and suggestions preferably to one
40 of the Bioperl mailing lists. Your participation is much appreciated.
42 bioperl-l@bioperl.org - General discussion
43 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45 =head2 Reporting Bugs
47 Report bugs to the Bioperl bug tracking system to help us keep track
48 the bugs and their resolution. Bug reports can be submitted via the
49 web:
51 http://bugzilla.open-bio.org/
53 =head1 AUTHOR - Heikki Lehvaslaiho
55 Email Heikki Lehvaslaiho E<lt>heikki-at-bioperl-dot-orgE<gt>
57 =head1 APPENDIX
59 The rest of the documentation details each of the object
60 methods. Internal methods are usually preceded with a _
62 =cut
64 # Let the code begin...
66 package Bio::DB::DBFetch;
67 use strict;
68 use vars qw($MODVERSION $DEFAULTFORMAT $DEFAULTLOCATION
69 $DEFAULTSERVERTYPE);
71 $MODVERSION = '0.1';
72 use HTTP::Request::Common;
74 use base qw(Bio::DB::WebDBSeqI);
76 # the new way to make modules a little more lightweight
78 BEGIN {
79 # global vars
80 $DEFAULTSERVERTYPE = 'dbfetch';
81 $DEFAULTLOCATION = 'ebi';
85 =head1 Routines from Bio::DB::WebDBSeqI
87 =head2 get_request
89 Title : get_request
90 Usage : my $url = $self->get_request
91 Function: returns a HTTP::Request object
92 Returns :
93 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
95 =cut
97 sub get_request {
98 my ($self, @qualifiers) = @_;
99 my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
100 @qualifiers);
102 $self->throw("Must specify a value for UIDs to fetch")
103 unless defined $uids;
104 my $tmp;
105 my $format_string = '';
106 $format ||= $self->default_format;
107 ($format, $tmp) = $self->request_format($format);
108 $format_string = "&format=$format";
109 my $url = $self->location_url();
110 my $uid;
111 if( ref($uids) =~ /ARRAY/i ) {
112 $uid = join (',', @$uids);
113 $self->warn ('The server will accept maximum of 50 entries in a request. The rest are ignored.')
114 if scalar @$uids >50;
115 } else {
116 $uid = $uids;
119 return GET $url. $format_string. '&id='. $uid;
123 =head2 postprocess_data
125 Title : postprocess_data
126 Usage : $self->postprocess_data ( 'type' => 'string',
127 'location' => \$datastr);
128 Function: process downloaded data before loading into a Bio::SeqIO
129 Returns : void
130 Args : hash with two keys - 'type' can be 'string' or 'file'
131 - 'location' either file location or string
132 reference containing data
134 =cut
136 # remove occasional blank lines at top of web output
137 sub postprocess_data {
138 my ($self, %args) = @_;
139 if ($args{type} eq 'string') {
140 ${$args{location}} =~ s/^\s+//; # get rid of leading whitespace
142 elsif ($args{type} eq 'file') {
143 my $F;
144 open $F,"<", $args{location} or $self->throw("Cannot open $args{location}: $!");
145 my @data = <$F>;
146 for (@data) {
147 last unless /^\s+$/;
148 shift @data;
150 open $F,">", $args{location} or $self->throw("Cannot write to $args{location}: $!");
151 print $F @data;
152 close $F;
156 =head2 default_format
158 Title : default_format
159 Usage : my $format = $self->default_format
160 Function: Returns default sequence format for this module
161 Returns : string
162 Args : none
164 =cut
166 sub default_format {
167 my ($self) = @_;
168 return $self->{'_default_format'};
171 =head1 Bio::DB::DBFetch specific routines
173 =head2 get_Stream_by_id
175 Title : get_Stream_by_id
176 Usage : $seq = $db->get_Stream_by_id($ref);
177 Function: Retrieves Seq objects from the server 'en masse', rather than one
178 at a time. For large numbers of sequences, this is far superior
179 than get_Stream_by_[id/acc]().
180 Example :
181 Returns : a Bio::SeqIO stream object
182 Args : $ref : either an array reference, a filename, or a filehandle
183 from which to get the list of unique ids/accession numbers.
185 NOTE: for backward compatibility, this method is also called
186 get_Stream_by_batch.
188 =cut
190 sub get_Stream_by_id {
191 my ($self, $ids) = @_;
192 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'batch');
195 =head2 get_Seq_by_version
197 Title : get_Seq_by_version
198 Usage : $seq = $db->get_Seq_by_version('X77802.1');
199 Function: Gets a Bio::Seq object by accession number
200 Returns : A Bio::Seq object
201 Args : version number (as a string)
202 Throws : "version does not exist" exception
204 =cut
206 sub get_Seq_by_version {
207 my ($self,$seqid) = @_;
208 my $seqio = $self->get_Stream_by_acc([$seqid]);
209 $self->throw("version does not exist") if( !defined $seqio );
210 return $seqio->next_seq();
213 =head2 request_format
215 Title : request_format
216 Usage : my ($req_format, $ioformat) = $self->request_format;
217 $self->request_format("genbank");
218 $self->request_format("fasta");
219 Function: Get/Set sequence format retrieval. The get-form will normally not
220 be used outside of this and derived modules.
221 Returns : Array of two strings, the first representing the format for
222 retrieval, and the second specifying the corresponding SeqIO format.
223 Args : $format = sequence format
225 =cut
227 sub request_format {
228 my ($self, $value) = @_;
229 if( defined $value ) {
230 $value = lc $value;
231 $self->{'_format'} = $value;
232 return ($value, $value);
234 $value = $self->{'_format'};
235 if( $value and defined $self->formatmap->{$value} ) {
236 return ($value, $self->formatmap->{$value});
237 } else {
238 # Try to fall back to a default.
239 return ($self->default_format, $self->default_format );
244 =head2 servertype
246 Title : servertype
247 Usage : my $servertype = $self->servertype
248 $self->servertype($servertype);
249 Function: Get/Set server type
250 Returns : string
251 Args : server type string [optional]
253 =cut
255 sub servertype {
256 my ($self, $servertype) = @_;
257 if( defined $servertype && $servertype ne '') {
258 $self->throw("You gave an invalid server type ($servertype)".
259 " - available types are ".
260 keys %{$self->hosts}) unless( $self->hosts->{$servertype} );
261 $self->{'_servertype'} = $servertype;
263 $self->{'_servertype'} = $DEFAULTSERVERTYPE unless $self->{'_servertype'};
264 return $self->{'_servertype'};
267 =head2 hostlocation
269 Title : hostlocation
270 Usage : my $location = $self->hostlocation()
271 $self->hostlocation($location)
272 Function: Set/Get Hostlocation
273 Returns : string representing hostlocation
274 Args : string specifying hostlocation [optional]
276 =cut
278 sub hostlocation {
279 my ($self, $location ) = @_;
280 $location = lc $location;
281 my $servertype = $self->servertype;
282 $self->throw("Must have a valid servertype defined not $servertype")
283 unless defined $servertype;
284 my %hosts = %{$self->hosts->{$servertype}->{'hosts'}};
285 if( defined $location && $location ne '' ) {
286 if( ! $hosts{$location} ) {
287 $self->throw("Must specify a known host, not $location,".
288 " possible values (".
289 join(",", sort keys %hosts ). ")");
291 $self->{'_hostlocation'} = $location;
293 $self->{'_hostlocation'} = $DEFAULTLOCATION unless $self->{'_hostlocation'};
294 return $self->{'_hostlocation'};
297 =head2 location_url
299 Title : location
300 Usage : my $url = $self->location_url()
301 Function: Get host url
302 Returns : string representing url
303 Args : none
305 =cut
307 sub location_url {
308 my ($self) = @_;
309 my $servertype = $self->servertype();
310 my $location = $self->hostlocation();
311 if( ! defined $location || !defined $servertype ) {
312 $self->throw("must have a valid hostlocation and servertype set before calling location_url");
314 return sprintf($self->hosts->{$servertype}->{'baseurl'},
315 $self->hosts->{$servertype}->{'hosts'}->{$location});
318 =head1 Bio::DB::DBFetch routines
320 These methods allow subclasses to pass parameters.
322 =head2 hosts
324 Title : hosts
325 Usage :
326 Function: get/set for host hash
327 Returns :
328 Args : optional hash
330 =cut
332 sub hosts {
333 my ($self, $value) = @_;
334 if (defined $value) {
335 $self->{'_hosts'} = $value;
337 unless (exists $self->{'_hosts'}) {
338 return ('');
339 } else {
340 return $self->{'_hosts'};
344 =head2 formatmap
346 Title : formatmap
347 Usage :
348 Function: get/set for format hash
349 Returns :
350 Args : optional hash
352 =cut
354 sub formatmap {
355 my ($self, $value) = @_;
356 if (defined $value) {
357 $self->{'_formatmap'} = $value;
359 unless (exists $self->{'_formatmap'}) {
360 return ('');
361 } else {
362 return $self->{'_formatmap'};
368 __END__