Add tests for memory leaks and weaken for Issue #81
[bioperl-live.git] / Bio / DB / DBFetch.pm
blob8b0d41e653573a868e81b916f4e0f8f9a335fc80
2 # BioPerl module for Bio::DB::DBFetch
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
8 # Copyright Heikki Lehvaslaiho
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::DB::DBFetch - Database object for retrieving using the dbfetch script
18 =head1 SYNOPSIS
20 #do not use this module directly
22 =head1 DESCRIPTION
24 Allows the dynamic retrieval of entries from databases using the
25 dbfetch script at EBI:
26 L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>cgi-binE<sol>dbfetch>.
28 In order to make changes transparent we have host type (currently only
29 ebi) and location (defaults to ebi) separated out. This allows later
30 additions of more servers in different geographical locations.
32 This is a superclass which is called by instantiable subclasses with
33 correct parameters.
35 =head1 FEEDBACK
37 =head2 Mailing Lists
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to one
41 of the Bioperl mailing lists. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 =head2 Support
48 Please direct usage questions or support issues to the mailing list:
50 I<bioperl-l@bioperl.org>
52 rather than to the module maintainer directly. Many experienced and
53 reponsive experts will be able look at the problem and quickly
54 address it. Please include a thorough description of the problem
55 with code and data examples if at all possible.
57 =head2 Reporting Bugs
59 Report bugs to the Bioperl bug tracking system to help us keep track
60 the bugs and their resolution. Bug reports can be submitted via the
61 web:
63 https://github.com/bioperl/bioperl-live/issues
65 =head1 AUTHOR - Heikki Lehvaslaiho
67 Email Heikki Lehvaslaiho E<lt>heikki-at-bioperl-dot-orgE<gt>
69 =head1 APPENDIX
71 The rest of the documentation details each of the object
72 methods. Internal methods are usually preceded with a _
74 =cut
76 # Let the code begin...
78 package Bio::DB::DBFetch;
79 use strict;
80 use vars qw($MODVERSION $DEFAULTFORMAT $DEFAULTLOCATION
81 $DEFAULTSERVERTYPE);
83 $MODVERSION = '0.1';
84 use HTTP::Request::Common;
86 use base qw(Bio::DB::WebDBSeqI);
88 # the new way to make modules a little more lightweight
90 BEGIN {
91 # global vars
92 $DEFAULTSERVERTYPE = 'dbfetch';
93 $DEFAULTLOCATION = 'ebi';
97 =head1 Routines from Bio::DB::WebDBSeqI
99 =head2 get_request
101 Title : get_request
102 Usage : my $url = $self->get_request
103 Function: returns a HTTP::Request object
104 Returns :
105 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
107 =cut
109 sub get_request {
110 my ($self, @qualifiers) = @_;
111 my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
112 @qualifiers);
114 $self->throw("Must specify a value for UIDs to fetch")
115 unless defined $uids;
116 my $tmp;
117 my $format_string = '';
118 $format ||= $self->default_format;
119 ($format, $tmp) = $self->request_format($format);
120 $format_string = "&format=$format";
121 my $url = $self->location_url();
122 my $uid;
123 if( ref($uids) =~ /ARRAY/i ) {
124 $uid = join (',', @$uids);
125 $self->warn ('The server will accept maximum of 50 entries in a request. The rest are ignored.')
126 if scalar @$uids >50;
127 } else {
128 $uid = $uids;
131 return GET $url. $format_string. '&id='. $uid;
135 =head2 postprocess_data
137 Title : postprocess_data
138 Usage : $self->postprocess_data ( 'type' => 'string',
139 'location' => \$datastr);
140 Function: process downloaded data before loading into a Bio::SeqIO
141 Returns : void
142 Args : hash with two keys - 'type' can be 'string' or 'file'
143 - 'location' either file location or string
144 reference containing data
146 =cut
148 # remove occasional blank lines at top of web output
149 sub postprocess_data {
150 my ($self, %args) = @_;
151 if ($args{type} eq 'string') {
152 ${$args{location}} =~ s/^\s+//; # get rid of leading whitespace
154 elsif ($args{type} eq 'file') {
155 my $F;
156 open $F,"<", $args{location} or $self->throw("Cannot open $args{location}: $!");
157 my @data = <$F>;
158 for (@data) {
159 last unless /^\s+$/;
160 shift @data;
162 open $F,">", $args{location} or $self->throw("Cannot write to $args{location}: $!");
163 print $F @data;
164 close $F;
168 =head2 default_format
170 Title : default_format
171 Usage : my $format = $self->default_format
172 Function: Returns default sequence format for this module
173 Returns : string
174 Args : none
176 =cut
178 sub default_format {
179 my ($self) = @_;
180 return $self->{'_default_format'};
183 =head1 Bio::DB::DBFetch specific routines
185 =head2 get_Stream_by_id
187 Title : get_Stream_by_id
188 Usage : $seq = $db->get_Stream_by_id($ref);
189 Function: Retrieves Seq objects from the server 'en masse', rather than one
190 at a time. For large numbers of sequences, this is far superior
191 than get_Stream_by_[id/acc]().
192 Example :
193 Returns : a Bio::SeqIO stream object
194 Args : $ref : either an array reference, a filename, or a filehandle
195 from which to get the list of unique ids/accession numbers.
197 NOTE: for backward compatibility, this method is also called
198 get_Stream_by_batch.
200 =cut
202 sub get_Stream_by_id {
203 my ($self, $ids) = @_;
204 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'batch');
207 =head2 get_Seq_by_version
209 Title : get_Seq_by_version
210 Usage : $seq = $db->get_Seq_by_version('X77802.1');
211 Function: Gets a Bio::Seq object by accession number
212 Returns : A Bio::Seq object
213 Args : version number (as a string)
214 Throws : "version does not exist" exception
216 =cut
218 sub get_Seq_by_version {
219 my ($self,$seqid) = @_;
220 my $seqio = $self->get_Stream_by_acc([$seqid]);
221 $self->throw("version does not exist") if( !defined $seqio );
222 return $seqio->next_seq();
225 =head2 request_format
227 Title : request_format
228 Usage : my ($req_format, $ioformat) = $self->request_format;
229 $self->request_format("genbank");
230 $self->request_format("fasta");
231 Function: Get/Set sequence format retrieval. The get-form will normally not
232 be used outside of this and derived modules.
233 Returns : Array of two strings, the first representing the format for
234 retrieval, and the second specifying the corresponding SeqIO format.
235 Args : $format = sequence format
237 =cut
239 sub request_format {
240 my ($self, $value) = @_;
241 if( defined $value ) {
242 $value = lc $value;
243 $self->{'_format'} = $value;
244 return ($value, $value);
246 $value = $self->{'_format'};
247 if( $value and defined $self->formatmap->{$value} ) {
248 return ($value, $self->formatmap->{$value});
249 } else {
250 # Try to fall back to a default.
251 return ($self->default_format, $self->default_format );
256 =head2 servertype
258 Title : servertype
259 Usage : my $servertype = $self->servertype
260 $self->servertype($servertype);
261 Function: Get/Set server type
262 Returns : string
263 Args : server type string [optional]
265 =cut
267 sub servertype {
268 my ($self, $servertype) = @_;
269 if( defined $servertype && $servertype ne '') {
270 $self->throw("You gave an invalid server type ($servertype)".
271 " - available types are ".
272 keys %{$self->hosts}) unless( $self->hosts->{$servertype} );
273 $self->{'_servertype'} = $servertype;
275 $self->{'_servertype'} = $DEFAULTSERVERTYPE unless $self->{'_servertype'};
276 return $self->{'_servertype'};
279 =head2 hostlocation
281 Title : hostlocation
282 Usage : my $location = $self->hostlocation()
283 $self->hostlocation($location)
284 Function: Set/Get Hostlocation
285 Returns : string representing hostlocation
286 Args : string specifying hostlocation [optional]
288 =cut
290 sub hostlocation {
291 my ($self, $location ) = @_;
292 my $servertype = $self->servertype;
293 $self->throw("Must have a valid servertype defined not $servertype")
294 unless defined $servertype;
295 my %hosts = %{$self->hosts->{$servertype}->{'hosts'}};
296 if( defined $location && $location ne '' ) {
297 $location = lc $location;
298 if( ! $hosts{$location} ) {
299 $self->throw("Must specify a known host, not $location,".
300 " possible values (".
301 join(",", sort keys %hosts ). ")");
303 $self->{'_hostlocation'} = $location;
305 $self->{'_hostlocation'} = $DEFAULTLOCATION unless $self->{'_hostlocation'};
306 return $self->{'_hostlocation'};
309 =head2 location_url
311 Title : location
312 Usage : my $url = $self->location_url()
313 Function: Get host url
314 Returns : string representing url
315 Args : none
317 =cut
319 sub location_url {
320 my ($self) = @_;
321 my $servertype = $self->servertype();
322 my $location = $self->hostlocation();
323 if( ! defined $location || !defined $servertype ) {
324 $self->throw("must have a valid hostlocation and servertype set before calling location_url");
326 return sprintf($self->hosts->{$servertype}->{'baseurl'},
327 $self->hosts->{$servertype}->{'hosts'}->{$location});
330 =head1 Bio::DB::DBFetch routines
332 These methods allow subclasses to pass parameters.
334 =head2 hosts
336 Title : hosts
337 Usage :
338 Function: get/set for host hash
339 Returns :
340 Args : optional hash
342 =cut
344 sub hosts {
345 my ($self, $value) = @_;
346 if (defined $value) {
347 $self->{'_hosts'} = $value;
349 unless (exists $self->{'_hosts'}) {
350 return ('');
351 } else {
352 return $self->{'_hosts'};
356 =head2 formatmap
358 Title : formatmap
359 Usage :
360 Function: get/set for format hash
361 Returns :
362 Args : optional hash
364 =cut
366 sub formatmap {
367 my ($self, $value) = @_;
368 if (defined $value) {
369 $self->{'_formatmap'} = $value;
371 unless (exists $self->{'_formatmap'}) {
372 return ('');
373 } else {
374 return $self->{'_formatmap'};
380 __END__