3 # BioPerl module for Bio::DB::DBFetch
5 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::DB::DBFetch - Database object for retrieving using the dbfetch script
19 #do not use this module directly
23 Allows the dynamic retrieval of entries from databases using the
24 dbfetch script at EBI:
25 L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>cgi-binE<sol>dbfetch>.
27 In order to make changes transparent we have host type (currently only
28 ebi) and location (defaults to ebi) separated out. This allows later
29 additions of more servers in different geographical locations.
31 This is a superclass which is called by instantiable subclasses with
38 User feedback is an integral part of the evolution of this and other
39 Bioperl modules. Send your comments and suggestions preferably to one
40 of the Bioperl mailing lists. Your participation is much appreciated.
42 bioperl-l@bioperl.org - General discussion
43 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47 Report bugs to the Bioperl bug tracking system to help us keep track
48 the bugs and their resolution. Bug reports can be submitted via the
51 http://bugzilla.open-bio.org/
53 =head1 AUTHOR - Heikki Lehvaslaiho
55 Email Heikki Lehvaslaiho E<lt>heikki-at-bioperl-dot-orgE<gt>
59 The rest of the documentation details each of the object
60 methods. Internal methods are usually preceded with a _
64 # Let the code begin...
66 package Bio
::DB
::DBFetch
;
68 use vars
qw($MODVERSION $DEFAULTFORMAT $DEFAULTLOCATION
72 use HTTP::Request::Common;
74 use base qw(Bio::DB::WebDBSeqI);
76 # the new way to make modules a little more lightweight
80 $DEFAULTSERVERTYPE = 'dbfetch';
81 $DEFAULTLOCATION = 'ebi';
85 =head1 Routines from Bio::DB::WebDBSeqI
90 Usage : my $url = $self->get_request
91 Function: returns a HTTP::Request object
93 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
98 my ($self, @qualifiers) = @_;
99 my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
102 $self->throw("Must specify a value for UIDs to fetch")
103 unless defined $uids;
105 my $format_string = '';
106 $format ||= $self->default_format;
107 ($format, $tmp) = $self->request_format($format);
108 $format_string = "&format=$format";
109 my $url = $self->location_url();
111 if( ref($uids) =~ /ARRAY/i ) {
112 $uid = join (',', @
$uids);
113 $self->warn ('The server will accept maximum of 50 entries in a request. The rest are ignored.')
114 if scalar @
$uids >50;
119 return GET
$url. $format_string. '&id='. $uid;
123 =head2 postprocess_data
125 Title : postprocess_data
126 Usage : $self->postprocess_data ( 'type' => 'string',
127 'location' => \$datastr);
128 Function: process downloaded data before loading into a Bio::SeqIO
130 Args : hash with two keys - 'type' can be 'string' or 'file'
131 - 'location' either file location or string
132 reference containing data
136 # remove occasional blank lines at top of web output
137 sub postprocess_data
{
138 my ($self, %args) = @_;
139 if ($args{type
} eq 'string') {
140 ${$args{location
}} =~ s/^\s+//; # get rid of leading whitespace
142 elsif ($args{type
} eq 'file') {
144 open $F,"<", $args{location
} or $self->throw("Cannot open $args{location}: $!");
150 open $F,">", $args{location
} or $self->throw("Cannot write to $args{location}: $!");
156 =head2 default_format
158 Title : default_format
159 Usage : my $format = $self->default_format
160 Function: Returns default sequence format for this module
168 return $self->{'_default_format'};
171 =head1 Bio::DB::DBFetch specific routines
173 =head2 get_Stream_by_id
175 Title : get_Stream_by_id
176 Usage : $seq = $db->get_Stream_by_id($ref);
177 Function: Retrieves Seq objects from the server 'en masse', rather than one
178 at a time. For large numbers of sequences, this is far superior
179 than get_Stream_by_[id/acc]().
181 Returns : a Bio::SeqIO stream object
182 Args : $ref : either an array reference, a filename, or a filehandle
183 from which to get the list of unique ids/accession numbers.
185 NOTE: for backward compatibility, this method is also called
190 sub get_Stream_by_id
{
191 my ($self, $ids) = @_;
192 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'batch');
195 =head2 get_Seq_by_version
197 Title : get_Seq_by_version
198 Usage : $seq = $db->get_Seq_by_version('X77802.1');
199 Function: Gets a Bio::Seq object by accession number
200 Returns : A Bio::Seq object
201 Args : version number (as a string)
202 Throws : "version does not exist" exception
206 sub get_Seq_by_version
{
207 my ($self,$seqid) = @_;
208 my $seqio = $self->get_Stream_by_acc([$seqid]);
209 $self->throw("version does not exist") if( !defined $seqio );
210 return $seqio->next_seq();
213 =head2 request_format
215 Title : request_format
216 Usage : my ($req_format, $ioformat) = $self->request_format;
217 $self->request_format("genbank");
218 $self->request_format("fasta");
219 Function: Get/Set sequence format retrieval. The get-form will normally not
220 be used outside of this and derived modules.
221 Returns : Array of two strings, the first representing the format for
222 retrieval, and the second specifying the corresponding SeqIO format.
223 Args : $format = sequence format
228 my ($self, $value) = @_;
229 if( defined $value ) {
231 $self->{'_format'} = $value;
232 return ($value, $value);
234 $value = $self->{'_format'};
235 if( $value and defined $self->formatmap->{$value} ) {
236 return ($value, $self->formatmap->{$value});
238 # Try to fall back to a default.
239 return ($self->default_format, $self->default_format );
247 Usage : my $servertype = $self->servertype
248 $self->servertype($servertype);
249 Function: Get/Set server type
251 Args : server type string [optional]
256 my ($self, $servertype) = @_;
257 if( defined $servertype && $servertype ne '') {
258 $self->throw("You gave an invalid server type ($servertype)".
259 " - available types are ".
260 keys %{$self->hosts}) unless( $self->hosts->{$servertype} );
261 $self->{'_servertype'} = $servertype;
263 $self->{'_servertype'} = $DEFAULTSERVERTYPE unless $self->{'_servertype'};
264 return $self->{'_servertype'};
270 Usage : my $location = $self->hostlocation()
271 $self->hostlocation($location)
272 Function: Set/Get Hostlocation
273 Returns : string representing hostlocation
274 Args : string specifying hostlocation [optional]
279 my ($self, $location ) = @_;
280 $location = lc $location;
281 my $servertype = $self->servertype;
282 $self->throw("Must have a valid servertype defined not $servertype")
283 unless defined $servertype;
284 my %hosts = %{$self->hosts->{$servertype}->{'hosts'}};
285 if( defined $location && $location ne '' ) {
286 if( ! $hosts{$location} ) {
287 $self->throw("Must specify a known host, not $location,".
288 " possible values (".
289 join(",", sort keys %hosts ). ")");
291 $self->{'_hostlocation'} = $location;
293 $self->{'_hostlocation'} = $DEFAULTLOCATION unless $self->{'_hostlocation'};
294 return $self->{'_hostlocation'};
300 Usage : my $url = $self->location_url()
301 Function: Get host url
302 Returns : string representing url
309 my $servertype = $self->servertype();
310 my $location = $self->hostlocation();
311 if( ! defined $location || !defined $servertype ) {
312 $self->throw("must have a valid hostlocation and servertype set before calling location_url");
314 return sprintf($self->hosts->{$servertype}->{'baseurl'},
315 $self->hosts->{$servertype}->{'hosts'}->{$location});
318 =head1 Bio::DB::DBFetch routines
320 These methods allow subclasses to pass parameters.
326 Function: get/set for host hash
333 my ($self, $value) = @_;
334 if (defined $value) {
335 $self->{'_hosts'} = $value;
337 unless (exists $self->{'_hosts'}) {
340 return $self->{'_hosts'};
348 Function: get/set for format hash
355 my ($self, $value) = @_;
356 if (defined $value) {
357 $self->{'_formatmap'} = $value;
359 unless (exists $self->{'_formatmap'}) {
362 return $self->{'_formatmap'};