Updates:
[bioperl-live.git] / Bio / Tools / EUtilities / EUtilParameters.pm
blob36fbd7cd6b53980591328f8889b508e1a2b80ecb
1 # $Id: EUtilParameters.pm 15052 2008-12-01 08:47:39Z heikki $
3 # BioPerl module for Bio::Tools::EUtilities::EUtilParameters
5 # Cared for by Chris Fields <cjfields at uiuc dot edu>
7 # Copyright Chris Fields
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::Tools::EUtilities::EUtilParameters - Manipulation of NCBI eutil-based parameters for
16 remote database requests.
18 =head1 SYNOPSIS
20 # Bio::Tools::EUtilities::EUtilParameters implements Bio::ParameterBaseI
22 my @params = (-eutil => 'efetch',
23 db => 'nucleotide',
24 id => \@ids,
25 email => 'me@foo.bar',
26 retmode => 'xml');
28 my $p = Bio::Tools::EUtilities::EUtilParameters->new(@params);
30 if ($p->parameters_changed) {
31 # ...
32 } # state information
34 $p->set_parameters(@extra_params); # set new NCBI parameters, leaves others preset
36 $p->reset_parameters(@new_params); # reset NCBI parameters to original state
38 $p->to_string(); # get a URI-encoded string representation of the URL address
40 $p->to_request(); # get an HTTP::Request object (to pass on to LWP::UserAgent)
42 =head1 DESCRIPTION
44 Bio::Tools::EUtilities::EUtilParameters is-a Bio::ParameterBaseI implementation that allows
45 simple manipulation of NCBI eutil parameters for CGI-based queries. SOAP-based
46 methods may be added in the future.
48 For simplicity parameters do not require dashes when passed and do not need URI
49 encoding (spaces are converted to '+', symbols encoded, etc). Also, the
50 following extra parameters can be passed to the new() constructor or via
51 set_parameters() or reset_parameters():
53 eutil - the eutil to be used. The default is 'efetch' if not set.
54 correspondence - Flag for how IDs are treated. Default is undef (none).
55 history - a Bio::Tools::EUtilities::HistoryI object. Default is undef (none).
57 At this point minimal checking is done for potential errors in parameter
58 passing, though these should be easily added in the future when necessary.
60 =head1 TODO
62 Possibly integrate SOAP-compliant methods. SOAP::Lite may be undergoing an
63 complete rewrite so I'm hesitant about adding this in immediately.
65 =head1 FEEDBACK
67 =head2 Mailing Lists
69 User feedback is an integral part of the
70 evolution of this and other Bioperl modules. Send
71 your comments and suggestions preferably to one
72 of the Bioperl mailing lists. Your participation
73 is much appreciated.
75 bioperl-l@lists.open-bio.org - General discussion
76 http://www.bioperl.org/wiki/Mailing_lists - About the mailing lists
78 =head2 Reporting Bugs
80 Report bugs to the Bioperl bug tracking system to
81 help us keep track the bugs and their resolution.
82 Bug reports can be submitted via the web.
84 http://bugzilla.open-bio.org/
86 =head1 AUTHOR
88 Email cjfields at uiuc dot edu
90 =head1 APPENDIX
92 The rest of the documentation details each of the
93 object methods. Internal methods are usually
94 preceded with a _
96 =cut
98 # Let the code begin...
100 package Bio::Tools::EUtilities::EUtilParameters;
101 use strict;
102 use warnings;
104 use base qw(Bio::Root::Root Bio::ParameterBaseI);
105 use URI;
106 use HTTP::Request;
107 use Bio::Root::IO;
109 # eutils only has one hostbase URL
111 # mode : GET or POST (HTTP::Request)
112 # location : CGI location
113 # params : allowed parameters for that eutil
114 my %MODE = (
115 'einfo' => {
116 'mode' => 'GET',
117 'location' => 'einfo.fcgi',
118 'params' => [qw(db tool email)],
120 'epost' => {
121 'mode' => 'POST',
122 'location' => 'epost.fcgi',
123 'params' => [qw(db retmode id tool email WebEnv query_key)],
125 'efetch' => {
126 'mode' => 'GET',
127 'location' => 'efetch.fcgi',
128 'params' => [qw(db retmode id retmax retstart rettype strand seq_start
129 seq_stop complexity report tool email WebEnv query_key)],
131 'esearch' => {
132 'mode' => 'GET',
133 'location' => 'esearch.fcgi',
134 'params' => [qw(db retmode usehistory term field reldate mindate
135 maxdate datetype retmax retstart rettype sort tool email
136 WebEnv query_key)],
138 'esummary' => {
139 'mode' => 'GET',
140 'location' => 'esummary.fcgi',
141 'params' => [qw(db retmode id retmax retstart rettype tool email
142 WebEnv query_key)],
144 'elink' => {
145 'mode' => 'GET',
146 'location' => 'elink.fcgi',
147 'params' => [qw(db retmode id reldate mindate maxdate datetype term
148 dbfrom holding cmd version tool email linkname WebEnv
149 query_key)],
151 'egquery' => {
152 'mode' => 'GET',
153 'location' => 'egquery.fcgi',
154 'params' => [qw(term retmode tool email)],
156 'espell' => {
157 'mode' => 'GET',
158 'location' => 'espell.fcgi',
159 'params' => [qw(db retmode term tool email )],
163 my @PARAMS;
165 # generate getter/setters (will move this into individual ones at some point)
167 BEGIN {
168 @PARAMS = qw(db id email retmode rettype usehistory term field tool
169 reldate mindate maxdate datetype retstart retmax sort seq_start seq_stop
170 strand complexity report dbfrom cmd holding version linkname WebEnv
171 query_key);
172 for my $method (@PARAMS) {
173 eval <<END;
174 sub $method {
175 my (\$self, \$val) = \@_;
176 if (defined \$val) {
177 if ((!defined \$self->{'_$method'}) ||
178 (defined \$self->{'_$method'} && \$self->{'_$method'} ne \$val)) {
179 \$self->{'_statechange'} = 1;
180 \$self->{'_$method'} = \$val;
183 return \$self->{'_$method'};
189 sub new {
190 my ($class, @args) = @_;
191 my $self = $class->SUPER::new(@args);
192 my ($retmode) = $self->_rearrange(["RETMODE"],@args);
193 $self->_set_from_args(\@args,
194 -methods => [@PARAMS, qw(eutil history correspondence id_file)]);
195 $self->eutil() || $self->eutil('efetch');
196 $self->tool() || $self->tool('bioperl');
197 # set default retmode if not explicitly set
198 $self->set_default_retmode if (!$retmode);
199 $self->{'_statechange'} = 1;
200 return $self;
203 =head1 Bio::ParameterBaseI implemented methods
205 =head2 set_parameters
207 Title : set_parameters
208 Usage : $pobj->set_parameters(@params);
209 Function: sets the NCBI parameters listed in the hash or array
210 Returns : None
211 Args : [optional] hash or array of parameter/values.
212 Note : This sets any parameter passed but leaves previously set data alone.
213 In addition to regular eutil-specific parameters, you can set the
214 following:
216 -eutil - the eUtil to be used (default 'efetch')
217 -history - pass a HistoryI-implementing object, which
218 sets the WebEnv, query_key, and possibly db and linkname
219 (the latter two only for LinkSets)
220 -correspondence - Boolean flag, set to TRUE or FALSE; indicates how
221 IDs are to be added together for elink request where
222 ID correspondence might be needed
223 (default 0)
225 =cut
227 sub set_parameters {
228 my ($self, @args) = @_;
229 # allow automated resetting; must check to ensure that retmode isn't explicitly passed
230 my ($newmode,$file) = $self->_rearrange([qw(RETMODE ID_FILE)],@args);
231 $self->_set_from_args(\@args, -methods => [@PARAMS, qw(eutil correspondence history)]);
232 # set default retmode if not explicitly passed
233 $self->set_default_retmode unless $newmode;
234 $file && $self->id_file($file);
235 return;
238 =head2 reset_parameters
240 Title : reset_parameters
241 Usage : resets values
242 Function: resets parameters to either undef or value in passed hash
243 Returns : none
244 Args : [optional] hash of parameter-value pairs
245 Note : This sets any parameter passed, but resets all others (deletes them).
246 In addition to regular eutil-specific parameters, you can set the
247 following:
249 -eutil - the eUtil to be used (default 'efetch')
250 -history - pass a HistoryI-implementing object, which
251 sets the WebEnv, query_key, and possibly db and linkname
252 (the latter two only for LinkSets)
253 -correspondence - Boolean flag, set to TRUE or FALSE; indicates how
254 IDs are to be added together for elink request where
255 ID correspondence might be needed
256 (default 0)
258 =cut
260 sub reset_parameters {
261 my ($self, @args) = @_;
262 # is there a better way of doing this? probably, but this works...
263 my ($retmode,$file) = $self->_rearrange([qw(RETMODE ID_FILE)],@args);
264 map { defined $self->{"_$_"} && undef $self->{"_$_"} } (@PARAMS, qw(eutil correspondence history_cache request_cache));
265 $self->_set_from_args(\@args, -methods => [@PARAMS, qw(eutil correspondence history)]);
266 $self->eutil() || $self->eutil('efetch');
267 $self->set_default_retmode unless $retmode;
268 $file && $self->id_file($file);
269 $self->{'_statechange'} = 1;
272 =head2 parameters_changed
274 Title : parameters_changed
275 Usage : if ($pobj->parameters_changed) {...}
276 Function: Returns TRUE if parameters have changed
277 Returns : Boolean (0 or 1)
278 Args : [optional] Boolean
280 =cut
282 sub parameters_changed {
283 my ($self) = @_;
284 $self->{'_statechange'};
287 =head2 available_parameters
289 Title : available_parameters
290 Usage : @params = $pobj->available_parameters()
291 Function: Returns a list of the available parameters
292 Returns : Array of available parameters (no values)
293 Args : [optional] A string with the eutil name (for returning eutil-specific
294 parameters)
296 =cut
298 sub available_parameters {
299 my ($self, $type) = @_;
300 $type ||= 'all';
301 if ($type eq 'all') {
302 return @PARAMS;
303 } else {
304 $self->throw("$type parameters not supported") if !exists $MODE{$type};
305 return @{$MODE{$type}->{params}};
309 =head2 get_parameters
311 Title : get_parameters
312 Usage : @params = $pobj->get_parameters;
313 %params = $pobj->get_parameters;
314 Function: Returns list of key/value pairs, parameter => value
315 Returns : Flattened list of key-value pairs. All key-value pairs returned,
316 though subsets can be returned based on the '-type' parameter. Data
317 originally set as an array ref are returned based on whether the
318 '-join_id' flag is set (default is the same array ref).
319 Args : -type : the eutil name (Default: returns all). Use of '-list'
320 supercedes this
321 -list : array ref of specific parameters
322 -join_ids : Boolean; join IDs based on correspondence (Default: no join)
324 =cut
326 sub get_parameters {
327 my ($self, @args) = @_;
328 my ($type, $list, $join) = $self->_rearrange([qw(TYPE LIST JOIN_IDS)], @args);
329 $self->throw("Parameter list not an array ref") if $list && ref $list ne 'ARRAY';
330 $type ||= '';
331 my @final = $list ? grep {$self->can($_)} @{$list} : $self->available_parameters($type);
332 my @p;
333 for my $param (@final) {
334 if ($param eq 'id' && $self->id && $join) {
335 my $id = $self->id;
336 if ($self->correspondence && $self->eutil eq 'elink') {
337 for my $id_group (@{ $id }) {
338 if (ref($id_group) eq 'ARRAY') {
339 push @p, ('id' => join(q(,), @{ $id_group }));
341 elsif (!ref($id_group)) {
342 push @p, ('id' => $id_group);
344 else {
345 $self->throw("Unknown ID type: $id_group");
348 } else {
349 push @p, ref $id eq 'ARRAY' ?
350 ($param => join(',', @{ $id })):
351 ($param => $id);
354 elsif ($param eq 'db' && $self->db && $join) {
355 my $db = $self->db;
356 push @p, (ref $db eq 'ARRAY') ?
357 ($param => join(',', @{ $db })) :
358 ($param => $db) ;
360 else {
361 push @p, ($param => $self->{"_$param"}) if defined $self->{"_$param"};
364 return @p;
367 =head1 Implementation-specific to_* methods
369 =head2 to_string
371 Title : to_string
372 Usage : $string = $pobj->to_string;
373 Function: Returns string (URL only in this case)
374 Returns : String (URL only for now)
375 Args : [optional] 'all'; build URI::http using all parameters
376 Default : Builds based on allowed parameters (presence of history data
377 or eutil type in %MODE).
378 Note : Changes state of object. Absolute string
380 =cut
382 sub to_string {
383 my ($self, @args) = @_;
384 # calling to_uri changes the state
385 if ($self->parameters_changed || !defined $self->{'_string_cache'}) {
386 my $string = $self->to_request(@args)->uri->as_string;
387 $self->{'_statechange'} = 0;
388 $self->{'_string_cache'} = $string;
390 return $self->{'_string_cache'};
393 =head2 to_request
395 Title : to_request
396 Usage : $uri = $pobj->to_request;
397 Function: Returns HTTP::Request object
398 Returns : HTTP::Request
399 Args : [optional] 'all'; builds request using all parameters
400 Default : Builds based on allowed parameters (presence of history data
401 or eutil type in %MODE).
402 Note : Changes state of object (to boolean FALSE). Used for CGI-based GET/POST
404 =cut
406 sub to_request {
407 my ($self, $type) = @_;
408 if ($self->parameters_changed || !defined $self->{'_request_cache'}) {
409 my $eutil = $self->eutil;
410 $self->throw("No eutil set") if !$eutil;
411 #set default retmode
412 $type ||= $eutil;
413 my ($location, $mode) = ($MODE{$eutil}->{location}, $MODE{$eutil}->{mode});
414 my $request;
415 my $uri = URI->new($self->url_base_address . $location);
416 if ($mode eq 'GET') {
417 $uri->query_form($self->get_parameters(-type => $type, -join_ids => 1) );
418 $request = HTTP::Request->new($mode => $uri);
419 $self->{'_request_cache'} = $request;
420 } elsif ($mode eq 'POST') {
421 $request = HTTP::Request->new($mode => $uri->as_string);
422 $uri->query_form($self->get_parameters(-type => $type, -join_ids => 1) );
423 $request->content_type('application/x-www-form-urlencoded');
424 $request->content($uri->query);
425 $self->{'_request_cache'} = $request;
426 } else {
427 $self->throw("Unrecognized request mode: $mode");
429 $self->{'_statechange'} = 0;
430 $self->{'_request_cache'} = $request;
432 return $self->{'_request_cache'};
435 =head1 Implementation specific-methods
437 =head2 eutil
439 Title : eutil
440 Usage : $p->eutil('efetch')
441 Function: gets/sets the eutil for this set of parameters
442 Returns : string (eutil)
443 Args : [optional] string (eutil)
444 Throws : '$eutil not supported' if eutil not present
445 Note : This does not reset retmode to the default if called directly.
447 =cut
449 sub eutil {
450 my ($self, $eutil) = @_;
451 if ($eutil) {
452 $self->throw("$eutil not supported") if !exists $MODE{$eutil};
453 if (!defined $self->{'_eutil'} || ($self->{'_eutil'} && $self->{'_eutil'} ne $eutil)) {
454 $self->{'_eutil'} = $eutil;
455 $self->{'_statechange'} = 1;
458 return $self->{'_eutil'};
461 =head2 history
463 Title : history
464 Usage : $p->history($history);
465 Function: gets/sets the history object to be used for these parameters
466 Returns : Bio::Tools::EUtilities::HistoryI (if set)
467 Args : [optional] Bio::Tools::EUtilities::HistoryI
468 Throws : Passed something other than a Bio::Tools::EUtilities::HistoryI
469 Note : This overrides WebEnv() and query_key() settings when set. This
470 caches the last history object passed and returns like a Get/Set
472 =cut
474 sub history {
475 my ($self, $history) = @_;
476 if ($history) {
477 $self->throw('Not a Bio::Tools::EUtilities::HistoryI object!') if
478 !$history->isa('Bio::Tools::EUtilities::HistoryI');
479 my ($webenv, $qkey) = $history->history;
480 $self->WebEnv($webenv);
481 $self->query_key($qkey);
482 $self->{'_statechange'} = 1;
483 $self->{'_history_cache'} = $history;
485 return $self->{'_history_cache'};
488 =head2 correspondence
490 Title : correspondence
491 Usage : $p->correspondence(1);
492 Function: Sets flag for posting IDs for one-to-one correspondence
493 Returns : Boolean
494 Args : [optional] boolean value
496 =cut
498 sub correspondence {
499 my ($self, $corr) = @_;
500 if (defined $corr) {
501 $self->{'_correspondence'} = $corr;
502 $self->{'_statechange'} = 1;
504 return $self->{'_correspondence'};
507 =head2 id_file
509 Title : id_file
510 Usage : $p->id_file('<foo');
511 Function: convenience method; passes in file containing a list of IDs for
512 searches (one per line), sets id() to list
513 Returns : none
514 Args : either string indicating file to use, a file handle, or an IO::Handle
515 object
516 Note : use of this overrides concurrent use of the '-id' parameter when both
517 are passed. The filename is not retained, merely parsed for IDs.
519 =cut
521 sub id_file {
522 my ($self, $file) = @_;
523 if ($file) {
524 # do this in a way that allows file, fh, IO::Handle
525 my $io = $self->_io;
526 $io->_initialize_io(-input => $file);
527 my @ids;
528 while (my $line = $io->_readline) {
529 chomp $line;
530 push @ids, $line;
532 $self->_io->close;
533 $self->id(\@ids);
537 =head2 url_base_address
539 Title : url_base_address
540 Usage : $address = $p->url_base_address();
541 Function: Get URL base address
542 Returns : String
543 Args : None in this implementation; the URL is fixed
545 =cut
548 my $HOSTBASE = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/';
550 sub url_base_address {
551 my ($self, $address) = @_;
552 return $HOSTBASE;
556 =head2 set_default_retmode
558 Title : set_default_retmode
559 Usage : $p->set_default_retmode();
560 Function: sets retmode to default value specified by the eutil() and the value
561 in %NCBI_DATABASE (for efetch only) if called
562 Returns : none
563 Args : none
565 =cut
568 # default retmode if one is not supplied
569 my %NCBI_DATABASE = (
570 'protein' => 'text',
571 'nucleotide' => 'text',
572 'nuccore' => 'text',
573 'nucgss' => 'text',
574 'nucest' => 'text',
575 'structure' => 'text',
576 'genome' => 'text',
577 'gene' => 'asn1',
578 'journals' => 'text',
581 sub set_default_retmode {
582 my $self = shift;
583 if ($self->eutil eq 'efetch') {
584 my $db = $self->db || return; # assume retmode will be set along with db
585 my $mode = exists $NCBI_DATABASE{$db} ? $NCBI_DATABASE{$db} : 'xml';
586 $self->retmode($mode);
587 } else {
588 $self->retmode('xml');
593 sub _io {
594 my $self = shift;
595 if (!defined $self->{'_io'}) {
596 $self->{'_io'} = Bio::Root::IO->new();
598 return $self->{'_io'};