3 # BioPerl module for Bio::Tools::EUtilities
5 # Cared for by Chris Fields
7 # Copyright Chris Fields
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::Tools::EUtilities - NCBI eutil XML parsers
21 my $parser = Bio::Tools::EUtilities->new(
26 # or HTTP::Response object...
28 my $parser = Bio::Tools::EUtilities->new(
30 -response => $response
33 # esearch, esummary, elink
35 @ids = $parser->get_ids(); # returns array or array ref of IDs
39 $term = $parser->get_term(); # returns array or array ref of IDs
43 $db = $parser->get_database(); # returns database
45 # Query-related methods (esearch, egquery, espell data)
46 # eutil data centered on use of search terms
48 my $ct = $parser->get_count; # uses optional database for egquery count
49 my $translation = $parser->get_count;
51 my $corrected = $parser->get_corrected_query; # espell
53 while (my $gquery = $parser->next_GlobalQuery) {
54 # iterates through egquery data
57 # Info-related methods (einfo data)
58 # database-related information
60 my $desc = $parser->get_description;
61 my $update = $parser->get_last_update;
62 my $nm = $parser->get_menu_name;
63 my $ct = $parser->get_record_count;
65 while (my $field = $parser->next_FieldInfo) {
68 while (my $field = $parser->next_LinkInfo) {
72 # History methods (epost data, some data returned from elink)
73 # data which enables one to retrieve and query against user-stored
74 # information on the NCBI server
76 while (my $cookie = $parser->next_History) {
80 my @hists = $parser->get_Histories;
82 # Bio::Tools::EUtilities::Summary (esummary data)
83 # information on a specific database record
85 # retrieve nested docsum data
86 while (my $docsum = $parser->next_DocSum) {
87 print "ID:",$docsum->get_ids,"\n";
88 while (my $item = $docsum->next_Item) {
90 while (my $listitem = $docsum->next_ListItem) {
92 while (my $listitem = $docsum->next_Structure) {
99 # retrieve flattened item list per DocSum
100 while (my $docsum = $parser->next_DocSum) {
101 my @items = $docsum->get_all_DocSum_Items;
106 Parses NCBI eutils XML output for retrieving IDs and other information. Part of
107 the BioPerl EUtilities system.
109 This is a general parser for eutils XML; data from efetch is NOT parsed (this
110 requires separate format-dependent parsers). All other XML for eutils is
111 parsed. These modules can be used independently of Bio::DB::EUtilities
112 and Bio::Tools::EUtilities::EUtilParameters; if used in this way, only data present in the XML
113 will be parsed out (other bits are retrieved from a passed in
114 Bio::Tools::EUtilities::EUtilParameters instance used while querying the database)
118 This module is largely complete. However there are a few holes which will
119 eventually be filled in. TranslationSets from esearch are not currently parsed,
126 User feedback is an integral part of the
127 evolution of this and other Bioperl modules. Send
128 your comments and suggestions preferably to one
129 of the Bioperl mailing lists. Your participation
132 bioperl-l@lists.open-bio.org - General discussion
133 http://www.bioperl.org/wiki/Mailing_lists - About the mailing lists
135 =head2 Reporting Bugs
137 Report bugs to the Bioperl bug tracking system to
138 help us keep track the bugs and their resolution.
139 Bug reports can be submitted via the web.
141 http://bugzilla.open-bio.org/
145 Email cjfields at uiuc dot edu
149 The rest of the documentation details each of the
150 object methods. Internal methods are usually
155 # Let the code begin...
157 package Bio
::Tools
::EUtilities
;
161 use base
qw(Bio::Root::IO Bio::Tools::EUtilities::EUtilDataI);
164 =head2 Constructor methods
171 Usage : my $parser = Bio::Tools::EUtilities->new(-file => 'my.xml',
172 -eutil => 'esearch');
173 Function : create Bio::Tools::EUtilities instance
174 Returns : new Bio::Tools::EUtilities instance
175 Args : -file/-fh - File or filehandle
176 -eutil - eutil parser to use (supports all but efetch)
177 -response - HTTP::Response object (optional)
184 'esearch' => 'Query',
185 'egquery' => 'Query',
190 'esummary' => 'Summary',
194 my($caller,@args) = @_;
195 my $class = ref $caller || $caller;
196 if ($class =~ m{Bio::Tools::EUtilities::(\S+)}) {
197 my ($self) = $class->SUPER::new
(@args);
198 $self->_initialize(@args);
202 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
203 my $eutil = $param{'-eutil'} || $class->throw("Need eutil to make instance");
204 return unless( $class->_load_eutil_module( $DATA_MODULE{$eutil}) );
205 return "Bio::Tools::EUtilities::$DATA_MODULE{$eutil}"->new(-datatype
=> lc $DATA_MODULE{$eutil},
212 my ($self, @args) = @_;
213 my ($response, $pobj, $type, $eutil, $cache, $lazy) =
214 $self->_rearrange([qw(RESPONSE
222 $self->datatype($type);
223 $self->eutil($eutil);
224 # lazy parsing only implemented for elink and esummary (where returned data
225 # can be quite long). Also, no point to parsing lazily when the data is
226 # already in memory in an HTTP::Response object, so turn it off and chunk
227 # the Response object after parsing.
228 $response && $self->response($response);
229 $pobj && $self->parameter_base($pobj);
230 $self->cache_response($cache);
231 $lazy = 0 if ($response) || ($eutil ne 'elink' && $eutil ne 'esummary');
232 # setting parser to 'lazy' mode is permanent (can't reset later)
233 $self->{'_lazy'} = $lazy;
234 $self->{'_parsed'} = 0;
239 =head1 Bio::Tools::EUtilities methods
241 =head2 cache_response
243 Title : cache_response
244 Usage : $parser->cache_response(1)
245 Function : sets flag to cache response object (off by default)
246 Returns : value eval'ing to TRUE or FALSE
247 Args : value eval'ing to TRUE or FALSE
248 Note : must be set prior to any parsing run
253 my ($self, $cache) = @_;
254 if (defined $cache) {
255 $self->{'_cache_response'} = ($cache) ?
1 : 0;
257 return $self->{'_cache_response'};
263 Usage : my $response = $parser->response;
264 Function : Get/Set HTTP::Response object
265 Returns : HTTP::Response
266 Args : HTTP::Response
267 Note : to prevent object from destruction set cache_response() to TRUE
272 my ($self, $response) = @_;
274 $self->throw('Not an HTTP::Response object') unless (ref $response && $response->isa('HTTP::Response'));
275 $self->{'_response'} = $response;
277 return $self->{'_response'};
280 =head2 parameter_base
282 Title : parameter_base
283 Usage : my $response = $parser->parameter_base;
284 Function : Get/Set Bio::ParameterBaseI object (should be Bio::Tools::EUtilities::EUtilParameters)
285 Returns : Bio::Tools::EUtilities::EUtilParameters || undef
286 Args : (optional) Bio::Tools::EUtilities::EUtilParameters
287 Note : If this object is present, it may be used as a last resort for
288 some data values if parsed XML does not contain said values (for
289 instance, database, term, IDs, etc).
294 my ($self, $pb) = @_;
296 $self->throw('Not an Bio::ParameterBaseI object') unless (ref $pb && $pb->isa('Bio::ParameterBaseI'));
297 $self->warn('Not an Bio::Tools::EUtilities::EUtilParameters object; may experience some turbulence...') unless (ref $pb && $pb->isa('Bio::Tools::EUtilities::EUtilParameters'));
298 $self->{'_parameter_base'} = $pb;
300 return $self->{'_parameter_base'};
306 Usage : if ($parser->data_parsed) {...}
307 Function : returns TRUE if data has been parsed
308 Returns : value eval'ing to TRUE or FALSE
309 Args : none (set within parser)
310 Note : mainly internal method (set in case user wants to check
311 whether parser is exhausted).
316 return shift->{'_parsed'};
322 Usage : if ($parser->is_lazy) {...}
323 Function : returns TRUE if parser is set to lazy parsing mode
324 (only affects elink/esummary)
327 Note : Permanently set in constructor. Still highly experimental.
328 Don't stare directly at happy fun ball...
333 return shift->{'_lazy'};
339 Usage : $parser->parse_data
340 Function : direct call to parse data; normally implicitly called
348 'esummary' => [qw(DocSum Item)],
351 'einfo' => [qw(Field Link)],
352 'elink' => [qw(LinkSet LinkSetDb LinkSetDbHistory IdUrlSet
353 Id IdLinkSet ObjUrl Link LinkInfo)],
354 'espell' => [qw(Original Replaced)],
355 'esearch' => [qw(Id ErrorList WarningList)],
360 my $eutil = $self->eutil;
361 my $xs = XML
::Simple
->new();
362 my $response = $self->response ?
$self->response :
363 $self->_fh ?
$self->_fh :
364 $self->throw('No response or stream specified');
365 my $simple = ($eutil eq 'espell') ?
366 $xs->XMLin($self->_fix_espell($response), forcearray
=> $EUTIL_DATA{$eutil}) :
367 ($response && $response->isa("HTTP::Response")) ?
368 $xs->XMLin($response->content, forcearray
=> $EUTIL_DATA{$eutil}) :
369 $xs->XMLin($response, forcearray
=> $EUTIL_DATA{$eutil});
371 if ($simple->{ERROR
}) {
372 my $error = $simple->{ERROR
};
373 $self->throw("NCBI $eutil fatal error: ".$error) unless ref $error;
375 if ($simple->{InvalidIdList
}) {
376 $self->warn("NCBI $eutil error: Invalid ID List".$simple->{InvalidIdList
});
379 if ($simple->{ErrorList
} || $simple->{WarningList
}) {
380 my @errorlist = @
{ $simple->{ErrorList
} } if $simple->{ErrorList
};
381 my @warninglist = @
{ $simple->{WarningList
} } if $simple->{WarningList
};
383 for my $error (@errorlist) {
384 my $messages = join("\n\t",map {"$_ [".$error->{$_}.']'}
385 grep {!ref $error->{$_}} keys %$error);
386 $err_warn .= "Error : $messages";
388 for my $warn (@warninglist) {
389 my $messages = join("\n\t",map {"$_ [".$warn->{$_}.']'}
390 grep {!ref $warn->{$_}} keys %$warn);
391 $err_warn .= "Warnings : $messages";
394 $self->warn("NCBI $eutil Errors/Warnings:\n".$err_warn)
395 # don't return as some data may still be useful
397 delete $self->{'_response'} unless $self->cache_response;
398 $self->{'_parsed'} = 1;
399 $self->_add_data($simple);
402 # implemented only for elink/esummary, still experimental
406 my $eutil = $self->eutil;
407 my $tag = $eutil eq 'elink' ?
'LinkSet' :
408 $eutil eq 'esummary' ?
'DocSum' :
409 $self->throw("Only eutil elink/esummary use parse_chunk()");
410 my $xs = XML
::Simple
->new();
411 if ($self->response) {
412 $self->throw("Lazy parsing not implemented for HTTP::Response data yet");
413 delete $self->{'_response'} if !$self->cache_response && $self->data_parsed;
414 } else { # has to be a file/filehandle
416 my ($chunk, $seendoc, $line);
418 while ($line = <$fh>) {
419 next unless $seendoc || $line =~ m{^<$tag>};
422 last if $line =~ m{^</$tag>};
424 if (!defined $line) {
425 $self->{'_parsed'} = 1;
429 $xs->XMLin($chunk, forcearray
=> $EUTIL_DATA{$eutil}, KeepRoot
=> 1)
439 Usage : $foo->to_string()
440 Function : converts current object to string
442 Args : (optional) simple data for text formatting
443 Note : Implemented in plugins
449 $self->parse_data if ($self->can('parse_data') && !$self->data_parsed);
450 return sprintf("%-20s:%s\n\n", 'EUtil', $self->eutil);
456 Usage : $info->print_all();
457 $info->print_all(-fh => $fh, -cb => $coderef);
458 Function : prints (dumps) all data in parser. Unless a coderef is supplied,
459 this just dumps the parser-specific to_string method to either a
463 -file : file to print to
464 -fh : filehandle to print to (cannot be used concurrently with file)
465 -cb : coderef to use in place of default print method. This is
466 passed in the parser object
467 -wrap : number of columns to wrap default text output to (def = 80)
468 Notes : only applicable for einfo. If -file or -fh are not defined,
474 my ($self, @args) = @_;
475 $self->_print_handler(@args);
478 =head1 Bio::Tools::EUtilities::EUtilDataI methods
483 Usage : $eutil->$foo->eutil
484 Function : Get/Set eutil
486 Args : string (eutil)
487 Throws : on invalid eutil
494 Usage : $type = $foo->datatype;
495 Function : Get/Set data object type
501 =head1 Methods useful for multiple eutils
506 Usage : my @ids = $parser->get_ids
507 Function : returns array of requested IDs (see Notes for more specifics)
509 Args : [conditional] not required except when running elink queries against
510 multiple databases. In case of the latter, the database name is
511 optional but recommended when retrieving IDs as the ID list will
512 be globbed together. In such cases, if a db name isn't provided a
513 warning is issued as a reminder.
514 Notes : esearch : returned ID list
515 elink : returned ID list (see Args above for caveats)
516 all others : from parameter_base->id or undef
521 my ($self, $request) = @_;
522 my $eutil = $self->eutil;
523 if ($self->is_lazy) {
524 $self->warn('get_ids() not implemented when using lazy mode');
527 $self->parse_data unless $self->data_parsed;
528 if ($eutil eq 'esearch') {
529 return $self->{'_id'} ? @
{ $self->{'_id'} } : ();
530 } elsif ($eutil eq 'elink') {
533 if (ref $request eq 'CODE') {
534 push @ids, map {$_->get_ids }
535 grep { $request->($_) } $self->get_LinkSets;
539 grep {grep { $_ eq $request } @
{$_->[1]}}
540 map {[[$_->get_ids], [$_->get_databases]]} $self->get_LinkSets;
543 $self->warn('Multiple database present, IDs will be globbed together')
544 if $self->get_linked_databases > 1;
545 push @ids, map {$_->get_ids } $self->get_LinkSets;
548 } elsif ($eutil eq 'esummary') {
549 unless (exists $self->{'_id'}) {
550 push @
{$self->{'_id'}}, map {$_->get_id } $self->get_DocSums;
552 return @
{$self->{'_id'}};
553 } elsif (my $pb = $self->parameter_base) {
555 return $ids ? @
{$ids} : ();
564 Usage : my $db = $info->get_database;
565 Function : returns single database name (eutil-compatible). This is the
566 queried database. For most eutils this is straightforward. For
567 elinks (which have 'db' and 'dbfrom') this is db/dbto, for egquery,
568 it is the first db in the list (you probably want get_databases
572 Notes : egquery : first db in the query (you probably want get_databases)
573 einfo : the queried database
574 espell : the queried database
575 all others : from parameter_base->db or undef
580 return ($_[0]->get_databases)[0];
583 =head2 get_db (alias for get_database)
588 return shift->get_database;
593 Title : get_databases
594 Usage : my @dbs = $parser->get_databases
595 Function : returns list of databases
596 Returns : array of strings
598 Notes : This is guaranteed to return a list of databases. For a single
599 database use the convenience method get_db/get_database
601 egquery : list of all databases in the query
602 einfo : the queried database, or the available databases
603 espell : the queried database
604 elink : collected from each LinkSet
605 all others : from parameter_base->db or undef
610 my ($self, $db) = @_;
611 $self->parse_data unless $self->data_parsed;
612 my $eutil = $self->eutil;
614 if ($eutil eq 'einfo' || $eutil eq 'espell') {
615 @dbs = $self->{'_dbname'} ||
616 $self->{'_database'} ||
617 $self->get_available_databases;
618 } elsif ($eutil eq 'egquery') {
619 @dbs = map {$_->get_database} ($self->get_GlobalQueries);
620 } elsif ($eutil eq 'elink') {
623 @dbs = sort grep {!$tmp{$_}++}
624 map {($_->get_databases)} $self->get_LinkSets;
625 } elsif ($self->parameter_base) {
626 if ($self->parameter_base->eutil eq 'elink') {
627 @dbs = $self->parameter_base->dbfrom;
629 @dbs = $self->parameter_base->db;
635 =head2 get_dbs (alias for get_databases)
640 return shift->get_databases;
646 Usage : while (my $hist=$parser->next_History) {...}
647 Function : returns next HistoryI (if present).
648 Returns : Bio::Tools::EUtilities::HistoryI (Cookie or LinkSet)
650 Note : esearch, epost, and elink are all capable of returning data which
651 indicates search results (in the form of UIDs) is stored on the
652 remote server. Access to this data is wrapped up in simple interface
653 (HistoryI), which is implemented in two classes:
654 Bio::DB::EUtilities::History (the simplest) and
655 Bio::DB::EUtilities::LinkSet. In general, calls to epost and esearch
656 will only return a single HistoryI object (formerly known as a
657 Cookie), but calls to elink can generate many depending on the
658 number of IDs, the correspondence, etc. Hence this iterator, which
659 allows one to retrieve said data one piece at a time.
665 $self->parse_data unless $self->data_parsed;
666 $self->{'_histories_it'} = $self->generate_iterator('histories')
667 if (!exists $self->{'_histories_it'});
668 my $hist = $self->{'_histories_it'}->();
671 =head2 next_cookie (alias for next_History)
676 return shift->next_History;
681 Title : get_Histories
682 Usage : my @hists = $parser->get_Histories
683 Function : returns list of HistoryI objects.
684 Returns : list of Bio::Tools::EUtilities::HistoryI (History or LinkSet)
691 $self->parse_data unless $self->data_parsed;
692 ref $self->{'_histories'} ?
return @
{ $self->{'_histories'} } : return ();
695 =head1 Query-related methods
700 Usage : my $ct = $parser->get_count
701 Function : returns the count (hits for a search)
703 Args : [CONDITIONAL] string with database name - used to retrieve
704 count from specific database when using egquery
705 Notes : egquery : count for specified database (specified above)
706 esearch : count for last search
712 my ($self, $db) = @_;
713 $self->parse_data unless $self->data_parsed;
715 if ($self->datatype eq 'multidbquery') {
717 $self->warn('Must specify database to get count from');
720 my ($gq) = grep {$_->get_database eq $db} $self->get_GlobalQueries;
721 $gq && return $gq->get_count;
722 $self->warn("Unknown database $db");
725 return $self->{'_count'};
732 Usage : $st = $qd->get_term;
733 Function : retrieve the term for the global search
736 Notes : egquery : search term
738 esearch : from parameter_base->term or undef
744 my ($self, @args) = @_;
745 $self->parse_data unless $self->data_parsed;
746 $self->{'_term'} ?
$self->{'_term'} :
747 $self->{'_query'} ?
$self->{'_query'} :
748 $self->parameter_base ?
$self->parameter_base->term :
752 =head2 get_translation_from
754 Title : get_translation_from
755 Usage : $string = $qd->get_translation_from();
756 Function: portion of the original query replaced with translated_to()
759 Note : only applicable for esearch
763 sub get_translation_from
{
765 $self->parse_data unless $self->data_parsed;
766 return $self->{'_translation'}->{'From'};
769 =head2 get_translation_to
771 Title : get_translation_to
772 Usage : $string = $qd->get_translation_to();
773 Function: replaced string used in place of the original query term in translation_from()
776 Note : only applicable for esearch
780 sub get_translation_to
{
782 $self->parse_data unless $self->data_parsed;
783 return $self->{'_translation'}->{'To'};
789 Usage : $start = $qd->get_retstart();
790 Function : retstart setting for the query (either set or NCBI default)
793 Notes : esearch : retstart
795 all others : from parameter_base->retstart or undef
801 $self->parse_data unless $self->data_parsed;
802 return $self->{'_retstart'};
808 Usage : $max = $qd->get_retmax();
809 Function : retmax setting for the query (either set or NCBI default)
812 Notes : esearch : retmax
814 all others : from parameter_base->retmax or undef
820 $self->parse_data unless $self->data_parsed;
821 return $self->{'_retmax'};
824 =head2 get_query_translation
826 Title : get_query_translation
827 Usage : $string = $qd->get_query_translation();
828 Function: returns the translated query used for the search (if any)
831 Notes : only applicable for esearch. This is the actual term used for
836 sub get_query_translation
{
838 $self->parse_data unless $self->data_parsed;
839 return $self->{'_querytranslation'};
842 =head2 get_corrected_query
844 Title : get_corrected_query
845 Usage : my $cor = $eutil->get_corrected_query;
846 Function : retrieves the corrected query when using espell
849 Notes : only applicable for espell.
853 sub get_corrected_query
{
855 $self->parse_data unless $self->data_parsed;
856 return $self->{'_correctedquery'};
859 =head2 get_replaced_terms
861 Title : get_replaced_terms
862 Usage : my $term = $eutil->get_replaced_term
863 Function : returns array of strings replaced in the query
866 Notes : only applicable for espell
870 sub get_replaced_terms
{
872 $self->parse_data unless $self->data_parsed;
873 if ($self->{'_spelledquery'} && $self->{'_spelledquery'}->{Replaced
}) {
874 ref $self->{'_spelledquery'}->{Replaced
} ?
875 return @
{ $self->{'_spelledquery'}->{Replaced
} } : return ();
879 =head2 next_GlobalQuery
881 Title : next_GlobalQuery
882 Usage : while (my $query = $eutil->next_GlobalQuery) {...}
883 Function : iterates through the queries returned from an egquery search
884 Returns : GlobalQuery object
886 Notes : only applicable for egquery
890 sub next_GlobalQuery
{
892 $self->parse_data unless $self->data_parsed;
893 $self->{'_globalqueries_it'} = $self->generate_iterator('globalqueries')
894 if (!exists $self->{'_globalqueries_it'});
895 $self->{'_globalqueries_it'}->();
898 =head2 get_GlobalQueries
900 Title : get_GlobalQueries
901 Usage : @queries = $eutil->get_GlobalQueries
902 Function : returns list of GlobalQuery objects
903 Returns : array of GlobalQuery objects
905 Notes : only applicable for egquery
909 sub get_GlobalQueries
{
911 $self->parse_data unless $self->data_parsed;
912 ref $self->{'_globalqueries'} ?
return @
{ $self->{'_globalqueries'} } : return ();
915 =head2 print_GlobalQueries
917 Title : print_GlobalQueries
918 Usage : $docsum->print_GlobalQueries();
919 $docsum->print_GlobalQueries(-fh => $fh, -callback => $coderef);
920 Function : prints item data for all global queries. The default printing
921 method is each item per DocSum is printed with relevant values if
922 present in a simple table using Text::Wrap.
925 -file : file to print to
926 -fh : filehandle to print to (cannot be used concurrently with file)
927 -cb : coderef to use in place of default print method. This is passed
928 in a GlobalQuery object;
929 -wrap : number of columns to wrap default text output to (def = 80)
930 Notes : only applicable for esummary. If -file or -fh are not defined,
935 sub print_GlobalQueries
{
936 my ($self, @args) = @_;
937 $self->_print_handler(@args, -type
=> 'GlobalQuery');
940 =head1 Summary-related methods
945 Usage : while (my $ds = $esum->next_DocSum) {...}
946 Function : iterate through DocSum instances
947 Returns : single Bio::Tools::EUtilities::Summary::DocSum
949 Notes : only applicable for esummary
955 if(!$self->data_parsed && !$self->is_lazy) {
958 $self->{'_docsums_it'} = $self->generate_iterator('docsums')
959 if (!exists $self->{'_docsums_it'});
960 $self->{'_docsums_it'}->();
966 Usage : my @docsums = $esum->get_DocSums
967 Function : retrieve a list of DocSum instances
968 Returns : array of Bio::Tools::EUtilities::Summary::DocSum
970 Notes : only applicable for esummary
976 if ($self->is_lazy) {
977 $self->warn('get_DocSums() not implemented when using lazy mode');
980 $self->parse_data unless $self->data_parsed;
981 return ref $self->{'_docsums'} ? @
{ $self->{'_docsums'} } : return ();
986 Title : print_DocSums
987 Usage : $docsum->print_DocSums();
988 $docsum->print_DocSums(-fh => $fh, -cb => $coderef);
989 Function : prints item data for all docsums. The default data is generated
990 via DocSum::to_string
993 -file : file to print to
994 -fh : filehandle to print to (cannot be used concurrently with file)
995 -cb : coderef to use in place of default print method. This is passed
997 -wrap : number of columns to wrap default text output to (def = 80)
998 Notes : only applicable for esummary. If -file or -fh are not defined,
1004 my ($self, @args) = @_;
1005 $self->_print_handler(@args, -type
=> 'DocSum');
1008 =head1 Info-related methods
1010 =head2 get_available_databases
1012 Title : get_available_databases
1013 Usage : my @dbs = $info->get_available_databases
1014 Function : returns list of available eutil-compatible database names
1015 Returns : Array of strings
1017 Notes : only applicable for einfo.
1021 sub get_available_databases
{
1023 $self->parse_data unless $self->data_parsed;
1024 ($self->{'_available_databases'}) ?
1025 return @
{($self->{'_available_databases'})} :
1029 =head2 get_record_count
1031 Title : get_record_count
1032 Usage : my $ct = $eutil->get_record_count;
1033 Function : returns database record count
1036 Notes : only applicable for einfo.
1040 sub get_record_count
{
1042 $self->parse_data unless $self->data_parsed;
1043 return $self->{'_count'}
1046 =head2 get_last_update
1048 Title : get_last_update
1049 Usage : my $time = $info->get_last_update;
1050 Function : returns string containing time/date stamp for last database update
1053 Notes : only applicable for einfo.
1057 sub get_last_update
{
1059 $self->parse_data unless $self->data_parsed;
1060 return $self->{'_lastupdate'}
1063 =head2 get_menu_name
1065 Title : get_menu_name
1066 Usage : my $nm = $info->get_menu_name;
1067 Function : returns string of database menu name
1070 Notes : only applicable for einfo.
1076 $self->parse_data unless $self->data_parsed;
1077 exists $self->{'_menuname'} ?
return $self->{'_menuname'} :
1078 exists $self->{'_menu'} ?
return $self->{'_menu'} :
1082 =head2 get_description
1084 Title : get_description
1085 Usage : my $desc = $info->get_description;
1086 Function : returns database description
1089 Notes : only applicable for einfo.
1093 sub get_description
{
1095 $self->parse_data unless $self->data_parsed;
1096 return $self->{'_description'};
1099 =head2 next_FieldInfo
1101 Title : next_FieldInfo
1102 Usage : while (my $field = $info->next_FieldInfo) {...}
1103 Function : iterate through FieldInfo objects
1104 Returns : Field object
1106 Notes : only applicable for einfo. Uses callback() for filtering if defined
1111 sub next_FieldInfo
{
1113 $self->parse_data unless $self->data_parsed;
1114 $self->{'_fieldinfo_it'} = $self->generate_iterator('fieldinfo')
1115 if (!exists $self->{'_fieldinfo_it'});
1116 $self->{'_fieldinfo_it'}->();
1119 =head2 get_FieldInfo
1121 Title : get_FieldInfo
1122 Usage : my @fields = $info->get_FieldInfo;
1123 Function : returns list of FieldInfo objects
1124 Returns : array (FieldInfo objects)
1126 Notes : only applicable for einfo.
1132 $self->parse_data unless $self->data_parsed;
1133 return ref $self->{'_fieldinfo'} ? @
{ $self->{'_fieldinfo'} } : return ();
1136 *get_FieldInfos
= \
&get_FieldInfo
;
1138 =head2 next_LinkInfo
1140 Title : next_LinkInfo
1141 Usage : while (my $link = $info->next_LinkInfo) {...}
1142 Function : iterate through LinkInfo objects
1143 Returns : LinkInfo object
1145 Notes : only applicable for einfo. Uses callback() for filtering if defined
1152 $self->parse_data unless $self->data_parsed;
1153 $self->{'_linkinfo_it'} = $self->generate_iterator('linkinfo')
1154 if (!exists $self->{'_linkinfo_it'});
1155 $self->{'_linkinfo_it'}->();
1160 Title : get_LinkInfo
1161 Usage : my @links = $info->get_LinkInfo;
1162 Function : returns list of LinkInfo objects
1163 Returns : array (LinkInfo objects)
1165 Notes : only applicable for einfo.
1171 $self->parse_data unless $self->data_parsed;
1172 return ref $self->{'_linkinfo'} ? @
{ $self->{'_linkinfo'} } : return ();
1175 *get_LinkInfos
= \
&get_LinkInfo
;
1177 =head2 print_FieldInfo
1179 Title : print_FieldInfo
1180 Usage : $info->print_FieldInfo();
1181 $info->print_FieldInfo(-fh => $fh, -cb => $coderef);
1182 Function : prints link data for each FieldInfo object. The default is generated
1183 via FieldInfo::to_string
1186 -file : file to print to
1187 -fh : filehandle to print to (cannot be used concurrently with file)
1188 -cb : coderef to use in place of default print method. This is
1189 passed in a FieldInfo object
1190 -wrap : number of columns to wrap default text output to (def = 80)
1191 Notes : only applicable for einfo. If -file or -fh are not defined,
1196 sub print_FieldInfo
{
1197 my ($self, @args) = @_;
1198 $self->_print_handler(@args, -type
=> 'FieldInfo');
1201 =head2 print_LinkInfo
1203 Title : print_LinkInfo
1204 Usage : $info->print_LinkInfo();
1205 $info->print_LinkInfo(-fh => $fh, -cb => $coderef);
1206 Function : prints link data for each LinkInfo object. The default is generated
1207 via LinkInfo::to_string
1210 -file : file to print to
1211 -fh : filehandle to print to (cannot be used concurrently with file)
1212 -cb : coderef to use in place of default print method. This is passed
1213 in a LinkInfo object
1214 -wrap : number of columns to wrap default text output to (def = 80)
1215 Notes : only applicable for einfo. If -file or -fh are not defined,
1220 sub print_LinkInfo
{
1221 my ($self, @args) = @_;
1222 $self->_print_handler(@args, -type
=> 'LinkInfo');
1225 =head1 Bio::Tools::EUtilities::Link-related methods
1229 Title : next_LinkSet
1230 Usage : while (my $ls = $eutil->next_LinkSet {...}
1231 Function : iterate through LinkSet objects
1232 Returns : LinkSet object
1234 Notes : only applicable for elink. Uses callback() for filtering if defined
1241 #$self->parse_data unless $self->data_parsed;
1242 if(!$self->data_parsed && !$self->is_lazy) {
1245 $self->{'_linksets_it'} = $self->generate_iterator('linksets')
1246 if (!exists $self->{'_linksets_it'});
1247 $self->{'_linksets_it'}->();
1252 Title : get_LinkSets
1253 Usage : my @links = $info->get_LinkSets;
1254 Function : returns list of LinkSets objects
1255 Returns : array (LinkSet objects)
1257 Notes : only applicable for elink.
1261 # add support for retrieval of data if lazy parsing is enacted
1265 if ($self->is_lazy) {
1266 $self->warn('get_LinkSets() not implemented when using lazy mode');
1269 $self->parse_data unless $self->data_parsed;
1270 return ref $self->{'_linksets'} ? @
{ $self->{'_linksets'} } : return ();
1273 =head2 print_LinkSets
1275 Title : print_LinkSets
1276 Usage : $info->print_LinkSets();
1277 $info->print_LinkSets(-fh => $fh, -cb => $coderef);
1278 Function : prints link data for each LinkSet object. The default is generated
1279 via LinkSet::to_string
1282 -file : file to print to
1283 -fh : filehandle to print to (cannot be used concurrently with file)
1284 -cb : coderef to use in place of default print method. This is passed
1286 -wrap : number of columns to wrap default text output to (def = 80)
1287 Notes : only applicable for einfo. If -file or -fh are not defined,
1292 sub print_LinkSets
{
1293 my ($self, @args) = @_;
1294 $self->_print_handler(@args, -type
=> 'LinkSet');
1297 =head2 get_linked_databases
1299 Title : get_linked_databases
1300 Usage : my @dbs = $eutil->get_linked_databases
1301 Function : returns list of databases linked to in linksets
1302 Returns : array of databases
1304 Notes : only applicable for elink. Now defers to get_databases.
1308 sub get_linked_databases
{
1310 return $self->get_databases if $self->eutil eq 'elink';
1314 =head1 Iterator- and callback-related methods
1319 my %VALID_ITERATORS = (
1320 'globalqueries' => 'globalqueries',
1321 'fieldinfo' => 'fieldinfo',
1322 'fieldinfos' => 'fieldinfo',
1323 'linkinfo' => 'linkinfo',
1324 'linkinfos' => 'linkinfo',
1325 'linksets' => 'linksets',
1326 'docsums' => 'docsums',
1327 'histories' => 'histories'
1333 Usage : $esum->rewind()
1334 $esum->rewind('recursive')
1335 Function : retrieve a list of DocSum instances
1336 Returns : array of Bio::Tools::EUtilities::Summary::DocSum
1337 Args : [optional] Scalar; string ('all') to reset all iterators, or string
1338 describing the specific main object iterator to reset. The following
1339 are recognized (case-insensitive):
1341 'all' - rewind all objects and also recursively resets nested object interators
1342 (such as LinkSets and DocSums).
1343 'globalqueries' - GlobalQuery objects
1344 'fieldinfo' or 'fieldinfos' - FieldInfo objects
1345 'linkinfo' or 'linkinfos' - LinkInfo objects in this layer
1346 'linksets' - LinkSet objects
1347 'docsums' - DocSum objects
1348 'histories' - HistoryI objects (Cookies, LinkSets)
1353 my ($self, $arg) = ($_[0], lc $_[1]);
1354 my $eutil = $self->eutil;
1355 if ($self->is_lazy) {
1356 $self->warn('rewind() not implemented yet when running in lazy mode');
1360 if (exists $VALID_ITERATORS{$arg}) {
1361 delete $self->{'_'.$arg.'_it'};
1362 } elsif ($arg eq 'all') {
1363 for my $it (values %VALID_ITERATORS){
1364 delete $self->{'_'.$it.'_it'} if
1365 exists $self->{'_'.$it.'_it'};
1366 map {$_->rewind('all')} $self->get_LinkSets;
1367 map {$_->rewind('all')} $self->get_DocSums;
1372 =head2 generate_iterator
1374 Title : generate_iterator
1375 Usage : my $coderef = $esum->generate_iterator('linkinfo')
1376 Function : generates an iterator (code reference) which iterates through
1377 the relevant object indicated by the args
1378 Returns : code reference
1379 Args : [REQUIRED] Scalar; string describing the specific object to iterate.
1380 The following are currently recognized (case-insensitive):
1383 'fieldinfo' or 'fieldinfos' (the latter sounds clumsy, but I alias it JIC)
1384 'linkinfo' or 'linkinfos' (the latter sounds clumsy, but I alias it JIC)
1389 Note : This function generates a simple coderef that one can use
1390 independently of the various next_* functions (in fact, the next_*
1391 functions use lazily created iterators generated via this method,
1392 while rewind() merely deletes them so they can be regenerated on the
1395 A callback specified using callback() will be used to filter objects
1396 for any generated iterator. This behaviour is implemented for both
1397 normal and lazy iterator types and is the default. If you don't want
1398 this, make sure to reset any previously set callbacks via
1399 reset_callback() (which just deletes the code ref). Note that setting
1400 callback() also changes the behavior of the next_* functions as the
1401 iterators are generated here (as described above); this is a feature
1404 'Lazy' iterators are considered an experimental feature and may be
1405 modified in the future. A 'lazy' iterator, which loops through and
1406 returns objects as they are created (instead of creating all data
1407 instances up front, then iterating through) is returned if the
1408 parser is set to 'lazy' mode. This mode is only present for elink
1409 and esummary output as they are the two formats parsed which can
1410 generate potentially thousands of individual objects (note efetch
1411 isn't parsed, so isn't counted). Use of rewind() with these
1412 iterators is not supported for the time being as we can't guarantee
1413 you can rewind(), as this depends on whether the data source is
1414 seek()able and thus 'rewindable'. We will add rewind() support at a
1415 later time which will work for 'seekable' data or possibly cached
1416 objects via Storable or BDB.
1420 sub generate_iterator
{
1421 my ($self, $obj) = @_;
1423 $self->throw('Must provide object type to iterate');
1424 } elsif (!exists $VALID_ITERATORS{$obj}) {
1425 $self->throw("Unknown object type [$obj]");
1427 my $cb = $self->callback;
1428 if ($self->is_lazy) {
1429 my $type = $self->eutil eq 'esummary' ?
'_docsums' : '_linksets';
1430 $self->{$type} = [];
1432 if (!@
{$self->{$type}}) {
1433 $self->parse_chunk; # fill the queue
1435 while (my $obj = shift @
{$self->{$type}}) {
1437 ($cb->($obj)) ?
return $obj : next;
1445 my $loc = '_'.$VALID_ITERATORS{$obj};
1446 my $index = $#{$self->{$loc}};
1449 while ($current <= $index) {
1451 if (my $d = $cb->($self->{$loc}->[$current])) {
1452 return $self->{$loc}->[$current++] }
1458 return $self->{$loc}->[$current++]
1471 Usage : $parser->callback(sub {$_[0]->get_database eq 'protein'});
1472 Function : Get/set callback code ref used to filter returned data objects
1473 Returns : code ref if previously set
1474 Args : single argument:
1475 code ref - evaluates a passed object and returns true or false value
1477 'reset' - string, resets the iterator.
1478 returns upon any other args
1483 my ($self, $cb) = @_;
1485 delete $self->{'_cb'} if ($cb eq 'reset');
1486 return if ref $cb ne 'CODE';
1487 $self->{'_cb'} = $cb;
1489 return $self->{'_cb'};
1492 # Object printing methods
1495 my $DEF_HANDLER = sub {
1497 return $obj->to_string."\n";
1509 sub _print_handler
{
1511 my ($file, $fh, $cb, $wrap, $type, $all) = $self->_rearrange([qw(FILE FH CB WRAP TYPE ALL)], @_);
1514 # default formatting delegates to_string
1516 $self->throw("Type $type not registered with print handler, exiting...")
1517 if !exists($HANDLER{$type});
1520 $self->throw("Callback must be a code reference") if ref $cb ne 'CODE';
1524 $self->throw("Have defined both file and filehandle; only use one!") if $file && $fh;
1525 my $io = ($file) ? Bio
::Root
::IO
->new(-input
=> $file, -flush
=> 1) :
1526 Bio
::Root
::IO
->new(-flush
=> 1); # defaults to STDOUT
1528 if ($type eq 'all') {
1529 my $string = $cb->($self);
1530 $io->_print($string) if $string;
1533 my $it = "next_$type";
1534 $self->throw("Unknown iterator method $it") unless $self->can($it);
1535 while (my $obj = $self->$it) {
1536 my $string = $cb->($obj);
1537 $io->_print($string) if $string;
1547 return shift->{'_seekable'}
1550 # fixes odd bad XML issue espell data (still present 6-24-07)
1553 my ($self, $response) = @_;
1555 my $type = ref($response);
1556 if ($type eq 'GLOB') {
1557 $temp .= $_ for <$response>;
1558 } elsif ($type eq 'HTTP::Response') {
1559 $temp = $response->content;
1561 $self->throw("Unrecognized ref type $type");
1563 if ($temp =~ m{^<html>}) {
1564 $self->throw("NCBI espell nonrecoverable error: HTML content returned")
1566 $temp =~ s{<ERROR>(.*?)<ERROR>}{<ERROR>$1</ERROR>};
1570 sub _load_eutil_module
{
1571 my ($self, $class) = @_;
1573 my $module = "Bio::Tools::EUtilities::" . $class;
1576 $ok = $self->_load_module($module);
1580 $self: data module $module cannot be found
1582 For more information about the EUtilities system please see the EUtilities docs.