3 # BioPerl module for Bio::Tools::EUtilities::Link::LinkSet
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Chris Fields
9 # Copyright Chris Fields
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 # Part of the EUtilities BioPerl package
19 Bio::Tools::EUtilities::Link::LinkSet - class for EUtils LinkSets
33 User feedback is an integral part of the
34 evolution of this and other Bioperl modules. Send
35 your comments and suggestions preferably to one
36 of the Bioperl mailing lists. Your participation
39 bioperl-l@lists.open-bio.org - General discussion
40 http://www.bioperl.org/wiki/Mailing_lists - About the mailing lists
44 Please direct usage questions or support issues to the mailing list:
46 I<bioperl-l@bioperl.org>
48 rather than to the module maintainer directly. Many experienced and
49 reponsive experts will be able look at the problem and quickly
50 address it. Please include a thorough description of the problem
51 with code and data examples if at all possible.
55 Report bugs to the Bioperl bug tracking system to
56 help us keep track the bugs and their resolution.
57 Bug reports can be submitted via the web.
59 http://bugzilla.open-bio.org/
63 Email cjfields at bioperl dot org
67 The rest of the documentation details each of the
68 object methods. Internal methods are usually
73 # Let the code begin...
75 package Bio
::Tools
::EUtilities
::Link
::LinkSet
;
79 use base
qw(Bio::Root::Root Bio::Tools::EUtilities::HistoryI);
80 use Bio
::Tools
::EUtilities
::Link
::UrlLink
;
81 use Bio
::Tools
::EUtilities
::Info
::LinkInfo
;
84 my ($class,@args) = @_;
85 my $self = $class->SUPER::new
(@args);
86 my ($type) = $self->_rearrange([qw(DATATYPE)],@args);
88 $self->eutil('elink');
89 $self->datatype($type);
96 Usage : my @ids = $linkset->get_ids
97 Function : returns list of retrieved IDs
98 Returns : array of IDs
100 Notes : Cmd Description
101 acheck same as get_submitted_ids
102 lcheck same as get_submitted_ids
103 ncheck same as get_submitted_ids
104 prlinks same as get_submitted_ids
105 llinks same as get_submitted_ids
106 llinkslib same as get_submitted_ids
107 neighbor linked IDs for database in get_database
108 neighbor_history linked IDs for database in get_database
114 unless ($self->{'_sorted_id'}) {
115 @
{$self->{'_sorted_id'}} =
117 $self->{'_id'}->{$a}->[0] <=>
118 $self->{'_id'}->{$b}->[0]
119 } keys %{$self->{'_id'}};
121 return @
{$self->{'_sorted_id'}};
127 Usage : my $db = $info->get_database;
128 Function : returns single database name (eutil-compatible). This is the
129 queried database. For elinks (which have 'db' and 'dbfrom')
130 this is equivalent to db/dbto (use get_dbfrom() to for the latter).
131 Note that this only returns the first db; in some cases this may
132 not be what you want (when multiple dbs are queried, for instance)
135 Notes : with all elink cmd arguments
140 return ($_[0]->get_databases)[0];
143 =head2 get_db (alias for get_database)
148 return shift->get_database;
151 =head2 get_dbto (alias for get_database)
156 return shift->get_database;
161 Title : get_databases
162 Usage : my $string = $linkset->get_databases;
163 Function : retrieve databases referred to for this linkset
164 these may be present as a single database or embedded in
165 Returns : array of strings
173 my @dbs = sort map {$_->get_database}
174 grep {!$tmp{$_->get_database}++} ($self->get_LinkInfo);
175 unshift @dbs, $self->{'_dbto'} if $self->{'_dbto'} && !$tmp{$self->{'_dbto'}}++;
179 =head2 get_dbs (alias for get_databases)
184 return shift->get_databases;
190 Usage : my $string = $linkset->get_dbfrom;
191 Function : retrieve originating database for this linkset
197 sub get_dbfrom
{ return shift->{'_dbfrom'} }
199 =head2 get_link_names
201 Title : get_link_names
202 Usage : my $string = $linkset->get_link_names;
203 Function : retrieve eutil-compatible link names
204 Returns : array of strings
206 Notes : Each LinkSet can hold multiple LinkInfo objects (each containing
207 a link name). Also, some LinkSets define a single link name. This
208 returns an array with all unique linknames globbed both sources, if
217 if ($self->{'_linkname'}) {
218 push @lns, $self->{'_linkname'};
219 $tmps{$self->{'_linkname'}}++;
221 push @lns, map {$_->get_link_name} $self->get_LinkInfo;
227 Title : get_link_name
228 Usage : my $string = $linkset->get_link_name;
229 Function : retrieve eutil-compatible link name
230 Returns : single link name
236 return ($_[0]->get_link_names)[0];
239 =head2 get_submitted_ids
241 Title : get_submitted_ids
242 Usage : my $string = $linkset->get_submitted_ids;
243 Function : retrieve original ID list
249 sub get_submitted_ids
{
251 my $datatype = $self->datatype;
252 if ($datatype eq 'idcheck' || $datatype eq 'urllink') {
253 return $self->get_ids;
254 } elsif ($self->{'_submitted_ids'}) {
255 return @
{$self->{'_submitted_ids'}};
264 Usage : if (my $linkset->has_scores) {...}
265 Function : returns TRUE if score data is present
273 return exists $self->{'_has_scores'} ?
1 : 0;
279 Usage : %scores = $linkset->get_scores;
280 Function : returns flattened list or hash ref containing ID => score pairs
281 Returns : hash or hash ref (based on list or scalar context)
288 # do we want to cache this or generate only when needed? Likely won't be
289 # called more than once...
290 return unless $self->has_scores;
291 my %scores = map {$_ => $self->{'_id'}->{$_}->[1]} keys %{$self->{'_id'}};
295 =head2 get_score_by_id
297 Title : get_score_by_id
298 Usage : $score = $linkset->get_score_by_id($id);
299 Function : returns the score for a particular primary ID
301 Args : [REQUIRED] Primary ID for the score lookup
305 sub get_score_by_id
{
306 my ($self, $id) = @_;
307 ($id && exists $self->{'_id'}->{$id}) ?
return $self->{'_id'}->{$id}->[1] :
314 Usage : if ($linkset->has_linkout) {...}
315 Function : returns TRUE if the single ID present in this linkset has a linkout
318 Notes : this checks cmd=lcheck (boolean for a linkout) and also backchecks
319 cmd=acheck for databases with name 'LinkOut'
325 if (exists $self->{'_haslinkout'}) {
326 return $self->{'_haslinkout'} eq 'Y' ?
1 : 0;
328 return (grep {$_ eq 'LinkOut'} $self->get_databases) ?
1 : 0;
335 Usage : if ($linkset->has_neighbor) {...}
336 Function : returns TRUE if the single ID present in this linkset has a neighbor
340 Notes : this checks cmd=ncheck (boolean for a neighbor in same database); no
341 other checks performed at this time
347 if (exists $self->{'_hasneighbor'}) {
348 return $self->{'_hasneighbor'} eq 'Y' ?
1 : 0;
357 Usage : while (my $url = $linkset->next_UrlLink) {...}
358 Function : iterate through UrlLink objects
359 Returns : Bio::Tools::EUtilities::Link::UrlLink
366 unless ($self->{"_urllinks_it"}) {
367 my @ul = $self->get_UrlLinks;
368 $self->{"_urllinks_it"} = sub {return shift @ul}
370 $self->{'_urllinks_it'}->();
376 Usage : my @urls = $linkset->get_UrlLinks
377 Function : returns all UrlLink objects
378 Returns : list of Bio::Tools::EUtilities::Link::UrlLink
385 return ref $self->{'_urllinks'} ? @
{ $self->{'_urllinks'} } : return;
390 Title : next_LinkInfo
391 Usage : while (my $info = $linkset->next_LinkInfo) {...}
392 Function : iterate through LinkInfo objects
393 Returns : Bio::Tools::EUtilities::Link::LinkInfo
400 unless ($self->{"_linkinfo_it"}) {
401 my @li = $self->get_LinkInfo;
402 $self->{"_linkinfo_it"} = sub {return shift @li}
404 $self->{'_linkinfo_it'}->();
410 Usage : my @links = $linkset->get_LinkInfo
411 Function : returns all LinkInfo objects
412 Returns : list of Bio::Tools::EUtilities::Link::LinkInfo
419 return ref $self->{'_linkinfo'} ? @
{ $self->{'_linkinfo'} } : return ();
425 Usage : $info->rewind() # rewinds all (default)
426 $info->rewind('links') # rewinds only links
427 Function : 'rewinds' (resets) specified interators (all if no arg)
429 Args : [OPTIONAL] String:
430 'all' - all iterators (default)
431 'linkinfo' or 'linkinfos' - LinkInfo objects only
432 'urllinks' - UrlLink objects only
437 my %VALID_DATA = ('linkinfo' => 'linkinfo',
438 'linkinfos' => 'linkinfo',
439 'urllinks' => 'urllinks');
442 my ($self, $arg) = @_;
444 if (exists $VALID_DATA{$arg}) {
445 delete $self->{'_'.$arg.'_it'};
446 } elsif ($arg eq 'all') {
447 delete $self->{'_'.$_.'_it'} for values %VALID_DATA;
452 # private methods and handlers
456 'IdList' => \
&_add_submitted_ids
,
457 'Id' => \
&_add_retrieved_ids
,
458 'LinkInfo' => \
&_add_linkinfo
,
459 'Link' => \
&_add_retrieved_ids
,
460 'ObjUrl' => \
&_add_objurls
,
464 my ($self, $data) = @_;
465 for my $key (qw(IdList Link Id ObjUrl LinkInfo)) {
466 next if !exists $data->{$key};
467 my $handler = $DATA_HANDLER{$key};
468 $self->$handler($data);
469 delete $data->{$key};
472 if ($self->datatype eq 'idcheck' && exists $data->{content
}) {
473 %{$self->{'_id'} } = ($data->{content
} => [1]);
474 delete $data->{content
}
476 map {$self->{'_'.lc $_} = $data->{$_}} keys %$data;
481 sub _add_submitted_ids
{
482 my ($self, $data) = @_;
483 if (exists $data->{IdList
}->{Id
}) {
484 @
{$self->{'_submitted_ids'}} = @
{$data->{IdList
}->{Id
}} ;
488 sub _add_retrieved_ids
{
489 my ($self, $data) = @_;
490 # map all IDs to deal with possible scores
491 # ID => {'count' = POSITION, 'score' => SCORE}
492 if (exists $data->{Link
}) {
494 for my $link (@
{$data->{Link
}}) {
495 if (exists $link->{Score
}) {
496 $self->{'_has_scores'}++;
497 $self->{'_id'}->{$link->{Id
}->[0]} = [ $ct++,$link->{Score
}];
499 $self->{'_id'}->{$link->{Id
}->[0]} = [ $ct++ ];
503 elsif (exists $data->{Id
}) { # urls
504 %{$self->{'_id'} } = ($data->{Id
}->[0] => [1]);
509 my ($self, $data) = @_;
510 for my $urldata (@
{$data->{ObjUrl
}}) {
511 $urldata->{dbfrom
} = $data->{DbFrom
} if exists $data->{DbFrom
};
512 my $obj = Bio
::Tools
::EUtilities
::Link
::UrlLink
->new(-eutil
=> 'elink',
513 -datatype
=> 'urldata',
514 -verbose
=> $self->verbose
516 $obj->_add_data($urldata);
517 push @
{$self->{'_urllinks'}}, $obj;
522 my ($self, $data) = @_;
523 for my $linkinfo (@
{$data->{LinkInfo
}}) {
524 $linkinfo->{dbfrom
} = $data->{DbFrom
} if exists $data->{DbFrom
};
525 my $obj = Bio
::Tools
::EUtilities
::Info
::LinkInfo
->new(-eutil
=> 'elink',
526 -datatype
=> 'linkinfo',
527 -verbose
=> $self->verbose
529 $obj->_add_data($linkinfo);
530 push @
{$self->{'_linkinfo'}}, $obj;
537 Usage : $foo->to_string()
538 Function : converts current object to string
540 Args : (optional) simple data for text formatting
541 Note : Used generally for debugging and for various print methods
547 my $level = shift || 0;
548 my $pad = 20 - $level;
550 my %tags = (1 => ['get_databases' => 'DB'],
551 2 => ['get_ids' => 'ID'],
552 3 => ['get_link_names' => 'Link Names'],
553 5 => ['get_submitted_ids' => 'Submitted IDs'],
554 6 => ['has_scores' => 'Scores?'],
555 7 => ['has_linkout' => 'LinkOut?'],
556 8 => ['has_neighbor' => 'DB Neighbors?'],
557 9 => ['get_webenv' => 'WebEnv'],
558 10 => ['get_query_key' => 'Key'],
561 for my $tag (sort {$a <=> $b} keys %tags) {
562 my ($m, $nm) = ($tags{$tag}->[0], $tags{$tag}->[1]);
563 # using this awkward little construct to deal with both lists and scalars
564 my @content = grep {defined $_} $self->$m();
565 next unless @content;
566 $string .= sprintf("%-*s%-*s%s\n",
569 $self->_text_wrap(':',
571 join(', ',@content)));
573 while (my $li = $self->next_LinkInfo) {
574 $string .= $li->to_string(4);
576 while (my $ui = $self->next_UrlLink) {
577 $string .= $ui->to_string(4);
579 if ($self->has_scores) {
580 $string .= "Scores:\n";
581 my %scores = $self->get_scores;
582 $string .= sprintf("%-*s%-*s%s\n",
584 $pad - 4, 'ID', 'Score'
586 for my $id ($self->get_ids) {
587 $string .= sprintf("%-*s%-*s%s\n",
589 $pad - 4, $id, $scores{$id}