3 # BioPerl module for Bio::Tools::EUtilities::Link::LinkSet
5 # Cared for by Chris Fields
7 # Copyright Chris Fields
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 # Part of the EUtilities BioPerl package
17 Bio::Tools::EUtilities::Link::LinkSet
31 User feedback is an integral part of the
32 evolution of this and other Bioperl modules. Send
33 your comments and suggestions preferably to one
34 of the Bioperl mailing lists. Your participation
37 bioperl-l@lists.open-bio.org - General discussion
38 http://www.bioperl.org/wiki/Mailing_lists - About the mailing lists
42 Report bugs to the Bioperl bug tracking system to
43 help us keep track the bugs and their resolution.
44 Bug reports can be submitted via the web.
46 http://bugzilla.open-bio.org/
50 Email cjfields at uiuc dot edu
54 The rest of the documentation details each of the
55 object methods. Internal methods are usually
60 # Let the code begin...
62 package Bio
::Tools
::EUtilities
::Link
::LinkSet
;
66 use base
qw(Bio::Root::Root Bio::Tools::EUtilities::HistoryI);
67 use Bio
::Tools
::EUtilities
::Link
::UrlLink
;
68 use Bio
::Tools
::EUtilities
::Info
::LinkInfo
;
71 my ($class,@args) = @_;
72 my $self = $class->SUPER::new
(@args);
73 my ($type) = $self->_rearrange([qw(DATATYPE)],@args);
75 $self->eutil('elink');
76 $self->datatype($type);
83 Usage : my @ids = $linkset->get_ids
84 Function : returns list of retrieved IDs
85 Returns : array of IDs
87 Notes : Cmd Description
88 acheck same as get_submitted_ids
89 lcheck same as get_submitted_ids
90 ncheck same as get_submitted_ids
91 prlinks same as get_submitted_ids
92 llinks same as get_submitted_ids
93 llinkslib same as get_submitted_ids
94 neighbor linked IDs for database in get_database
95 neighbor_history linked IDs for database in get_database
101 unless ($self->{'_sorted_id'}) {
102 @
{$self->{'_sorted_id'}} =
104 $self->{'_id'}->{$a}->[0] <=>
105 $self->{'_id'}->{$b}->[0]
106 } keys %{$self->{'_id'}};
108 return @
{$self->{'_sorted_id'}};
114 Usage : my $db = $info->get_database;
115 Function : returns single database name (eutil-compatible). This is the
116 queried database. For elinks (which have 'db' and 'dbfrom')
117 this is equivalent to db/dbto (use get_dbfrom() to for the latter).
118 Note that this only returns the first db; in some cases this may
119 not be what you want (when multiple dbs are queried, for instance)
122 Notes : with all elink cmd arguments
127 return ($_[0]->get_databases)[0];
130 =head2 get_db (alias for get_database)
135 return shift->get_database;
138 =head2 get_dbto (alias for get_database)
143 return shift->get_database;
148 Title : get_databases
149 Usage : my $string = $linkset->get_databases;
150 Function : retrieve databases referred to for this linkset
151 these may be present as a single database or embedded in
152 Returns : array of strings
160 my @dbs = sort map {$_->get_database}
161 grep {!$tmp{$_->get_database}++} ($self->get_LinkInfo);
162 unshift @dbs, $self->{'_dbto'} if $self->{'_dbto'} && !$tmp{$self->{'_dbto'}}++;
166 =head2 get_dbs (alias for get_databases)
171 return shift->get_databases;
177 Usage : my $string = $linkset->get_dbfrom;
178 Function : retrieve originating database for this linkset
184 sub get_dbfrom
{ return shift->{'_dbfrom'} }
186 =head2 get_link_names
188 Title : get_link_names
189 Usage : my $string = $linkset->get_link_names;
190 Function : retrieve eutil-compatible link names
191 Returns : array of strings
193 Notes : Each LinkSet can hold multiple LinkInfo objects (each containing
194 a link name). Also, some LinkSets define a single link name. This
195 returns an array with all unique linknames globbed both sources, if
204 if ($self->{'_linkname'}) {
205 push @lns, $self->{'_linkname'};
206 $tmps{$self->{'_linkname'}}++;
208 push @lns, map {$_->get_link_name} $self->get_LinkInfo;
214 Title : get_link_name
215 Usage : my $string = $linkset->get_link_name;
216 Function : retrieve eutil-compatible link name
217 Returns : single link name
223 return ($_[0]->get_linknames)[0];
226 =head2 get_submitted_ids
228 Title : get_submitted_ids
229 Usage : my $string = $linkset->get_submitted_ids;
230 Function : retrieve original ID list
236 sub get_submitted_ids
{
238 my $datatype = $self->datatype;
239 if ($datatype eq 'idcheck' || $datatype eq 'urllink') {
240 return $self->get_ids;
241 } elsif ($self->{'_submitted_ids'}) {
242 return @
{$self->{'_submitted_ids'}};
251 Usage : if (my $linkset->has_scores) {...}
252 Function : returns TRUE if score data is present
260 return exists $self->{'_has_scores'} ?
1 : 0;
266 Usage : %scores = $linkset->get_scores;
267 Function : returns flattened list or hash ref containing ID => score pairs
268 Returns : hash or hash ref (based on list or scalar context)
275 # do we want to cache this or generate only when needed? Likely won't be
276 # called more than once...
277 return unless $self->has_scores;
278 my %scores = map {$_ => $self->{'_id'}->{$_}->[1]} keys %{$self->{'_id'}};
282 =head2 get_score_by_id
284 Title : get_score_by_id
285 Usage : $score = $linkset->get_score_by_id($id);
286 Function : returns the score for a particular primary ID
288 Args : [REQUIRED] Primary ID for the score lookup
292 sub get_score_by_id
{
293 my ($self, $id) = @_;
294 ($id && exists $self->{'_id'}->{$id}) ?
return $self->{'_id'}->{$id}->[1] :
301 Usage : if ($linkset->has_linkout) {...}
302 Function : returns TRUE if the single ID present in this linkset has a linkout
305 Notes : this checks cmd=lcheck (boolean for a linkout) and also backchecks
306 cmd=acheck for databases with name 'LinkOut'
312 if (exists $self->{'_haslinkout'}) {
313 return $self->{'_haslinkout'} eq 'Y' ?
1 : 0;
315 return (grep {$_ eq 'LinkOut'} $self->get_databases) ?
1 : 0;
322 Usage : if ($linkset->has_neighbor) {...}
323 Function : returns TRUE if the single ID present in this linkset has a neighbor
327 Notes : this checks cmd=ncheck (boolean for a neighbor in same database); no
328 other checks performed at this time
334 if (exists $self->{'_hasneighbor'}) {
335 return $self->{'_hasneighbor'} eq 'Y' ?
1 : 0;
344 Usage : while (my $url = $linkset->next_UrlLink) {...}
345 Function : iterate through UrlLink objects
346 Returns : Bio::Tools::EUtilities::Link::UrlLink
353 unless ($self->{"_urllinks_it"}) {
354 my @ul = $self->get_UrlLinks;
355 $self->{"_urllinks_it"} = sub {return shift @ul}
357 $self->{'_urllinks_it'}->();
363 Usage : my @urls = $linkset->get_UrlLinks
364 Function : returns all UrlLink objects
365 Returns : list of Bio::Tools::EUtilities::Link::UrlLink
372 return ref $self->{'_urllinks'} ? @
{ $self->{'_urllinks'} } : return;
377 Title : next_LinkInfo
378 Usage : while (my $info = $linkset->next_LinkInfo) {...}
379 Function : iterate through LinkInfo objects
380 Returns : Bio::Tools::EUtilities::Link::LinkInfo
387 unless ($self->{"_linkinfo_it"}) {
388 my @li = $self->get_LinkInfo;
389 $self->{"_linkinfo_it"} = sub {return shift @li}
391 $self->{'_linkinfo_it'}->();
397 Usage : my @links = $linkset->get_LinkInfo
398 Function : returns all LinkInfo objects
399 Returns : list of Bio::Tools::EUtilities::Link::LinkInfo
406 return ref $self->{'_linkinfo'} ? @
{ $self->{'_linkinfo'} } : return ();
412 Usage : $info->rewind() # rewinds all (default)
413 $info->rewind('links') # rewinds only links
414 Function : 'rewinds' (resets) specified interators (all if no arg)
416 Args : [OPTIONAL] String:
417 'all' - all iterators (default)
418 'linkinfo' or 'linkinfos' - LinkInfo objects only
419 'urllinks' - UrlLink objects only
424 my %VALID_DATA = ('linkinfo' => 'linkinfo',
425 'linkinfos' => 'linkinfo',
426 'urllinks' => 'urllinks');
429 my ($self, $arg) = @_;
431 if (exists $VALID_DATA{$arg}) {
432 delete $self->{'_'.$arg.'_it'};
433 } elsif ($arg eq 'all') {
434 delete $self->{'_'.$_.'_it'} for values %VALID_DATA;
439 # private methods and handlers
443 'IdList' => \
&_add_submitted_ids
,
444 'Id' => \
&_add_retrieved_ids
,
445 'LinkInfo' => \
&_add_linkinfo
,
446 'Link' => \
&_add_retrieved_ids
,
447 'ObjUrl' => \
&_add_objurls
,
451 my ($self, $data) = @_;
452 for my $key (qw(IdList Link Id ObjUrl LinkInfo)) {
453 next if !exists $data->{$key};
454 my $handler = $DATA_HANDLER{$key};
455 $self->$handler($data);
456 delete $data->{$key};
459 if ($self->datatype eq 'idcheck' && exists $data->{content
}) {
460 %{$self->{'_id'} } = ($data->{content
} => [1]);
461 delete $data->{content
}
463 map {$self->{'_'.lc $_} = $data->{$_}} keys %$data;
468 sub _add_submitted_ids
{
469 my ($self, $data) = @_;
470 @
{$self->{'_submitted_ids'}} = @
{$data->{IdList
}->{Id
}} ;
473 sub _add_retrieved_ids
{
474 my ($self, $data) = @_;
475 # map all IDs to deal with possible scores
476 # ID => {'count' = POSITION, 'score' => SCORE}
477 if (exists $data->{Link
}) {
479 for my $link (@
{$data->{Link
}}) {
480 if (exists $link->{Score
}) {
481 $self->{'_has_scores'}++;
482 $self->{'_id'}->{$link->{Id
}->[0]} = [ $ct++,$link->{Score
}];
484 $self->{'_id'}->{$link->{Id
}->[0]} = [ $ct++ ];
488 elsif (exists $data->{Id
}) { # urls
489 %{$self->{'_id'} } = ($data->{Id
}->[0] => [1]);
494 my ($self, $data) = @_;
495 for my $urldata (@
{$data->{ObjUrl
}}) {
496 $urldata->{dbfrom
} = $data->{DbFrom
} if exists $data->{DbFrom
};
497 my $obj = Bio
::Tools
::EUtilities
::Link
::UrlLink
->new(-eutil
=> 'elink',
498 -datatype
=> 'urldata',
499 -verbose
=> $self->verbose
501 $obj->_add_data($urldata);
502 push @
{$self->{'_urllinks'}}, $obj;
507 my ($self, $data) = @_;
508 for my $linkinfo (@
{$data->{LinkInfo
}}) {
509 $linkinfo->{dbfrom
} = $data->{DbFrom
} if exists $data->{DbFrom
};
510 my $obj = Bio
::Tools
::EUtilities
::Info
::LinkInfo
->new(-eutil
=> 'elink',
511 -datatype
=> 'linkinfo',
512 -verbose
=> $self->verbose
514 $obj->_add_data($linkinfo);
515 push @
{$self->{'_linkinfo'}}, $obj;
522 Usage : $foo->to_string()
523 Function : converts current object to string
525 Args : (optional) simple data for text formatting
526 Note : Used generally for debugging and for various print methods
532 my $level = shift || 0;
533 my $pad = 20 - $level;
535 my %tags = (1 => ['get_databases' => 'DB'],
536 2 => ['get_ids' => 'ID'],
537 3 => ['get_link_names' => 'Link Names'],
538 5 => ['get_submitted_ids' => 'Submitted IDs'],
539 6 => ['has_scores' => 'Scores?'],
540 7 => ['has_linkout' => 'LinkOut?'],
541 8 => ['has_neighbor' => 'DB Neighbors?'],
542 9 => ['get_webenv' => 'WebEnv'],
543 10 => ['get_query_key' => 'Key'],
546 for my $tag (sort {$a <=> $b} keys %tags) {
547 my ($m, $nm) = ($tags{$tag}->[0], $tags{$tag}->[1]);
548 # using this awkward little construct to deal with both lists and scalars
549 my @content = grep {defined $_} $self->$m();
550 next unless @content;
551 $string .= sprintf("%-*s%-*s%s\n",
554 $self->_text_wrap(':',
556 join(', ',@content)));
558 while (my $li = $self->next_LinkInfo) {
559 $string .= $li->to_string(4);
561 while (my $ui = $self->next_UrlLink) {
562 $string .= $ui->to_string(4);
564 if ($self->has_scores) {
565 $string .= "Scores:\n";
566 my %scores = $self->get_scores;
567 $string .= sprintf("%-*s%-*s%s\n",
569 $pad - 4, 'ID', 'Score'
571 for my $id ($self->get_ids) {
572 $string .= sprintf("%-*s%-*s%s\n",
574 $pad - 4, $id, $scores{$id}