scores
[bioperl-live.git] / Bio / Tools / EUtilities / Link / LinkSet.pm
blobad84c476f2a7eba2369b3da5f29d6da0c27add75
1 # $Id$
3 # BioPerl module for Bio::Tools::EUtilities::Link::LinkSet
5 # Cared for by Chris Fields
7 # Copyright Chris Fields
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 # Part of the EUtilities BioPerl package
15 =head1 NAME
17 Bio::Tools::EUtilities::Link::LinkSet
19 =head1 SYNOPSIS
21 # ...
23 =head1 DESCRIPTION
25 # ...
27 =head1 FEEDBACK
29 =head2 Mailing Lists
31 User feedback is an integral part of the
32 evolution of this and other Bioperl modules. Send
33 your comments and suggestions preferably to one
34 of the Bioperl mailing lists. Your participation
35 is much appreciated.
37 bioperl-l@lists.open-bio.org - General discussion
38 http://www.bioperl.org/wiki/Mailing_lists - About the mailing lists
40 =head2 Reporting Bugs
42 Report bugs to the Bioperl bug tracking system to
43 help us keep track the bugs and their resolution.
44 Bug reports can be submitted via the web.
46 http://bugzilla.open-bio.org/
48 =head1 AUTHOR
50 Email cjfields at uiuc dot edu
52 =head1 APPENDIX
54 The rest of the documentation details each of the
55 object methods. Internal methods are usually
56 preceded with a _
58 =cut
60 # Let the code begin...
62 package Bio::Tools::EUtilities::Link::LinkSet;
63 use strict;
64 use warnings;
66 use base qw(Bio::Root::Root Bio::Tools::EUtilities::HistoryI);
67 use Bio::Tools::EUtilities::Link::UrlLink;
68 use Bio::Tools::EUtilities::Info::LinkInfo;
70 sub new {
71 my ($class,@args) = @_;
72 my $self = $class->SUPER::new(@args);
73 my ($type) = $self->_rearrange([qw(DATATYPE)],@args);
74 $type ||= 'linkset';
75 $self->eutil('elink');
76 $self->datatype($type);
77 return $self;
80 =head2 get_ids
82 Title : get_ids
83 Usage : my @ids = $linkset->get_ids
84 Function : returns list of retrieved IDs
85 Returns : array of IDs
86 Args : none
87 Notes : Cmd Description
88 acheck same as get_submitted_ids
89 lcheck same as get_submitted_ids
90 ncheck same as get_submitted_ids
91 prlinks same as get_submitted_ids
92 llinks same as get_submitted_ids
93 llinkslib same as get_submitted_ids
94 neighbor linked IDs for database in get_database
95 neighbor_history linked IDs for database in get_database
97 =cut
99 sub get_ids {
100 my $self = shift;
101 unless ($self->{'_sorted_id'}) {
102 @{$self->{'_sorted_id'}} =
103 sort {
104 $self->{'_id'}->{$a}->[0] <=>
105 $self->{'_id'}->{$b}->[0]
106 } keys %{$self->{'_id'}};
108 return @{$self->{'_sorted_id'}};
111 =head2 get_database
113 Title : get_database
114 Usage : my $db = $info->get_database;
115 Function : returns single database name (eutil-compatible). This is the
116 queried database. For elinks (which have 'db' and 'dbfrom')
117 this is equivalent to db/dbto (use get_dbfrom() to for the latter).
118 Note that this only returns the first db; in some cases this may
119 not be what you want (when multiple dbs are queried, for instance)
120 Returns : string
121 Args : none
122 Notes : with all elink cmd arguments
124 =cut
126 sub get_database {
127 return ($_[0]->get_databases)[0];
130 =head2 get_db (alias for get_database)
132 =cut
134 sub get_db {
135 return shift->get_database;
138 =head2 get_dbto (alias for get_database)
140 =cut
142 sub get_dbto {
143 return shift->get_database;
146 =head2 get_databases
148 Title : get_databases
149 Usage : my $string = $linkset->get_databases;
150 Function : retrieve databases referred to for this linkset
151 these may be present as a single database or embedded in
152 Returns : array of strings
153 Args : none
155 =cut
157 sub get_databases {
158 my $self = shift;
159 my %tmp;
160 my @dbs = sort map {$_->get_database}
161 grep {!$tmp{$_->get_database}++} ($self->get_LinkInfo);
162 unshift @dbs, $self->{'_dbto'} if $self->{'_dbto'} && !$tmp{$self->{'_dbto'}}++;
163 return @dbs;
166 =head2 get_dbs (alias for get_databases)
168 =cut
170 sub get_dbs {
171 return shift->get_databases;
174 =head2 get_dbfrom
176 Title : get_dbfrom
177 Usage : my $string = $linkset->get_dbfrom;
178 Function : retrieve originating database for this linkset
179 Returns : string
180 Args : none
182 =cut
184 sub get_dbfrom { return shift->{'_dbfrom'} }
186 =head2 get_link_names
188 Title : get_link_names
189 Usage : my $string = $linkset->get_link_names;
190 Function : retrieve eutil-compatible link names
191 Returns : array of strings
192 Args : none
193 Notes : Each LinkSet can hold multiple LinkInfo objects (each containing
194 a link name). Also, some LinkSets define a single link name. This
195 returns an array with all unique linknames globbed both sources, if
196 present and defined
198 =cut
200 sub get_link_names {
201 my ($self) = shift;
202 my %tmps;
203 my @lns;
204 if ($self->{'_linkname'}) {
205 push @lns, $self->{'_linkname'};
206 $tmps{$self->{'_linkname'}}++;
208 push @lns, map {$_->get_link_name} $self->get_LinkInfo;
209 return @lns;
212 =head2 get_link_name
214 Title : get_link_name
215 Usage : my $string = $linkset->get_link_name;
216 Function : retrieve eutil-compatible link name
217 Returns : single link name
218 Args : none
220 =cut
222 sub get_link_name {
223 return ($_[0]->get_linknames)[0];
226 =head2 get_submitted_ids
228 Title : get_submitted_ids
229 Usage : my $string = $linkset->get_submitted_ids;
230 Function : retrieve original ID list
231 Returns : string
232 Args : none
234 =cut
236 sub get_submitted_ids {
237 my $self = shift;
238 my $datatype = $self->datatype;
239 if ($datatype eq 'idcheck' || $datatype eq 'urllink') {
240 return $self->get_ids;
241 } elsif ($self->{'_submitted_ids'}) {
242 return @{$self->{'_submitted_ids'}};
243 } else {
244 return ();
248 =head2 has_scores
250 Title : has_scores
251 Usage : if (my $linkset->has_scores) {...}
252 Function : returns TRUE if score data is present
253 Returns : Boolean
254 Args : none
256 =cut
258 sub has_scores {
259 my $self = shift;
260 return exists $self->{'_has_scores'} ? 1 : 0;
263 =head2 get_scores
265 Title : get_scores
266 Usage : %scores = $linkset->get_scores;
267 Function : returns flattened list or hash ref containing ID => score pairs
268 Returns : hash or hash ref (based on list or scalar context)
269 Args : none
271 =cut
273 sub get_scores {
274 my $self = shift;
275 # do we want to cache this or generate only when needed? Likely won't be
276 # called more than once...
277 return unless $self->has_scores;
278 my %scores = map {$_ => $self->{'_id'}->{$_}->[1]} keys %{$self->{'_id'}};
279 return %scores;
282 =head2 get_score_by_id
284 Title : get_score_by_id
285 Usage : $score = $linkset->get_score_by_id($id);
286 Function : returns the score for a particular primary ID
287 Returns : integer
288 Args : [REQUIRED] Primary ID for the score lookup
290 =cut
292 sub get_score_by_id {
293 my ($self, $id) = @_;
294 ($id && exists $self->{'_id'}->{$id}) ? return $self->{'_id'}->{$id}->[1] :
295 return;
298 =head2 has_linkout
300 Title : has_linkout
301 Usage : if ($linkset->has_linkout) {...}
302 Function : returns TRUE if the single ID present in this linkset has a linkout
303 Returns : boolean
304 Args : none
305 Notes : this checks cmd=lcheck (boolean for a linkout) and also backchecks
306 cmd=acheck for databases with name 'LinkOut'
308 =cut
310 sub has_linkout {
311 my $self = shift;
312 if (exists $self->{'_haslinkout'}) {
313 return $self->{'_haslinkout'} eq 'Y' ? 1 : 0;
314 } else {
315 return (grep {$_ eq 'LinkOut'} $self->get_databases) ? 1 : 0;
319 =head2 has_neighbor
321 Title : has_neighbor
322 Usage : if ($linkset->has_neighbor) {...}
323 Function : returns TRUE if the single ID present in this linkset has a neighbor
324 in the same database
325 Returns : boolean
326 Args : none
327 Notes : this checks cmd=ncheck (boolean for a neighbor in same database); no
328 other checks performed at this time
330 =cut
332 sub has_neighbor {
333 my $self = shift;
334 if (exists $self->{'_hasneighbor'}) {
335 return $self->{'_hasneighbor'} eq 'Y' ? 1 : 0;
336 } else {
337 return 0;
341 =head2 next_UrlLink
343 Title : next_UrlLink
344 Usage : while (my $url = $linkset->next_UrlLink) {...}
345 Function : iterate through UrlLink objects
346 Returns : Bio::Tools::EUtilities::Link::UrlLink
347 Args :
349 =cut
351 sub next_UrlLink {
352 my $self = shift;
353 unless ($self->{"_urllinks_it"}) {
354 my @ul = $self->get_UrlLinks;
355 $self->{"_urllinks_it"} = sub {return shift @ul}
357 $self->{'_urllinks_it'}->();
360 =head2 get_UrlLinks
362 Title : get_UrlLinks
363 Usage : my @urls = $linkset->get_UrlLinks
364 Function : returns all UrlLink objects
365 Returns : list of Bio::Tools::EUtilities::Link::UrlLink
366 Args :
368 =cut
370 sub get_UrlLinks {
371 my $self = shift;
372 return ref $self->{'_urllinks'} ? @{ $self->{'_urllinks'} } : return;
375 =head2 next_LinkInfo
377 Title : next_LinkInfo
378 Usage : while (my $info = $linkset->next_LinkInfo) {...}
379 Function : iterate through LinkInfo objects
380 Returns : Bio::Tools::EUtilities::Link::LinkInfo
381 Args :
383 =cut
385 sub next_LinkInfo {
386 my $self = shift;
387 unless ($self->{"_linkinfo_it"}) {
388 my @li = $self->get_LinkInfo;
389 $self->{"_linkinfo_it"} = sub {return shift @li}
391 $self->{'_linkinfo_it'}->();
394 =head2 get_LinkInfo
396 Title : get_LinkInfo
397 Usage : my @links = $linkset->get_LinkInfo
398 Function : returns all LinkInfo objects
399 Returns : list of Bio::Tools::EUtilities::Link::LinkInfo
400 Args :
402 =cut
404 sub get_LinkInfo {
405 my $self = shift;
406 return ref $self->{'_linkinfo'} ? @{ $self->{'_linkinfo'} } : return ();
409 =head2 rewind
411 Title : rewind
412 Usage : $info->rewind() # rewinds all (default)
413 $info->rewind('links') # rewinds only links
414 Function : 'rewinds' (resets) specified interators (all if no arg)
415 Returns : none
416 Args : [OPTIONAL] String:
417 'all' - all iterators (default)
418 'linkinfo' or 'linkinfos' - LinkInfo objects only
419 'urllinks' - UrlLink objects only
421 =cut
424 my %VALID_DATA = ('linkinfo' => 'linkinfo',
425 'linkinfos' => 'linkinfo',
426 'urllinks' => 'urllinks');
428 sub rewind {
429 my ($self, $arg) = @_;
430 $arg ||= 'all';
431 if (exists $VALID_DATA{$arg}) {
432 delete $self->{'_'.$arg.'_it'};
433 } elsif ($arg eq 'all') {
434 delete $self->{'_'.$_.'_it'} for values %VALID_DATA;
439 # private methods and handlers
442 my %DATA_HANDLER = (
443 'IdList' => \&_add_submitted_ids,
444 'Id' => \&_add_retrieved_ids,
445 'LinkInfo' => \&_add_linkinfo,
446 'Link' => \&_add_retrieved_ids,
447 'ObjUrl' => \&_add_objurls,
450 sub _add_data {
451 my ($self, $data) = @_;
452 for my $key (qw(IdList Link Id ObjUrl LinkInfo)) {
453 next if !exists $data->{$key};
454 my $handler = $DATA_HANDLER{$key};
455 $self->$handler($data);
456 delete $data->{$key};
458 # map the rest
459 if ($self->datatype eq 'idcheck' && exists $data->{content}) {
460 %{$self->{'_id'} } = ($data->{content} => [1]);
461 delete $data->{content}
463 map {$self->{'_'.lc $_} = $data->{$_}} keys %$data;
468 sub _add_submitted_ids {
469 my ($self, $data) = @_;
470 @{$self->{'_submitted_ids'}} = @{$data->{IdList}->{Id}} ;
473 sub _add_retrieved_ids {
474 my ($self, $data) = @_;
475 # map all IDs to deal with possible scores
476 # ID => {'count' = POSITION, 'score' => SCORE}
477 if (exists $data->{Link}) {
478 my $ct = 0;
479 for my $link (@{$data->{Link}}) {
480 if (exists $link->{Score}) {
481 $self->{'_has_scores'}++;
482 $self->{'_id'}->{$link->{Id}->[0]} = [ $ct++,$link->{Score}];
483 } else {
484 $self->{'_id'}->{$link->{Id}->[0]} = [ $ct++ ];
488 elsif (exists $data->{Id}) { # urls
489 %{$self->{'_id'} } = ($data->{Id}->[0] => [1]);
493 sub _add_objurls {
494 my ($self, $data) = @_;
495 for my $urldata (@{$data->{ObjUrl}}) {
496 $urldata->{dbfrom} = $data->{DbFrom} if exists $data->{DbFrom};
497 my $obj = Bio::Tools::EUtilities::Link::UrlLink->new(-eutil => 'elink',
498 -datatype => 'urldata',
499 -verbose => $self->verbose
501 $obj->_add_data($urldata);
502 push @{$self->{'_urllinks'}}, $obj;
506 sub _add_linkinfo {
507 my ($self, $data) = @_;
508 for my $linkinfo (@{$data->{LinkInfo}}) {
509 $linkinfo->{dbfrom} = $data->{DbFrom} if exists $data->{DbFrom};
510 my $obj = Bio::Tools::EUtilities::Info::LinkInfo->new(-eutil => 'elink',
511 -datatype => 'linkinfo',
512 -verbose => $self->verbose
514 $obj->_add_data($linkinfo);
515 push @{$self->{'_linkinfo'}}, $obj;
519 =head2 to_string
521 Title : to_string
522 Usage : $foo->to_string()
523 Function : converts current object to string
524 Returns : none
525 Args : (optional) simple data for text formatting
526 Note : Used generally for debugging and for various print methods
528 =cut
530 sub to_string {
531 my $self = shift;
532 my $level = shift || 0;
533 my $pad = 20 - $level;
534 # order method name
535 my %tags = (1 => ['get_databases' => 'DB'],
536 2 => ['get_ids' => 'ID'],
537 3 => ['get_link_names' => 'Link Names'],
538 5 => ['get_submitted_ids' => 'Submitted IDs'],
539 6 => ['has_scores' => 'Scores?'],
540 7 => ['has_linkout' => 'LinkOut?'],
541 8 => ['has_neighbor' => 'DB Neighbors?'],
542 9 => ['get_webenv' => 'WebEnv'],
543 10 => ['get_query_key' => 'Key'],
545 my $string;
546 for my $tag (sort {$a <=> $b} keys %tags) {
547 my ($m, $nm) = ($tags{$tag}->[0], $tags{$tag}->[1]);
548 # using this awkward little construct to deal with both lists and scalars
549 my @content = grep {defined $_} $self->$m();
550 next unless @content;
551 $string .= sprintf("%-*s%-*s%s\n",
552 $level, '',
553 $pad, $nm,
554 $self->_text_wrap(':',
555 ' ' x ($pad).':',
556 join(', ',@content)));
558 while (my $li = $self->next_LinkInfo) {
559 $string .= $li->to_string(4);
561 while (my $ui = $self->next_UrlLink) {
562 $string .= $ui->to_string(4);
564 if ($self->has_scores) {
565 $string .= "Scores:\n";
566 my %scores = $self->get_scores;
567 $string .= sprintf("%-*s%-*s%s\n",
568 $level + 4, '',
569 $pad - 4, 'ID', 'Score'
571 for my $id ($self->get_ids) {
572 $string .= sprintf("%-*s%-*s%s\n",
573 $level + 4, '',
574 $pad - 4, $id, $scores{$id}
578 $string .= "\n";
579 return $string;