tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / Tools / EUtilities / Link / LinkSet.pm
blob885e3ac1d512de1021209003e2a309bd1d3116c8
1 # $Id$
3 # BioPerl module for Bio::Tools::EUtilities::Link::LinkSet
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Chris Fields
9 # Copyright Chris Fields
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 # Part of the EUtilities BioPerl package
17 =head1 NAME
19 Bio::Tools::EUtilities::Link::LinkSet - class for EUtils LinkSets
21 =head1 SYNOPSIS
23 # ...
25 =head1 DESCRIPTION
27 # ...
29 =head1 FEEDBACK
31 =head2 Mailing Lists
33 User feedback is an integral part of the
34 evolution of this and other Bioperl modules. Send
35 your comments and suggestions preferably to one
36 of the Bioperl mailing lists. Your participation
37 is much appreciated.
39 bioperl-l@lists.open-bio.org - General discussion
40 http://www.bioperl.org/wiki/Mailing_lists - About the mailing lists
42 =head2 Support
44 Please direct usage questions or support issues to the mailing list:
46 I<bioperl-l@bioperl.org>
48 rather than to the module maintainer directly. Many experienced and
49 reponsive experts will be able look at the problem and quickly
50 address it. Please include a thorough description of the problem
51 with code and data examples if at all possible.
53 =head2 Reporting Bugs
55 Report bugs to the Bioperl bug tracking system to
56 help us keep track the bugs and their resolution.
57 Bug reports can be submitted via the web.
59 http://bugzilla.open-bio.org/
61 =head1 AUTHOR
63 Email cjfields at bioperl dot org
65 =head1 APPENDIX
67 The rest of the documentation details each of the
68 object methods. Internal methods are usually
69 preceded with a _
71 =cut
73 # Let the code begin...
75 package Bio::Tools::EUtilities::Link::LinkSet;
76 use strict;
77 use warnings;
79 use base qw(Bio::Root::Root Bio::Tools::EUtilities::HistoryI);
80 use Bio::Tools::EUtilities::Link::UrlLink;
81 use Bio::Tools::EUtilities::Info::LinkInfo;
83 sub new {
84 my ($class,@args) = @_;
85 my $self = $class->SUPER::new(@args);
86 my ($type) = $self->_rearrange([qw(DATATYPE)],@args);
87 $type ||= 'linkset';
88 $self->eutil('elink');
89 $self->datatype($type);
90 return $self;
93 =head2 get_ids
95 Title : get_ids
96 Usage : my @ids = $linkset->get_ids
97 Function : returns list of retrieved IDs
98 Returns : array of IDs
99 Args : none
100 Notes : Cmd Description
101 acheck same as get_submitted_ids
102 lcheck same as get_submitted_ids
103 ncheck same as get_submitted_ids
104 prlinks same as get_submitted_ids
105 llinks same as get_submitted_ids
106 llinkslib same as get_submitted_ids
107 neighbor linked IDs for database in get_database
108 neighbor_history linked IDs for database in get_database
110 =cut
112 sub get_ids {
113 my $self = shift;
114 unless ($self->{'_sorted_id'}) {
115 @{$self->{'_sorted_id'}} =
116 sort {
117 $self->{'_id'}->{$a}->[0] <=>
118 $self->{'_id'}->{$b}->[0]
119 } keys %{$self->{'_id'}};
121 return @{$self->{'_sorted_id'}};
124 =head2 get_database
126 Title : get_database
127 Usage : my $db = $info->get_database;
128 Function : returns single database name (eutil-compatible). This is the
129 queried database. For elinks (which have 'db' and 'dbfrom')
130 this is equivalent to db/dbto (use get_dbfrom() to for the latter).
131 Note that this only returns the first db; in some cases this may
132 not be what you want (when multiple dbs are queried, for instance)
133 Returns : string
134 Args : none
135 Notes : with all elink cmd arguments
137 =cut
139 sub get_database {
140 return ($_[0]->get_databases)[0];
143 =head2 get_db (alias for get_database)
145 =cut
147 sub get_db {
148 return shift->get_database;
151 =head2 get_dbto (alias for get_database)
153 =cut
155 sub get_dbto {
156 return shift->get_database;
159 =head2 get_databases
161 Title : get_databases
162 Usage : my $string = $linkset->get_databases;
163 Function : retrieve databases referred to for this linkset
164 these may be present as a single database or embedded in
165 Returns : array of strings
166 Args : none
168 =cut
170 sub get_databases {
171 my $self = shift;
172 my %tmp;
173 my @dbs = sort map {$_->get_database}
174 grep {!$tmp{$_->get_database}++} ($self->get_LinkInfo);
175 unshift @dbs, $self->{'_dbto'} if $self->{'_dbto'} && !$tmp{$self->{'_dbto'}}++;
176 return @dbs;
179 =head2 get_dbs (alias for get_databases)
181 =cut
183 sub get_dbs {
184 return shift->get_databases;
187 =head2 get_dbfrom
189 Title : get_dbfrom
190 Usage : my $string = $linkset->get_dbfrom;
191 Function : retrieve originating database for this linkset
192 Returns : string
193 Args : none
195 =cut
197 sub get_dbfrom { return shift->{'_dbfrom'} }
199 =head2 get_link_names
201 Title : get_link_names
202 Usage : my $string = $linkset->get_link_names;
203 Function : retrieve eutil-compatible link names
204 Returns : array of strings
205 Args : none
206 Notes : Each LinkSet can hold multiple LinkInfo objects (each containing
207 a link name). Also, some LinkSets define a single link name. This
208 returns an array with all unique linknames globbed both sources, if
209 present and defined
211 =cut
213 sub get_link_names {
214 my ($self) = shift;
215 my %tmps;
216 my @lns;
217 if ($self->{'_linkname'}) {
218 push @lns, $self->{'_linkname'};
219 $tmps{$self->{'_linkname'}}++;
221 push @lns, map {$_->get_link_name} $self->get_LinkInfo;
222 return @lns;
225 =head2 get_link_name
227 Title : get_link_name
228 Usage : my $string = $linkset->get_link_name;
229 Function : retrieve eutil-compatible link name
230 Returns : single link name
231 Args : none
233 =cut
235 sub get_link_name {
236 return ($_[0]->get_link_names)[0];
239 =head2 get_submitted_ids
241 Title : get_submitted_ids
242 Usage : my $string = $linkset->get_submitted_ids;
243 Function : retrieve original ID list
244 Returns : string
245 Args : none
247 =cut
249 sub get_submitted_ids {
250 my $self = shift;
251 my $datatype = $self->datatype;
252 if ($datatype eq 'idcheck' || $datatype eq 'urllink') {
253 return $self->get_ids;
254 } elsif ($self->{'_submitted_ids'}) {
255 return @{$self->{'_submitted_ids'}};
256 } else {
257 return ();
261 =head2 has_scores
263 Title : has_scores
264 Usage : if (my $linkset->has_scores) {...}
265 Function : returns TRUE if score data is present
266 Returns : Boolean
267 Args : none
269 =cut
271 sub has_scores {
272 my $self = shift;
273 return exists $self->{'_has_scores'} ? 1 : 0;
276 =head2 get_scores
278 Title : get_scores
279 Usage : %scores = $linkset->get_scores;
280 Function : returns flattened list or hash ref containing ID => score pairs
281 Returns : hash or hash ref (based on list or scalar context)
282 Args : none
284 =cut
286 sub get_scores {
287 my $self = shift;
288 # do we want to cache this or generate only when needed? Likely won't be
289 # called more than once...
290 return unless $self->has_scores;
291 my %scores = map {$_ => $self->{'_id'}->{$_}->[1]} keys %{$self->{'_id'}};
292 return %scores;
295 =head2 get_score_by_id
297 Title : get_score_by_id
298 Usage : $score = $linkset->get_score_by_id($id);
299 Function : returns the score for a particular primary ID
300 Returns : integer
301 Args : [REQUIRED] Primary ID for the score lookup
303 =cut
305 sub get_score_by_id {
306 my ($self, $id) = @_;
307 ($id && exists $self->{'_id'}->{$id}) ? return $self->{'_id'}->{$id}->[1] :
308 return;
311 =head2 has_linkout
313 Title : has_linkout
314 Usage : if ($linkset->has_linkout) {...}
315 Function : returns TRUE if the single ID present in this linkset has a linkout
316 Returns : boolean
317 Args : none
318 Notes : this checks cmd=lcheck (boolean for a linkout) and also backchecks
319 cmd=acheck for databases with name 'LinkOut'
321 =cut
323 sub has_linkout {
324 my $self = shift;
325 if (exists $self->{'_haslinkout'}) {
326 return $self->{'_haslinkout'} eq 'Y' ? 1 : 0;
327 } else {
328 return (grep {$_ eq 'LinkOut'} $self->get_databases) ? 1 : 0;
332 =head2 has_neighbor
334 Title : has_neighbor
335 Usage : if ($linkset->has_neighbor) {...}
336 Function : returns TRUE if the single ID present in this linkset has a neighbor
337 in the same database
338 Returns : boolean
339 Args : none
340 Notes : this checks cmd=ncheck (boolean for a neighbor in same database); no
341 other checks performed at this time
343 =cut
345 sub has_neighbor {
346 my $self = shift;
347 if (exists $self->{'_hasneighbor'}) {
348 return $self->{'_hasneighbor'} eq 'Y' ? 1 : 0;
349 } else {
350 return 0;
354 =head2 next_UrlLink
356 Title : next_UrlLink
357 Usage : while (my $url = $linkset->next_UrlLink) {...}
358 Function : iterate through UrlLink objects
359 Returns : Bio::Tools::EUtilities::Link::UrlLink
360 Args :
362 =cut
364 sub next_UrlLink {
365 my $self = shift;
366 unless ($self->{"_urllinks_it"}) {
367 my @ul = $self->get_UrlLinks;
368 $self->{"_urllinks_it"} = sub {return shift @ul}
370 $self->{'_urllinks_it'}->();
373 =head2 get_UrlLinks
375 Title : get_UrlLinks
376 Usage : my @urls = $linkset->get_UrlLinks
377 Function : returns all UrlLink objects
378 Returns : list of Bio::Tools::EUtilities::Link::UrlLink
379 Args :
381 =cut
383 sub get_UrlLinks {
384 my $self = shift;
385 return ref $self->{'_urllinks'} ? @{ $self->{'_urllinks'} } : return;
388 =head2 next_LinkInfo
390 Title : next_LinkInfo
391 Usage : while (my $info = $linkset->next_LinkInfo) {...}
392 Function : iterate through LinkInfo objects
393 Returns : Bio::Tools::EUtilities::Link::LinkInfo
394 Args :
396 =cut
398 sub next_LinkInfo {
399 my $self = shift;
400 unless ($self->{"_linkinfo_it"}) {
401 my @li = $self->get_LinkInfo;
402 $self->{"_linkinfo_it"} = sub {return shift @li}
404 $self->{'_linkinfo_it'}->();
407 =head2 get_LinkInfo
409 Title : get_LinkInfo
410 Usage : my @links = $linkset->get_LinkInfo
411 Function : returns all LinkInfo objects
412 Returns : list of Bio::Tools::EUtilities::Link::LinkInfo
413 Args :
415 =cut
417 sub get_LinkInfo {
418 my $self = shift;
419 return ref $self->{'_linkinfo'} ? @{ $self->{'_linkinfo'} } : return ();
422 =head2 rewind
424 Title : rewind
425 Usage : $info->rewind() # rewinds all (default)
426 $info->rewind('links') # rewinds only links
427 Function : 'rewinds' (resets) specified interators (all if no arg)
428 Returns : none
429 Args : [OPTIONAL] String:
430 'all' - all iterators (default)
431 'linkinfo' or 'linkinfos' - LinkInfo objects only
432 'urllinks' - UrlLink objects only
434 =cut
437 my %VALID_DATA = ('linkinfo' => 'linkinfo',
438 'linkinfos' => 'linkinfo',
439 'urllinks' => 'urllinks');
441 sub rewind {
442 my ($self, $arg) = @_;
443 $arg ||= 'all';
444 if (exists $VALID_DATA{$arg}) {
445 delete $self->{'_'.$arg.'_it'};
446 } elsif ($arg eq 'all') {
447 delete $self->{'_'.$_.'_it'} for values %VALID_DATA;
452 # private methods and handlers
455 my %DATA_HANDLER = (
456 'IdList' => \&_add_submitted_ids,
457 'Id' => \&_add_retrieved_ids,
458 'LinkInfo' => \&_add_linkinfo,
459 'Link' => \&_add_retrieved_ids,
460 'ObjUrl' => \&_add_objurls,
463 sub _add_data {
464 my ($self, $data) = @_;
465 for my $key (qw(IdList Link Id ObjUrl LinkInfo)) {
466 next if !exists $data->{$key};
467 my $handler = $DATA_HANDLER{$key};
468 $self->$handler($data);
469 delete $data->{$key};
471 # map the rest
472 if ($self->datatype eq 'idcheck' && exists $data->{content}) {
473 %{$self->{'_id'} } = ($data->{content} => [1]);
474 delete $data->{content}
476 map {$self->{'_'.lc $_} = $data->{$_}} keys %$data;
481 sub _add_submitted_ids {
482 my ($self, $data) = @_;
483 if (exists $data->{IdList}->{Id}) {
484 @{$self->{'_submitted_ids'}} = @{$data->{IdList}->{Id}} ;
488 sub _add_retrieved_ids {
489 my ($self, $data) = @_;
490 # map all IDs to deal with possible scores
491 # ID => {'count' = POSITION, 'score' => SCORE}
492 if (exists $data->{Link}) {
493 my $ct = 0;
494 for my $link (@{$data->{Link}}) {
495 if (exists $link->{Score}) {
496 $self->{'_has_scores'}++;
497 $self->{'_id'}->{$link->{Id}->[0]} = [ $ct++,$link->{Score}];
498 } else {
499 $self->{'_id'}->{$link->{Id}->[0]} = [ $ct++ ];
503 elsif (exists $data->{Id}) { # urls
504 %{$self->{'_id'} } = ($data->{Id}->[0] => [1]);
508 sub _add_objurls {
509 my ($self, $data) = @_;
510 for my $urldata (@{$data->{ObjUrl}}) {
511 $urldata->{dbfrom} = $data->{DbFrom} if exists $data->{DbFrom};
512 my $obj = Bio::Tools::EUtilities::Link::UrlLink->new(-eutil => 'elink',
513 -datatype => 'urldata',
514 -verbose => $self->verbose
516 $obj->_add_data($urldata);
517 push @{$self->{'_urllinks'}}, $obj;
521 sub _add_linkinfo {
522 my ($self, $data) = @_;
523 for my $linkinfo (@{$data->{LinkInfo}}) {
524 $linkinfo->{dbfrom} = $data->{DbFrom} if exists $data->{DbFrom};
525 my $obj = Bio::Tools::EUtilities::Info::LinkInfo->new(-eutil => 'elink',
526 -datatype => 'linkinfo',
527 -verbose => $self->verbose
529 $obj->_add_data($linkinfo);
530 push @{$self->{'_linkinfo'}}, $obj;
534 =head2 to_string
536 Title : to_string
537 Usage : $foo->to_string()
538 Function : converts current object to string
539 Returns : none
540 Args : (optional) simple data for text formatting
541 Note : Used generally for debugging and for various print methods
543 =cut
545 sub to_string {
546 my $self = shift;
547 my $level = shift || 0;
548 my $pad = 20 - $level;
549 # order method name
550 my %tags = (1 => ['get_databases' => 'DB'],
551 2 => ['get_ids' => 'ID'],
552 3 => ['get_link_names' => 'Link Names'],
553 5 => ['get_submitted_ids' => 'Submitted IDs'],
554 6 => ['has_scores' => 'Scores?'],
555 7 => ['has_linkout' => 'LinkOut?'],
556 8 => ['has_neighbor' => 'DB Neighbors?'],
557 9 => ['get_webenv' => 'WebEnv'],
558 10 => ['get_query_key' => 'Key'],
560 my $string;
561 for my $tag (sort {$a <=> $b} keys %tags) {
562 my ($m, $nm) = ($tags{$tag}->[0], $tags{$tag}->[1]);
563 # using this awkward little construct to deal with both lists and scalars
564 my @content = grep {defined $_} $self->$m();
565 next unless @content;
566 $string .= sprintf("%-*s%-*s%s\n",
567 $level, '',
568 $pad, $nm,
569 $self->_text_wrap(':',
570 ' ' x ($pad).':',
571 join(', ',@content)));
573 while (my $li = $self->next_LinkInfo) {
574 $string .= $li->to_string(4);
576 while (my $ui = $self->next_UrlLink) {
577 $string .= $ui->to_string(4);
579 if ($self->has_scores) {
580 $string .= "Scores:\n";
581 my %scores = $self->get_scores;
582 $string .= sprintf("%-*s%-*s%s\n",
583 $level + 4, '',
584 $pad - 4, 'ID', 'Score'
586 for my $id ($self->get_ids) {
587 $string .= sprintf("%-*s%-*s%s\n",
588 $level + 4, '',
589 $pad - 4, $id, $scores{$id}
593 $string .= "\n";
594 return $string;