tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / DB / HIV / HIVQueryHelper.pm
blob67922d03345ef551c16d7fecc5bd0c27d0c089b6
1 # $Id: HIVQueryHelper.pm 231 2008-12-11 14:32:00Z maj $
3 # BioPerl module for Bio::DB::HIV::HIVQueryHelper
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Mark A. Jensen <maj@fortinbras.us>
9 # Copyright Mark A. Jensen
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::DB::HIV::HIVQueryHelper - Routines and packages used by Bio::DB::HIV and
18 Bio::DB::Query::HIVQuery
20 =head1 SYNOPSIS
22 Used in Bio::DB::Query::HIVQuery. No need to use directly.
24 =head1 DESCRIPTION
26 C<Bio::DB::HIV::HIVQueryHelper> contains a number of packages for use
27 by L<Bio::DB::Query::HIVQuery>. Package C<HIVSchema> parses the
28 C<lanl-schema.xml> file, and allows access to it in the context of the
29 relational database it represents (see APPENDIX for excruciating
30 detail). Packages C<QRY>, C<R>, and C<Q> together create the query
31 string parser that enables NCBI-like queries to be understood by
32 C<Bio::DB::Query::HIVQuery>. They provide objects and operators to
33 perform and simplify logical expressions involving C<AND>, C<OR>, and
34 C<()> and return hash structures that can be handled by
35 C<Bio::DB::Query::HIVQuery> routines.
37 =head1 FEEDBACK
39 =head2 Mailing Lists
41 User feedback is an integral part of the evolution of this and other
42 Bioperl modules. Send your comments and suggestions preferably to
43 the Bioperl mailing list. Your participation is much appreciated.
45 bioperl-l@bioperl.org - General discussion
46 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48 =head2 Support
50 Please direct usage questions or support issues to the mailing list:
52 I<bioperl-l@bioperl.org>
54 rather than to the module maintainer directly. Many experienced and
55 reponsive experts will be able look at the problem and quickly
56 address it. Please include a thorough description of the problem
57 with code and data examples if at all possible.
59 =head2 Reporting Bugs
61 Report bugs to the Bioperl bug tracking system to help us keep track
62 of the bugs and their resolution. Bug reports can be submitted via
63 the web:
65 http://bugzilla.open-bio.org/
67 =head1 AUTHOR - Mark A. Jensen
69 Email maj@fortinbras.us
71 =head1 CONTRIBUTORS
73 Mark A. Jensen
75 =head1 APPENDIX
77 The rest of the documentation details each of the contained packages.
78 Internal methods are usually preceded with a _
80 =cut
82 # Let the code begin...
84 package Bio::DB::HIV::HIVQueryHelper;
85 use strict;
86 use Bio::Root::Root;
88 # globals
89 BEGIN {
90 #exceptions
91 @Bio::QueryStringSyntax::Exception::ISA = qw( Bio::Root::Exception);
96 =head2 HIVSchema - objects/methods to manipulate a version of the LANL HIV DB schema
98 =head3 HIVSchema SYNOPSIS
100 $schema = new HIVSchema( 'lanl-schema.xml' );
101 @tables = $schema->tables;
102 @validFields = $schema->fields;
103 @validAliases = $schema->aliases;
104 @query_aliases_for_coreceptor = $schema->aliases( 'SEQ_SAMple.SSAM_second_receptor' );
105 $pk_for_SequenceEntry = $schema->primarykey('SequenceEntry'); # returns 'SequenceEntry.SE_id'
106 $fk_for_SEQ_SAMple_to_SequenceEntry =
107 $schema->foreignkey('SEQ_SAMple', 'SequenceEntry'); # returns 'SEQ_SAMple.SSAM_SE_id'
109 $table = $schema->tablepart('SEQ_SAMple.SSAM_badseq'); # returns 'SEQ_SAMple'
110 $column = $schema->columnpart('SEQ_SAMple.SSAM_badseq'); # returns 'SSAM_badseq'
112 =head3 HIVSchema DESCRIPTION
114 HIVSchema methods are used in L<Bio::DB::Query::HIVQuery> for table,
115 column, primary/foreign key manipulations based on the observed Los
116 Alamos HIV Sequence Database (LANL DB) naming conventions for their
117 CGI parameters. The schema is contained in an XML file
118 (C<lanl-schema.xml>) which is read into an HIVSchema object, in turn a
119 property of the HIVQuery object. HIVSchema methods are used to build
120 correct cgi queries in a way that attempts to preserve the context of
121 the relational database the query parameters represent.
123 =cut
125 package HIVSchema;
126 # objects/methods to manipulate a version of the LANL HIV DB schema
127 # stored in XML
128 use XML::Simple;
129 use Bio::Root::Root;
130 use strict;
132 ### constructor
134 =head3 HIVSchema CONSTRUCTOR
136 =head4 HIVSchema::new
138 Title : new
139 Usage : $schema = new HIVSchema( "lanl-schema.xml ");
140 Function:
141 Example :
142 Returns : an HIVSchema object
143 Args : XML filename
145 =cut
147 sub new {
148 my $class = shift;
149 my @args = @_;
150 my $self = {};
151 if ($args[0]) {
152 $self->{schema_ref} = loadHIVSchema($args[0]);
154 bless($self, $class);
155 return $self;
158 ### object methods
160 =head3 HIVSchema INSTANCE METHODS
162 =head4 HIVSchema tables
164 Title : tables
165 Usage : $schema->tables()
166 Function: get all table names in schema
167 Example :
168 Returns : array of table names
169 Args : none
171 =cut
173 sub tables {
174 # return array of all tables in schema
175 local $_;
176 my $self = shift;
177 my $sref = $self->{schema_ref};
178 Bio::Root::Root->throw("schema not initialized") unless $sref;
179 my @k = grep(/\./, keys %$sref);
180 my %ret;
181 foreach (@k) {
182 s/\..*$//;
183 $ret{$_}++;
185 @k = sort keys %ret;
186 return @k;
189 =head4 HIVSchema columns
191 Title : columns
192 Usage : $schema->columns( [$tablename] );
193 Function: return array of columns for specified table, or all columns in
194 schema, if called w/o args
195 Example :
196 Returns :
197 Args : tablename or fieldname string
199 =cut
201 sub columns {
202 # return array of columns for specified table
203 # all columns in schema, if called w/o args
204 local $_;
205 my $self = shift;
206 my ($tbl) = @_;
207 my $sref = $self->{schema_ref};
208 Bio::Root::Root->throw("schema not initialized") unless $sref;
209 # trim column name
210 $tbl =~ s/\..*$//;
211 # check if table exists
212 return () unless grep(/^$tbl$/i, $self->tables);
213 my @k = sort keys %$sref;
214 @k = grep (/^$tbl\./i, @k);
215 foreach (@k) {
216 s/^$tbl\.//;
218 return @k;
221 =head4 HIVSchema fields
223 Title : fields
224 Usage : $schema->fields();
225 Function: return array of all fields in schema, in format "table.column"
226 Example :
227 Returns : array of all fields
228 Args : none
230 =cut
232 sub fields {
233 # return array of all fields (Table.Column format) in schema
234 my $self = shift;
235 my $sref = $self->{schema_ref};
236 Bio::Root::Root->throw("schema not initialized") unless $sref;
237 my @k = sort keys %{$sref};
238 return @k;
241 =head4 HIVSchema options
243 Title : options
244 Usage : $schema->options(@fieldnames)
245 Function: get array of options (i.e., valid match data strings) available
246 to specified field
247 Example :
248 Returns : array of match data strings
249 Args : [array of] fieldname string[s] in "table.column" format
251 =cut
253 sub options {
254 # return array of options available to specified field
255 my $self = shift;
256 my ($sfield) = @_;
257 my $sref = $self->{schema_ref};
258 Bio::Root::Root->throw("schema not initialized") unless $sref;
259 return $$sref{$sfield}{option} ? @{$$sref{$sfield}{option}} : ();
262 =head4 HIVSchema aliases
264 Title : aliases
265 Usage : $schema->aliases(@fieldnames)
266 Function: get array of aliases to specified field[s]
267 Example :
268 Returns : array of valid query aliases for fields as spec'd in XML file
269 Args : [an array of] fieldname[s] in "table.column" format
271 =cut
273 sub aliases {
274 # return array of aliases to specified field
275 my $self = shift;
276 my ($sfield) = @_;
277 my $sref = $self->{schema_ref};
278 my @ret;
279 Bio::Root::Root->throw("schema not initialized") unless $sref;
280 if ($sfield) {
281 return $$sref{$sfield}{alias} ? @{$$sref{$sfield}{alias}} : ();
283 else { # all valid aliases
284 map {push @ret, @{$$sref{$_}{alias}} if $$sref{$_}{alias}} $self->fields;
285 return @ret;
289 =head4 HIVSchema ankh
291 Title : ankh (annotation key hash)
292 Usage : $schema->ankh(@fieldnames)
293 Function: return a hash translating fields to annotation keys for the
294 spec'd fields.
295 (Annotation keys are used for parsing the tab-delimited response
296 to Bio::DB::Query::HIVQuery::_do_lanl_request.)
297 Example :
298 Returns : hash ref
299 Args : [an array of] fieldname[s] in "table.column" format
301 =cut
303 sub ankh {
304 # return hash translating sfields to annotation keys for specified sfield(s)
305 my $self = shift;
306 my %ret = ();
307 my @sfields = @_;
308 my $sref = $self->{schema_ref};
309 Bio::Root::Root->throw("schema not initialized") unless $sref;
310 foreach (@sfields) {
311 next unless $$sref{$_}{ankey};
312 $ret{$_} = {'ankey'=>$$sref{$_}{ankey},'antype'=>$$sref{$_}{antype}};
314 return %ret;
317 =head4 HIVSchema tablepart
319 Title : tablepart (alias: tbl)
320 Usage : $schema->tbl(@fieldnames)
321 Function: return the portion of the fieldname[s] that refer to the
322 db table
323 Example : $schema->tbl('SequenceEntry.SE_id'); # returns 'SequenceEntry'
324 Returns : table name as string
325 Args : [an array of] fieldname[s] in "table.column" format
327 =cut
329 sub tablepart {
330 # return the 'Table' part of the specified field(s)
331 my $self = shift;
332 my @sfields = @_;
333 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
334 my ($squish,@ret, %ret);
335 if ($sfields[0] eq '-s') {
336 # squish : remove duplicates from the returned array
337 $squish=1;
338 shift @sfields;
340 foreach (@sfields) {
341 push @ret, /^(.*)\./;
343 if ($squish) {
344 # arg order is clobbered
345 @ret{@ret} = undef;
346 @ret = keys %ret;
348 return (wantarray ? @ret : $ret[0]);
351 sub tbl {
352 # tablepart alias
353 shift->tablepart(@_);
356 =head4 HIVSchema columnpart
358 Title : columnpart (alias: col)
359 Usage : $schema->col(@fieldnames)
360 Function: return the portion of the fieldname[s] that refer to the
361 db column
362 Example : $schema->col('SequenceEntry.SE_id'); # returns 'SE_id'
363 Returns : column name as string
364 Args : [an array of] fieldname[s] in "table.column" format
366 =cut
368 sub columnpart {
369 # return the 'Column' part of the specified field(s)
370 my $self = shift;
371 my @sfields = @_;
372 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
373 my @ret;
374 foreach (@sfields) {
375 push @ret, /\.(.*)$/;
377 return (wantarray ? @ret : $ret[0]);
380 sub col {
381 # columnpart alias
382 shift->columnpart(@_);
385 =head4 HIVSchema primarykey
387 Title : primarykey [alias: pk]
388 Usage : $schema->pk(@tablenames);
389 Function: return the primary key of the specified table[s], as judged by
390 the syntax of the table's[s'] fieldnames
391 Example : $schema->pk('SequenceEntry') # returns 'SequenceEntry.SE_id'
392 Returns : primary key fieldname[s] in "table.column" format, or null if
393 no pk exists
394 Args : [an array of] table name[s] (fieldnames are ok, table part used)
396 =cut
398 sub primarykey {
399 # return the primary key (in Table.Column format) of specified table(s)
400 my $self = shift;
401 my @tbl = @_;
402 my @ret;
403 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
404 foreach my $tbl (@tbl) {
405 # trim column name
406 $tbl =~ s/\..*$//;
407 grep(/^$tbl$/i, $self->tables) ?
408 push(@ret, grep(/\.[0-9a-zA-Z]+_id/, grep(/$tbl/i,$self->fields))) :
409 push(@ret, "");
411 return (wantarray ? @ret : $ret[0]);
414 sub pk {
415 # primarykey alias
416 shift->primarykey(@_);
419 =head4 HIVSchema foreignkey
421 Title : foreignkey [alias: fk]
422 Usage : $schema->fk($intable [, $totable])
423 Function: return foreign key fieldname in table $intable referring to
424 table $totable, or all foreign keys in $intable if $totable
425 unspec'd
426 Example : $schema->fk('AUthor', 'SequenceEntry'); # returns 'AUthor_AU_SE_id'
427 Returns : foreign key fieldname[s] in "table.column" format
428 Args : tablename [, optional foreign table name] (fieldnames are ok,
429 table part used)
431 =cut
433 sub foreignkey {
434 # return foreign key in in-table ($intbl) to to-table ($totbl)
435 # or all foreign keys in in-table if to-table not specified
436 # keys returned in Table.Column format
437 my $self = shift;
438 my ($intbl, $totbl) = @_;
439 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
440 # trim col names
441 $intbl =~ s/\..*$//;
442 $totbl =~ s/\..*$// if $totbl;
443 # check if in-table exists
444 return () unless grep( /^$intbl/i, $self->tables);
445 my @ret = grep( /$intbl\.(?:[0-9a-zA-Z]+_){2,}id/i, $self->fields);
446 if ($totbl) {
447 my $tpk = $self->primarykey($totbl);
448 return (wantarray ? () : "") unless grep( /^$totbl/i, $self->tables) && $tpk;
449 ($tpk) = ($tpk =~ /\.(.*)$/);
450 @ret = grep( /$tpk$/, @ret);
451 return (wantarray ? @ret : $ret[0]);
453 else {
454 # return all foreign keys in in-table
455 return @ret;
459 sub fk {
460 # foreignkey alias
461 shift->foreignkey(@_);
464 =head4 HIVSchema foreigntable
466 Title : foreigntable [alias ftbl]
467 Usage : $schema->ftbl( @foreign_key_fieldnames );
468 Function: return tablename of table that foreign keys points to
469 Example : $schema->ftbl( 'AUthor.AU_SE_id' ); # returns 'SequenceEntry'
470 Returns : tablename
471 Args : [an array of] fieldname[s] in "table.column" format
473 =cut
475 sub foreigntable {
476 # return table name that foreign key(s) point(s) to
477 my $self = shift;
478 my @fk = @_;
479 my @ret;
480 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
481 foreach (@fk) {
482 my ($mnem, $fmnem) = /\.([0-9a-zA-Z]+)_([0-9a-zA-Z]+)_.*$/;
483 next unless $mnem && $fmnem;
484 # lookup based on Table.Column format of fields
485 my $sf = [grep( /^[0-9a-zA-Z]+\.$fmnem\_/, $self->fields )]->[0];
486 next unless $sf;
487 ($sf) = ($sf =~ /^([0-9a-zA-Z]+)\./);
488 push @ret, $sf;
490 return (wantarray ? @ret : $ret[0]);
493 sub ftbl {
494 # foreigntable alias
495 shift->foreigntable(@_);
498 =head4 HIVSchema find_join
500 Title : find_join
501 Usage : $sch->find_join('Table1', 'Table2')
502 Function: Retrieves a set of foreign and primary keys (in table.column
503 format) that represents a join path from Table1 to Table2
504 Example :
505 Returns : an array of keys (as table.column strings) -or- an empty
506 array if Table1 == Table2 -or- undef if no path exists
507 Args : two table names as strings
509 =cut
511 sub find_join {
512 my $self = shift;
513 my ($tgt, $tbl) = @_;
514 my ($stack, $revstack, $found, $revcut) = ([],[], 0, 4);
515 $self->_find_join_guts($tgt, $tbl, $stack, \$found);
516 if ($found) {
517 if (@$stack > $revcut) {
518 # reverse order of tables, see if a shorter path emerges
519 $found = 0;
520 $self->_find_join_guts($tgt, $tbl, $revstack, \$found, 1);
521 return (@$stack <= @$revstack ? @$stack : @$revstack);
523 return @$stack;
525 else {
526 return undef;
530 =head4 HIVSchema _find_join_guts
532 Title : _find_join_guts
533 Usage : $sch->_find_join_guts($table1, $table2, $stackref, \$found, $reverse)
534 (call with $stackref = [], $found=0)
535 Function: recursive guts of find_join
536 Example :
537 Returns : if a path is found, $found==1 and @$stackref contains the keys
538 in table.column format representing the path; if a path is not
539 found, $found == 0 and @$stackref contains garbage
540 Args : $table1, $table2 : table names as strings
541 $stackref : an arrayref to an empty array
542 \$found : a scalar ref to the value 0
543 $rev : if $rev==1, the arrays of table names will be reversed;
544 this can give a shorter path if cycles exist in the
545 schema graph
547 =cut
549 sub _find_join_guts {
550 my $self = shift;
551 my ($tbl, $tgt, $stack, $found, $rev) = @_;
552 return () if $tbl eq $tgt;
553 my $k = $self->pk($tbl);
554 if ($k) {
555 # all fks pointing to pk
556 my @fk2pk = map {
557 $self->fk($_, $k) || ()
558 } ($rev ? reverse $self->tables : $self->tables);
559 # skip keys already on stack
560 if (@$stack) {
561 (@$stack == 1) && do {
562 @fk2pk = grep (!/$$stack[0]/, @fk2pk);
564 (@$stack > 1 ) && do {
565 @fk2pk = map { my $f=$_; grep(/$f/, @$stack) ? () : $f } @fk2pk;
568 foreach my $f2p (@fk2pk) { # tables with fks pointing to pk
569 push @$stack, $f2p;
570 if ($self->tbl($f2p) eq $tgt) { # this fk's table is the target
571 # found it
572 $$found = 1;
573 return;
575 else {
576 #keep looking
577 $self->_find_join_guts($self->tbl($f2p), $tgt, $stack, $found, $rev);
578 return if $$found;
582 # all fks in $tbl
583 my @fks = ($rev ? reverse $self->fk($tbl) : $self->fk($tbl));
584 #skip keys already on stack
585 if (@$stack) {
586 (@$stack == 1) && do {
587 @fks = grep(!/$$stack[0]/, @fks);
589 (@$stack > 1) && do {
590 @fks = map { my $f=$_; grep(/$f/, @$stack) ? () : $f } @fks;
593 # all fks in table
594 if (@fks) {
595 for my $f (@fks) {
596 push @$stack, $f;
597 if ($self->ftbl($f) eq $tgt) { #found it
598 $$found = 1;
599 return;
601 else {
602 $self->_find_join_guts($self->ftbl($f), $tgt, $stack, $found, $rev);
603 $$found ? return : pop @$stack;
607 else {
608 pop @$stack;
609 return;
613 =head4 HIVSchema loadSchema
615 Title : loadHIVSchema [alias: loadSchema]
616 Usage : $schema->loadSchema( $XMLfilename )
617 Function: read (LANL DB) schema spec from XML
618 Example : $schema->loadSchema('lanl-schema.xml');
619 Returns : hashref to schema data
620 Keys are fieldnames in "table.column" format.
621 Each value is a hashref with the following properties:
622 {name} : HIVWEB 'table.column' format fieldname,
623 can be used directly in the cgi query
624 {aliases} : ref to array containing valid aliases/shortcuts for
625 {name}; can be used in routines creating the HTML query
626 {options} : ref to array containing valid matchdata for this field
627 can be used directly in the HTML query
628 {ankey} : contains the annotation key for this field used with
629 Bioperl annotation objects
630 {..attr..}: ..value_of_attr.. for this field (app-specific metadata)
631 Args :
633 =cut
635 sub loadHIVSchema {
636 my $fn = shift;
637 Bio::Root::Root->throw("loadHIVSchema: schema file not found") unless -e $fn;
638 my $q = XML::Simple->new(ContentKey=>'name',NormalizeSpace=>2,ForceArray=>1);
639 my %ret;
640 my $ref = $q->XMLin($fn);
641 my @sf = keys %{$$ref{sfield}};
642 foreach (@sf) {
643 my $h = $$ref{sfield}{$_};
644 $ret{$_} = $h;
645 foreach my $ptr ($$h{option}, $$h{alias}) {
646 if ($ptr) {
647 # kludge for XMLin: appears to convert to arrays, if there
648 # exists a tag without content, but to convert to hashes
649 # with content as key, if all tags possess content
650 if (ref($ptr) eq 'HASH') {
651 my @k = keys %{$ptr};
652 if (grep /desc/, keys %{$ptr->{$k[0]}}) {
653 # slurp the desc's
654 $$h{desc} = [ map { $$ptr{$_}->{desc} } @k ];
656 # now overwrite with keys (descs in same order...)
657 $ptr = [@k];
659 elsif (ref($ptr) eq 'ARRAY') {
660 $ptr = [map { ref eq 'HASH' ? $_->{name} : $_ } @{$ptr}]
662 else {
663 1; # stub : doh!
667 for my $ptr ($$h{ankey}) {
668 # flatten
669 my $ank = [keys %{$ptr}]->[0];
670 if (!defined $ank) {
671 delete $$h{ankey};
673 else {
674 $h->{antype} = $ptr->{$ank}{antype};
675 $ptr = $ank;
679 return \%ret;
682 sub loadSchema {
683 my $self = shift;
684 $self->{schema_ref} = loadHIVSchema(shift);
687 # below, dangerous
689 =head4 HIVSchema _sfieldh
691 Title : _sfieldh
692 Usage : $schema->_sfieldh($fieldname)
693 Function: get hashref to the specified field hash
694 Example :
695 Returns : hashref
696 Args : fieldname in "table.column" format
698 =cut
700 sub _sfieldh {
701 # return reference to the specified field hash
702 my $self = shift;
703 my ($sfield) = @_;
704 return ${$self->{schema_ref}}{$sfield};
709 =head2 Class QRY - a query algebra for HIVQuery
711 =head3 QRY SYNOPSIS
713 $Q = new QRY(
714 new R(
715 new Q('coreceptor', 'CXCR4'),
716 new Q('country', 'ZA')
719 QRY::Eq(QRY::And($Q, $Q), $Q); # returns 1
720 QRY::Eq(QRY::Or($Q, $Q), $Q); # returns 1
721 $Q2 = $Q1->clone;
722 $Q2 = new QRY(
723 new R(
724 new Q( 'coreceptor', 'CCR5' ),
725 new Q( 'country', 'ZA')
728 (QRY::And($Q, $Q2))->isnull; # returns 1
729 $Q3 = QRY::Or($Q, $Q2);
730 print $Q3->A; # prints '(CCR5 CXCR4)[coreceptor] (ZA)[country]'
732 =head3 QRY DESCRIPTION
734 The QRY package provides a query parser for
735 L<Bio::DB::Query::HIVQuery>. Currently, the parser supports AND, OR,
736 and () operations. The structure of the LANL cgi makes it tricky to
737 perform NOTs, though this could be implemented if the desire were
738 great.
740 Two class methods do the work. C<QRY::_parse_q> does a first-pass
741 parse of the query string. C<QRY::_make_q> interprets the parse tree
742 as returned by C<QRY::_parse_q> and produces an array of hash
743 structures that can be used directly by C<Bio::DB::Query::HIVQuery>
744 query execution methods. Validation of query fields and options is
745 performed at the C<Bio::DB::Query::HIVQuery> level, not here.
747 C<QRY> objects are collections of C<R> (or request) objects, which are
748 in turn collections of C<Q> (or atomic query) objects. C<Q> objects
749 represent a query on a single field, with match data options C<OR>ed
750 together, e.g. C<(A B)[subtype]>. C<R> objects collect C<Q> objects
751 that could be processed in a single HTTP request; i.e., a set of
752 atomic queries each having different fields C<AND>ed together, such as
754 (A B)[subtype] AND ('CCR5')[coreceptor] AND (US CA)[country]
756 The C<QRY> object collects C<R>s that cannot be reduced (through
757 logical operations) to a single HTTP request, e.g.
759 ((C)[subtype] AND (SI)[phenotype]) OR ( (D)[subtype] AND (NSI)[phenotype] ),
761 which cannot be got in one go through the current LANL cgi
762 implementation (as far as I can tell). The parser will simplify
763 something like
765 ((C)[subtype] AND (SI)[phenotype]) OR ((C)[subtype] AND (NSI)[phenotype])
767 to the single request
769 (C)[subtype] AND (NSI SI)[phenotype]
771 however.
773 The operators C<&> and C<|> are overloaded to C<QRY::And> and
774 C<QRY::Or>, to get Perl precedence and grouping for free. C<bool> is
775 overloaded to get symbolic tests such as C<if ($QRY) {stuff}>. C<==>
776 is overloaded with C<QRY::Eq> for convenience. No overloading is done
777 for C<R> or C<Q>.
779 =cut
781 # a query algebra for HIVQuery
783 # Each Q object is an 'atomic' query, written as (data)[field]
784 # (a b ...)[X] equals (a)[X] | (b)[X] | ...
785 # Each R object represents a single HTTP request to the db
786 # contains an array of Q (atomic) objects (q1, q2, ...)
787 # the R object is interpreted as q1 & q2 & ...
788 # Each QRY object represents a series of HTTP requests to the db
789 # contains an array of R (request) objects (R1, R2, ...)
790 # the QRY object is interpreted as R1 | R2 | ...
792 # & and | operations are specified for each type
794 package QRY;
795 use strict;
796 $QRY::NULL = new QRY();
799 use overload
800 "|" => \&Or,
801 "&" => \&And,
802 "bool" => \&Bool,
803 "==" => \&Eq;
806 # query language emulator
807 # supports only AND and OR, any groupings
809 # syntax rules:
810 # query atom: bareword [field] OR (bareword ...) [field]
811 # only single bareword allowed between []
812 # annotation fields in {} (only bareword lists allowed between {})
813 # () can group query atoms joined by operators (AND or OR)
814 # () containing only barewords MUST be followed by a field descriptor [field]
815 # empty [] not allowed
816 # query atoms joined with AND by default
817 # barewords are associated (ORed within) the next field descriptor in the line
819 # follow the parse tree, creating new QRY objects as needed in @q, and
820 # construct a logical expression using & and | symbols.
821 # These are overloaded for doing ands and ors on QRY objects;
822 # to get the final QRY object, eval the resulting expression $q_expr.
823 # QRY object will be translated into (possibly multiple) hashes
824 # conforming to HIVQuery parameter requirements.
826 =head4 QRY _make_q
828 Title : _make_q
829 Usage : QRY::_make_q($parsetree)
830 Function: creates hash structures suitable for HIVQuery from parse tree
831 returned by QRY::_parse_q
832 Example :
833 Returns : array of hashrefs of query specs
834 Args : a hashref
836 =cut
838 sub _make_q {
839 my $ptree = shift;
840 my ($q_expr, @q, @an, $query, @dbq);
841 _make_q_guts($ptree, \$q_expr, \@q, \@an);
842 $query = eval $q_expr;
843 throw Bio::Root::Root(-class=>'Bio::Root::Exception',
844 -text=>$@,
845 -value=>$q_expr) if $@;
846 return {} if $query->isnull;
847 foreach my $rq ($query->requests) {
848 my $h = {'query'=>{}};
849 foreach ($rq->atoms) {
850 my @d = split(/\s+/, $_->dta);
851 foreach my $d (@d) {
852 $d =~ s/[+]/ /g; ###! _ to [+]
853 $d =~ s/'//g;
855 $h->{'query'}{$_->fld} = (@d == 1) ? $d[0] : [@d];
857 $h->{'annot'} = [@an] if @an;
858 push @dbq, $h;
860 return @dbq;
863 =head4 QRY _make_q_guts
865 Title : _make_q_guts (Internal class method)
866 Usage : _make_q_guts($ptree, $q_expr, $qarry, $anarry)
867 Function: traverses the parse tree returned from QRY::_parse_q, checking
868 syntax and creating HIVQuery-compliant query structures
869 Example :
870 Returns :
871 Args : $parse_tree (hashref), $query_expression (scalar string ref),
872 $query_array (array ref : stack for returning query structures),
873 $annotation_array (array ref : stack for returning annotation
874 fields)
876 =cut
878 sub _make_q_guts {
879 my ($ptree, $q_expr, $qarry, $anarry) = @_;
880 my (@words, $o);
881 eval { # catch
882 foreach (@{$ptree->{cont}}) {
883 m{^AND$} && do {
884 $$q_expr .= "&";
885 next;
887 m{^OR$} && do {
888 $$q_expr .= "|";
889 next;
891 m{^HASH} && do {
892 for my $dl ($_->{delim}) {
893 ($dl =~ m{\(}) && do {
894 if (grep /^HASH/, @{$_->{cont}}) {
895 $$q_expr .= "&" unless !$$q_expr || !length($$q_expr) || (substr($$q_expr, -1, 1) =~ /[&|(]/);
896 $$q_expr .= "(";
897 _make_q_guts($_,$q_expr,$qarry,$anarry);
898 $$q_expr .= ")";
900 else {
901 my @c;
902 my $c = join(' ',@{$_->{cont}});
903 $c =~ s/,/ /g;
904 Bio::Root::Root->throw("query syntax error: unmatched ['\"]") if (@c = ($c =~ /(['"])/g)) % 2;
905 @c = split(/\s*(['"])\s*/, $c);
906 do {
907 $c = shift @c;
908 if ($c =~ m{['"]}) {
909 $c = join('', ($c, shift @c, shift @c));
910 $c =~ s/\s+/+/g; ###! _ to +
911 push @words, $c;
913 else {
914 push @words, split(/\s+/,$c);
916 } while @c;
918 last;
920 ($dl =~ m{\[}) && do {
921 Bio::Root::Root->throw("syntax error: empty field descriptor") unless @{$_->{cont}};
922 Bio::Root::Root->throw("syntax error: more than one field descriptor in square brackets") unless @{$_->{cont}} == 1;
924 push @{$qarry}, new QRY( new R( new Q( $_->{cont}->[0], @words)));
925 # add default operation if nec
926 $$q_expr .= "&" unless !$$q_expr || !length($$q_expr) || (substr($$q_expr, -1, 1) =~ /[&|(]/);
927 $$q_expr .= "\$q[".$#$qarry."]";
928 @words = ();
929 last;
931 ($dl =~ m{\{}) && do {
932 foreach my $an (@{$_->{cont}}) {
933 ($an =~ /^HASH/) && do {
934 if ($an->{delim} eq '[') {
935 push @$anarry, @{$an->{cont}};
937 else {
938 Bio::Root::Root->throw("query syntax error: only field descriptors (with or without square brackets) allowed in annotation spec");
940 next;
942 do { #else
943 push @$anarry, $an;
944 next;
947 last;
949 do {
950 1; #else stub
953 next;
955 do { # else, bareword
956 if ($o) {
957 $words[-1] .= "+$_"; ####! _ to +
959 else {
960 push @words, $_;
962 m/['"]/ && ($o = !$o);
964 } # @{ptree->{cont}}
965 Bio::Root::Root->throw("query syntax error: no search fields specified")
966 unless $$q_expr =~ /q\[[0-9]+\]/;
968 $@ ?
969 throw Bio::Root::Root(-class=>'Bio::QueryStringSyntax::Exception',
970 -text=>$@,
971 -value=>$$q_expr)
972 : return 1;
975 =head4 QRY _parse_q
977 Title : _parse_q
978 Usage : QRY::_parse_q($query_string)
979 Function: perform first pass parse of a query string with some syntax
980 checking, return a parse tree suitable for QRY::_make_q
981 Example : QRY::_parse_q(" to[be] OR (not to)[be] ");
982 Returns : hashref
983 Args : query string
985 =cut
987 # parse qry string into a branching tree structure
988 # each branch tagged by the opening delimiter ( key 'delim' )
989 # content (tokens and subbranch hashes) placed in l2r order in
990 # @{p->{cont}}
991 sub _parse_q {
992 local $_;
993 my $qstr = shift;
994 my $illegal = qr/[^a-zA-Z0-9-_<>=,\.\(\[\{\}\]\)\s'"]/;
995 my $pdlm = qr/[\{\[\(\)\]\}]/;
996 my %md = ('('=>')', '['=>']','{'=>'}');
997 my @tok = grep !/^\s*$/, split /($pdlm)/, $qstr;
998 return {} unless @tok;
999 my @pstack = ();
1000 my @dstack = ();
1001 my ($ptree, $p);
1003 eval { #catch
1004 Bio::Root::Root->throw("query syntax error: illegal character") if $qstr =~ /$illegal/;
1006 $ptree = $p = {'delim'=>'*'};
1007 foreach (@tok) {
1008 #trim whsp
1009 s/^\s+//;
1010 s/\s+$//;
1011 m{[\(\[\{]} && do {
1012 my $new = {'delim'=>$_};
1013 $p->{cont} = [] unless $p->{cont};
1014 push @{$p->{cont}}, $new;
1015 push @pstack, $p;
1016 push @dstack, $_;
1017 $p = $new;
1018 next;
1020 m{[\)\]\}]} && do {
1021 my $d = pop @dstack;
1022 if ($md{$d} eq $_) {
1023 $p = pop @pstack;
1024 Bio::Root::Root->throw("query syntax error: unmatched \"$_\"") unless $p;
1026 else {
1027 Bio::Root::Root->throw("query syntax error: saw \"$_\" before matching \"$md{$d}\"");
1029 next;
1031 do { # else
1032 $p->{cont} = [] unless $p->{cont};
1033 push @{$p->{cont}}, split(/\s+/);
1037 $@ ?
1038 throw Bio::Root::Root(-class=>'Bio::QueryStringSyntax::Exception',
1039 -text=>$@,
1040 -value=>"")
1041 : return $ptree;
1044 ## QRY constructor
1046 =head3 QRY CONSTRUCTOR
1048 =head4 QRY Constructor
1050 Title : QRY constructor
1051 Usage : $QRY = new QRY()
1052 Function:
1053 Example :
1054 Returns :
1055 Args : array of R objects, optional
1057 =cut
1059 sub new {
1060 my $class = shift;
1061 my @args = @_;
1062 my $self = {};
1063 $self->{requests} = [];
1064 bless($self, $class);
1065 $self->put_requests(@args) if @args;
1066 return $self;
1069 ## QRY instance methods
1071 =head3 QRY INSTANCE METHODS
1073 =head4 QRY requests
1075 Title : requests
1076 Usage : $QRY->requests
1077 Function: get/set array of requests comprising this QRY object
1078 Example :
1079 Returns :
1080 Args : array of class R objects
1082 =cut
1084 sub requests {
1085 my $self = shift;
1086 $self->put_requests(@_) if @_;
1087 return @{$self->{'requests'}};
1090 =head4 QRY put_requests
1092 Title : put_requests
1093 Usage : $QRY->put_request(@R)
1094 Function: add object of class R to $QRY
1095 Example :
1096 Returns :
1097 Args : [an array of] of class R object[s]
1099 =cut
1101 sub put_requests {
1102 my $self = shift;
1103 my @args = @_;
1104 foreach (@args) {
1105 Bio::Root::Root->throw('requires type R (request)') unless ref && $_->isa('R');
1106 push @{$self->{requests}}, $_;
1108 return @args;
1111 =head4 QRY isnull
1113 Title : isnull
1114 Usage : $QRY->isnull
1115 Function: test if QRY object is null
1116 Example :
1117 Returns : 1 if null, 0 otherwise
1118 Args :
1120 =cut
1122 sub isnull {
1123 my $self = shift;
1124 return ($self->requests) ? 0 : 1;
1127 =head4 QRY A
1129 Title : A
1130 Usage : print $QRY->A
1131 Function: get a string representation of QRY object
1132 Example :
1133 Returns : string scalar
1134 Args :
1136 =cut
1138 sub A {
1139 my $self = shift;
1140 return join( "\n", map {$_->A} $self->requests );
1143 =head4 QRY len
1145 Title : len
1146 Usage : $QRY->len
1147 Function: get number of class R objects contained by QRY object
1148 Example :
1149 Returns : scalar
1150 Args :
1152 =cut
1154 sub len {
1155 my $self = shift;
1156 return scalar @{$self->{'requests'}};
1159 =head4 QRY clone
1161 Title : clone
1162 Usage : $QRY2 = $QRY1->clone;
1163 Function: create and return a clone of the object
1164 Example :
1165 Returns : object of class QRY
1166 Args :
1168 =cut
1170 sub clone {
1171 local $_;
1172 my $self = shift;
1173 my $ret = new QRY();
1174 foreach ($self->requests) {
1175 $ret->put_requests($_->clone);
1177 return $ret;
1180 ## QRY class methods
1182 =head3 QRY CLASS METHODS
1184 =head4 QRY Or
1186 Title : Or
1187 Usage : $QRY3 = QRY::Or($QRY1, $QRY2)
1188 Function: logical OR for QRY objects
1189 Example :
1190 Returns : a QRY object
1191 Args : two class QRY objects
1193 =cut
1195 sub Or {
1196 local $_;
1197 my ($q, $r, $rev_f) = @_;
1198 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1199 Bio::Root::Root->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1200 if ($q->isnull) {
1201 return $r->clone;
1203 elsif ($r->isnull) {
1204 return $q->clone;
1206 do {my $qq = $q; $q=$r; $r=$qq} if ($q->len > $r->len);
1207 my @rq_r = $r->requests;
1208 my @rq_q = $q->requests;
1209 my (@cand_rq, @ret_rq);
1210 # search for simplifications
1211 my @now = @rq_q;
1212 my @nxt =();
1213 foreach (@rq_r) {
1214 my $found = 0;
1215 while (my $rq = pop @now) {
1216 my @result = R::Or($rq, $_);
1217 if (@result==1) {
1218 push @cand_rq, $result[0]->clone;
1219 $found = 1;
1220 last;
1222 else {
1223 push @nxt, $rq;
1226 push @cand_rq, $_->clone unless ($found);
1227 # @now becomes unexamined @rq_q's plus failed @rq_q's
1228 @now = (@now, @nxt);
1230 push @cand_rq, map {$_->clone} @now; # add all failed @rq_q's
1231 # squeeze out redundant requests
1232 while (my $rq = pop @cand_rq) {
1233 push @ret_rq, $rq unless @cand_rq && grep {R::Eq($rq, $_)} @cand_rq;
1235 return new QRY( @ret_rq );
1238 =head4 QRY And
1240 Title : And
1241 Usage : $QRY3 = QRY::And($QRY1, $QRY2)
1242 Function: logical AND for QRY objects
1243 Example :
1244 Returns : a QRY object
1245 Args : two class QRY objects
1247 =cut
1249 sub And {
1250 my ($q, $r, $rev_f) = @_;
1251 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1252 Bio::Root::Root->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1253 return ($QRY::NULL) if ($q->isnull || $r->isnull);
1254 my (@cand_rq, @ret_rq);
1255 foreach my $rq_r ($r->requests) {
1256 foreach my $rq_q ($q->requests) {
1257 my ($rq) = R::And($rq_r, $rq_q);
1258 push @cand_rq, $rq unless $rq->isnull;
1261 return $QRY::NULL unless @cand_rq;
1262 # squeeze out redundant requests
1263 while (my $rq = pop @cand_rq) {
1264 push @ret_rq, $rq unless @cand_rq && grep {R::Eq($rq, $_)} @cand_rq;
1266 return new QRY( @ret_rq );
1269 =head4 QRY Bool
1271 Title : Bool
1272 Usage : QRY::Bool($QRY1)
1273 Function: allows symbolic testing of QRY object when bool overloaded
1274 Example : do {stuff} if $QRY1 *same as* do {stuff} if !$QRY1->isnull
1275 Returns :
1276 Args : a class QRY object
1278 =cut
1280 sub Bool {
1281 my $q = shift;
1282 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1283 return $q->isnull ? 0 : 1;
1286 =head4 QRY Eq
1288 Title : Eq
1289 Usage : QRY::Eq($QRY1, $QRY2)
1290 Function: test if R objects in two QRY objects are the same
1291 (irrespective of order)
1292 Example :
1293 Returns : 1 if equal, 0 otherwise
1294 Args : two class QRY objects
1296 =cut
1298 sub Eq {
1299 my ($q, $r, $rev_f) = @_;
1300 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1301 Bio::Root::Root->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1302 return 0 unless $q->len == $r->len;
1303 foreach my $rq_q ($q->requests) {
1304 my $found = 0;
1305 foreach my $rq_r ($r->requests) {
1306 if (R::Eq($rq_q,$rq_r)) {
1307 $found = 1;
1308 last;
1311 return 0 unless $found;
1313 return 1;
1318 =head2 Class R - request objects for QRY algebra
1320 =head3 R SYNOPSIS
1322 $R = new R( $q1, $q2 );
1323 $R->put_atoms($q3);
1324 $R->del_atoms('coreceptor', 'phenotype');
1325 return $R->clone;
1326 $R1 = new R( new Q('subtype', 'B') );
1327 $R2 = new R( new Q('subtype', 'B C'),
1328 new Q('country', 'US') );
1329 R::Eq( (R::And($R1, $R2))[0],
1330 new R( new Q('subtype', 'B' ),
1331 new Q('country', 'US') )); # returns 1
1332 QRY::Eq( new QRY(R::Or($R1, $R2)), new QRY($R1, $R2) ); # returns 1
1333 R::In( (R::And($R1, $R2))[0], $R1 ); # returns 1
1335 =head3 R DESCRIPTION
1337 Class R objects contain a list of atomic queries (class Q
1338 objects). Each class R object represents a single HTTP request to the
1339 LANL DB. When converted to a DB query, the class Q objects contained
1340 by an R object are effectively C<AND>ed.
1342 =cut
1344 package R;
1345 use strict;
1346 $R::NULL = new R();
1349 ## R constructor
1351 =head3 R CONSTRUCTOR
1353 =head4 R constructor
1355 Title : R constructor
1356 Usage : $R = new R()
1357 Function: create a new R (request) object
1358 Example :
1359 Returns : class R (request) object
1360 Args : optional, array of class Q objects
1362 =cut
1364 sub new {
1365 my $class = shift;
1366 my @args = @_;
1367 my $self = {};
1368 $self->{atoms} = {};
1369 bless($self, $class);
1370 $self->put_atoms(@args) if @args;
1371 return $self;
1374 ## R instance methods
1376 =head3 R INSTANCE METHODS
1378 =head4 R len
1380 Title : len
1381 Usage : $R->len
1382 Function: get number of class Q objects contained in R object
1383 Example :
1384 Returns : scalar
1385 Args :
1387 =cut
1389 sub len {
1390 my $self = shift;
1391 return scalar @{[keys %{$self->{'atoms'}}]};
1394 =head4 R atoms
1396 Title : atoms
1397 Usage : $R->atoms( [optional $field])
1398 Function: get array of class Q (atomic query) objects in class R object
1399 Example : $R->atoms(); $R->atoms('coreceptor')
1400 Returns : array of class Q objects (all Qs or those corresponding to $field
1401 if present)
1402 Args : optional, scalar string
1404 =cut
1406 sub atoms {
1407 local $_;
1408 # returns an array of atoms
1409 # no arg: all atoms;
1410 # args: atoms with specified fields
1411 my $self = shift;
1412 my @flds = (@_ ? @_ : keys %{$self->{'atoms'}});
1413 return wantarray ? map { $self->{'atoms'}->{$_} } @flds : $self->{'atoms'}->{$flds[0]};
1416 =head4 R fields
1418 Title : fields
1419 Usage : $R->fields
1420 Function: get array of fields of all Q objects contained in $R
1421 Example :
1422 Returns : array of scalars
1423 Args :
1425 =cut
1427 sub fields {
1428 my $self = shift;
1429 return keys %{$self->{'atoms'}};
1432 =head4 R put_atoms
1434 Title : put_atoms
1435 Usage : $R->put_atoms( @q )
1436 Function: AND an atomic query (class Q object) to the class R object's list
1437 Example :
1438 Returns : void
1439 Args : an [array of] class Q object[s]
1441 =cut
1443 sub put_atoms {
1444 # AND this atom to the request
1445 local $_;
1446 my $self = shift;
1447 my @args = @_;
1448 foreach (@args) {
1449 Bio::Root::Root->throw('requires type Q (atom)') unless ref && $_->isa('Q');
1450 if ($self->atoms($_->fld)) {
1451 my $a = Q::qand( $self->atoms($_->fld), $_ );
1452 if ($a->isnull) {
1453 delete $self->{'atoms'}->{$_->fld};
1455 else {
1456 $self->{atoms}->{$_->fld} = $a->clone;
1459 else {
1460 $self->{atoms}->{$_->fld} = $_->clone;
1463 return;
1466 =head4 R del_atoms
1468 Title : del_atoms
1469 Usage : $R->del_atoms( @qfields )
1470 Function: removes class Q objects from R object's list according to the
1471 field names given in arguments
1472 Example :
1473 Returns : the class Q objects deleted
1474 Args : scalar array of field names
1476 =cut
1478 sub del_atoms {
1479 # remove atoms by field from request
1480 local $_;
1481 my $self = shift;
1482 my @args = @_;
1483 return () unless @args;
1484 my @ret;
1485 foreach (@args) {
1486 push @ret, delete $self->{'atoms'}->{$_};
1488 return @ret;
1491 =head4 R isnull
1493 Title : isnull
1494 Usage : $R->isnull
1495 Function: test if class R object is null
1496 Example :
1497 Returns : 1 if null, 0 otherwise
1498 Args :
1500 =cut
1502 sub isnull {
1503 my $self = shift;
1504 return ($self->len) ? 0 : 1;
1507 =head4 R A
1509 Title : A
1510 Usage : print $R->A
1511 Function: get a string representation of class R object
1512 Example :
1513 Returns : string scalar
1514 Args :
1516 =cut
1518 sub A {
1519 my $self = shift;
1520 my @a = sort {$a->fld cmp $b->fld} $self->atoms;
1521 return join(" ", map {$_->A} @a);
1524 =head4 R clone
1526 Title : clone
1527 Usage : $R2 = $R1->clone;
1528 Function: create and return a clone of the object
1529 Example :
1530 Returns : object of class R
1531 Args :
1533 =cut
1535 sub clone {
1536 local $_;
1537 my $self = shift;
1538 my $ret = new R();
1539 foreach ($self->atoms) {
1540 $ret->put_atoms($_->clone);
1542 return $ret;
1545 ## R class methods
1547 =head3 R CLASS METHODS
1549 =head4 R In
1551 Title : In
1552 Usage : R::In($R1, $R2)
1553 Function: tests whether the query represented by $R1 would return a subset
1554 of items returned by the query represented by $R2
1555 Example : print "R2 gets those and more" if R::In($R1, $R2);
1556 Returns : 1 if R1 is subset of R2, 0 otherwise
1557 Args : two class R objects
1559 =cut
1561 sub In {
1562 local $_;
1563 my ($s, $t) = @_;
1564 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1565 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1566 return 1 if ($s->isnull);
1567 # common fields
1568 my @cf = grep {defined} map {my $f=$_; grep /^$f$/,$s->fields} $t->fields;
1569 return 0 unless @cf==$t->len;
1570 foreach (@cf) {
1571 my @sd = split(/\s+/, $s->atoms($_)->dta);
1572 my @td = split(/\s+/, $t->atoms($_)->dta);
1573 my @cd = grep {defined} map {my $d=$_; grep /^$d$/, @td} @sd;
1574 return 0 unless @cd==@sd;
1576 return 1;
1579 =head4 R And
1581 Title : And
1582 Usage : @Rresult = R::And($R1, $R2)
1583 Function: logical AND for R objects
1584 Example :
1585 Returns : an array containing class R objects
1586 Args : two class R objects
1588 =cut
1590 sub And {
1591 local $_;
1592 my ($s, $t) = @_;
1593 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1594 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1595 return ($R::NULL) if ($s->isnull || $t->isnull);
1597 do { my $ss = $s; $s = $t; $t = $ss } if ( $s->len > $t->len );
1598 # $t has at least as many fields defined than $s ($t is more restrictive)
1600 # common fields
1601 my @cf = grep {defined} map {my $sf = $_; grep /$sf/, $t->fields } $s->fields;
1602 my $ret = new R();
1603 my $v = $t->clone;
1604 $v->del_atoms(@cf);
1605 my $u = $s->clone;
1606 $u->del_atoms(@cf);
1608 # And the atoms with identical fields
1610 foreach (@cf) {
1611 my ($a) = Q::qand($s->atoms($_), $t->atoms($_));
1612 if ($a->isnull) {
1613 return $R::NULL;
1615 else {
1616 $ret->put_atoms($a);
1619 # put the private atoms
1620 $ret->put_atoms($u->atoms, $v->atoms);
1621 return ($ret);
1625 =head4 R Or
1627 Title : Or
1628 Usage : @Rresult = R::Or($R1, $R2)
1629 Function: logical OR for R objects
1630 Example :
1631 Returns : an array containing class R objects
1632 Args : two class R objects
1634 =cut
1636 sub Or {
1637 local $_;
1638 my ($s, $t) = @_;
1639 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1640 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1641 if ($s->isnull) {
1642 return $t->clone;
1644 elsif ($t->isnull) {
1645 return $s->clone;
1647 return $s->clone if (R::In($t, $s));
1648 return $t->clone if (R::In($s, $t));
1650 # try simplifying
1651 do { my $ss = $s; $s = $t; $t = $ss } if ( $s->len > $t->len );
1652 # common fields
1653 my @cf = grep {defined} map {my $sf = $_; grep /$sf/, $t->fields } $s->fields;
1655 if ($t->len == @cf) {
1656 # all atoms equal within fields but one? If yes, simplify...
1657 my @df = grep {!Q::qeq($s->atoms($_), $t->atoms($_))} @cf;
1658 if (@df == 1) {
1659 my ($a) = Q::qor($s->atoms($df[0]), $t->atoms($df[0]));
1660 my $ret = $s->clone;
1661 $ret->del_atoms($df[0]);
1662 $ret->put_atoms($a);
1663 return ($ret);
1667 # neither request contains the other, and the requests cannot be
1668 # simplified; reflect back (clones of) the input...
1669 return ($s->clone, $t->clone);
1673 =head4 R Eq
1675 Title : Eq
1676 Usage : R::Eq($R1, $R2)
1677 Function: test if class Q objects in two R objects are the same
1678 (irrespective of order)
1679 Example :
1680 Returns : 1 if equal, 0 otherwise
1681 Args : two class R objects
1683 =cut
1685 sub Eq {
1686 local $_;
1687 my ($s, $t) = @_;
1688 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1689 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1690 my @sf = $s->fields;
1691 my @tf = $t->fields;
1692 return 0 unless @sf==@tf;
1693 my @cf = grep {defined} map {my $f=$_; grep /^$f$/,@sf} @tf;
1694 return 0 unless @cf==@tf;
1695 foreach (@cf) {
1696 return 0 unless Q::qeq($s->atoms($_), $t->atoms($_));
1698 return 1;
1702 =head2 Class Q - atomic query objects for QRY algebra
1704 =head3 Q SYNOPSIS
1706 $q = new Q('coreceptor', 'CXCR4 CCR5');
1707 $u = new Q('coreceptor', 'CXCR4');
1708 $q->fld; # returns 'coreceptor'
1709 $q->dta; # returns 'CXCR4 CCR5'
1710 print $q->A; # prints '(CXCR4 CCR5)[coreceptor]
1711 Q::qeq($q, $u); # returns 0
1712 Q::qeq( Q::qor($q, $q), $q ); # returns 1
1713 Q::qin($u, $q) # returns 1
1714 Q::qeq(Q::qand($u, $q), $u ); # returns 1
1716 =head3 Q DESCRIPTION
1718 Class Q objects represent atomic queries, that can be described by a
1719 single LANL cgi parameter=value pair. Class R objects (requests) are
1720 built from class Qs. The logical operations at the higher levels
1721 (C<QRY, R>) ultimately depend on the lower level operations on Qs:
1722 C<qeq, qin, qand, qor>.
1724 =cut
1726 package Q;
1727 use strict;
1728 $Q::NULL = new Q();
1730 ## Q constructor
1732 =head3 Q CONSTRUCTOR
1734 =head4 Q constructor
1736 Title : Q constructor
1737 Usage : $q = new Q($field, $data)
1738 Function: create a new Q (atomic query) object
1739 Example :
1740 Returns : class Q object
1741 Args : optional $field, $data strings
1743 =cut
1745 sub new {
1746 local $_;
1747 my ($class,@args) = @_;
1748 my $self={};
1749 foreach (@args) { s/^\s+//; s/\s+$//; }
1750 my ($fld, @dta) = @args;
1751 $self->{fld}=$fld;
1752 $self->{dta}=join(" ", @dta);
1753 bless($self, $class);
1754 return $self;
1757 ## Q instance methods
1759 =head3 Q INSTANCE METHODS
1761 =head4 Q isnull
1763 Title : isnull
1764 Usage : $q->isnull
1765 Function: test if class Q object is null
1766 Example :
1767 Returns : 1 if null, 0 otherwise
1768 Args :
1770 =cut
1772 sub isnull {
1773 my $self = shift;
1774 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1775 return 1 unless (($self->fld && length($self->fld)) || ($self->dta && length($self->dta)));
1776 return 0;
1779 =head4 Q fld
1781 Title : fld
1782 Usage : $q->fld($field)
1783 Function: get/set fld (field name) property
1784 Example :
1785 Returns : scalar
1786 Args : scalar
1788 =cut
1790 sub fld {
1791 my $self = shift;
1792 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1793 my $f = shift;
1794 if ($f) {
1795 $f =~ s/^\s+//;
1796 $f =~ s/\s+$//;
1797 return $self->{fld}=$f;
1799 return $self->{fld};
1803 =head4 Q dta
1805 Title : dta
1806 Usage : $q->dta($data)
1807 Function: get/set dta (whsp-separated data string) property
1808 Example :
1809 Returns : scalar
1810 Args : scalar
1812 =cut
1814 sub dta {
1815 my $self = shift;
1816 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1817 my $d = join(" ", @_);
1818 if ($d) {
1819 $d =~ s/^\s+//;
1820 $d =~ s/\s+$//;
1821 return $self->{dta} = $d;
1823 return $self->{dta};
1826 =head4 Q A
1828 Title : A
1829 Usage : print $q->A
1830 Function: get a string representation of class Q object
1831 Example :
1832 Returns : string scalar
1833 Args :
1835 =cut
1837 sub A {
1838 my $self = shift;
1839 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1840 my @a = split(/\s+/, $self->dta);
1842 return "(".join(' ', sort {$a cmp $b} @a).")[".$self->fld."]";
1845 =head4 Q clone
1847 Title : clone
1848 Usage : $q2 = $q1->clone;
1849 Function: create and return a clone of the object
1850 Example :
1851 Returns : object of class Q
1852 Args :
1854 =cut
1856 sub clone {
1857 my $self = shift;
1858 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1859 my $ret = new Q ($self->fld, $self->dta);
1860 return $ret;
1863 ### Q class methods
1865 =head3 Q CLASS METHODS
1867 =head4 Q qin
1869 Title : qin
1870 Usage : Q::qin($q1, $q2)
1871 Function: tests whether the query represented by $q1 would return a subset
1872 of items returned by the query represented by $q2
1873 Example : print "q2 gets those and more" if Q::qin($q1, $q2);
1874 Returns : 1 if q1 is subset of q2, 0 otherwise
1875 Args : two class Q objects
1877 =cut
1879 sub qin {
1880 my ($a, $b) = @_;
1881 Bio::Root::Root->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1882 return 0 unless $a->fld eq $b->fld;
1883 return Q::qeq( $b, Q::qor($a, $b) );
1886 =head4 Q qeq
1888 Title : qeq
1889 Usage : Q::qeq($q1, $q2)
1890 Function: test if fld and dta properties in two class Q objects are the same
1891 (irrespective of order)
1892 Example :
1893 Returns : 1 if equal, 0 otherwise
1894 Args : two class Q objects
1896 =cut
1898 sub qeq {
1899 local $_;
1900 my ($a, $b) = @_;
1901 Bio::Root::Root->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1902 return 0 unless $a->fld eq $b->fld;
1903 my @ad = unique(split(/\s+/,$a->dta));
1904 my @bd = unique(split(/\s+/,$b->dta));
1905 return 0 unless @ad==@bd;
1906 my @cd = grep {defined} map {my $f = $_; grep /^$f$/, @ad} @bd;
1907 return @cd == @bd;
1910 =head4 Q qor
1912 Title : qor
1913 Usage : @qresult = Q::qor($q1, $q2)
1914 Function: logical OR for Q objects
1915 Example :
1916 Returns : an array of class Q objects
1917 Args : two class Q objects
1919 =cut
1921 sub qor {
1922 local $_;
1923 my @a = @_;
1924 foreach (@a) {
1925 Bio::Root::Root->throw("requires type Q (atom)") unless ref && $_->isa('Q');
1927 my @ret;
1928 my (%f, @f);
1929 @a = grep {!$_->isnull} @a;
1930 return ($Q::NULL) unless @a > 0;
1931 # list of unique flds
1932 @f = unique(map {$_->fld} @a);
1933 foreach my $f (@f) {
1934 my @fobjs = grep {$_->fld eq $f} @a;
1935 my @d = unique(map {split(/\s/, $_->dta)} @fobjs );
1936 my $r = new Q($f, @d);
1937 push @ret, $r;
1939 return @ret;
1942 =head4 Q qand
1944 Title : qand
1945 Usage : @qresult = Q::And($q1, $q2)
1946 Function: logical AND for R objects
1947 Example :
1948 Returns : an array of class Q objects
1949 Args : two class Q objects
1951 =cut
1953 sub qand {
1954 local $_;
1955 my ($a, $b) = @_;
1956 Bio::Root::Root->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1957 my @ret;
1958 if (ref $a eq 'ARRAY') {
1959 foreach my $ea (@$a) {
1960 push @ret, qand( $ea, $b );
1962 return qor(@ret); # simplify
1964 elsif (ref $b eq 'ARRAY') {
1965 foreach my $eb (@$b) {
1966 push @ret, qand( $a, $eb);
1969 return qor(@ret); # simplify
1971 else {
1972 return ($Q::NULL) if ($a->isnull || $b->isnull);
1973 if ($a->fld eq $b->fld) {
1974 # find intersection of data
1975 my (%ad, @ad, @bd);
1976 @ad = split(/\s+/, $a->dta);
1977 @ad{@ad} = (1) x @ad;
1978 @bd = split(/\s+/, $b->dta);
1979 foreach (@bd) {
1980 $ad{$_}++;
1982 my $r = new Q($a->fld,
1983 grep {$_}
1984 map {$ad{$_} == 2 ? $_ : undef} keys %ad);
1985 return (length($r->dta) > 0) ? ($r) : ($Q::NULL);
1987 else {
1988 return ($a, $b);
1993 =head3 Q INTERNALS
1995 =head4 Q unique
1997 Title : unique
1998 Usage : @ua = unique(@a)
1999 Function: return contents of @a with duplicates removed
2000 Example :
2001 Returns :
2002 Args : an array
2004 =cut
2006 sub unique {
2007 my @a = @_;
2008 my %a;
2009 @a{@a} = undef;
2010 return keys %a;
2015 =head2 Additional tools for Bio::AnnotationCollectionI
2017 =head3 Bio::AnnotationCollectionI SYNOPSIS (additional methods)
2019 $seq->annotation->put_value('patient_id', 1401)
2020 $seq->annotation->get_value('patient_ids') # returns 1401
2021 $seq->annotation->put_value('patient_group', 'MassGenH')
2022 $seq->annotation->put_value(['clinical', 'cd4count'], 503);
2023 $seq->annotation->put_value(['clinical', 'virus_load'], 150805);
2024 foreach ( qw( cd4count virus_load ) ) {
2025 $blood_readings{$_} = $seq->annonation->get_value(['clinical', $_]);
2028 =head3 Bio::AnnotationCollectionI DESCRIPTION (additional methods)
2030 C<get_value()> and C<put_value> allow easy creation of and access to an
2031 annotation collection tree with nodes of L<Bio::Annotation::SimpleValue>. These
2032 methods obiviate direct accession of the SimpleValue objects.
2034 =cut
2036 package Bio::AnnotationCollectionI;
2037 use strict;
2038 use Bio::Annotation::SimpleValue;
2040 =head2 get_value
2042 Title : get_value
2043 Usage : $ac->get_value($tagname) -or-
2044 $ac->get_value( $tag_level1, $tag_level2,... )
2045 Function: access the annotation value assocated with the given tags
2046 Example :
2047 Returns : a scalar
2048 Args : an array of tagnames that descend into the annotation tree
2050 =cut
2052 sub get_value {
2053 local $_;
2054 my $self = shift;
2055 my @args = @_;
2056 my @h;
2057 return "" unless @_;
2058 while ($_ = shift @args) {
2059 @h = $self->get_Annotations($_);
2060 if (ref($h[0]->{value})) {
2061 $self = $h[0]->{value}; # must be another Bio::AnnotationCollectionI
2063 else {
2064 last;
2067 return $h[0] && $h[0]->{value} ; # now the last value.
2070 =head2 put_value
2072 Title : put_value
2073 Usage : $ac->put_value($tagname, $value) -or-
2074 $ac->put_value([$tag_level1, $tag_level2, ...], $value) -or-
2075 $ac->put_value( [$tag_level1, $tag_level2, ...] )
2076 Function: create a node in an annotation tree, and assign a scalar value to it
2077 if a value is specified
2078 Example :
2079 Returns : scalar or a Bio::AnnotationCollection object
2080 Args : $tagname, $value scalars (can be specified as -KEYS=>$tagname,
2081 -VALUE=>$value) -or-
2082 \@tagnames, $value (or as -KEYS=>\@tagnames, -VALUE=>$value )
2083 Note : If intervening nodes do not exist, put_value creates them, replacing
2084 existing nodes. So if $ac->put_value('x', 10) was done, then later,
2085 $ac->put_value(['x', 'y'], 20), the original value of 'x' is trashed,
2086 and $ac->get_value('x') will now return the annotation collection
2087 with tagname 'y'.
2089 =cut
2091 sub put_value {
2092 local $_;
2093 my $self = shift;
2094 my @args = @_;
2095 my ($keys, $value) = $self->_rearrange([qw( KEYS VALUE )], @args);
2096 my (@keys, $lastkey);
2097 # $value ||= new Bio::Annotation::Collection;
2098 @keys = (ref($keys) eq 'ARRAY') ? @$keys : ($keys);
2099 $lastkey = pop @keys;
2100 foreach (@keys) {
2101 my $a = $self->get_value($_);
2102 if (ref($a) && $a->isa('Bio::Annotation::Collection')) {
2103 $self = $a;
2105 else {
2106 # replace an old value
2107 $self->remove_Annotations($_) if $a;
2108 my $ac = new Bio::Annotation::Collection;
2109 $self->add_Annotation(new Bio::Annotation::SimpleValue(
2110 -tagname => $_,
2111 -value => $ac
2114 $self = $ac;
2117 if ($self->get_value($lastkey)) {
2118 # replace existing value
2119 ($self->get_Annotations($lastkey))[0]->{value} = $value;
2121 else {
2122 $self->add_Annotation(new Bio::Annotation::SimpleValue(
2123 -tagname=>$lastkey,
2124 -value=>$value
2127 return $value;
2130 =head2 get_keys
2132 Title : get_keys
2133 Usage : $ac->get_keys($tagname_level_1, $tagname_level_2,...)
2134 Function: Get an array of tagnames underneath the named tag nodes
2135 Example : # prints the values of the members of Category 1...
2136 print map { $ac->get_value($_) } $ac->get_keys('Category 1') ;
2137 Returns : array of tagnames or empty list if the arguments represent a leaf
2138 Args : [array of] tagname[s]
2140 =cut
2142 sub get_keys {
2143 my $self = shift;
2144 my @keys = @_;
2145 foreach (@keys) {
2146 my $a = $self->get_value($_);
2147 if (ref($a) && $a->isa('Bio::Annotation::Collection')) {
2148 $self = $a;
2150 else {
2151 return ();
2154 return $self->get_all_annotation_keys();