1 # $Id: HIVQueryHelper.pm 231 2008-12-11 14:32:00Z maj $
3 # BioPerl module for Bio::DB::HIV::HIVQueryHelper
5 # Cared for by Mark A. Jensen <maj@fortinbras.us>
7 # Copyright Mark A. Jensen
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::DB::HIV::HIVQueryHelper - Routines and packages used by Bio::DB::HIV and
16 Bio::DB::Query::HIVQuery
20 Used in Bio::DB::Query::HIVQuery. No need to use directly.
24 C<Bio::DB::HIV::HIVQueryHelper> contains a number of packages for use
25 by L<Bio::DB::Query::HIVQuery>. Package C<HIVSchema> parses the
26 C<lanl-schema.xml> file, and allows access to it in the context of the
27 relational database it represents (see APPENDIX for excruciating
28 detail). Packages C<QRY>, C<R>, and C<Q> together create the query
29 string parser that enables NCBI-like queries to be understood by
30 C<Bio::DB::Query::HIVQuery>. They provide objects and operators to
31 perform and simplify logical expressions involving C<AND>, C<OR>, and
32 C<()> and return hash structures that can be handled by
33 C<Bio::DB::Query::HIVQuery> routines.
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to
41 the Bioperl mailing list. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48 Report bugs to the Bioperl bug tracking system to help us keep track
49 of the bugs and their resolution. Bug reports can be submitted via
52 http://bugzilla.open-bio.org/
54 =head1 AUTHOR - Mark A. Jensen
56 Email maj@fortinbras.us
62 The rest of the documentation details each of the contained packages.
63 Internal methods are usually preceded with a _
67 # Let the code begin...
69 package Bio
::DB
::HIV
::HIVQueryHelper
;
76 @Bio::QueryStringSyntax
::Exception
::ISA
= qw( Bio::Root::Exception);
81 =head2 HIVSchema - objects/methods to manipulate a version of the LANL HIV DB schema
85 $schema = new HIVSchema( 'lanl-schema.xml' );
86 @tables = $schema->tables;
87 @validFields = $schema->fields;
88 @validAliases = $schema->aliases;
89 @query_aliases_for_coreceptor = $schema->aliases( 'SEQ_SAMple.SSAM_second_receptor' );
90 $pk_for_SequenceEntry = $schema->primarykey('SequenceEntry'); # returns 'SequenceEntry.SE_id'
91 $fk_for_SEQ_SAMple_to_SequenceEntry =
92 $schema->foreignkey('SEQ_SAMple', 'SequenceEntry'); # returns 'SEQ_SAMple.SSAM_SE_id'
94 $table = $schema->tablepart('SEQ_SAMple.SSAM_badseq'); # returns 'SEQ_SAMple'
95 $column = $schema->columnpart('SEQ_SAMple.SSAM_badseq'); # returns 'SSAM_badseq'
99 HIVSchema methods are used in L<Bio::DB::Query::HIVQuery> for table,
100 column, primary/foreign key manipulations based on the observed Los
101 Alamos HIV Sequence Database (LANL DB) naming conventions for their
102 CGI parameters. The schema is contained in an XML file
103 (C<lanl-schema.xml>) which is read into an HIVSchema object, in turn a
104 property of the HIVQuery object. HIVSchema methods are used to build
105 correct cgi queries in a way that attempts to preserve the context of
106 the relational database the query parameters represent.
111 # objects/methods to manipulate a version of the LANL HIV DB schema
124 Usage : $schema = new HIVSchema( "lanl-schema.xml ");
127 Returns : an HIVSchema object
137 $self->{schema_ref
} = loadHIVSchema
($args[0]);
139 bless($self, $class);
145 =head3 INSTANCE METHODS
150 Usage : $schema->tables()
151 Function: get all table names in schema
153 Returns : array of table names
159 # return array of all tables in schema
162 my $sref = $self->{schema_ref
};
163 Bio
::Root
::Root
->throw("schema not initialized") unless $sref;
164 my @k = grep(/\./, keys %$sref);
177 Usage : $schema->columns( [$tablename] );
178 Function: return array of columns for specified table, or all columns in
179 schema, if called w/o args
182 Args : tablename or fieldname string
187 # return array of columns for specified table
188 # all columns in schema, if called w/o args
192 my $sref = $self->{schema_ref
};
193 Bio
::Root
::Root
->throw("schema not initialized") unless $sref;
196 # check if table exists
197 return () unless grep(/^$tbl$/i, $self->tables);
198 my @k = sort keys %$sref;
199 @k = grep (/^$tbl\./i, @k);
209 Usage : $schema->fields();
210 Function: return array of all fields in schema, in format "table.column"
212 Returns : array of all fields
218 # return array of all fields (Table.Column format) in schema
220 my $sref = $self->{schema_ref
};
221 Bio
::Root
::Root
->throw("schema not initialized") unless $sref;
222 my @k = sort keys %{$sref};
229 Usage : $schema->options(@fieldnames)
230 Function: get array of options (i.e., valid match data strings) available
233 Returns : array of match data strings
234 Args : [array of] fieldname string[s] in "table.column" format
239 # return array of options available to specified field
242 my $sref = $self->{schema_ref
};
243 Bio
::Root
::Root
->throw("schema not initialized") unless $sref;
244 return $$sref{$sfield}{option
} ? @
{$$sref{$sfield}{option
}} : ();
250 Usage : $schema->aliases(@fieldnames)
251 Function: get array of aliases to specified field[s]
253 Returns : array of valid query aliases for fields as spec'd in XML file
254 Args : [an array of] fieldname[s] in "table.column" format
259 # return array of aliases to specified field
262 my $sref = $self->{schema_ref
};
264 Bio
::Root
::Root
->throw("schema not initialized") unless $sref;
266 return $$sref{$sfield}{alias
} ? @
{$$sref{$sfield}{alias
}} : ();
268 else { # all valid aliases
269 map {push @ret, @
{$$sref{$_}{alias
}} if $$sref{$_}{alias
}} $self->fields;
276 Title : ankh (annotation key hash)
277 Usage : $schema->ankh(@fieldnames)
278 Function: return a has translating fields to annotation keys for the
280 (Annotation keys are used for parsing the tab-delimited response
281 to Bio::DB::Query::HIVQuery::_do_lanl_request.)
284 Args : [an array of] fieldname[s] in "table.column" format
289 # return hash translating sfields to annotation keys for specified sfield(s)
293 my $sref = $self->{schema_ref
};
294 Bio
::Root
::Root
->throw("schema not initialized") unless $sref;
296 next unless $$sref{$_}{ankey
};
297 $ret{$_} = {'ankey'=>$$sref{$_}{ankey
},'antype'=>$$sref{$_}{antype
}};
304 Title : tablepart (alias: tbl)
305 Usage : $schema->tbl(@fieldnames)
306 Function: return the portion of the fieldname[s] that refer to the
308 Example : $schema->tbl('SequenceEntry.SE_id'); # returns 'SequenceEntry'
309 Returns : table name as string
310 Args : [an array of] fieldname[s] in "table.column" format
315 # return the 'Table' part of the specified field(s)
318 Bio
::Root
::Root
->throw("schema not initialized") unless $self->{schema_ref
};
319 my ($squish,@ret, %ret);
320 if ($sfields[0] eq '-s') {
321 # squish : remove duplicates from the returned array
326 push @ret, /^(.*)\./;
329 # arg order is clobbered
333 return (wantarray ?
@ret : $ret[0]);
338 shift->tablepart(@_);
343 Title : columnpart (alias: col)
344 Usage : $schema->col(@fieldnames)
345 Function: return the portion of the fieldname[s] that refer to the
347 Example : $schema->col('SequenceEntry.SE_id'); # returns 'SE_id'
348 Returns : column name as string
349 Args : [an array of] fieldname[s] in "table.column" format
354 # return the 'Column' part of the specified field(s)
357 Bio
::Root
::Root
->throw("schema not initialized") unless $self->{schema_ref
};
360 push @ret, /\.(.*)$/;
362 return (wantarray ?
@ret : $ret[0]);
367 shift->columnpart(@_);
372 Title : primarykey [alias: pk]
373 Usage : $schema->pk(@tablenames);
374 Function: return the primary key of the specified table[s], as judged by
375 the syntax of the table's[s'] fieldnames
376 Example : $schema->pk('SequenceEntry') # returns 'SequenceEntry.SE_id'
377 Returns : primary key fieldname[s] in "table.column" format, or null if
379 Args : [an array of] table name[s] (fieldnames are ok, table part used)
384 # return the primary key (in Table.Column format) of specified table(s)
388 Bio
::Root
::Root
->throw("schema not initialized") unless $self->{schema_ref
};
389 foreach my $tbl (@tbl) {
392 grep(/^$tbl$/i, $self->tables) ?
393 push(@ret, grep(/\.[0-9a-zA-Z]+_id/, grep(/$tbl/i,$self->fields))) :
396 return (wantarray ?
@ret : $ret[0]);
401 shift->primarykey(@_);
406 Title : foreignkey [alias: fk]
407 Usage : $schema->fk($intable [, $totable])
408 Function: return foreign key fieldname in table $intable referring to
409 table $totable, or all foreign keys in $intable if $totable
411 Example : $schema->fk('AUthor', 'SequenceEntry'); # returns 'AUthor_AU_SE_id'
412 Returns : foreign key fieldname[s] in "table.column" format
413 Args : tablename [, optional foreign table name] (fieldnames are ok,
419 # return foreign key in in-table ($intbl) to to-table ($totbl)
420 # or all foreign keys in in-table if to-table not specified
421 # keys returned in Table.Column format
423 my ($intbl, $totbl) = @_;
424 Bio
::Root
::Root
->throw("schema not initialized") unless $self->{schema_ref
};
427 $totbl =~ s/\..*$// if $totbl;
428 # check if in-table exists
429 return () unless grep( /^$intbl/i, $self->tables);
430 my @ret = grep( /$intbl\.(?:[0-9a-zA-Z]+_){2,}id/i, $self->fields);
432 my $tpk = $self->primarykey($totbl);
433 return (wantarray ?
() : "") unless grep( /^$totbl/i, $self->tables) && $tpk;
434 ($tpk) = ($tpk =~ /\.(.*)$/);
435 @ret = grep( /$tpk$/, @ret);
436 return (wantarray ?
@ret : $ret[0]);
439 # return all foreign keys in in-table
446 shift->foreignkey(@_);
451 Title : foreigntable [alias ftbl]
452 Usage : $schema->ftbl( @foreign_key_fieldnames );
453 Function: return tablename of table that foreign keys points to
454 Example : $schema->ftbl( 'AUthor.AU_SE_id' ); # returns 'SequenceEntry'
456 Args : [an array of] fieldname[s] in "table.column" format
461 # return table name that foreign key(s) point(s) to
465 Bio
::Root
::Root
->throw("schema not initialized") unless $self->{schema_ref
};
467 my ($mnem, $fmnem) = /\.([0-9a-zA-Z]+)_([0-9a-zA-Z]+)_.*$/;
468 next unless $mnem && $fmnem;
469 # lookup based on Table.Column format of fields
470 my $sf = [grep( /^[0-9a-zA-Z]+\.$fmnem\_/, $self->fields )]->[0];
472 ($sf) = ($sf =~ /^([0-9a-zA-Z]+)\./);
475 return (wantarray ?
@ret : $ret[0]);
480 shift->foreigntable(@_);
485 Title : loadHIVSchema [alias: loadSchema]
486 Usage : $schema->loadSchema( $XMLfilename )
487 Function: read (LANL DB) schema spec from XML
488 Example : $schema->loadSchema('lanl-schema.xml');
489 Returns : hashref to schema data
490 Keys are fieldnames in "table.column" format.
491 Each value is a hashref with the following properties:
492 {name} : HIVWEB 'table.column' format fieldname,
493 can be used directly in the cgi query
494 {aliases} : ref to array containing valid aliases/shortcuts for
495 {name}; can be used in routines creating the HTML query
496 {options} : ref to array containing valid matchdata for this field
497 can be used directly in the HTML query
498 {ankey} : contains the annotation key for this field used with
499 Bioperl annotation objects
500 {..attr..}: ..value_of_attr.. for this field (app-specific metadata)
507 Bio
::Root
::Root
->throw("loadHIVSchema: schema file not found") unless -e
$fn;
508 my $q = XML
::Simple
->new(ContentKey
=>'name',NormalizeSpace
=>2,ForceArray
=>1);
510 my $ref = $q->XMLin($fn);
511 my @sf = keys %{$$ref{sfield
}};
513 my $h = $$ref{sfield
}{$_};
515 foreach my $ptr ($$h{option
}, $$h{alias
}) {
517 # kludge for XMLin: appears to convert to arrays, if there
518 # exists a tag without content, but to convert to hashes
519 # with content as key, if all tags possess content
520 if (ref($ptr) eq 'HASH') {
521 $ptr = [keys %{$ptr}];
523 elsif (ref($ptr) eq 'ARRAY') {
524 $ptr = [map { ref eq 'HASH' ?
$_->{name
} : $_ } @
{$ptr}]
531 for my $ptr ($$h{ankey
}) {
533 my $ank = [keys %{$ptr}]->[0];
538 $h->{antype
} = $ptr->{$ank}{antype
};
548 $self->{schema_ref
} = loadHIVSchema
(shift);
556 Usage : $schema->_sfieldh($fieldname)
557 Function: get hashref to the specified field hash
560 Args : fieldname in "table.column" format
565 # return reference to the specified field hash
568 return ${$self->{schema_ref
}}{$sfield};
573 =head2 Class QRY - a query algebra for HIVQuery
579 new Q('coreceptor', 'CXCR4'),
580 new Q('country', 'ZA')
583 QRY::Eq(QRY::And($Q, $Q), $Q); # returns 1
584 QRY::Eq(QRY::Or($Q, $Q), $Q); # returns 1
588 new Q( 'coreceptor', 'CCR5' ),
589 new Q( 'country', 'ZA')
592 (QRY::And($Q, $Q2))->isnull; # returns 1
593 $Q3 = QRY::Or($Q, $Q2);
594 print $Q3->A; # prints '(CCR5 CXCR4)[coreceptor] (ZA)[country]'
598 The QRY package provides a query parser for
599 L<Bio::DB::Query::HIVQuery>. Currently, the parser supports AND, OR,
600 and () operations. The structure of the LANL cgi makes it tricky to
601 perform NOTs, though this could be implemented if the desire were
604 Two class methods do the work. C<QRY::_parse_q> does a first-pass
605 parse of the query string. C<QRY::_make_q> interprets the parse tree
606 as returned by C<QRY::_parse_q> and produces an array of hash
607 structures that can be used directly by C<Bio::DB::Query::HIVQuery>
608 query execution methods. Validation of query fields and options is
609 performed at the C<Bio::DB::Query::HIVQuery> level, not here.
611 C<QRY> objects are collections of C<R> (or request) objects, which are
612 in turn collections of C<Q> (or atomic query) objects. C<Q> objects
613 represent a query on a single field, with match data options C<OR>ed
614 together, e.g. C<(A B)[subtype]>. C<R> objects collect C<Q> objects
615 that could be processed in a single HTTP request; i.e., a set of
616 atomic queries each having different fields C<AND>ed together, such as
618 (A B)[subtype] AND ('CCR5')[coreceptor] AND (US CA)[country]
620 The C<QRY> object collects C<R>s that cannot be reduced (through
621 logical operations) to a single HTTP request, e.g.
623 ((C)[subtype] AND (SI)[phenotype]) OR ( (D)[subtype] AND (NSI)[phenotype] ),
625 which cannot be got in one go through the current LANL cgi
626 implementation (as far as I can tell). The parser will simplify
629 ((C)[subtype] AND (SI)[phenotype]) OR ((C)[subtype] AND (NSI)[phenotype])
631 to the single request
633 (C)[subtype] AND (NSI SI)[phenotype]
637 The operators C<&> and C<|> are overloaded to C<QRY::And> and
638 C<QRY::Or>, to get Perl precedence and grouping for free. C<bool> is
639 overloaded to get symbolic tests such as C<if ($QRY) {stuff}>. C<==>
640 is overloaded with C<QRY::Eq> for convenience. No overloading is done
645 # a query algebra for HIVQuery
647 # Each Q object is an 'atomic' query, written as (data)[field]
648 # (a b ...)[X] equals (a)[X] | (b)[X] | ...
649 # Each R object represents a single HTTP request to the db
650 # contains an array of Q (atomic) objects (q1, q2, ...)
651 # the R object is interpreted as q1 & q2 & ...
652 # Each QRY object represents a series of HTTP requests to the db
653 # contains an array of R (request) objects (R1, R2, ...)
654 # the QRY object is interpreted as R1 | R2 | ...
656 # & and | operations are specified for each type
660 $QRY::NULL
= new QRY
();
670 # query language emulator
671 # supports only AND and OR, any groupings
674 # query atom: bareword [field] OR (bareword ...) [field]
675 # only single bareword allowed between []
676 # annotation fields in {} (only bareword lists allowed between {})
677 # () can group query atoms joined by operators (AND or OR)
678 # () containing only barewords MUST be followed by a field descriptor [field]
679 # empty [] not allowed
680 # query atoms joined with AND by default
681 # barewords are associated (ORed within) the next field descriptor in the line
683 # follow the parse tree, creating new QRY objects as needed in @q, and
684 # construct a logical expression using & and | symbols.
685 # These are overloaded for doing ands and ors on QRY objects;
686 # to get the final QRY object, eval the resulting expression $q_expr.
687 # QRY object will be translated into (possibly multiple) hashes
688 # conforming to HIVQuery parameter requirements.
695 Usage : QRY::_make_q($parsetree)
696 Function: creates hash structures suitable for HIVQuery from parse tree
697 returned by QRY::_parse_q
699 Returns : array of hashrefs of query specs
706 my ($q_expr, @q, @an, $query, @dbq);
707 _make_q_guts
($ptree, \
$q_expr, \
@q, \
@an);
708 $query = eval $q_expr;
709 throw Bio
::Root
::Root
(-class=>'Bio::Root::Exception',
711 -value
=>$q_expr) if $@
;
712 return {} if $query->isnull;
713 foreach my $rq ($query->requests) {
714 my $h = {'query'=>{}};
715 foreach ($rq->atoms) {
716 my @d = split(/\s+/, $_->dta);
721 $h->{'query'}{$_->fld} = (@d == 1) ?
$d[0] : [@d];
723 $h->{'annot'} = [@an] if @an;
731 Title : _make_q_guts (Internal class method)
732 Usage : _make_q_guts($ptree, $q_expr, $qarry, $anarry)
733 Function: traverses the parse tree returned from QRY::_parse_q, checking
734 syntax and creating HIVQuery-compliant query structures
737 Args : $parse_tree (hashref), $query_expression (scalar string ref),
738 $query_array (array ref : stack for returning query structures),
739 $annotation_array (array ref : stack for returning annotation
745 my ($ptree, $q_expr, $qarry, $anarry) = @_;
748 foreach (@
{$ptree->{cont
}}) {
758 for my $dl ($_->{delim
}) {
759 ($dl =~ m{\(}) && do {
760 if (grep /^HASH/, @
{$_->{cont
}}) {
761 $$q_expr .= "&" unless !$$q_expr || !length($$q_expr) || (substr($$q_expr, -1, 1) =~ /[&|(]/);
763 _make_q_guts
($_,$q_expr,$qarry,$anarry);
768 my $c = join(' ',@
{$_->{cont
}});
770 Bio
::Root
::Root
->throw("query syntax error: unmatched ['\"]") if (@c = ($c =~ /(['"])/g)) % 2;
771 @c = split(/\s*(['"])\s*/, $c);
775 $c = join('', ($c, shift @c, shift @c));
780 push @words, split(/\s+/,$c);
786 ($dl =~ m{\[}) && do {
787 Bio
::Root
::Root
->throw("syntax error: empty field descriptor") unless @
{$_->{cont
}};
788 Bio
::Root
::Root
->throw("syntax error: more than one field descriptor in square brackets") unless @
{$_->{cont
}} == 1;
790 push @
{$qarry}, new QRY
( new R
( new Q
( $_->{cont
}->[0], @words)));
791 # add default operation if nec
792 $$q_expr .= "&" unless !$$q_expr || !length($$q_expr) || (substr($$q_expr, -1, 1) =~ /[&|(]/);
793 $$q_expr .= "\$q[".$#$qarry."]";
797 ($dl =~ m{\{}) && do {
798 foreach my $an (@
{$_->{cont
}}) {
799 ($an =~ /^HASH/) && do {
800 if ($an->{delim
} eq '[') {
801 push @
$anarry, @
{$an->{cont
}};
804 Bio
::Root
::Root
->throw("query syntax error: only field descriptors (with or without square brackets) allowed in annotation spec");
821 do { # else, bareword
828 m/['"]/ && ($o = !$o);
831 Bio
::Root
::Root
->throw("query syntax error: no search fields specified")
832 unless $$q_expr =~ /q\[[0-9]+\]/;
835 throw Bio
::Root
::Root
(-class=>'Bio::QueryStringSyntax::Exception',
844 Usage : QRY::_parse_q($query_string)
845 Function: perform first pass parse of a query string with some syntax
846 checking, return a parse tree suitable for QRY::_make_q
847 Example : QRY::_parse_q(" to[be] OR (not to)[be] ");
853 # parse qry string into a branching tree structure
854 # each branch tagged by the opening delimiter ( key 'delim' )
855 # content (tokens and subbranch hashes) placed in l2r order in
860 my $illegal = qr/[^a-zA-Z0-9-_,\.\(\[\{\}\]\)\s'"]/;
861 my $pdlm = qr/[\{\[\(\)\]\}]/;
862 my %md = ('('=>')', '['=>']','{'=>'}');
863 my @tok = grep !/^\s*$/, split /($pdlm)/, $qstr;
864 return {} unless @tok;
870 Bio
::Root
::Root
->throw("query syntax error: illegal character") if $qstr =~ /$illegal/;
872 $ptree = $p = {'delim'=>'*'};
878 my $new = {'delim'=>$_};
879 $p->{cont
} = [] unless $p->{cont
};
880 push @
{$p->{cont
}}, $new;
890 Bio
::Root
::Root
->throw("query syntax error: unmatched \"$_\"") unless $p;
893 Bio
::Root
::Root
->throw("query syntax error: saw \"$_\" before matching \"$md{$d}\"");
898 $p->{cont
} = [] unless $p->{cont
};
899 push @
{$p->{cont
}}, split(/\s+/);
904 throw Bio
::Root
::Root
(-class=>'Bio::QueryStringSyntax::Exception',
914 =head4 QRY Constructor
916 Title : QRY constructor
917 Usage : $QRY = new QRY()
921 Args : array of R objects, optional
929 $self->{requests
} = [];
930 bless($self, $class);
931 $self->put_requests(@args) if @args;
935 ## QRY instance methods
937 =head3 INSTANCE METHODS
942 Usage : $QRY->requests
943 Function: get/set array of requests comprising this QRY object
946 Args : array of class R objects
952 $self->put_requests(@_) if @_;
953 return @
{$self->{'requests'}};
959 Usage : $QRY->put_request(@R)
960 Function: add object of class R to $QRY
963 Args : [an array of] of class R object[s]
971 Bio
::Root
::Root
->throw('requires type R (request)') unless ref && $_->isa('R');
972 push @
{$self->{requests
}}, $_;
981 Function: test if QRY object is null
983 Returns : 1 if null, 0 otherwise
990 return ($self->requests) ?
0 : 1;
996 Usage : print $QRY->A
997 Function: get a string representation of QRY object
999 Returns : string scalar
1006 return join( "\n", map {$_->A} $self->requests );
1013 Function: get number of class R objects contained by QRY object
1022 return scalar @
{$self->{'requests'}};
1028 Usage : $QRY2 = $QRY1->clone;
1029 Function: create and return a clone of the object
1031 Returns : object of class QRY
1039 my $ret = new QRY
();
1040 foreach ($self->requests) {
1041 $ret->put_requests($_->clone);
1046 ## QRY class methods
1048 =head3 CLASS METHODS
1053 Usage : $QRY3 = QRY::Or($QRY1, $QRY2)
1054 Function: logical OR for QRY objects
1056 Returns : a QRY object
1057 Args : two class QRY objects
1063 my ($q, $r, $rev_f) = @_;
1064 Bio
::Root
::Root
->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1065 Bio
::Root
::Root
->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1069 elsif ($r->isnull) {
1072 do {my $qq = $q; $q=$r; $r=$qq} if ($q->len > $r->len);
1073 my @rq_r = $r->requests;
1074 my @rq_q = $q->requests;
1075 my (@cand_rq, @ret_rq);
1076 # search for simplifications
1081 while (my $rq = pop @now) {
1082 my @result = R
::Or
($rq, $_);
1084 push @cand_rq, $result[0]->clone;
1092 push @cand_rq, $_->clone unless ($found);
1093 # @now becomes unexamined @rq_q's plus failed @rq_q's
1094 @now = (@now, @nxt);
1096 push @cand_rq, map {$_->clone} @now; # add all failed @rq_q's
1097 # squeeze out redundant requests
1098 while (my $rq = pop @cand_rq) {
1099 push @ret_rq, $rq unless @cand_rq && grep {R
::Eq
($rq, $_)} @cand_rq;
1101 return new QRY
( @ret_rq );
1107 Usage : $QRY3 = QRY::And($QRY1, $QRY2)
1108 Function: logical AND for QRY objects
1110 Returns : a QRY object
1111 Args : two class QRY objects
1116 my ($q, $r, $rev_f) = @_;
1117 Bio
::Root
::Root
->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1118 Bio
::Root
::Root
->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1119 return ($QRY::NULL
) if ($q->isnull || $r->isnull);
1120 my (@cand_rq, @ret_rq);
1121 foreach my $rq_r ($r->requests) {
1122 foreach my $rq_q ($q->requests) {
1123 my ($rq) = R
::And
($rq_r, $rq_q);
1124 push @cand_rq, $rq unless $rq->isnull;
1127 return $QRY::NULL
unless @cand_rq;
1128 # squeeze out redundant requests
1129 while (my $rq = pop @cand_rq) {
1130 push @ret_rq, $rq unless @cand_rq && grep {R
::Eq
($rq, $_)} @cand_rq;
1132 return new QRY
( @ret_rq );
1138 Usage : QRY::Bool($QRY1)
1139 Function: allows symbolic testing of QRY object when bool overloaded
1140 Example : do {stuff} if $QRY1 *same as* do {stuff} if !$QRY1->isnull
1142 Args : a class QRY object
1148 Bio
::Root
::Root
->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1149 return $q->isnull ?
0 : 1;
1155 Usage : QRY::Eq($QRY1, $QRY2)
1156 Function: test if R objects in two QRY objects are the same
1157 (irrespective of order)
1159 Returns : 1 if equal, 0 otherwise
1160 Args : two class QRY objects
1165 my ($q, $r, $rev_f) = @_;
1166 Bio
::Root
::Root
->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1167 Bio
::Root
::Root
->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1168 return 0 unless $q->len == $r->len;
1169 foreach my $rq_q ($q->requests) {
1171 foreach my $rq_r ($r->requests) {
1172 if (R
::Eq
($rq_q,$rq_r)) {
1177 return 0 unless $found;
1184 =head2 Class R - request objects for QRY algebra
1188 $R = new R( $q1, $q2 );
1190 $R->del_atoms('coreceptor', 'phenotype');
1192 $R1 = new R( new Q('subtype', 'B') );
1193 $R2 = new R( new Q('subtype', 'B C'),
1194 new Q('country', 'US') );
1195 R::Eq( (R::And($R1, $R2))[0],
1196 new R( new Q('subtype', 'B' ),
1197 new Q('country', 'US') )); # returns 1
1198 QRY::Eq( new QRY(R::Or($R1, $R2)), new QRY($R1, $R2) ); # returns 1
1199 R::In( (R::And($R1, $R2))[0], $R1 ); # returns 1
1203 Class R objects contain a list of atomic queries (class Q
1204 objects). Each class R object represents a single HTTP request to the
1205 LANL DB. When converted to a DB query, the class Q objects contained
1206 by an R object are effectively C<AND>ed.
1219 =head4 R constructor
1221 Title : R constructor
1222 Usage : $R = new R()
1223 Function: create a new R (request) object
1225 Returns : class R (request) object
1226 Args : optional, array of class Q objects
1234 $self->{atoms
} = {};
1235 bless($self, $class);
1236 $self->put_atoms(@args) if @args;
1240 ## R instance methods
1242 =head3 INSTANCE METHODS
1248 Function: get number of class Q objects contained in R object
1257 return scalar @
{[keys %{$self->{'atoms'}}]};
1263 Usage : $R->atoms( [optional $field])
1264 Function: get array of class Q (atomic query) objects in class R object
1265 Example : $R->atoms(); $R->atoms('coreceptor')
1266 Returns : array of class Q objects (all Qs or those corresponding to $field
1268 Args : optional, scalar string
1274 # returns an array of atoms
1275 # no arg: all atoms;
1276 # args: atoms with specified fields
1278 my @flds = (@_ ?
@_ : keys %{$self->{'atoms'}});
1279 return wantarray ?
map { $self->{'atoms'}->{$_} } @flds : $self->{'atoms'}->{$flds[0]};
1286 Function: get array of fields of all Q objects contained in $R
1288 Returns : array of scalars
1295 return keys %{$self->{'atoms'}};
1301 Usage : $R->put_atoms( @q )
1302 Function: AND an atomic query (class Q object) to the class R object's list
1305 Args : an [array of] class Q object[s]
1310 # AND this atom to the request
1315 Bio
::Root
::Root
->throw('requires type Q (atom)') unless ref && $_->isa('Q');
1316 if ($self->atoms($_->fld)) {
1317 my $a = Q
::qand
( $self->atoms($_->fld), $_ );
1319 delete $self->{'atoms'}->{$_->fld};
1322 $self->{atoms
}->{$_->fld} = $a->clone;
1326 $self->{atoms
}->{$_->fld} = $_->clone;
1335 Usage : $R->del_atoms( @qfields )
1336 Function: removes class Q objects from R object's list according to the
1337 field names given in arguments
1339 Returns : the class Q objects deleted
1340 Args : scalar array of field names
1345 # remove atoms by field from request
1349 return () unless @args;
1352 push @ret, delete $self->{'atoms'}->{$_};
1361 Function: test if class R object is null
1363 Returns : 1 if null, 0 otherwise
1370 return ($self->len) ?
0 : 1;
1377 Function: get a string representation of class R object
1379 Returns : string scalar
1386 my @a = sort {$a->fld cmp $b->fld} $self->atoms;
1387 return join(" ", map {$_->A} @a);
1393 Usage : $R2 = $R1->clone;
1394 Function: create and return a clone of the object
1396 Returns : object of class R
1405 foreach ($self->atoms) {
1406 $ret->put_atoms($_->clone);
1413 =head3 CLASS METHODS
1418 Usage : R::In($R1, $R2)
1419 Function: tests whether the query represented by $R1 would return a subset
1420 of items returned by the query represented by $R2
1421 Example : print "R2 gets those and more" if R::In($R1, $R2);
1422 Returns : 1 if R1 is subset of R2, 0 otherwise
1423 Args : two class R objects
1430 Bio
::Root
::Root
->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1431 Bio
::Root
::Root
->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1432 return 1 if ($s->isnull);
1434 my @cf = grep {defined} map {my $f=$_; grep /^$f$/,$s->fields} $t->fields;
1435 return 0 unless @cf==$t->len;
1437 my @sd = split(/\s+/, $s->atoms($_)->dta);
1438 my @td = split(/\s+/, $t->atoms($_)->dta);
1439 my @cd = grep {defined} map {my $d=$_; grep /^$d$/, @td} @sd;
1440 return 0 unless @cd==@sd;
1448 Usage : @Rresult = R::And($R1, $R2)
1449 Function: logical AND for R objects
1451 Returns : an array containing class R objects
1452 Args : two class R objects
1459 Bio
::Root
::Root
->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1460 Bio
::Root
::Root
->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1461 return ($R::NULL
) if ($s->isnull || $t->isnull);
1463 do { my $ss = $s; $s = $t; $t = $ss } if ( $s->len > $t->len );
1464 # $t has at least as many fields defined than $s ($t is more restrictive)
1467 my @cf = grep {defined} map {my $sf = $_; grep /$sf/, $t->fields } $s->fields;
1474 # And the atoms with identical fields
1477 my ($a) = Q
::qand
($s->atoms($_), $t->atoms($_));
1482 $ret->put_atoms($a);
1485 # put the private atoms
1486 $ret->put_atoms($u->atoms, $v->atoms);
1494 Usage : @Rresult = R::Or($R1, $R2)
1495 Function: logical OR for R objects
1497 Returns : an array containing class R objects
1498 Args : two class R objects
1505 Bio
::Root
::Root
->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1506 Bio
::Root
::Root
->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1510 elsif ($t->isnull) {
1513 return $s->clone if (R
::In
($t, $s));
1514 return $t->clone if (R
::In
($s, $t));
1517 do { my $ss = $s; $s = $t; $t = $ss } if ( $s->len > $t->len );
1519 my @cf = grep {defined} map {my $sf = $_; grep /$sf/, $t->fields } $s->fields;
1521 if ($t->len == @cf) {
1522 # all atoms equal within fields but one? If yes, simplify...
1523 my @df = grep {!Q
::qeq
($s->atoms($_), $t->atoms($_))} @cf;
1525 my ($a) = Q
::qor
($s->atoms($df[0]), $t->atoms($df[0]));
1526 my $ret = $s->clone;
1527 $ret->del_atoms($df[0]);
1528 $ret->put_atoms($a);
1533 # neither request contains the other, and the requests cannot be
1534 # simplified; reflect back (clones of) the input...
1535 return ($s->clone, $t->clone);
1542 Usage : R::Eq($R1, $R2)
1543 Function: test if class Q objects in two R objects are the same
1544 (irrespective of order)
1546 Returns : 1 if equal, 0 otherwise
1547 Args : two class R objects
1554 Bio
::Root
::Root
->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1555 Bio
::Root
::Root
->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1556 my @sf = $s->fields;
1557 my @tf = $t->fields;
1558 return 0 unless @sf==@tf;
1559 my @cf = grep {defined} map {my $f=$_; grep /^$f$/,@sf} @tf;
1560 return 0 unless @cf==@tf;
1562 return 0 unless Q
::qeq
($s->atoms($_), $t->atoms($_));
1568 =head2 Class Q - atomic query objects for QRY algebra
1572 $q = new Q('coreceptor', 'CXCR4 CCR5');
1573 $u = new Q('coreceptor', 'CXCR4');
1574 $q->fld; # returns 'coreceptor'
1575 $q->dta; # returns 'CXCR4 CCR5'
1576 print $q->A; # prints '(CXCR4 CCR5)[coreceptor]
1577 Q::qeq($q, $u); # returns 0
1578 Q::qeq( Q::qor($q, $q), $q ); # returns 1
1579 Q::qin($u, $q) # returns 1
1580 Q::qeq(Q::qand($u, $q), $u ); # returns 1
1584 Class Q objects represent atomic queries, that can be described by a
1585 single LANL cgi parameter=value pair. Class R objects (requests) are
1586 built from class Qs. The logical operations at the higher levels
1587 (C<QRY, R>) ultimately depend on the lower level operations on Qs:
1588 C<qeq, qin, qand, qor>.
1600 =head4 Q constructor
1602 Title : Q constructor
1603 Usage : $q = new Q($field, $data)
1604 Function: create a new Q (atomic query) object
1606 Returns : class Q object
1607 Args : optional $field, $data strings
1613 my ($class,@args) = @_;
1615 foreach (@args) { s/^\s+//; s/\s+$//; }
1616 my ($fld, @dta) = @args;
1618 $self->{dta
}=join(" ", @dta);
1619 bless($self, $class);
1623 ## Q instance methods
1625 =head3 INSTANCE METHODS
1631 Function: test if class Q object is null
1633 Returns : 1 if null, 0 otherwise
1640 Bio
::Root
::Root
->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1641 return 1 unless (($self->fld && length($self->fld)) || ($self->dta && length($self->dta)));
1648 Usage : $q->fld($field)
1649 Function: get/set fld (field name) property
1658 Bio
::Root
::Root
->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1663 return $self->{fld
}=$f;
1665 return $self->{fld
};
1672 Usage : $q->dta($data)
1673 Function: get/set dta (whsp-separated data string) property
1682 Bio
::Root
::Root
->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1683 my $d = join(" ", @_);
1687 return $self->{dta
} = $d;
1689 return $self->{dta
};
1696 Function: get a string representation of class Q object
1698 Returns : string scalar
1705 Bio
::Root
::Root
->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1706 my @a = split(/\s+/, $self->dta);
1708 return "(".join(' ', sort {$a cmp $b} @a).")[".$self->fld."]";
1714 Usage : $q2 = $q1->clone;
1715 Function: create and return a clone of the object
1717 Returns : object of class Q
1724 Bio
::Root
::Root
->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1725 my $ret = new Q
($self->fld, $self->dta);
1731 =head3 CLASS METHODS
1736 Usage : Q::qin($q1, $q2)
1737 Function: tests whether the query represented by $q1 would return a subset
1738 of items returned by the query represented by $q2
1739 Example : print "q2 gets those and more" if Q::qin($q1, $q2);
1740 Returns : 1 if q1 is subset of q2, 0 otherwise
1741 Args : two class Q objects
1747 Bio
::Root
::Root
->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1748 return 0 unless $a->fld eq $b->fld;
1749 return Q
::qeq
( $b, Q
::qor
($a, $b) );
1755 Usage : Q::qeq($q1, $q2)
1756 Function: test if fld and dta properties in two class Q objects are the same
1757 (irrespective of order)
1759 Returns : 1 if equal, 0 otherwise
1760 Args : two class Q objects
1767 Bio
::Root
::Root
->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1768 return 0 unless $a->fld eq $b->fld;
1769 my @ad = unique
(split(/\s+/,$a->dta));
1770 my @bd = unique
(split(/\s+/,$b->dta));
1771 return 0 unless @ad==@bd;
1772 my @cd = grep {defined} map {my $f = $_; grep /^$f$/, @ad} @bd;
1779 Usage : @qresult = Q::qor($q1, $q2)
1780 Function: logical OR for Q objects
1782 Returns : an array of class Q objects
1783 Args : two class Q objects
1791 Bio
::Root
::Root
->throw("requires type Q (atom)") unless ref && $_->isa('Q');
1795 @a = grep {!$_->isnull} @a;
1796 return ($Q::NULL
) unless @a > 0;
1797 # list of unique flds
1798 @f = unique
(map {$_->fld} @a);
1799 foreach my $f (@f) {
1800 my @fobjs = grep {$_->fld eq $f} @a;
1801 my @d = unique
(map {split(/\s/, $_->dta)} @fobjs );
1802 my $r = new Q
($f, @d);
1811 Usage : @qresult = Q::And($q1, $q2)
1812 Function: logical AND for R objects
1814 Returns : an array of class Q objects
1815 Args : two class Q objects
1822 Bio
::Root
::Root
->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1824 if (ref $a eq 'ARRAY') {
1825 foreach my $ea (@
$a) {
1826 push @ret, qand
( $ea, $b );
1828 return qor
(@ret); # simplify
1830 elsif (ref $b eq 'ARRAY') {
1831 foreach my $eb (@
$b) {
1832 push @ret, qand
( $a, $eb);
1835 return qor
(@ret); # simplify
1838 return ($Q::NULL
) if ($a->isnull || $b->isnull);
1839 if ($a->fld eq $b->fld) {
1840 # find intersection of data
1842 @ad = split(/\s+/, $a->dta);
1843 @ad{@ad} = (1) x
@ad;
1844 @bd = split(/\s+/, $b->dta);
1848 my $r = new Q
($a->fld,
1850 map {$ad{$_} == 2 ?
$_ : undef} keys %ad);
1851 return (length($r->dta) > 0) ?
($r) : ($Q::NULL
);
1864 Usage : @ua = unique(@a)
1865 Function: return contents of @a with duplicates removed
1881 =head2 Additional tools for Bio::AnnotationCollectionI
1885 $seq->annotation->put_value('patient_id', 1401)
1886 $seq->annotation->get_value('patient_ids') # returns 1401
1887 $seq->annotation->put_value('patient_group', 'MassGenH')
1888 $seq->annotation->put_value(['clinical', 'cd4count'], 503);
1889 $seq->annotation->put_value(['clinical', 'virus_load'], 150805);
1890 foreach ( qw( cd4count virus_load ) ) {
1891 $blood_readings{$_} = $seq->annonation->get_value(['clinical', $_]);
1896 C<get_value()> and C<put_value> allow easy creation of and access to an annotation collection tree with nodes of L<Bio::Annotation::SimpleValue>. These methods obiviate direct accession of the SimpleValue objects.
1900 package Bio
::AnnotationCollectionI
;
1902 use Bio
::Annotation
::SimpleValue
;
1907 Usage : $ac->get_value($tagname) -or-
1908 $ac->get_value( $tag_level1, $tag_level2,... )
1909 Function: access the annotation value assocated with the given tags
1912 Args : an array of tagnames that descend into the annotation tree
1921 return "" unless @_;
1922 while ($_ = shift @args) {
1923 @h = $self->get_Annotations($_);
1924 if (ref($h[0]->{value
})) {
1925 $self = $h[0]->{value
}; # must be another Bio::AnnotationCollectionI
1931 return $h[0] && $h[0]->{value
} ; # now the last value.
1937 Usage : $ac->put_value($tagname, $value) -or-
1938 $ac->put_value([$tag_level1, $tag_level2, ...], $value) -or-
1939 $ac->put_value( [$tag_level1, $tag_level2, ...] )
1940 Function: create a node in an annotation tree, and assign a scalar value to it
1941 if a value is specified
1943 Returns : scalar or a Bio::AnnotationCollection object
1944 Args : $tagname, $value scalars (can be specified as -KEYS=>$tagname,
1945 -VALUE=>$value) -or-
1946 \@tagnames, $value (or as -KEYS=>\@tagnames, -VALUE=>$value )
1947 Note : If intervening nodes do not exist, put_value creates them, replacing
1948 existing nodes. So if $ac->put_value('x', 10) was done, then later,
1949 $ac->put_value(['x', 'y'], 20), the original value of 'x' is trashed, and $ac->get_value('x') will now return the annotation collection
1958 my ($keys, $value) = $self->_rearrange([qw( KEYS VALUE )], @args);
1959 my (@keys, $lastkey);
1960 $value ||= new Bio
::Annotation
::Collection
;
1961 @keys = (ref($keys) eq 'ARRAY') ? @
$keys : ($keys);
1962 $lastkey = pop @keys;
1964 my $a = $self->get_value($_);
1965 if (ref($a) && $a->isa('Bio::Annotation::Collection')) {
1969 # replace an old value
1970 $self->remove_Annotations($_) if $a;
1971 my $ac = new Bio
::Annotation
::Collection
;
1972 $self->add_Annotation(new Bio
::Annotation
::SimpleValue
(
1980 if ($self->get_value($lastkey)) {
1981 # replace existing value
1982 ($self->get_Annotations($lastkey))[0]->{value
} = $value;
1985 $self->add_Annotation(new Bio
::Annotation
::SimpleValue
(