[bug 2637]
[bioperl-live.git] / Bio / DB / HIV / HIVQueryHelper.pm
blob78dd6d662e0a389bdc4161f802dc473e243ec7bf
1 # $Id: HIVQueryHelper.pm 231 2008-12-11 14:32:00Z maj $
3 # BioPerl module for HIVQueryHelper
5 # Cared for by Mark A. Jensen <maj@fortinbras.us>
7 # Copyright Mark A. Jensen
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 HIVQueryHelper - Routines and packages used by Bio::DB::HIV and Bio::DB::Query::HIVQuery
17 =head1 SYNOPSIS
19 Used in Bio::DB::Query::HIVQuery. No need to use directly.
21 =head1 DESCRIPTION
23 C<Bio::DB::HIV::HIVQueryHelper> contains a number of packages for use
24 by L<Bio::DB::Query::HIVQuery>. Package C<HIVSchema> parses the
25 C<lanl-schema.xml> file, and allows access to it in the context of the
26 relational database it represents (see APPENDIX for excruciating
27 detail). Packages C<QRY>, C<R>, and C<Q> together create the query
28 string parser that enables NCBI-like queries to be understood by
29 C<Bio::DB::Query::HIVQuery>. They provide objects and operators to
30 perform and simplify logical expressions involving C<AND>, C<OR>, and
31 C<()> and return hash structures that can be handled by
32 C<Bio::DB::Query::HIVQuery> routines.
34 =head1 FEEDBACK
36 =head2 Mailing Lists
38 User feedback is an integral part of the evolution of this and other
39 Bioperl modules. Send your comments and suggestions preferably to
40 the Bioperl mailing list. Your participation is much appreciated.
42 bioperl-l@bioperl.org - General discussion
43 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45 =head2 Reporting Bugs
47 Report bugs to the Bioperl bug tracking system to help us keep track
48 of the bugs and their resolution. Bug reports can be submitted via
49 the web:
51 http://bugzilla.open-bio.org/
53 =head1 AUTHOR - Mark A. Jensen
55 Email maj@fortinbras.us
57 =head1 CONTRIBUTORS
59 =head1 APPENDIX
61 The rest of the documentation details each of the contained packages.
62 Internal methods are usually preceded with a _
64 =cut
66 # Let the code begin...
69 package HIVQueryHelper;
70 use strict;
71 use Bio::Root::Root;
73 # globals
74 BEGIN {
75 #exceptions
76 @Bio::QueryStringSyntax::Exception::ISA = qw( Bio::Root::Exception);
81 =head2 HIVSchema - objects/methods to manipulate a version of the LANL HIV DB schema
83 =head3 SYNOPSIS
85 $schema = new HIVSchema( 'lanl-schema.xml' );
86 @tables = $schema->tables;
87 @validFields = $schema->fields;
88 @validAliases = $schema->aliases;
89 @query_aliases_for_coreceptor = $schema->aliases( 'SEQ_SAMple.SSAM_second_receptor' );
90 $pk_for_SequenceEntry = $schema->primarykey('SequenceEntry'); # returns 'SequenceEntry.SE_id'
91 $fk_for_SEQ_SAMple_to_SequenceEntry =
92 $schema->foreignkey('SEQ_SAMple', 'SequenceEntry'); # returns 'SEQ_SAMple.SSAM_SE_id'
94 $table = $schema->tablepart('SEQ_SAMple.SSAM_badseq'); # returns 'SEQ_SAMple'
95 $column = $schema->columnpart('SEQ_SAMple.SSAM_badseq'); # returns 'SSAM_badseq'
97 =head3 DESCRIPTION
99 HIVSchema methods are used in L<Bio::DB::Query::HIVQuery> for table,
100 column, primary/foreign key manipulations based on the observed Los
101 Alamos HIV Sequence Database (LANL DB) naming conventions for their
102 CGI parameters. The schema is contained in an XML file
103 (C<lanl-schema.xml>) which is read into an HIVSchema object, in turn a
104 property of the HIVQuery object. HIVSchema methods are used to build
105 correct cgi queries in a way that attempts to preserve the context of
106 the relational database the query parameters represent.
108 =cut
110 package HIVSchema;
111 # objects/methods to manipulate a version of the LANL HIV DB schema
112 # stored in XML
113 use XML::Simple;
114 use Bio::Root::Root;
115 use strict;
117 ### constructor
119 =head3 CONSTRUCTOR
121 =head4 new
123 Title : new
124 Usage : $schema = new HIVSchema( "lanl-schema.xml ");
125 Function:
126 Example :
127 Returns : an HIVSchema object
128 Args : XML filename
130 =cut
132 sub new {
133 my $class = shift;
134 my @args = @_;
135 my $self = {};
136 if ($args[0]) {
137 $self->{schema_ref} = loadHIVSchema($args[0]);
139 bless($self, $class);
140 return $self;
143 ### object methods
145 =head3 INSTANCE METHODS
147 =head4 tables
149 Title : tables
150 Usage : $schema->tables()
151 Function: get all table names in schema
152 Example :
153 Returns : array of table names
154 Args : none
156 =cut
158 sub tables {
159 # return array of all tables in schema
160 local $_;
161 my $self = shift;
162 my $sref = $self->{schema_ref};
163 Bio::Root::Root->throw("schema not initialized") unless $sref;
164 my @k = grep(/\./, keys %$sref);
165 my %ret;
166 foreach (@k) {
167 s/\..*$//;
168 $ret{$_}++;
170 @k = sort keys %ret;
171 return @k;
174 =head4 columns
176 Title : columns
177 Usage : $schema->columns( [$tablename] );
178 Function: return array of columns for specified table, or all columns in
179 schema, if called w/o args
180 Example :
181 Returns :
182 Args : tablename or fieldname string
184 =cut
186 sub columns {
187 # return array of columns for specified table
188 # all columns in schema, if called w/o args
189 local $_;
190 my $self = shift;
191 my ($tbl) = @_;
192 my $sref = $self->{schema_ref};
193 Bio::Root::Root->throw("schema not initialized") unless $sref;
194 # trim column name
195 $tbl =~ s/\..*$//;
196 # check if table exists
197 return () unless grep(/^$tbl$/i, $self->tables);
198 my @k = sort keys %$sref;
199 @k = grep (/^$tbl\./i, @k);
200 foreach (@k) {
201 s/^$tbl\.//;
203 return @k;
206 =head4 fields
208 Title : fields
209 Usage : $schema->fields();
210 Function: return array of all fields in schema, in format "table.column"
211 Example :
212 Returns : array of all fields
213 Args : none
215 =cut
217 sub fields {
218 # return array of all fields (Table.Column format) in schema
219 my $self = shift;
220 my $sref = $self->{schema_ref};
221 Bio::Root::Root->throw("schema not initialized") unless $sref;
222 my @k = sort keys %{$sref};
223 return @k;
226 =head4 options
228 Title : options
229 Usage : $schema->options(@fieldnames)
230 Function: get array of options (i.e., valid match data strings) available
231 to specified field
232 Example :
233 Returns : array of match data strings
234 Args : [array of] fieldname string[s] in "table.column" format
236 =cut
238 sub options {
239 # return array of options available to specified field
240 my $self = shift;
241 my ($sfield) = @_;
242 my $sref = $self->{schema_ref};
243 Bio::Root::Root->throw("schema not initialized") unless $sref;
244 return $$sref{$sfield}{option} ? @{$$sref{$sfield}{option}} : ();
247 =head4 aliases
249 Title : aliases
250 Usage : $schema->aliases(@fieldnames)
251 Function: get array of aliases to specified field[s]
252 Example :
253 Returns : array of valid query aliases for fields as spec'd in XML file
254 Args : [an array of] fieldname[s] in "table.column" format
256 =cut
258 sub aliases {
259 # return array of aliases to specified field
260 my $self = shift;
261 my ($sfield) = @_;
262 my $sref = $self->{schema_ref};
263 my @ret;
264 Bio::Root::Root->throw("schema not initialized") unless $sref;
265 if ($sfield) {
266 return $$sref{$sfield}{alias} ? @{$$sref{$sfield}{alias}} : ();
268 else { # all valid aliases
269 map {push @ret, @{$$sref{$_}{alias}} if $$sref{$_}{alias}} $self->fields;
270 return @ret;
274 =head4 ankh
276 Title : ankh (annotation key hash)
277 Usage : $schema->ankh(@fieldnames)
278 Function: return a has translating fields to annotation keys for the
279 spec'd fields.
280 (Annotation keys are used for parsing the tab-delimited response
281 to Bio::DB::Query::HIVQuery::_do_lanl_request.)
282 Example :
283 Returns : hash ref
284 Args : [an array of] fieldname[s] in "table.column" format
286 =cut
288 sub ankh {
289 # return hash translating sfields to annotation keys for specified sfield(s)
290 my $self = shift;
291 my %ret = ();
292 my @sfields = @_;
293 my $sref = $self->{schema_ref};
294 Bio::Root::Root->throw("schema not initialized") unless $sref;
295 foreach (@sfields) {
296 next unless $$sref{$_}{ankey};
297 $ret{$_} = {'ankey'=>$$sref{$_}{ankey},'antype'=>$$sref{$_}{antype}};
299 return %ret;
302 =head4 tablepart
304 Title : tablepart (alias: tbl)
305 Usage : $schema->tbl(@fieldnames)
306 Function: return the portion of the fieldname[s] that refer to the
307 db table
308 Example : $schema->tbl('SequenceEntry.SE_id'); # returns 'SequenceEntry'
309 Returns : table name as string
310 Args : [an array of] fieldname[s] in "table.column" format
312 =cut
314 sub tablepart {
315 # return the 'Table' part of the specified field(s)
316 my $self = shift;
317 my @sfields = @_;
318 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
319 my ($squish,@ret, %ret);
320 if ($sfields[0] eq '-s') {
321 # squish : remove duplicates from the returned array
322 $squish=1;
323 shift @sfields;
325 foreach (@sfields) {
326 push @ret, /^(.*)\./;
328 if ($squish) {
329 # arg order is clobbered
330 @ret{@ret} = undef;
331 @ret = keys %ret;
333 return (wantarray ? @ret : $ret[0]);
336 sub tbl {
337 # tablepart alias
338 shift->tablepart(@_);
341 =head4 columnpart
343 Title : columnpart (alias: col)
344 Usage : $schema->col(@fieldnames)
345 Function: return the portion of the fieldname[s] that refer to the
346 db column
347 Example : $schema->col('SequenceEntry.SE_id'); # returns 'SE_id'
348 Returns : column name as string
349 Args : [an array of] fieldname[s] in "table.column" format
351 =cut
353 sub columnpart {
354 # return the 'Column' part of the specified field(s)
355 my $self = shift;
356 my @sfields = @_;
357 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
358 my @ret;
359 foreach (@sfields) {
360 push @ret, /\.(.*)$/;
362 return (wantarray ? @ret : $ret[0]);
365 sub col {
366 # columnpart alias
367 shift->columnpart(@_);
370 =head4 primarykey
372 Title : primarykey [alias: pk]
373 Usage : $schema->pk(@tablenames);
374 Function: return the primary key of the specified table[s], as judged by
375 the syntax of the table's[s'] fieldnames
376 Example : $schema->pk('SequenceEntry') # returns 'SequenceEntry.SE_id'
377 Returns : primary key fieldname[s] in "table.column" format, or null if
378 no pk exists
379 Args : [an array of] table name[s] (fieldnames are ok, table part used)
381 =cut
383 sub primarykey {
384 # return the primary key (in Table.Column format) of specified table(s)
385 my $self = shift;
386 my @tbl = @_;
387 my @ret;
388 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
389 foreach my $tbl (@tbl) {
390 # trim column name
391 $tbl =~ s/\..*$//;
392 grep(/^$tbl$/i, $self->tables) ?
393 push(@ret, grep(/\.[0-9a-zA-Z]+_id/, grep(/$tbl/i,$self->fields))) :
394 push(@ret, "");
396 return (wantarray ? @ret : $ret[0]);
399 sub pk {
400 # primarykey alias
401 shift->primarykey(@_);
404 =head4 foreignkey
406 Title : foreignkey [alias: fk]
407 Usage : $schema->fk($intable [, $totable])
408 Function: return foreign key fieldname in table $intable referring to
409 table $totable, or all foreign keys in $intable if $totable
410 unspec'd
411 Example : $schema->fk('AUthor', 'SequenceEntry'); # returns 'AUthor_AU_SE_id'
412 Returns : foreign key fieldname[s] in "table.column" format
413 Args : tablename [, optional foreign table name] (fieldnames are ok,
414 table part used)
416 =cut
418 sub foreignkey {
419 # return foreign key in in-table ($intbl) to to-table ($totbl)
420 # or all foreign keys in in-table if to-table not specified
421 # keys returned in Table.Column format
422 my $self = shift;
423 my ($intbl, $totbl) = @_;
424 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
425 # trim col names
426 $intbl =~ s/\..*$//;
427 $totbl =~ s/\..*$// if $totbl;
428 # check if in-table exists
429 return () unless grep( /^$intbl/i, $self->tables);
430 my @ret = grep( /$intbl\.(?:[0-9a-zA-Z]+_){2,}id/i, $self->fields);
431 if ($totbl) {
432 my $tpk = $self->primarykey($totbl);
433 return (wantarray ? () : "") unless grep( /^$totbl/i, $self->tables) && $tpk;
434 ($tpk) = ($tpk =~ /\.(.*)$/);
435 @ret = grep( /$tpk$/, @ret);
436 return (wantarray ? @ret : $ret[0]);
438 else {
439 # return all foreign keys in in-table
440 return @ret;
444 sub fk {
445 # foreignkey alias
446 shift->foreignkey(@_);
449 =head4 foreigntable
451 Title : foreigntable [alias ftbl]
452 Usage : $schema->ftbl( @foreign_key_fieldnames );
453 Function: return tablename of table that foreign keys points to
454 Example : $schema->ftbl( 'AUthor.AU_SE_id' ); # returns 'SequenceEntry'
455 Returns : tablename
456 Args : [an array of] fieldname[s] in "table.column" format
458 =cut
460 sub foreigntable {
461 # return table name that foreign key(s) point(s) to
462 my $self = shift;
463 my @fk = @_;
464 my @ret;
465 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
466 foreach (@fk) {
467 my ($mnem, $fmnem) = /\.([0-9a-zA-Z]+)_([0-9a-zA-Z]+)_.*$/;
468 next unless $mnem && $fmnem;
469 # lookup based on Table.Column format of fields
470 my $sf = [grep( /^[0-9a-zA-Z]+\.$fmnem\_/, $self->fields )]->[0];
471 next unless $sf;
472 ($sf) = ($sf =~ /^([0-9a-zA-Z]+)\./);
473 push @ret, $sf;
475 return (wantarray ? @ret : $ret[0]);
478 sub ftbl {
479 # foreigntable alias
480 shift->foreigntable(@_);
483 =head4 loadSchema
485 Title : loadHIVSchema [alias: loadSchema]
486 Usage : $schema->loadSchema( $XMLfilename )
487 Function: read (LANL DB) schema spec from XML
488 Example : $schema->loadSchema('lanl-schema.xml');
489 Returns : hashref to schema data
490 Keys are fieldnames in "table.column" format.
491 Each value is a hashref with the following properties:
492 {name} : HIVWEB 'table.column' format fieldname,
493 can be used directly in the cgi query
494 {aliases} : ref to array containing valid aliases/shortcuts for
495 {name}; can be used in routines creating the HTML query
496 {options} : ref to array containing valid matchdata for this field
497 can be used directly in the HTML query
498 {ankey} : contains the annotation key for this field used with
499 Bioperl annotation objects
500 {..attr..}: ..value_of_attr.. for this field (app-specific metadata)
501 Args :
503 =cut
505 sub loadHIVSchema {
506 my $fn = shift;
507 # look in @INC for file
508 my $dir;
509 # finding myself
510 foreach my $d (@INC) {
511 my $p = Bio::Root::IO->catfile($d, $fn);
512 my $b = Bio::Root::IO->catfile($d, qw(Bio DB HIV), $fn);
513 if (-e $p) {
514 $dir = $p
515 } elsif (-e $b) {
516 $dir = $b;
518 last if $dir;
520 Bio::Root::Root->throw("loadHIVSchema: schema file not found") unless $dir;
521 print STDERR "$dir\n";
522 my $q = XML::Simple->new(ContentKey=>'name',NormalizeSpace=>2,ForceArray=>1);
523 my %ret;
524 my $ref = $q->XMLin($dir);
525 my @sf = keys %{$$ref{sfield}};
526 foreach (@sf) {
527 my $h = $$ref{sfield}{$_};
528 $ret{$_} = $h;
529 foreach my $ptr ($$h{option}, $$h{alias}) {
530 if ($ptr) {
531 # kludge for XMLin: appears to convert to arrays, if there
532 # exists a tag without content, but to convert to hashes
533 # with content as key, if all tags possess content
534 if (ref($ptr) eq 'HASH') {
535 $ptr = [keys %{$ptr}];
537 elsif (ref($ptr) eq 'ARRAY') {
538 $ptr = [map { ref eq 'HASH' ? $_->{name} : $_ } @{$ptr}]
540 else {
541 1; # stub : doh!
545 for my $ptr ($$h{ankey}) {
546 # flatten
547 my $ank = [keys %{$ptr}]->[0];
548 if (!defined $ank) {
549 delete $$h{ankey};
551 else {
552 $h->{antype} = $ptr->{$ank}{antype};
553 $ptr = $ank;
557 return \%ret;
560 sub loadSchema {
561 my $self = shift;
562 $self->{schema_ref} = loadHIVSchema(shift);
565 # below, dangerous
567 =head4 _sfieldh
569 Title : _sfieldh
570 Usage : $schema->_sfieldh($fieldname)
571 Function: get hashref to the specified field hash
572 Example :
573 Returns : hashref
574 Args : fieldname in "table.column" format
576 =cut
578 sub _sfieldh {
579 # return reference to the specified field hash
580 my $self = shift;
581 my ($sfield) = @_;
582 return ${$self->{schema_ref}}{$sfield};
587 =head2 Class QRY - a query algebra for HIVQuery
589 =head3 SYNOPSIS
591 $Q = new QRY(
592 new R(
593 new Q('coreceptor', 'CXCR4'),
594 new Q('country', 'ZA')
597 QRY::Eq(QRY::And($Q, $Q), $Q); # returns 1
598 QRY::Eq(QRY::Or($Q, $Q), $Q); # returns 1
599 $Q2 = $Q1->clone;
600 $Q2 = new QRY(
601 new R(
602 new Q( 'coreceptor', 'CCR5' ),
603 new Q( 'country', 'ZA')
606 (QRY::And($Q, $Q2))->isnull; # returns 1
607 $Q3 = QRY::Or($Q, $Q2);
608 print $Q3->A; # prints '(CCR5 CXCR4)[coreceptor] (ZA)[country]'
610 =head3 DESCRIPTION
612 The QRY package provides a query parser for
613 L<Bio::DB::Query::HIVQuery>. Currently, the parser supports AND, OR,
614 and () operations. The structure of the LANL cgi makes it tricky to
615 perform NOTs, though this could be implemented if the desire were
616 great.
618 Two class methods do the work. C<QRY::_parse_q> does a first-pass
619 parse of the query string. C<QRY::_make_q> interprets the parse tree
620 as returned by C<QRY::_parse_q> and produces an array of hash
621 structures that can be used directly by C<Bio::DB::Query::HIVQuery>
622 query execution methods. Validation of query fields and options is
623 performed at the C<Bio::DB::Query::HIVQuery> level, not here.
625 C<QRY> objects are collections of C<R> (or request) objects, which are
626 in turn collections of C<Q> (or atomic query) objects. C<Q> objects
627 represent a query on a single field, with match data options C<OR>ed
628 together, e.g. C<(A B)[subtype]>. C<R> objects collect C<Q> objects
629 that could be processed in a single HTTP request; i.e., a set of
630 atomic queries each having different fields C<AND>ed together, such as
632 (A B)[subtype] AND ('CCR5')[coreceptor] AND (US CA)[country]
634 The C<QRY> object collects C<R>s that cannot be reduced (through
635 logical operations) to a single HTTP request, e.g.
637 ((C)[subtype] AND (SI)[phenotype]) OR ( (D)[subtype] AND (NSI)[phenotype] ),
639 which cannot be got in one go through the current LANL cgi
640 implementation (as far as I can tell). The parser will simplify
641 something like
643 ((C)[subtype] AND (SI)[phenotype]) OR ((C)[subtype] AND (NSI)[phenotype])
645 to the single request
647 (C)[subtype] AND (NSI SI)[phenotype]
649 however.
651 The operators C<&> and C<|> are overloaded to C<QRY::And> and
652 C<QRY::Or>, to get Perl precedence and grouping for free. C<bool> is
653 overloaded to get symbolic tests such as C<if ($QRY) {stuff}>. C<==>
654 is overloaded with C<QRY::Eq> for convenience. No overloading is done
655 for C<R> or C<Q>.
657 =cut
659 # a query algebra for HIVQuery
661 # Each Q object is an 'atomic' query, written as (data)[field]
662 # (a b ...)[X] equals (a)[X] | (b)[X] | ...
663 # Each R object represents a single HTTP request to the db
664 # contains an array of Q (atomic) objects (q1, q2, ...)
665 # the R object is interpreted as q1 & q2 & ...
666 # Each QRY object represents a series of HTTP requests to the db
667 # contains an array of R (request) objects (R1, R2, ...)
668 # the QRY object is interpreted as R1 | R2 | ...
670 # & and | operations are specified for each type
672 package QRY;
673 use strict;
674 $QRY::NULL = new QRY();
677 use overload
678 "|" => \&Or,
679 "&" => \&And,
680 "bool" => \&Bool,
681 "==" => \&Eq;
684 # query language emulator
685 # supports only AND and OR, any groupings
687 # syntax rules:
688 # query atom: bareword [field] OR (bareword ...) [field]
689 # only single bareword allowed between []
690 # annotation fields in {} (only bareword lists allowed between {})
691 # () can group query atoms joined by operators (AND or OR)
692 # () containing only barewords MUST be followed by a field descriptor [field]
693 # empty [] not allowed
694 # query atoms joined with AND by default
695 # barewords are associated (ORed within) the next field descriptor in the line
697 # follow the parse tree, creating new QRY objects as needed in @q, and
698 # construct a logical expression using & and | symbols.
699 # These are overloaded for doing ands and ors on QRY objects;
700 # to get the final QRY object, eval the resulting expression $q_expr.
701 # QRY object will be translated into (possibly multiple) hashes
702 # conforming to HIVQuery parameter requirements.
704 =head3 CLASS METHODS
706 =head4 _make_q
708 Title : _make_q
709 Usage : QRY::_make_q($parsetree)
710 Function: creates hash structures suitable for HIVQuery from parse tree
711 returned by QRY::_parse_q
712 Example :
713 Returns : array of hashrefs of query specs
714 Args : a hashref
716 =cut
718 sub _make_q {
719 my $ptree = shift;
720 my ($q_expr, @q, @an, $query, @dbq);
721 _make_q_guts($ptree, \$q_expr, \@q, \@an);
722 $query = eval $q_expr;
723 throw Bio::Root::Root(-class=>'Bio::Root::Exception',
724 -text=>$@,
725 -value=>$q_expr) if $@;
726 return {} if $query->isnull;
727 foreach my $rq ($query->requests) {
728 my $h = {'query'=>{}};
729 foreach ($rq->atoms) {
730 my @d = split(/\s+/, $_->dta);
731 foreach my $d (@d) {
732 $d =~ s/_/ /g;
733 $d =~ s/'//g;
735 $h->{'query'}{$_->fld} = (@d == 1) ? $d[0] : [@d];
737 $h->{'annot'} = [@an] if @an;
738 push @dbq, $h;
740 return @dbq;
743 =head4 _make_q_guts
745 Title : _make_q_guts (Internal class method)
746 Usage : _make_q_guts($ptree, $q_expr, $qarry, $anarry)
747 Function: traverses the parse tree returned from QRY::_parse_q, checking
748 syntax and creating HIVQuery-compliant query structures
749 Example :
750 Returns :
751 Args : $parse_tree (hashref), $query_expression (scalar string ref),
752 $query_array (array ref : stack for returning query structures),
753 $annotation_array (array ref : stack for returning annotation
754 fields)
756 =cut
758 sub _make_q_guts {
759 my ($ptree, $q_expr, $qarry, $anarry) = @_;
760 my (@words, $o);
761 eval { # catch
762 foreach (@{$ptree->{cont}}) {
763 m{^AND$} && do {
764 $$q_expr .= "&";
765 next;
767 m{^OR$} && do {
768 $$q_expr .= "|";
769 next;
771 m{^HASH} && do {
772 for my $dl ($_->{delim}) {
773 ($dl =~ m{\(}) && do {
774 if (grep /^HASH/, @{$_->{cont}}) {
775 $$q_expr .= "&" unless !length($$q_expr) || (substr($$q_expr, -1, 1) =~ /[&|(]/);
776 $$q_expr .= "(";
777 _make_q_guts($_,$q_expr,$qarry,$anarry);
778 $$q_expr .= ")";
780 else {
781 my @c;
782 my $c = join(' ',@{$_->{cont}});
783 $c =~ s/,/ /g;
784 Bio::Root::Root->throw("query syntax error: unmatched ['\"]") if (@c = ($c =~ /(['"])/g)) % 2;
785 @c = split(/\s*(['"])\s*/, $c);
786 do {
787 $c = shift @c;
788 if ($c =~ m{['"]}) {
789 $c = join('', ($c, shift @c, shift @c));
790 $c =~ s/\s+/_/g;
791 push @words, $c;
793 else {
794 push @words, split(/\s+/,$c);
796 } while @c;
798 last;
800 ($dl =~ m{\[}) && do {
801 Bio::Root::Root->throw("syntax error: empty field descriptor") unless @{$_->{cont}};
802 Bio::Root::Root->throw("syntax error: more than one field descriptor in square brackets") unless @{$_->{cont}} == 1;
804 push @{$qarry}, new QRY( new R( new Q( $_->{cont}->[0], @words)));
805 # add default operation if nec
806 $$q_expr .= "&" unless !$$q_expr || !length($$q_expr) || (substr($$q_expr, -1, 1) =~ /[&|(]/);
807 $$q_expr .= "\$q[".$#$qarry."]";
808 @words = ();
809 last;
811 ($dl =~ m{\{}) && do {
812 foreach my $an (@{$_->{cont}}) {
813 ($an =~ /^HASH/) && do {
814 if ($an->{delim} eq '[') {
815 push @$anarry, @{$an->{cont}};
817 else {
818 Bio::Root::Root->throw("query syntax error: only field descriptors (with or without square brackets) allowed in annotation spec");
820 next;
822 do { #else
823 push @$anarry, $an;
824 next;
827 last;
829 do {
830 1; #else stub
833 next;
835 do { # else, bareword
836 if ($o) {
837 $words[-1] .= "_$_";
839 else {
840 push @words, $_;
842 m/['"]/ && ($o = !$o);
844 } # @{ptree->{cont}}
845 Bio::Root::Root->throw("query syntax error: no search fields specified")
846 unless $$q_expr =~ /q\[[0-9]+\]/;
848 $@ ?
849 throw Bio::Root::Root(-class=>'Bio::QueryStringSyntax::Exception',
850 -text=>$@,
851 -value=>$$q_expr)
852 : return 1;
855 =head4 _parse_q
857 Title : _parse_q
858 Usage : QRY::_parse_q($query_string)
859 Function: perform first pass parse of a query string with some syntax
860 checking, return a parse tree suitable for QRY::_make_q
861 Example : QRY::_parse_q(" to[be] OR (not to)[be] ");
862 Returns : hashref
863 Args : query string
865 =cut
867 # parse qry string into a branching tree structure
868 # each branch tagged by the opening delimiter ( key 'delim' )
869 # content (tokens and subbranch hashes) placed in l2r order in
870 # @{p->{cont}}
871 sub _parse_q {
872 local $_;
873 my $qstr = shift;
874 my $illegal = qr/[^a-zA-Z0-9-_,\.\(\[\{\}\]\)\s'"]/;
875 my $pdlm = qr/[\{\[\(\)\]\}]/;
876 my %md = ('('=>')', '['=>']','{'=>'}');
877 my @tok = grep !/^\s*$/, split /($pdlm)/, $qstr;
878 return {} unless @tok;
879 my @pstack = ();
880 my @dstack = ();
881 my ($ptree, $p);
883 eval { #catch
884 Bio::Root::Root->throw("query syntax error: illegal character") if $qstr =~ /$illegal/;
886 $ptree = $p = {'delim'=>'*'};
887 foreach (@tok) {
888 #trim whsp
889 s/^\s+//;
890 s/\s+$//;
891 m{[\(\[\{]} && do {
892 my $new = {'delim'=>$_};
893 $p->{cont} = [] unless $p->{cont};
894 push @{$p->{cont}}, $new;
895 push @pstack, $p;
896 push @dstack, $_;
897 $p = $new;
898 next;
900 m{[\)\]\}]} && do {
901 my $d = pop @dstack;
902 if ($md{$d} eq $_) {
903 $p = pop @pstack;
904 Bio::Root::Root->throw("query syntax error: unmatched \"$_\"") unless $p;
906 else {
907 Bio::Root::Root->throw("query syntax error: saw \"$_\" before matching \"$md{$d}\"");
909 next;
911 do { # else
912 $p->{cont} = [] unless $p->{cont};
913 push @{$p->{cont}}, split(/\s+/);
917 $@ ?
918 throw Bio::Root::Root(-class=>'Bio::QueryStringSyntax::Exception',
919 -text=>$@,
920 -value=>"")
921 : return $ptree;
924 ## QRY constructor
926 =head3 CONSTRUCTOR
928 =head4 QRY Constructor
930 Title : QRY constructor
931 Usage : $QRY = new QRY()
932 Function:
933 Example :
934 Returns :
935 Args : array of R objects, optional
937 =cut
939 sub new {
940 my $class = shift;
941 my @args = @_;
942 my $self = {};
943 $self->{requests} = [];
944 bless($self, $class);
945 $self->put_requests(@args) if @args;
946 return $self;
949 ## QRY instance methods
951 =head3 INSTANCE METHODS
953 =head4 requests
955 Title : requests
956 Usage : $QRY->requests
957 Function: get/set array of requests comprising this QRY object
958 Example :
959 Returns :
960 Args : array of class R objects
962 =cut
964 sub requests {
965 my $self = shift;
966 $self->put_requests(@_) if @_;
967 return @{$self->{'requests'}};
970 =head4 put_requests
972 Title : put_requests
973 Usage : $QRY->put_request(@R)
974 Function: add object of class R to $QRY
975 Example :
976 Returns :
977 Args : [an array of] of class R object[s]
979 =cut
981 sub put_requests {
982 my $self = shift;
983 my @args = @_;
984 foreach (@args) {
985 Bio::Root::Root->throw('requires type R (request)') unless ref && $_->isa('R');
986 push @{$self->{requests}}, $_;
988 return @args;
991 =head4 isnull
993 Title : isnull
994 Usage : $QRY->isnull
995 Function: test if QRY object is null
996 Example :
997 Returns : 1 if null, 0 otherwise
998 Args :
1000 =cut
1002 sub isnull {
1003 my $self = shift;
1004 return ($self->requests) ? 0 : 1;
1007 =head4 A
1009 Title : A
1010 Usage : print $QRY->A
1011 Function: get a string representation of QRY object
1012 Example :
1013 Returns : string scalar
1014 Args :
1016 =cut
1018 sub A {
1019 my $self = shift;
1020 return join( "\n", map {$_->A} $self->requests );
1023 =head4 len
1025 Title : len
1026 Usage : $QRY->len
1027 Function: get number of class R objects contained by QRY object
1028 Example :
1029 Returns : scalar
1030 Args :
1032 =cut
1034 sub len {
1035 my $self = shift;
1036 return scalar @{$self->{'requests'}};
1039 =head4 clone
1041 Title : clone
1042 Usage : $QRY2 = $QRY1->clone;
1043 Function: create and return a clone of the object
1044 Example :
1045 Returns : object of class QRY
1046 Args :
1048 =cut
1050 sub clone {
1051 local $_;
1052 my $self = shift;
1053 my $ret = new QRY();
1054 foreach ($self->requests) {
1055 $ret->put_requests($_->clone);
1057 return $ret;
1060 ## QRY class methods
1062 =head3 CLASS METHODS
1064 =head4 Or
1066 Title : Or
1067 Usage : $QRY3 = QRY::Or($QRY1, $QRY2)
1068 Function: logical OR for QRY objects
1069 Example :
1070 Returns : a QRY object
1071 Args : two class QRY objects
1073 =cut
1075 sub Or {
1076 local $_;
1077 my ($q, $r, $rev_f) = @_;
1078 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1079 Bio::Root::Root->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1080 if ($q->isnull) {
1081 return $r->clone;
1083 elsif ($r->isnull) {
1084 return $q->clone;
1086 do {my $qq = $q; $q=$r; $r=$qq} if ($q->len > $r->len);
1087 my @rq_r = $r->requests;
1088 my @rq_q = $q->requests;
1089 my (@cand_rq, @ret_rq);
1090 # search for simplifications
1091 my @now = @rq_q;
1092 my @nxt =();
1093 foreach (@rq_r) {
1094 my $found = 0;
1095 while (my $rq = pop @now) {
1096 my @result = R::Or($rq, $_);
1097 if (@result==1) {
1098 push @cand_rq, $result[0]->clone;
1099 $found = 1;
1100 last;
1102 else {
1103 push @nxt, $rq;
1106 push @cand_rq, $_->clone unless ($found);
1107 # @now becomes unexamined @rq_q's plus failed @rq_q's
1108 @now = (@now, @nxt);
1110 push @cand_rq, map {$_->clone} @now; # add all failed @rq_q's
1111 # squeeze out redundant requests
1112 while (my $rq = pop @cand_rq) {
1113 push @ret_rq, $rq unless @cand_rq && grep {R::Eq($rq, $_)} @cand_rq;
1115 return new QRY( @ret_rq );
1118 =head4 And
1120 Title : And
1121 Usage : $QRY3 = QRY::And($QRY1, $QRY2)
1122 Function: logical AND for QRY objects
1123 Example :
1124 Returns : a QRY object
1125 Args : two class QRY objects
1127 =cut
1129 sub And {
1130 my ($q, $r, $rev_f) = @_;
1131 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1132 Bio::Root::Root->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1133 return ($QRY::NULL) if ($q->isnull || $r->isnull);
1134 my (@cand_rq, @ret_rq);
1135 foreach my $rq_r ($r->requests) {
1136 foreach my $rq_q ($q->requests) {
1137 my ($rq) = R::And($rq_r, $rq_q);
1138 push @cand_rq, $rq unless $rq->isnull;
1141 return $QRY::NULL unless @cand_rq;
1142 # squeeze out redundant requests
1143 while (my $rq = pop @cand_rq) {
1144 push @ret_rq, $rq unless @cand_rq && grep {R::Eq($rq, $_)} @cand_rq;
1146 return new QRY( @ret_rq );
1149 =head4 Bool
1151 Title : Bool
1152 Usage : QRY::Bool($QRY1)
1153 Function: allows symbolic testing of QRY object when bool overloaded
1154 Example : do {stuff} if $QRY1 *same as* do {stuff} if !$QRY1->isnull
1155 Returns :
1156 Args : a class QRY object
1158 =cut
1160 sub Bool {
1161 my $q = shift;
1162 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1163 return $q->isnull ? 0 : 1;
1166 =head4 Eq
1168 Title : Eq
1169 Usage : QRY::Eq($QRY1, $QRY2)
1170 Function: test if R objects in two QRY objects are the same
1171 (irrespective of order)
1172 Example :
1173 Returns : 1 if equal, 0 otherwise
1174 Args : two class QRY objects
1176 =cut
1178 sub Eq {
1179 my ($q, $r, $rev_f) = @_;
1180 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1181 Bio::Root::Root->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1182 return 0 unless $q->len == $r->len;
1183 foreach my $rq_q ($q->requests) {
1184 my $found = 0;
1185 foreach my $rq_r ($r->requests) {
1186 if (R::Eq($rq_q,$rq_r)) {
1187 $found = 1;
1188 last;
1191 return 0 unless $found;
1193 return 1;
1198 =head2 Class R - request objects for QRY algebra
1200 =head3 SYNOPSIS
1202 $R = new R( $q1, $q2 );
1203 $R->put_atoms($q3);
1204 $R->del_atoms('coreceptor', 'phenotype');
1205 return $R->clone;
1206 $R1 = new R( new Q('subtype', 'B') );
1207 $R2 = new R( new Q('subtype', 'B C'),
1208 new Q('country', 'US') );
1209 R::Eq( (R::And($R1, $R2))[0],
1210 new R( new Q('subtype', 'B' ),
1211 new Q('country', 'US') )); # returns 1
1212 QRY::Eq( new QRY(R::Or($R1, $R2)), new QRY($R1, $R2) ); # returns 1
1213 R::In( (R::And($R1, $R2))[0], $R1 ); # returns 1
1215 =head3 DESCRIPTION
1217 Class R objects contain a list of atomic queries (class Q
1218 objects). Each class R object represents a single HTTP request to the
1219 LANL DB. When converted to a DB query, the class Q objects contained
1220 by an R object are effectively C<AND>ed.
1222 =cut
1224 package R;
1225 use strict;
1226 $R::NULL = new R();
1229 ## R constructor
1231 =head3 CONSTRUCTOR
1233 =head4 R constructor
1235 Title : R constructor
1236 Usage : $R = new R()
1237 Function: create a new R (request) object
1238 Example :
1239 Returns : class R (request) object
1240 Args : optional, array of class Q objects
1242 =cut
1244 sub new {
1245 my $class = shift;
1246 my @args = @_;
1247 my $self = {};
1248 $self->{atoms} = {};
1249 bless($self, $class);
1250 $self->put_atoms(@args) if @args;
1251 return $self;
1254 ## R instance methods
1256 =head3 INSTANCE METHODS
1258 =head4 len
1260 Title : len
1261 Usage : $R->len
1262 Function: get number of class Q objects contained in R object
1263 Example :
1264 Returns : scalar
1265 Args :
1267 =cut
1269 sub len {
1270 my $self = shift;
1271 return scalar @{[keys %{$self->{'atoms'}}]};
1274 =head4 atoms
1276 Title : atoms
1277 Usage : $R->atoms( [optional $field])
1278 Function: get array of class Q (atomic query) objects in class R object
1279 Example : $R->atoms(); $R->atoms('coreceptor')
1280 Returns : array of class Q objects (all Qs or those corresponding to $field
1281 if present)
1282 Args : optional, scalar string
1284 =cut
1286 sub atoms {
1287 local $_;
1288 # returns an array of atoms
1289 # no arg: all atoms;
1290 # args: atoms with specified fields
1291 my $self = shift;
1292 my @flds = (@_ ? @_ : keys %{$self->{'atoms'}});
1293 return wantarray ? map { $self->{'atoms'}->{$_} } @flds : $self->{'atoms'}->{$flds[0]};
1296 =head4 fields
1298 Title : fields
1299 Usage : $R->fields
1300 Function: get array of fields of all Q objects contained in $R
1301 Example :
1302 Returns : array of scalars
1303 Args :
1305 =cut
1307 sub fields {
1308 my $self = shift;
1309 return keys %{$self->{'atoms'}};
1312 =head4 put_atoms
1314 Title : put_atoms
1315 Usage : $R->put_atoms( @q )
1316 Function: AND an atomic query (class Q object) to the class R object's list
1317 Example :
1318 Returns : void
1319 Args : an [array of] class Q object[s]
1321 =cut
1323 sub put_atoms {
1324 # AND this atom to the request
1325 local $_;
1326 my $self = shift;
1327 my @args = @_;
1328 foreach (@args) {
1329 Bio::Root::Root->throw('requires type Q (atom)') unless ref && $_->isa('Q');
1330 if ($self->atoms($_->fld)) {
1331 my $a = Q::qand( $self->atoms($_->fld), $_ );
1332 if ($a->isnull) {
1333 delete $self->{'atoms'}->{$_->fld};
1335 else {
1336 $self->{atoms}->{$_->fld} = $a->clone;
1339 else {
1340 $self->{atoms}->{$_->fld} = $_->clone;
1343 return;
1346 =head4 del_atoms
1348 Title : del_atoms
1349 Usage : $R->del_atoms( @qfields )
1350 Function: removes class Q objects from R object's list according to the
1351 field names given in arguments
1352 Example :
1353 Returns : the class Q objects deleted
1354 Args : scalar array of field names
1356 =cut
1358 sub del_atoms {
1359 # remove atoms by field from request
1360 local $_;
1361 my $self = shift;
1362 my @args = @_;
1363 return () unless @args;
1364 my @ret;
1365 foreach (@args) {
1366 push @ret, delete $self->{'atoms'}->{$_};
1368 return @ret;
1371 =head4 isnull
1373 Title : isnull
1374 Usage : $R->isnull
1375 Function: test if class R object is null
1376 Example :
1377 Returns : 1 if null, 0 otherwise
1378 Args :
1380 =cut
1382 sub isnull {
1383 my $self = shift;
1384 return ($self->len) ? 0 : 1;
1387 =head4 A
1389 Title : A
1390 Usage : print $R->A
1391 Function: get a string representation of class R object
1392 Example :
1393 Returns : string scalar
1394 Args :
1396 =cut
1398 sub A {
1399 my $self = shift;
1400 my @a = sort {$a->fld cmp $b->fld} $self->atoms;
1401 return join(" ", map {$_->A} @a);
1404 =head4 clone
1406 Title : clone
1407 Usage : $R2 = $R1->clone;
1408 Function: create and return a clone of the object
1409 Example :
1410 Returns : object of class R
1411 Args :
1413 =cut
1415 sub clone {
1416 local $_;
1417 my $self = shift;
1418 my $ret = new R();
1419 foreach ($self->atoms) {
1420 $ret->put_atoms($_->clone);
1422 return $ret;
1425 ## R class methods
1427 =head3 CLASS METHODS
1429 =head4 In
1431 Title : In
1432 Usage : R::In($R1, $R2)
1433 Function: tests whether the query represented by $R1 would return a subset
1434 of items returned by the query represented by $R2
1435 Example : print "R2 gets those and more" if R::In($R1, $R2);
1436 Returns : 1 if R1 is subset of R2, 0 otherwise
1437 Args : two class R objects
1439 =cut
1441 sub In {
1442 local $_;
1443 my ($s, $t) = @_;
1444 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1445 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1446 return 1 if ($s->isnull);
1447 # common fields
1448 my @cf = grep {defined} map {my $f=$_; grep /^$f$/,$s->fields} $t->fields;
1449 return 0 unless @cf==$t->len;
1450 foreach (@cf) {
1451 my @sd = split(/\s+/, $s->atoms($_)->dta);
1452 my @td = split(/\s+/, $t->atoms($_)->dta);
1453 my @cd = grep {defined} map {my $d=$_; grep /^$d$/, @td} @sd;
1454 return 0 unless @cd==@sd;
1456 return 1;
1459 =head4 And
1461 Title : And
1462 Usage : @Rresult = R::And($R1, $R2)
1463 Function: logical AND for R objects
1464 Example :
1465 Returns : an array containing class R objects
1466 Args : two class R objects
1468 =cut
1470 sub And {
1471 local $_;
1472 my ($s, $t) = @_;
1473 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1474 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1475 return ($R::NULL) if ($s->isnull || $t->isnull);
1477 do { my $ss = $s; $s = $t; $t = $ss } if ( $s->len > $t->len );
1478 # $t has at least as many fields defined than $s ($t is more restrictive)
1480 # common fields
1481 my @cf = grep {defined} map {my $sf = $_; grep /$sf/, $t->fields } $s->fields;
1482 my $ret = new R();
1483 my $v = $t->clone;
1484 $v->del_atoms(@cf);
1485 my $u = $s->clone;
1486 $u->del_atoms(@cf);
1488 # And the atoms with identical fields
1490 foreach (@cf) {
1491 my ($a) = Q::qand($s->atoms($_), $t->atoms($_));
1492 if ($a->isnull) {
1493 return $R::NULL;
1495 else {
1496 $ret->put_atoms($a);
1499 # put the private atoms
1500 $ret->put_atoms($u->atoms, $v->atoms);
1501 return ($ret);
1505 =head4 Or
1507 Title : Or
1508 Usage : @Rresult = R::Or($R1, $R2)
1509 Function: logical OR for R objects
1510 Example :
1511 Returns : an array containing class R objects
1512 Args : two class R objects
1514 =cut
1516 sub Or {
1517 local $_;
1518 my ($s, $t) = @_;
1519 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1520 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1521 if ($s->isnull) {
1522 return $t->clone;
1524 elsif ($t->isnull) {
1525 return $s->clone;
1527 return $s->clone if (R::In($t, $s));
1528 return $t->clone if (R::In($s, $t));
1530 # try simplifying
1531 do { my $ss = $s; $s = $t; $t = $ss } if ( $s->len > $t->len );
1532 # common fields
1533 my @cf = grep {defined} map {my $sf = $_; grep /$sf/, $t->fields } $s->fields;
1535 if ($t->len == @cf) {
1536 # all atoms equal within fields but one? If yes, simplify...
1537 my @df = grep {!Q::qeq($s->atoms($_), $t->atoms($_))} @cf;
1538 if (@df == 1) {
1539 my ($a) = Q::qor($s->atoms($df[0]), $t->atoms($df[0]));
1540 my $ret = $s->clone;
1541 $ret->del_atoms($df[0]);
1542 $ret->put_atoms($a);
1543 return ($ret);
1547 # neither request contains the other, and the requests cannot be
1548 # simplified; reflect back (clones of) the input...
1549 return ($s->clone, $t->clone);
1553 =head4 Eq
1555 Title : Eq
1556 Usage : R::Eq($R1, $R2)
1557 Function: test if class Q objects in two R objects are the same
1558 (irrespective of order)
1559 Example :
1560 Returns : 1 if equal, 0 otherwise
1561 Args : two class R objects
1563 =cut
1565 sub Eq {
1566 local $_;
1567 my ($s, $t) = @_;
1568 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1569 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1570 my @sf = $s->fields;
1571 my @tf = $t->fields;
1572 return 0 unless @sf==@tf;
1573 my @cf = grep {defined} map {my $f=$_; grep /^$f$/,@sf} @tf;
1574 return 0 unless @cf==@tf;
1575 foreach (@cf) {
1576 return 0 unless Q::qeq($s->atoms($_), $t->atoms($_));
1578 return 1;
1582 =head2 Class Q - atomic query objects for QRY algebra
1584 =head3 SYNOPSIS
1586 $q = new Q('coreceptor', 'CXCR4 CCR5');
1587 $u = new Q('coreceptor', 'CXCR4');
1588 $q->fld; # returns 'coreceptor'
1589 $q->dta; # returns 'CXCR4 CCR5'
1590 print $q->A; # prints '(CXCR4 CCR5)[coreceptor]
1591 Q::qeq($q, $u); # returns 0
1592 Q::qeq( Q::qor($q, $q), $q ); # returns 1
1593 Q::qin($u, $q) # returns 1
1594 Q::qeq(Q::qand($u, $q), $u ); # returns 1
1596 =head3 DESCRIPTION
1598 Class Q objects represent atomic queries, that can be described by a
1599 single LANL cgi parameter=value pair. Class R objects (requests) are
1600 built from class Qs. The logical operations at the higher levels
1601 (C<QRY, R>) ultimately depend on the lower level operations on Qs:
1602 C<qeq, qin, qand, qor>.
1604 =cut
1606 package Q;
1607 use strict;
1608 $Q::NULL = new Q();
1610 ## Q constructor
1612 =head3 CONSTRUCTOR
1614 =head4 Q constructor
1616 Title : Q constructor
1617 Usage : $q = new Q($field, $data)
1618 Function: create a new Q (atomic query) object
1619 Example :
1620 Returns : class Q object
1621 Args : optional $field, $data strings
1623 =cut
1625 sub new {
1626 local $_;
1627 my ($class,@args) = @_;
1628 my $self={};
1629 foreach (@args) { s/^\s+//; s/\s+$//; }
1630 my ($fld, @dta) = @args;
1631 $self->{fld}=$fld;
1632 $self->{dta}=join(" ", @dta);
1633 bless($self, $class);
1634 return $self;
1637 ## Q instance methods
1639 =head3 INSTANCE METHODS
1641 =head4 isnull
1643 Title : isnull
1644 Usage : $q->isnull
1645 Function: test if class Q object is null
1646 Example :
1647 Returns : 1 if null, 0 otherwise
1648 Args :
1650 =cut
1652 sub isnull {
1653 my $self = shift;
1654 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1655 return 1 unless (($self->fld && length($self->fld)) || ($self->dta && length($self->dta)));
1656 return 0;
1659 =head4 fld
1661 Title : fld
1662 Usage : $q->fld($field)
1663 Function: get/set fld (field name) property
1664 Example :
1665 Returns : scalar
1666 Args : scalar
1668 =cut
1670 sub fld {
1671 my $self = shift;
1672 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1673 my $f = shift;
1674 if ($f) {
1675 $f =~ s/^\s+//;
1676 $f =~ s/\s+$//;
1677 return $self->{fld}=$f;
1679 return $self->{fld};
1683 =head4 dta
1685 Title : dta
1686 Usage : $q->dta($data)
1687 Function: get/set dta (whsp-separated data string) property
1688 Example :
1689 Returns : scalar
1690 Args : scalar
1692 =cut
1694 sub dta {
1695 my $self = shift;
1696 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1697 my $d = join(" ", @_);
1698 if ($d) {
1699 $d =~ s/^\s+//;
1700 $d =~ s/\s+$//;
1701 return $self->{dta} = $d;
1703 return $self->{dta};
1706 =head4 A
1708 Title : A
1709 Usage : print $q->A
1710 Function: get a string representation of class Q object
1711 Example :
1712 Returns : string scalar
1713 Args :
1715 =cut
1717 sub A {
1718 my $self = shift;
1719 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1720 my @a = split(/\s+/, $self->dta);
1722 return "(".join(' ', sort {$a cmp $b} @a).")[".$self->fld."]";
1725 =head4 clone
1727 Title : clone
1728 Usage : $q2 = $q1->clone;
1729 Function: create and return a clone of the object
1730 Example :
1731 Returns : object of class Q
1732 Args :
1734 =cut
1736 sub clone {
1737 my $self = shift;
1738 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1739 my $ret = new Q ($self->fld, $self->dta);
1740 return $ret;
1743 ### Q class methods
1745 =head3 CLASS METHODS
1747 =head4 qin
1749 Title : qin
1750 Usage : Q::qin($q1, $q2)
1751 Function: tests whether the query represented by $q1 would return a subset
1752 of items returned by the query represented by $q2
1753 Example : print "q2 gets those and more" if Q::qin($q1, $q2);
1754 Returns : 1 if q1 is subset of q2, 0 otherwise
1755 Args : two class Q objects
1757 =cut
1759 sub qin {
1760 my ($a, $b) = @_;
1761 Bio::Root::Root->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1762 return 0 unless $a->fld eq $b->fld;
1763 return Q::qeq( $b, Q::qor($a, $b) );
1766 =head4 qeq
1768 Title : qeq
1769 Usage : Q::qeq($q1, $q2)
1770 Function: test if fld and dta properties in two class Q objects are the same
1771 (irrespective of order)
1772 Example :
1773 Returns : 1 if equal, 0 otherwise
1774 Args : two class Q objects
1776 =cut
1778 sub qeq {
1779 local $_;
1780 my ($a, $b) = @_;
1781 Bio::Root::Root->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1782 return 0 unless $a->fld eq $b->fld;
1783 my @ad = unique(split(/\s+/,$a->dta));
1784 my @bd = unique(split(/\s+/,$b->dta));
1785 return 0 unless @ad==@bd;
1786 my @cd = grep {defined} map {my $f = $_; grep /^$f$/, @ad} @bd;
1787 return @cd == @bd;
1790 =head4 qor
1792 Title : qor
1793 Usage : @qresult = Q::qor($q1, $q2)
1794 Function: logical OR for Q objects
1795 Example :
1796 Returns : an array of class Q objects
1797 Args : two class Q objects
1799 =cut
1801 sub qor {
1802 local $_;
1803 my @a = @_;
1804 foreach (@a) {
1805 Bio::Root::Root->throw("requires type Q (atom)") unless ref && $_->isa('Q');
1807 my @ret;
1808 my (%f, @f);
1809 @a = grep {!$_->isnull} @a;
1810 return ($Q::NULL) unless @a > 0;
1811 # list of unique flds
1812 @f = unique(map {$_->fld} @a);
1813 foreach my $f (@f) {
1814 my @fobjs = grep {$_->fld eq $f} @a;
1815 my @d = unique(map {split(/\s/, $_->dta)} @fobjs );
1816 my $r = new Q($f, @d);
1817 push @ret, $r;
1819 return @ret;
1822 =head4 qand
1824 Title : qand
1825 Usage : @qresult = Q::And($q1, $q2)
1826 Function: logical AND for R objects
1827 Example :
1828 Returns : an array of class Q objects
1829 Args : two class Q objects
1831 =cut
1833 sub qand {
1834 local $_;
1835 my ($a, $b) = @_;
1836 Bio::Root::Root->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1837 my @ret;
1838 if (ref $a eq 'ARRAY') {
1839 foreach my $ea (@$a) {
1840 push @ret, qand( $ea, $b );
1842 return qor(@ret); # simplify
1844 elsif (ref $b eq 'ARRAY') {
1845 foreach my $eb (@$b) {
1846 push @ret, qand( $a, $eb);
1849 return qor(@ret); # simplify
1851 else {
1852 return ($Q::NULL) if ($a->isnull || $b->isnull);
1853 if ($a->fld eq $b->fld) {
1854 # find intersection of data
1855 my (%ad, @ad, @bd);
1856 @ad = split(/\s+/, $a->dta);
1857 @ad{@ad} = (1) x @ad;
1858 @bd = split(/\s+/, $b->dta);
1859 foreach (@bd) {
1860 $ad{$_}++;
1862 my $r = new Q($a->fld,
1863 grep {$_}
1864 map {$ad{$_} == 2 ? $_ : undef} keys %ad);
1865 return (length($r->dta) > 0) ? ($r) : ($Q::NULL);
1867 else {
1868 return ($a, $b);
1873 =head3 INTERNALS
1875 =head4 unique
1877 Title : unique
1878 Usage : @ua = unique(@a)
1879 Function: return contents of @a with duplicates removed
1880 Example :
1881 Returns :
1882 Args : an array
1884 =cut
1886 sub unique {
1887 my @a = @_;
1888 my %a;
1889 @a{@a} = undef;
1890 return keys %a;
1895 =head2 Additional tools for Bio::AnnotationCollectionI
1897 =head3 SYNOPSIS
1899 $seq->annotation->put_value('patient_id', 1401)
1900 $seq->annotation->get_value('patient_ids') # returns 1401
1901 $seq->annotation->put_value('patient_group', 'MassGenH')
1902 $seq->annotation->put_value(['clinical', 'cd4count'], 503);
1903 $seq->annotation->put_value(['clinical', 'virus_load'], 150805);
1904 foreach ( qw( cd4count virus_load ) ) {
1905 $blood_readings{$_} = $seq->annonation->get_value(['clinical', $_]);
1908 =head3 DESCRIPTION
1910 C<get_value()> and C<put_value> allow easy creation of and access to an annotation collection tree with nodes of L<Bio::Annotation::SimpleValue>. These methods obiviate direct accession of the SimpleValue objects.
1912 =cut
1914 package Bio::AnnotationCollectionI;
1915 use strict;
1916 use Bio::Annotation::SimpleValue;
1918 =head2 get_value
1920 Title : get_value
1921 Usage : $ac->get_value($tagname) -or-
1922 $ac->get_value( $tag_level1, $tag_level2,... )
1923 Function: access the annotation value assocated with the given tags
1924 Example :
1925 Returns : a scalar
1926 Args : an array of tagnames that descend into the annotation tree
1928 =cut
1930 sub get_value {
1931 local $_;
1932 my $self = shift;
1933 my @args = @_;
1934 my @h;
1935 return "" unless @_;
1936 while ($_ = shift @args) {
1937 @h = $self->get_Annotations($_);
1938 if (ref($h[0]->{value})) {
1939 $self = $h[0]->{value}; # must be another Bio::AnnotationCollectionI
1941 else {
1942 last;
1945 return $h[0] && $h[0]->{value} ; # now the last value.
1948 =head2 put_value
1950 Title : put_value
1951 Usage : $ac->put_value($tagname, $value) -or-
1952 $ac->put_value([$tag_level1, $tag_level2, ...], $value) -or-
1953 $ac->put_value( [$tag_level1, $tag_level2, ...] )
1954 Function: create a node in an annotation tree, and assign a scalar value to it
1955 if a value is specified
1956 Example :
1957 Returns : scalar or a Bio::AnnotationCollection object
1958 Args : $tagname, $value scalars (can be specified as -KEYS=>$tagname,
1959 -VALUE=>$value) -or-
1960 \@tagnames, $value (or as -KEYS=>\@tagnames, -VALUE=>$value )
1961 Note : If intervening nodes do not exist, put_value creates them, replacing
1962 existing nodes. So if $ac->put_value('x', 10) was done, then later,
1963 $ac->put_value(['x', 'y'], 20), the original value of 'x' is trashed, and $ac->get_value('x') will now return the annotation collection
1964 with tagname 'y'.
1966 =cut
1968 sub put_value {
1969 local $_;
1970 my $self = shift;
1971 my @args = @_;
1972 my ($keys, $value) = $self->_rearrange([qw( KEYS VALUE )], @args);
1973 my (@keys, $lastkey);
1974 $value ||= new Bio::Annotation::Collection;
1975 @keys = (ref($keys) eq 'ARRAY') ? @$keys : ($keys);
1976 $lastkey = pop @keys;
1977 foreach (@keys) {
1978 my $a = $self->get_value($_);
1979 if (ref($a) && $a->isa('Bio::Annotation::Collection')) {
1980 $self = $a;
1982 else {
1983 # replace an old value
1984 $self->remove_Annotations($_) if $a;
1985 my $ac = new Bio::Annotation::Collection;
1986 $self->add_Annotation(new Bio::Annotation::SimpleValue(
1987 -tagname => $_,
1988 -value => $ac
1991 $self = $ac;
1994 if ($self->get_value($lastkey)) {
1995 # replace existing value
1996 ($self->get_Annotations($lastkey))[0]->{value} = $value;
1998 else {
1999 $self->add_Annotation(new Bio::Annotation::SimpleValue(
2000 -tagname=>$lastkey,
2001 -value=>$value
2004 return $value;