Sync branch with trunk
[bioperl-live.git] / Bio / DB / HIV / HIVQueryHelper.pm
blob53a38512ecde6472dbdb082709f1019094eae92d
1 # $Id: HIVQueryHelper.pm 231 2008-12-11 14:32:00Z maj $
3 # BioPerl module for Bio::DB::HIV::HIVQueryHelper
5 # Cared for by Mark A. Jensen <maj@fortinbras.us>
7 # Copyright Mark A. Jensen
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::DB::HIV::HIVQueryHelper - Routines and packages used by Bio::DB::HIV and
16 Bio::DB::Query::HIVQuery
18 =head1 SYNOPSIS
20 Used in Bio::DB::Query::HIVQuery. No need to use directly.
22 =head1 DESCRIPTION
24 C<Bio::DB::HIV::HIVQueryHelper> contains a number of packages for use
25 by L<Bio::DB::Query::HIVQuery>. Package C<HIVSchema> parses the
26 C<lanl-schema.xml> file, and allows access to it in the context of the
27 relational database it represents (see APPENDIX for excruciating
28 detail). Packages C<QRY>, C<R>, and C<Q> together create the query
29 string parser that enables NCBI-like queries to be understood by
30 C<Bio::DB::Query::HIVQuery>. They provide objects and operators to
31 perform and simplify logical expressions involving C<AND>, C<OR>, and
32 C<()> and return hash structures that can be handled by
33 C<Bio::DB::Query::HIVQuery> routines.
35 =head1 FEEDBACK
37 =head2 Mailing Lists
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to
41 the Bioperl mailing list. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 =head2 Reporting Bugs
48 Report bugs to the Bioperl bug tracking system to help us keep track
49 of the bugs and their resolution. Bug reports can be submitted via
50 the web:
52 http://bugzilla.open-bio.org/
54 =head1 AUTHOR - Mark A. Jensen
56 Email maj@fortinbras.us
58 =head1 CONTRIBUTORS
60 =head1 APPENDIX
62 The rest of the documentation details each of the contained packages.
63 Internal methods are usually preceded with a _
65 =cut
67 # Let the code begin...
69 package Bio::DB::HIV::HIVQueryHelper;
70 use strict;
71 use Bio::Root::Root;
73 # globals
74 BEGIN {
75 #exceptions
76 @Bio::QueryStringSyntax::Exception::ISA = qw( Bio::Root::Exception);
81 =head2 HIVSchema - objects/methods to manipulate a version of the LANL HIV DB schema
83 =head3 SYNOPSIS
85 $schema = new HIVSchema( 'lanl-schema.xml' );
86 @tables = $schema->tables;
87 @validFields = $schema->fields;
88 @validAliases = $schema->aliases;
89 @query_aliases_for_coreceptor = $schema->aliases( 'SEQ_SAMple.SSAM_second_receptor' );
90 $pk_for_SequenceEntry = $schema->primarykey('SequenceEntry'); # returns 'SequenceEntry.SE_id'
91 $fk_for_SEQ_SAMple_to_SequenceEntry =
92 $schema->foreignkey('SEQ_SAMple', 'SequenceEntry'); # returns 'SEQ_SAMple.SSAM_SE_id'
94 $table = $schema->tablepart('SEQ_SAMple.SSAM_badseq'); # returns 'SEQ_SAMple'
95 $column = $schema->columnpart('SEQ_SAMple.SSAM_badseq'); # returns 'SSAM_badseq'
97 =head3 DESCRIPTION
99 HIVSchema methods are used in L<Bio::DB::Query::HIVQuery> for table,
100 column, primary/foreign key manipulations based on the observed Los
101 Alamos HIV Sequence Database (LANL DB) naming conventions for their
102 CGI parameters. The schema is contained in an XML file
103 (C<lanl-schema.xml>) which is read into an HIVSchema object, in turn a
104 property of the HIVQuery object. HIVSchema methods are used to build
105 correct cgi queries in a way that attempts to preserve the context of
106 the relational database the query parameters represent.
108 =cut
110 package HIVSchema;
111 # objects/methods to manipulate a version of the LANL HIV DB schema
112 # stored in XML
113 use XML::Simple;
114 use Bio::Root::Root;
115 use strict;
117 ### constructor
119 =head3 CONSTRUCTOR
121 =head4 new
123 Title : new
124 Usage : $schema = new HIVSchema( "lanl-schema.xml ");
125 Function:
126 Example :
127 Returns : an HIVSchema object
128 Args : XML filename
130 =cut
132 sub new {
133 my $class = shift;
134 my @args = @_;
135 my $self = {};
136 if ($args[0]) {
137 $self->{schema_ref} = loadHIVSchema($args[0]);
139 bless($self, $class);
140 return $self;
143 ### object methods
145 =head3 INSTANCE METHODS
147 =head4 tables
149 Title : tables
150 Usage : $schema->tables()
151 Function: get all table names in schema
152 Example :
153 Returns : array of table names
154 Args : none
156 =cut
158 sub tables {
159 # return array of all tables in schema
160 local $_;
161 my $self = shift;
162 my $sref = $self->{schema_ref};
163 Bio::Root::Root->throw("schema not initialized") unless $sref;
164 my @k = grep(/\./, keys %$sref);
165 my %ret;
166 foreach (@k) {
167 s/\..*$//;
168 $ret{$_}++;
170 @k = sort keys %ret;
171 return @k;
174 =head4 columns
176 Title : columns
177 Usage : $schema->columns( [$tablename] );
178 Function: return array of columns for specified table, or all columns in
179 schema, if called w/o args
180 Example :
181 Returns :
182 Args : tablename or fieldname string
184 =cut
186 sub columns {
187 # return array of columns for specified table
188 # all columns in schema, if called w/o args
189 local $_;
190 my $self = shift;
191 my ($tbl) = @_;
192 my $sref = $self->{schema_ref};
193 Bio::Root::Root->throw("schema not initialized") unless $sref;
194 # trim column name
195 $tbl =~ s/\..*$//;
196 # check if table exists
197 return () unless grep(/^$tbl$/i, $self->tables);
198 my @k = sort keys %$sref;
199 @k = grep (/^$tbl\./i, @k);
200 foreach (@k) {
201 s/^$tbl\.//;
203 return @k;
206 =head4 fields
208 Title : fields
209 Usage : $schema->fields();
210 Function: return array of all fields in schema, in format "table.column"
211 Example :
212 Returns : array of all fields
213 Args : none
215 =cut
217 sub fields {
218 # return array of all fields (Table.Column format) in schema
219 my $self = shift;
220 my $sref = $self->{schema_ref};
221 Bio::Root::Root->throw("schema not initialized") unless $sref;
222 my @k = sort keys %{$sref};
223 return @k;
226 =head4 options
228 Title : options
229 Usage : $schema->options(@fieldnames)
230 Function: get array of options (i.e., valid match data strings) available
231 to specified field
232 Example :
233 Returns : array of match data strings
234 Args : [array of] fieldname string[s] in "table.column" format
236 =cut
238 sub options {
239 # return array of options available to specified field
240 my $self = shift;
241 my ($sfield) = @_;
242 my $sref = $self->{schema_ref};
243 Bio::Root::Root->throw("schema not initialized") unless $sref;
244 return $$sref{$sfield}{option} ? @{$$sref{$sfield}{option}} : ();
247 =head4 aliases
249 Title : aliases
250 Usage : $schema->aliases(@fieldnames)
251 Function: get array of aliases to specified field[s]
252 Example :
253 Returns : array of valid query aliases for fields as spec'd in XML file
254 Args : [an array of] fieldname[s] in "table.column" format
256 =cut
258 sub aliases {
259 # return array of aliases to specified field
260 my $self = shift;
261 my ($sfield) = @_;
262 my $sref = $self->{schema_ref};
263 my @ret;
264 Bio::Root::Root->throw("schema not initialized") unless $sref;
265 if ($sfield) {
266 return $$sref{$sfield}{alias} ? @{$$sref{$sfield}{alias}} : ();
268 else { # all valid aliases
269 map {push @ret, @{$$sref{$_}{alias}} if $$sref{$_}{alias}} $self->fields;
270 return @ret;
274 =head4 ankh
276 Title : ankh (annotation key hash)
277 Usage : $schema->ankh(@fieldnames)
278 Function: return a has translating fields to annotation keys for the
279 spec'd fields.
280 (Annotation keys are used for parsing the tab-delimited response
281 to Bio::DB::Query::HIVQuery::_do_lanl_request.)
282 Example :
283 Returns : hash ref
284 Args : [an array of] fieldname[s] in "table.column" format
286 =cut
288 sub ankh {
289 # return hash translating sfields to annotation keys for specified sfield(s)
290 my $self = shift;
291 my %ret = ();
292 my @sfields = @_;
293 my $sref = $self->{schema_ref};
294 Bio::Root::Root->throw("schema not initialized") unless $sref;
295 foreach (@sfields) {
296 next unless $$sref{$_}{ankey};
297 $ret{$_} = {'ankey'=>$$sref{$_}{ankey},'antype'=>$$sref{$_}{antype}};
299 return %ret;
302 =head4 tablepart
304 Title : tablepart (alias: tbl)
305 Usage : $schema->tbl(@fieldnames)
306 Function: return the portion of the fieldname[s] that refer to the
307 db table
308 Example : $schema->tbl('SequenceEntry.SE_id'); # returns 'SequenceEntry'
309 Returns : table name as string
310 Args : [an array of] fieldname[s] in "table.column" format
312 =cut
314 sub tablepart {
315 # return the 'Table' part of the specified field(s)
316 my $self = shift;
317 my @sfields = @_;
318 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
319 my ($squish,@ret, %ret);
320 if ($sfields[0] eq '-s') {
321 # squish : remove duplicates from the returned array
322 $squish=1;
323 shift @sfields;
325 foreach (@sfields) {
326 push @ret, /^(.*)\./;
328 if ($squish) {
329 # arg order is clobbered
330 @ret{@ret} = undef;
331 @ret = keys %ret;
333 return (wantarray ? @ret : $ret[0]);
336 sub tbl {
337 # tablepart alias
338 shift->tablepart(@_);
341 =head4 columnpart
343 Title : columnpart (alias: col)
344 Usage : $schema->col(@fieldnames)
345 Function: return the portion of the fieldname[s] that refer to the
346 db column
347 Example : $schema->col('SequenceEntry.SE_id'); # returns 'SE_id'
348 Returns : column name as string
349 Args : [an array of] fieldname[s] in "table.column" format
351 =cut
353 sub columnpart {
354 # return the 'Column' part of the specified field(s)
355 my $self = shift;
356 my @sfields = @_;
357 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
358 my @ret;
359 foreach (@sfields) {
360 push @ret, /\.(.*)$/;
362 return (wantarray ? @ret : $ret[0]);
365 sub col {
366 # columnpart alias
367 shift->columnpart(@_);
370 =head4 primarykey
372 Title : primarykey [alias: pk]
373 Usage : $schema->pk(@tablenames);
374 Function: return the primary key of the specified table[s], as judged by
375 the syntax of the table's[s'] fieldnames
376 Example : $schema->pk('SequenceEntry') # returns 'SequenceEntry.SE_id'
377 Returns : primary key fieldname[s] in "table.column" format, or null if
378 no pk exists
379 Args : [an array of] table name[s] (fieldnames are ok, table part used)
381 =cut
383 sub primarykey {
384 # return the primary key (in Table.Column format) of specified table(s)
385 my $self = shift;
386 my @tbl = @_;
387 my @ret;
388 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
389 foreach my $tbl (@tbl) {
390 # trim column name
391 $tbl =~ s/\..*$//;
392 grep(/^$tbl$/i, $self->tables) ?
393 push(@ret, grep(/\.[0-9a-zA-Z]+_id/, grep(/$tbl/i,$self->fields))) :
394 push(@ret, "");
396 return (wantarray ? @ret : $ret[0]);
399 sub pk {
400 # primarykey alias
401 shift->primarykey(@_);
404 =head4 foreignkey
406 Title : foreignkey [alias: fk]
407 Usage : $schema->fk($intable [, $totable])
408 Function: return foreign key fieldname in table $intable referring to
409 table $totable, or all foreign keys in $intable if $totable
410 unspec'd
411 Example : $schema->fk('AUthor', 'SequenceEntry'); # returns 'AUthor_AU_SE_id'
412 Returns : foreign key fieldname[s] in "table.column" format
413 Args : tablename [, optional foreign table name] (fieldnames are ok,
414 table part used)
416 =cut
418 sub foreignkey {
419 # return foreign key in in-table ($intbl) to to-table ($totbl)
420 # or all foreign keys in in-table if to-table not specified
421 # keys returned in Table.Column format
422 my $self = shift;
423 my ($intbl, $totbl) = @_;
424 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
425 # trim col names
426 $intbl =~ s/\..*$//;
427 $totbl =~ s/\..*$// if $totbl;
428 # check if in-table exists
429 return () unless grep( /^$intbl/i, $self->tables);
430 my @ret = grep( /$intbl\.(?:[0-9a-zA-Z]+_){2,}id/i, $self->fields);
431 if ($totbl) {
432 my $tpk = $self->primarykey($totbl);
433 return (wantarray ? () : "") unless grep( /^$totbl/i, $self->tables) && $tpk;
434 ($tpk) = ($tpk =~ /\.(.*)$/);
435 @ret = grep( /$tpk$/, @ret);
436 return (wantarray ? @ret : $ret[0]);
438 else {
439 # return all foreign keys in in-table
440 return @ret;
444 sub fk {
445 # foreignkey alias
446 shift->foreignkey(@_);
449 =head4 foreigntable
451 Title : foreigntable [alias ftbl]
452 Usage : $schema->ftbl( @foreign_key_fieldnames );
453 Function: return tablename of table that foreign keys points to
454 Example : $schema->ftbl( 'AUthor.AU_SE_id' ); # returns 'SequenceEntry'
455 Returns : tablename
456 Args : [an array of] fieldname[s] in "table.column" format
458 =cut
460 sub foreigntable {
461 # return table name that foreign key(s) point(s) to
462 my $self = shift;
463 my @fk = @_;
464 my @ret;
465 Bio::Root::Root->throw("schema not initialized") unless $self->{schema_ref};
466 foreach (@fk) {
467 my ($mnem, $fmnem) = /\.([0-9a-zA-Z]+)_([0-9a-zA-Z]+)_.*$/;
468 next unless $mnem && $fmnem;
469 # lookup based on Table.Column format of fields
470 my $sf = [grep( /^[0-9a-zA-Z]+\.$fmnem\_/, $self->fields )]->[0];
471 next unless $sf;
472 ($sf) = ($sf =~ /^([0-9a-zA-Z]+)\./);
473 push @ret, $sf;
475 return (wantarray ? @ret : $ret[0]);
478 sub ftbl {
479 # foreigntable alias
480 shift->foreigntable(@_);
483 =head4 loadSchema
485 Title : loadHIVSchema [alias: loadSchema]
486 Usage : $schema->loadSchema( $XMLfilename )
487 Function: read (LANL DB) schema spec from XML
488 Example : $schema->loadSchema('lanl-schema.xml');
489 Returns : hashref to schema data
490 Keys are fieldnames in "table.column" format.
491 Each value is a hashref with the following properties:
492 {name} : HIVWEB 'table.column' format fieldname,
493 can be used directly in the cgi query
494 {aliases} : ref to array containing valid aliases/shortcuts for
495 {name}; can be used in routines creating the HTML query
496 {options} : ref to array containing valid matchdata for this field
497 can be used directly in the HTML query
498 {ankey} : contains the annotation key for this field used with
499 Bioperl annotation objects
500 {..attr..}: ..value_of_attr.. for this field (app-specific metadata)
501 Args :
503 =cut
505 sub loadHIVSchema {
506 my $fn = shift;
507 Bio::Root::Root->throw("loadHIVSchema: schema file not found") unless -e $fn;
508 my $q = XML::Simple->new(ContentKey=>'name',NormalizeSpace=>2,ForceArray=>1);
509 my %ret;
510 my $ref = $q->XMLin($fn);
511 my @sf = keys %{$$ref{sfield}};
512 foreach (@sf) {
513 my $h = $$ref{sfield}{$_};
514 $ret{$_} = $h;
515 foreach my $ptr ($$h{option}, $$h{alias}) {
516 if ($ptr) {
517 # kludge for XMLin: appears to convert to arrays, if there
518 # exists a tag without content, but to convert to hashes
519 # with content as key, if all tags possess content
520 if (ref($ptr) eq 'HASH') {
521 $ptr = [keys %{$ptr}];
523 elsif (ref($ptr) eq 'ARRAY') {
524 $ptr = [map { ref eq 'HASH' ? $_->{name} : $_ } @{$ptr}]
526 else {
527 1; # stub : doh!
531 for my $ptr ($$h{ankey}) {
532 # flatten
533 my $ank = [keys %{$ptr}]->[0];
534 if (!defined $ank) {
535 delete $$h{ankey};
537 else {
538 $h->{antype} = $ptr->{$ank}{antype};
539 $ptr = $ank;
543 return \%ret;
546 sub loadSchema {
547 my $self = shift;
548 $self->{schema_ref} = loadHIVSchema(shift);
551 # below, dangerous
553 =head4 _sfieldh
555 Title : _sfieldh
556 Usage : $schema->_sfieldh($fieldname)
557 Function: get hashref to the specified field hash
558 Example :
559 Returns : hashref
560 Args : fieldname in "table.column" format
562 =cut
564 sub _sfieldh {
565 # return reference to the specified field hash
566 my $self = shift;
567 my ($sfield) = @_;
568 return ${$self->{schema_ref}}{$sfield};
573 =head2 Class QRY - a query algebra for HIVQuery
575 =head3 SYNOPSIS
577 $Q = new QRY(
578 new R(
579 new Q('coreceptor', 'CXCR4'),
580 new Q('country', 'ZA')
583 QRY::Eq(QRY::And($Q, $Q), $Q); # returns 1
584 QRY::Eq(QRY::Or($Q, $Q), $Q); # returns 1
585 $Q2 = $Q1->clone;
586 $Q2 = new QRY(
587 new R(
588 new Q( 'coreceptor', 'CCR5' ),
589 new Q( 'country', 'ZA')
592 (QRY::And($Q, $Q2))->isnull; # returns 1
593 $Q3 = QRY::Or($Q, $Q2);
594 print $Q3->A; # prints '(CCR5 CXCR4)[coreceptor] (ZA)[country]'
596 =head3 DESCRIPTION
598 The QRY package provides a query parser for
599 L<Bio::DB::Query::HIVQuery>. Currently, the parser supports AND, OR,
600 and () operations. The structure of the LANL cgi makes it tricky to
601 perform NOTs, though this could be implemented if the desire were
602 great.
604 Two class methods do the work. C<QRY::_parse_q> does a first-pass
605 parse of the query string. C<QRY::_make_q> interprets the parse tree
606 as returned by C<QRY::_parse_q> and produces an array of hash
607 structures that can be used directly by C<Bio::DB::Query::HIVQuery>
608 query execution methods. Validation of query fields and options is
609 performed at the C<Bio::DB::Query::HIVQuery> level, not here.
611 C<QRY> objects are collections of C<R> (or request) objects, which are
612 in turn collections of C<Q> (or atomic query) objects. C<Q> objects
613 represent a query on a single field, with match data options C<OR>ed
614 together, e.g. C<(A B)[subtype]>. C<R> objects collect C<Q> objects
615 that could be processed in a single HTTP request; i.e., a set of
616 atomic queries each having different fields C<AND>ed together, such as
618 (A B)[subtype] AND ('CCR5')[coreceptor] AND (US CA)[country]
620 The C<QRY> object collects C<R>s that cannot be reduced (through
621 logical operations) to a single HTTP request, e.g.
623 ((C)[subtype] AND (SI)[phenotype]) OR ( (D)[subtype] AND (NSI)[phenotype] ),
625 which cannot be got in one go through the current LANL cgi
626 implementation (as far as I can tell). The parser will simplify
627 something like
629 ((C)[subtype] AND (SI)[phenotype]) OR ((C)[subtype] AND (NSI)[phenotype])
631 to the single request
633 (C)[subtype] AND (NSI SI)[phenotype]
635 however.
637 The operators C<&> and C<|> are overloaded to C<QRY::And> and
638 C<QRY::Or>, to get Perl precedence and grouping for free. C<bool> is
639 overloaded to get symbolic tests such as C<if ($QRY) {stuff}>. C<==>
640 is overloaded with C<QRY::Eq> for convenience. No overloading is done
641 for C<R> or C<Q>.
643 =cut
645 # a query algebra for HIVQuery
647 # Each Q object is an 'atomic' query, written as (data)[field]
648 # (a b ...)[X] equals (a)[X] | (b)[X] | ...
649 # Each R object represents a single HTTP request to the db
650 # contains an array of Q (atomic) objects (q1, q2, ...)
651 # the R object is interpreted as q1 & q2 & ...
652 # Each QRY object represents a series of HTTP requests to the db
653 # contains an array of R (request) objects (R1, R2, ...)
654 # the QRY object is interpreted as R1 | R2 | ...
656 # & and | operations are specified for each type
658 package QRY;
659 use strict;
660 $QRY::NULL = new QRY();
663 use overload
664 "|" => \&Or,
665 "&" => \&And,
666 "bool" => \&Bool,
667 "==" => \&Eq;
670 # query language emulator
671 # supports only AND and OR, any groupings
673 # syntax rules:
674 # query atom: bareword [field] OR (bareword ...) [field]
675 # only single bareword allowed between []
676 # annotation fields in {} (only bareword lists allowed between {})
677 # () can group query atoms joined by operators (AND or OR)
678 # () containing only barewords MUST be followed by a field descriptor [field]
679 # empty [] not allowed
680 # query atoms joined with AND by default
681 # barewords are associated (ORed within) the next field descriptor in the line
683 # follow the parse tree, creating new QRY objects as needed in @q, and
684 # construct a logical expression using & and | symbols.
685 # These are overloaded for doing ands and ors on QRY objects;
686 # to get the final QRY object, eval the resulting expression $q_expr.
687 # QRY object will be translated into (possibly multiple) hashes
688 # conforming to HIVQuery parameter requirements.
690 =head3 CLASS METHODS
692 =head4 _make_q
694 Title : _make_q
695 Usage : QRY::_make_q($parsetree)
696 Function: creates hash structures suitable for HIVQuery from parse tree
697 returned by QRY::_parse_q
698 Example :
699 Returns : array of hashrefs of query specs
700 Args : a hashref
702 =cut
704 sub _make_q {
705 my $ptree = shift;
706 my ($q_expr, @q, @an, $query, @dbq);
707 _make_q_guts($ptree, \$q_expr, \@q, \@an);
708 $query = eval $q_expr;
709 throw Bio::Root::Root(-class=>'Bio::Root::Exception',
710 -text=>$@,
711 -value=>$q_expr) if $@;
712 return {} if $query->isnull;
713 foreach my $rq ($query->requests) {
714 my $h = {'query'=>{}};
715 foreach ($rq->atoms) {
716 my @d = split(/\s+/, $_->dta);
717 foreach my $d (@d) {
718 $d =~ s/_/ /g;
719 $d =~ s/'//g;
721 $h->{'query'}{$_->fld} = (@d == 1) ? $d[0] : [@d];
723 $h->{'annot'} = [@an] if @an;
724 push @dbq, $h;
726 return @dbq;
729 =head4 _make_q_guts
731 Title : _make_q_guts (Internal class method)
732 Usage : _make_q_guts($ptree, $q_expr, $qarry, $anarry)
733 Function: traverses the parse tree returned from QRY::_parse_q, checking
734 syntax and creating HIVQuery-compliant query structures
735 Example :
736 Returns :
737 Args : $parse_tree (hashref), $query_expression (scalar string ref),
738 $query_array (array ref : stack for returning query structures),
739 $annotation_array (array ref : stack for returning annotation
740 fields)
742 =cut
744 sub _make_q_guts {
745 my ($ptree, $q_expr, $qarry, $anarry) = @_;
746 my (@words, $o);
747 eval { # catch
748 foreach (@{$ptree->{cont}}) {
749 m{^AND$} && do {
750 $$q_expr .= "&";
751 next;
753 m{^OR$} && do {
754 $$q_expr .= "|";
755 next;
757 m{^HASH} && do {
758 for my $dl ($_->{delim}) {
759 ($dl =~ m{\(}) && do {
760 if (grep /^HASH/, @{$_->{cont}}) {
761 $$q_expr .= "&" unless !$$q_expr || !length($$q_expr) || (substr($$q_expr, -1, 1) =~ /[&|(]/);
762 $$q_expr .= "(";
763 _make_q_guts($_,$q_expr,$qarry,$anarry);
764 $$q_expr .= ")";
766 else {
767 my @c;
768 my $c = join(' ',@{$_->{cont}});
769 $c =~ s/,/ /g;
770 Bio::Root::Root->throw("query syntax error: unmatched ['\"]") if (@c = ($c =~ /(['"])/g)) % 2;
771 @c = split(/\s*(['"])\s*/, $c);
772 do {
773 $c = shift @c;
774 if ($c =~ m{['"]}) {
775 $c = join('', ($c, shift @c, shift @c));
776 $c =~ s/\s+/_/g;
777 push @words, $c;
779 else {
780 push @words, split(/\s+/,$c);
782 } while @c;
784 last;
786 ($dl =~ m{\[}) && do {
787 Bio::Root::Root->throw("syntax error: empty field descriptor") unless @{$_->{cont}};
788 Bio::Root::Root->throw("syntax error: more than one field descriptor in square brackets") unless @{$_->{cont}} == 1;
790 push @{$qarry}, new QRY( new R( new Q( $_->{cont}->[0], @words)));
791 # add default operation if nec
792 $$q_expr .= "&" unless !$$q_expr || !length($$q_expr) || (substr($$q_expr, -1, 1) =~ /[&|(]/);
793 $$q_expr .= "\$q[".$#$qarry."]";
794 @words = ();
795 last;
797 ($dl =~ m{\{}) && do {
798 foreach my $an (@{$_->{cont}}) {
799 ($an =~ /^HASH/) && do {
800 if ($an->{delim} eq '[') {
801 push @$anarry, @{$an->{cont}};
803 else {
804 Bio::Root::Root->throw("query syntax error: only field descriptors (with or without square brackets) allowed in annotation spec");
806 next;
808 do { #else
809 push @$anarry, $an;
810 next;
813 last;
815 do {
816 1; #else stub
819 next;
821 do { # else, bareword
822 if ($o) {
823 $words[-1] .= "_$_";
825 else {
826 push @words, $_;
828 m/['"]/ && ($o = !$o);
830 } # @{ptree->{cont}}
831 Bio::Root::Root->throw("query syntax error: no search fields specified")
832 unless $$q_expr =~ /q\[[0-9]+\]/;
834 $@ ?
835 throw Bio::Root::Root(-class=>'Bio::QueryStringSyntax::Exception',
836 -text=>$@,
837 -value=>$$q_expr)
838 : return 1;
841 =head4 _parse_q
843 Title : _parse_q
844 Usage : QRY::_parse_q($query_string)
845 Function: perform first pass parse of a query string with some syntax
846 checking, return a parse tree suitable for QRY::_make_q
847 Example : QRY::_parse_q(" to[be] OR (not to)[be] ");
848 Returns : hashref
849 Args : query string
851 =cut
853 # parse qry string into a branching tree structure
854 # each branch tagged by the opening delimiter ( key 'delim' )
855 # content (tokens and subbranch hashes) placed in l2r order in
856 # @{p->{cont}}
857 sub _parse_q {
858 local $_;
859 my $qstr = shift;
860 my $illegal = qr/[^a-zA-Z0-9-_,\.\(\[\{\}\]\)\s'"]/;
861 my $pdlm = qr/[\{\[\(\)\]\}]/;
862 my %md = ('('=>')', '['=>']','{'=>'}');
863 my @tok = grep !/^\s*$/, split /($pdlm)/, $qstr;
864 return {} unless @tok;
865 my @pstack = ();
866 my @dstack = ();
867 my ($ptree, $p);
869 eval { #catch
870 Bio::Root::Root->throw("query syntax error: illegal character") if $qstr =~ /$illegal/;
872 $ptree = $p = {'delim'=>'*'};
873 foreach (@tok) {
874 #trim whsp
875 s/^\s+//;
876 s/\s+$//;
877 m{[\(\[\{]} && do {
878 my $new = {'delim'=>$_};
879 $p->{cont} = [] unless $p->{cont};
880 push @{$p->{cont}}, $new;
881 push @pstack, $p;
882 push @dstack, $_;
883 $p = $new;
884 next;
886 m{[\)\]\}]} && do {
887 my $d = pop @dstack;
888 if ($md{$d} eq $_) {
889 $p = pop @pstack;
890 Bio::Root::Root->throw("query syntax error: unmatched \"$_\"") unless $p;
892 else {
893 Bio::Root::Root->throw("query syntax error: saw \"$_\" before matching \"$md{$d}\"");
895 next;
897 do { # else
898 $p->{cont} = [] unless $p->{cont};
899 push @{$p->{cont}}, split(/\s+/);
903 $@ ?
904 throw Bio::Root::Root(-class=>'Bio::QueryStringSyntax::Exception',
905 -text=>$@,
906 -value=>"")
907 : return $ptree;
910 ## QRY constructor
912 =head3 CONSTRUCTOR
914 =head4 QRY Constructor
916 Title : QRY constructor
917 Usage : $QRY = new QRY()
918 Function:
919 Example :
920 Returns :
921 Args : array of R objects, optional
923 =cut
925 sub new {
926 my $class = shift;
927 my @args = @_;
928 my $self = {};
929 $self->{requests} = [];
930 bless($self, $class);
931 $self->put_requests(@args) if @args;
932 return $self;
935 ## QRY instance methods
937 =head3 INSTANCE METHODS
939 =head4 requests
941 Title : requests
942 Usage : $QRY->requests
943 Function: get/set array of requests comprising this QRY object
944 Example :
945 Returns :
946 Args : array of class R objects
948 =cut
950 sub requests {
951 my $self = shift;
952 $self->put_requests(@_) if @_;
953 return @{$self->{'requests'}};
956 =head4 put_requests
958 Title : put_requests
959 Usage : $QRY->put_request(@R)
960 Function: add object of class R to $QRY
961 Example :
962 Returns :
963 Args : [an array of] of class R object[s]
965 =cut
967 sub put_requests {
968 my $self = shift;
969 my @args = @_;
970 foreach (@args) {
971 Bio::Root::Root->throw('requires type R (request)') unless ref && $_->isa('R');
972 push @{$self->{requests}}, $_;
974 return @args;
977 =head4 isnull
979 Title : isnull
980 Usage : $QRY->isnull
981 Function: test if QRY object is null
982 Example :
983 Returns : 1 if null, 0 otherwise
984 Args :
986 =cut
988 sub isnull {
989 my $self = shift;
990 return ($self->requests) ? 0 : 1;
993 =head4 A
995 Title : A
996 Usage : print $QRY->A
997 Function: get a string representation of QRY object
998 Example :
999 Returns : string scalar
1000 Args :
1002 =cut
1004 sub A {
1005 my $self = shift;
1006 return join( "\n", map {$_->A} $self->requests );
1009 =head4 len
1011 Title : len
1012 Usage : $QRY->len
1013 Function: get number of class R objects contained by QRY object
1014 Example :
1015 Returns : scalar
1016 Args :
1018 =cut
1020 sub len {
1021 my $self = shift;
1022 return scalar @{$self->{'requests'}};
1025 =head4 clone
1027 Title : clone
1028 Usage : $QRY2 = $QRY1->clone;
1029 Function: create and return a clone of the object
1030 Example :
1031 Returns : object of class QRY
1032 Args :
1034 =cut
1036 sub clone {
1037 local $_;
1038 my $self = shift;
1039 my $ret = new QRY();
1040 foreach ($self->requests) {
1041 $ret->put_requests($_->clone);
1043 return $ret;
1046 ## QRY class methods
1048 =head3 CLASS METHODS
1050 =head4 Or
1052 Title : Or
1053 Usage : $QRY3 = QRY::Or($QRY1, $QRY2)
1054 Function: logical OR for QRY objects
1055 Example :
1056 Returns : a QRY object
1057 Args : two class QRY objects
1059 =cut
1061 sub Or {
1062 local $_;
1063 my ($q, $r, $rev_f) = @_;
1064 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1065 Bio::Root::Root->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1066 if ($q->isnull) {
1067 return $r->clone;
1069 elsif ($r->isnull) {
1070 return $q->clone;
1072 do {my $qq = $q; $q=$r; $r=$qq} if ($q->len > $r->len);
1073 my @rq_r = $r->requests;
1074 my @rq_q = $q->requests;
1075 my (@cand_rq, @ret_rq);
1076 # search for simplifications
1077 my @now = @rq_q;
1078 my @nxt =();
1079 foreach (@rq_r) {
1080 my $found = 0;
1081 while (my $rq = pop @now) {
1082 my @result = R::Or($rq, $_);
1083 if (@result==1) {
1084 push @cand_rq, $result[0]->clone;
1085 $found = 1;
1086 last;
1088 else {
1089 push @nxt, $rq;
1092 push @cand_rq, $_->clone unless ($found);
1093 # @now becomes unexamined @rq_q's plus failed @rq_q's
1094 @now = (@now, @nxt);
1096 push @cand_rq, map {$_->clone} @now; # add all failed @rq_q's
1097 # squeeze out redundant requests
1098 while (my $rq = pop @cand_rq) {
1099 push @ret_rq, $rq unless @cand_rq && grep {R::Eq($rq, $_)} @cand_rq;
1101 return new QRY( @ret_rq );
1104 =head4 And
1106 Title : And
1107 Usage : $QRY3 = QRY::And($QRY1, $QRY2)
1108 Function: logical AND for QRY objects
1109 Example :
1110 Returns : a QRY object
1111 Args : two class QRY objects
1113 =cut
1115 sub And {
1116 my ($q, $r, $rev_f) = @_;
1117 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1118 Bio::Root::Root->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1119 return ($QRY::NULL) if ($q->isnull || $r->isnull);
1120 my (@cand_rq, @ret_rq);
1121 foreach my $rq_r ($r->requests) {
1122 foreach my $rq_q ($q->requests) {
1123 my ($rq) = R::And($rq_r, $rq_q);
1124 push @cand_rq, $rq unless $rq->isnull;
1127 return $QRY::NULL unless @cand_rq;
1128 # squeeze out redundant requests
1129 while (my $rq = pop @cand_rq) {
1130 push @ret_rq, $rq unless @cand_rq && grep {R::Eq($rq, $_)} @cand_rq;
1132 return new QRY( @ret_rq );
1135 =head4 Bool
1137 Title : Bool
1138 Usage : QRY::Bool($QRY1)
1139 Function: allows symbolic testing of QRY object when bool overloaded
1140 Example : do {stuff} if $QRY1 *same as* do {stuff} if !$QRY1->isnull
1141 Returns :
1142 Args : a class QRY object
1144 =cut
1146 sub Bool {
1147 my $q = shift;
1148 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1149 return $q->isnull ? 0 : 1;
1152 =head4 Eq
1154 Title : Eq
1155 Usage : QRY::Eq($QRY1, $QRY2)
1156 Function: test if R objects in two QRY objects are the same
1157 (irrespective of order)
1158 Example :
1159 Returns : 1 if equal, 0 otherwise
1160 Args : two class QRY objects
1162 =cut
1164 sub Eq {
1165 my ($q, $r, $rev_f) = @_;
1166 Bio::Root::Root->throw('requires type QRY') unless ref($q) && $q->isa('QRY');
1167 Bio::Root::Root->throw('requires type QRY') unless ref($r) && $r->isa('QRY');
1168 return 0 unless $q->len == $r->len;
1169 foreach my $rq_q ($q->requests) {
1170 my $found = 0;
1171 foreach my $rq_r ($r->requests) {
1172 if (R::Eq($rq_q,$rq_r)) {
1173 $found = 1;
1174 last;
1177 return 0 unless $found;
1179 return 1;
1184 =head2 Class R - request objects for QRY algebra
1186 =head3 SYNOPSIS
1188 $R = new R( $q1, $q2 );
1189 $R->put_atoms($q3);
1190 $R->del_atoms('coreceptor', 'phenotype');
1191 return $R->clone;
1192 $R1 = new R( new Q('subtype', 'B') );
1193 $R2 = new R( new Q('subtype', 'B C'),
1194 new Q('country', 'US') );
1195 R::Eq( (R::And($R1, $R2))[0],
1196 new R( new Q('subtype', 'B' ),
1197 new Q('country', 'US') )); # returns 1
1198 QRY::Eq( new QRY(R::Or($R1, $R2)), new QRY($R1, $R2) ); # returns 1
1199 R::In( (R::And($R1, $R2))[0], $R1 ); # returns 1
1201 =head3 DESCRIPTION
1203 Class R objects contain a list of atomic queries (class Q
1204 objects). Each class R object represents a single HTTP request to the
1205 LANL DB. When converted to a DB query, the class Q objects contained
1206 by an R object are effectively C<AND>ed.
1208 =cut
1210 package R;
1211 use strict;
1212 $R::NULL = new R();
1215 ## R constructor
1217 =head3 CONSTRUCTOR
1219 =head4 R constructor
1221 Title : R constructor
1222 Usage : $R = new R()
1223 Function: create a new R (request) object
1224 Example :
1225 Returns : class R (request) object
1226 Args : optional, array of class Q objects
1228 =cut
1230 sub new {
1231 my $class = shift;
1232 my @args = @_;
1233 my $self = {};
1234 $self->{atoms} = {};
1235 bless($self, $class);
1236 $self->put_atoms(@args) if @args;
1237 return $self;
1240 ## R instance methods
1242 =head3 INSTANCE METHODS
1244 =head4 len
1246 Title : len
1247 Usage : $R->len
1248 Function: get number of class Q objects contained in R object
1249 Example :
1250 Returns : scalar
1251 Args :
1253 =cut
1255 sub len {
1256 my $self = shift;
1257 return scalar @{[keys %{$self->{'atoms'}}]};
1260 =head4 atoms
1262 Title : atoms
1263 Usage : $R->atoms( [optional $field])
1264 Function: get array of class Q (atomic query) objects in class R object
1265 Example : $R->atoms(); $R->atoms('coreceptor')
1266 Returns : array of class Q objects (all Qs or those corresponding to $field
1267 if present)
1268 Args : optional, scalar string
1270 =cut
1272 sub atoms {
1273 local $_;
1274 # returns an array of atoms
1275 # no arg: all atoms;
1276 # args: atoms with specified fields
1277 my $self = shift;
1278 my @flds = (@_ ? @_ : keys %{$self->{'atoms'}});
1279 return wantarray ? map { $self->{'atoms'}->{$_} } @flds : $self->{'atoms'}->{$flds[0]};
1282 =head4 fields
1284 Title : fields
1285 Usage : $R->fields
1286 Function: get array of fields of all Q objects contained in $R
1287 Example :
1288 Returns : array of scalars
1289 Args :
1291 =cut
1293 sub fields {
1294 my $self = shift;
1295 return keys %{$self->{'atoms'}};
1298 =head4 put_atoms
1300 Title : put_atoms
1301 Usage : $R->put_atoms( @q )
1302 Function: AND an atomic query (class Q object) to the class R object's list
1303 Example :
1304 Returns : void
1305 Args : an [array of] class Q object[s]
1307 =cut
1309 sub put_atoms {
1310 # AND this atom to the request
1311 local $_;
1312 my $self = shift;
1313 my @args = @_;
1314 foreach (@args) {
1315 Bio::Root::Root->throw('requires type Q (atom)') unless ref && $_->isa('Q');
1316 if ($self->atoms($_->fld)) {
1317 my $a = Q::qand( $self->atoms($_->fld), $_ );
1318 if ($a->isnull) {
1319 delete $self->{'atoms'}->{$_->fld};
1321 else {
1322 $self->{atoms}->{$_->fld} = $a->clone;
1325 else {
1326 $self->{atoms}->{$_->fld} = $_->clone;
1329 return;
1332 =head4 del_atoms
1334 Title : del_atoms
1335 Usage : $R->del_atoms( @qfields )
1336 Function: removes class Q objects from R object's list according to the
1337 field names given in arguments
1338 Example :
1339 Returns : the class Q objects deleted
1340 Args : scalar array of field names
1342 =cut
1344 sub del_atoms {
1345 # remove atoms by field from request
1346 local $_;
1347 my $self = shift;
1348 my @args = @_;
1349 return () unless @args;
1350 my @ret;
1351 foreach (@args) {
1352 push @ret, delete $self->{'atoms'}->{$_};
1354 return @ret;
1357 =head4 isnull
1359 Title : isnull
1360 Usage : $R->isnull
1361 Function: test if class R object is null
1362 Example :
1363 Returns : 1 if null, 0 otherwise
1364 Args :
1366 =cut
1368 sub isnull {
1369 my $self = shift;
1370 return ($self->len) ? 0 : 1;
1373 =head4 A
1375 Title : A
1376 Usage : print $R->A
1377 Function: get a string representation of class R object
1378 Example :
1379 Returns : string scalar
1380 Args :
1382 =cut
1384 sub A {
1385 my $self = shift;
1386 my @a = sort {$a->fld cmp $b->fld} $self->atoms;
1387 return join(" ", map {$_->A} @a);
1390 =head4 clone
1392 Title : clone
1393 Usage : $R2 = $R1->clone;
1394 Function: create and return a clone of the object
1395 Example :
1396 Returns : object of class R
1397 Args :
1399 =cut
1401 sub clone {
1402 local $_;
1403 my $self = shift;
1404 my $ret = new R();
1405 foreach ($self->atoms) {
1406 $ret->put_atoms($_->clone);
1408 return $ret;
1411 ## R class methods
1413 =head3 CLASS METHODS
1415 =head4 In
1417 Title : In
1418 Usage : R::In($R1, $R2)
1419 Function: tests whether the query represented by $R1 would return a subset
1420 of items returned by the query represented by $R2
1421 Example : print "R2 gets those and more" if R::In($R1, $R2);
1422 Returns : 1 if R1 is subset of R2, 0 otherwise
1423 Args : two class R objects
1425 =cut
1427 sub In {
1428 local $_;
1429 my ($s, $t) = @_;
1430 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1431 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1432 return 1 if ($s->isnull);
1433 # common fields
1434 my @cf = grep {defined} map {my $f=$_; grep /^$f$/,$s->fields} $t->fields;
1435 return 0 unless @cf==$t->len;
1436 foreach (@cf) {
1437 my @sd = split(/\s+/, $s->atoms($_)->dta);
1438 my @td = split(/\s+/, $t->atoms($_)->dta);
1439 my @cd = grep {defined} map {my $d=$_; grep /^$d$/, @td} @sd;
1440 return 0 unless @cd==@sd;
1442 return 1;
1445 =head4 And
1447 Title : And
1448 Usage : @Rresult = R::And($R1, $R2)
1449 Function: logical AND for R objects
1450 Example :
1451 Returns : an array containing class R objects
1452 Args : two class R objects
1454 =cut
1456 sub And {
1457 local $_;
1458 my ($s, $t) = @_;
1459 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1460 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1461 return ($R::NULL) if ($s->isnull || $t->isnull);
1463 do { my $ss = $s; $s = $t; $t = $ss } if ( $s->len > $t->len );
1464 # $t has at least as many fields defined than $s ($t is more restrictive)
1466 # common fields
1467 my @cf = grep {defined} map {my $sf = $_; grep /$sf/, $t->fields } $s->fields;
1468 my $ret = new R();
1469 my $v = $t->clone;
1470 $v->del_atoms(@cf);
1471 my $u = $s->clone;
1472 $u->del_atoms(@cf);
1474 # And the atoms with identical fields
1476 foreach (@cf) {
1477 my ($a) = Q::qand($s->atoms($_), $t->atoms($_));
1478 if ($a->isnull) {
1479 return $R::NULL;
1481 else {
1482 $ret->put_atoms($a);
1485 # put the private atoms
1486 $ret->put_atoms($u->atoms, $v->atoms);
1487 return ($ret);
1491 =head4 Or
1493 Title : Or
1494 Usage : @Rresult = R::Or($R1, $R2)
1495 Function: logical OR for R objects
1496 Example :
1497 Returns : an array containing class R objects
1498 Args : two class R objects
1500 =cut
1502 sub Or {
1503 local $_;
1504 my ($s, $t) = @_;
1505 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1506 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1507 if ($s->isnull) {
1508 return $t->clone;
1510 elsif ($t->isnull) {
1511 return $s->clone;
1513 return $s->clone if (R::In($t, $s));
1514 return $t->clone if (R::In($s, $t));
1516 # try simplifying
1517 do { my $ss = $s; $s = $t; $t = $ss } if ( $s->len > $t->len );
1518 # common fields
1519 my @cf = grep {defined} map {my $sf = $_; grep /$sf/, $t->fields } $s->fields;
1521 if ($t->len == @cf) {
1522 # all atoms equal within fields but one? If yes, simplify...
1523 my @df = grep {!Q::qeq($s->atoms($_), $t->atoms($_))} @cf;
1524 if (@df == 1) {
1525 my ($a) = Q::qor($s->atoms($df[0]), $t->atoms($df[0]));
1526 my $ret = $s->clone;
1527 $ret->del_atoms($df[0]);
1528 $ret->put_atoms($a);
1529 return ($ret);
1533 # neither request contains the other, and the requests cannot be
1534 # simplified; reflect back (clones of) the input...
1535 return ($s->clone, $t->clone);
1539 =head4 Eq
1541 Title : Eq
1542 Usage : R::Eq($R1, $R2)
1543 Function: test if class Q objects in two R objects are the same
1544 (irrespective of order)
1545 Example :
1546 Returns : 1 if equal, 0 otherwise
1547 Args : two class R objects
1549 =cut
1551 sub Eq {
1552 local $_;
1553 my ($s, $t) = @_;
1554 Bio::Root::Root->throw('requires type R (request)') unless ref($s) && $s->isa('R');
1555 Bio::Root::Root->throw('requires type R (request)') unless ref($t) && $t->isa('R');
1556 my @sf = $s->fields;
1557 my @tf = $t->fields;
1558 return 0 unless @sf==@tf;
1559 my @cf = grep {defined} map {my $f=$_; grep /^$f$/,@sf} @tf;
1560 return 0 unless @cf==@tf;
1561 foreach (@cf) {
1562 return 0 unless Q::qeq($s->atoms($_), $t->atoms($_));
1564 return 1;
1568 =head2 Class Q - atomic query objects for QRY algebra
1570 =head3 SYNOPSIS
1572 $q = new Q('coreceptor', 'CXCR4 CCR5');
1573 $u = new Q('coreceptor', 'CXCR4');
1574 $q->fld; # returns 'coreceptor'
1575 $q->dta; # returns 'CXCR4 CCR5'
1576 print $q->A; # prints '(CXCR4 CCR5)[coreceptor]
1577 Q::qeq($q, $u); # returns 0
1578 Q::qeq( Q::qor($q, $q), $q ); # returns 1
1579 Q::qin($u, $q) # returns 1
1580 Q::qeq(Q::qand($u, $q), $u ); # returns 1
1582 =head3 DESCRIPTION
1584 Class Q objects represent atomic queries, that can be described by a
1585 single LANL cgi parameter=value pair. Class R objects (requests) are
1586 built from class Qs. The logical operations at the higher levels
1587 (C<QRY, R>) ultimately depend on the lower level operations on Qs:
1588 C<qeq, qin, qand, qor>.
1590 =cut
1592 package Q;
1593 use strict;
1594 $Q::NULL = new Q();
1596 ## Q constructor
1598 =head3 CONSTRUCTOR
1600 =head4 Q constructor
1602 Title : Q constructor
1603 Usage : $q = new Q($field, $data)
1604 Function: create a new Q (atomic query) object
1605 Example :
1606 Returns : class Q object
1607 Args : optional $field, $data strings
1609 =cut
1611 sub new {
1612 local $_;
1613 my ($class,@args) = @_;
1614 my $self={};
1615 foreach (@args) { s/^\s+//; s/\s+$//; }
1616 my ($fld, @dta) = @args;
1617 $self->{fld}=$fld;
1618 $self->{dta}=join(" ", @dta);
1619 bless($self, $class);
1620 return $self;
1623 ## Q instance methods
1625 =head3 INSTANCE METHODS
1627 =head4 isnull
1629 Title : isnull
1630 Usage : $q->isnull
1631 Function: test if class Q object is null
1632 Example :
1633 Returns : 1 if null, 0 otherwise
1634 Args :
1636 =cut
1638 sub isnull {
1639 my $self = shift;
1640 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1641 return 1 unless (($self->fld && length($self->fld)) || ($self->dta && length($self->dta)));
1642 return 0;
1645 =head4 fld
1647 Title : fld
1648 Usage : $q->fld($field)
1649 Function: get/set fld (field name) property
1650 Example :
1651 Returns : scalar
1652 Args : scalar
1654 =cut
1656 sub fld {
1657 my $self = shift;
1658 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1659 my $f = shift;
1660 if ($f) {
1661 $f =~ s/^\s+//;
1662 $f =~ s/\s+$//;
1663 return $self->{fld}=$f;
1665 return $self->{fld};
1669 =head4 dta
1671 Title : dta
1672 Usage : $q->dta($data)
1673 Function: get/set dta (whsp-separated data string) property
1674 Example :
1675 Returns : scalar
1676 Args : scalar
1678 =cut
1680 sub dta {
1681 my $self = shift;
1682 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1683 my $d = join(" ", @_);
1684 if ($d) {
1685 $d =~ s/^\s+//;
1686 $d =~ s/\s+$//;
1687 return $self->{dta} = $d;
1689 return $self->{dta};
1692 =head4 A
1694 Title : A
1695 Usage : print $q->A
1696 Function: get a string representation of class Q object
1697 Example :
1698 Returns : string scalar
1699 Args :
1701 =cut
1703 sub A {
1704 my $self = shift;
1705 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1706 my @a = split(/\s+/, $self->dta);
1708 return "(".join(' ', sort {$a cmp $b} @a).")[".$self->fld."]";
1711 =head4 clone
1713 Title : clone
1714 Usage : $q2 = $q1->clone;
1715 Function: create and return a clone of the object
1716 Example :
1717 Returns : object of class Q
1718 Args :
1720 =cut
1722 sub clone {
1723 my $self = shift;
1724 Bio::Root::Root->throw("requires type Q (atom)") unless ref($self) && $self->isa('Q');
1725 my $ret = new Q ($self->fld, $self->dta);
1726 return $ret;
1729 ### Q class methods
1731 =head3 CLASS METHODS
1733 =head4 qin
1735 Title : qin
1736 Usage : Q::qin($q1, $q2)
1737 Function: tests whether the query represented by $q1 would return a subset
1738 of items returned by the query represented by $q2
1739 Example : print "q2 gets those and more" if Q::qin($q1, $q2);
1740 Returns : 1 if q1 is subset of q2, 0 otherwise
1741 Args : two class Q objects
1743 =cut
1745 sub qin {
1746 my ($a, $b) = @_;
1747 Bio::Root::Root->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1748 return 0 unless $a->fld eq $b->fld;
1749 return Q::qeq( $b, Q::qor($a, $b) );
1752 =head4 qeq
1754 Title : qeq
1755 Usage : Q::qeq($q1, $q2)
1756 Function: test if fld and dta properties in two class Q objects are the same
1757 (irrespective of order)
1758 Example :
1759 Returns : 1 if equal, 0 otherwise
1760 Args : two class Q objects
1762 =cut
1764 sub qeq {
1765 local $_;
1766 my ($a, $b) = @_;
1767 Bio::Root::Root->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1768 return 0 unless $a->fld eq $b->fld;
1769 my @ad = unique(split(/\s+/,$a->dta));
1770 my @bd = unique(split(/\s+/,$b->dta));
1771 return 0 unless @ad==@bd;
1772 my @cd = grep {defined} map {my $f = $_; grep /^$f$/, @ad} @bd;
1773 return @cd == @bd;
1776 =head4 qor
1778 Title : qor
1779 Usage : @qresult = Q::qor($q1, $q2)
1780 Function: logical OR for Q objects
1781 Example :
1782 Returns : an array of class Q objects
1783 Args : two class Q objects
1785 =cut
1787 sub qor {
1788 local $_;
1789 my @a = @_;
1790 foreach (@a) {
1791 Bio::Root::Root->throw("requires type Q (atom)") unless ref && $_->isa('Q');
1793 my @ret;
1794 my (%f, @f);
1795 @a = grep {!$_->isnull} @a;
1796 return ($Q::NULL) unless @a > 0;
1797 # list of unique flds
1798 @f = unique(map {$_->fld} @a);
1799 foreach my $f (@f) {
1800 my @fobjs = grep {$_->fld eq $f} @a;
1801 my @d = unique(map {split(/\s/, $_->dta)} @fobjs );
1802 my $r = new Q($f, @d);
1803 push @ret, $r;
1805 return @ret;
1808 =head4 qand
1810 Title : qand
1811 Usage : @qresult = Q::And($q1, $q2)
1812 Function: logical AND for R objects
1813 Example :
1814 Returns : an array of class Q objects
1815 Args : two class Q objects
1817 =cut
1819 sub qand {
1820 local $_;
1821 my ($a, $b) = @_;
1822 Bio::Root::Root->throw('requires type Q (atom)') unless (ref $a) && $a->isa('Q') && (ref $b) && $b->isa('Q');
1823 my @ret;
1824 if (ref $a eq 'ARRAY') {
1825 foreach my $ea (@$a) {
1826 push @ret, qand( $ea, $b );
1828 return qor(@ret); # simplify
1830 elsif (ref $b eq 'ARRAY') {
1831 foreach my $eb (@$b) {
1832 push @ret, qand( $a, $eb);
1835 return qor(@ret); # simplify
1837 else {
1838 return ($Q::NULL) if ($a->isnull || $b->isnull);
1839 if ($a->fld eq $b->fld) {
1840 # find intersection of data
1841 my (%ad, @ad, @bd);
1842 @ad = split(/\s+/, $a->dta);
1843 @ad{@ad} = (1) x @ad;
1844 @bd = split(/\s+/, $b->dta);
1845 foreach (@bd) {
1846 $ad{$_}++;
1848 my $r = new Q($a->fld,
1849 grep {$_}
1850 map {$ad{$_} == 2 ? $_ : undef} keys %ad);
1851 return (length($r->dta) > 0) ? ($r) : ($Q::NULL);
1853 else {
1854 return ($a, $b);
1859 =head3 INTERNALS
1861 =head4 unique
1863 Title : unique
1864 Usage : @ua = unique(@a)
1865 Function: return contents of @a with duplicates removed
1866 Example :
1867 Returns :
1868 Args : an array
1870 =cut
1872 sub unique {
1873 my @a = @_;
1874 my %a;
1875 @a{@a} = undef;
1876 return keys %a;
1881 =head2 Additional tools for Bio::AnnotationCollectionI
1883 =head3 SYNOPSIS
1885 $seq->annotation->put_value('patient_id', 1401)
1886 $seq->annotation->get_value('patient_ids') # returns 1401
1887 $seq->annotation->put_value('patient_group', 'MassGenH')
1888 $seq->annotation->put_value(['clinical', 'cd4count'], 503);
1889 $seq->annotation->put_value(['clinical', 'virus_load'], 150805);
1890 foreach ( qw( cd4count virus_load ) ) {
1891 $blood_readings{$_} = $seq->annonation->get_value(['clinical', $_]);
1894 =head3 DESCRIPTION
1896 C<get_value()> and C<put_value> allow easy creation of and access to an annotation collection tree with nodes of L<Bio::Annotation::SimpleValue>. These methods obiviate direct accession of the SimpleValue objects.
1898 =cut
1900 package Bio::AnnotationCollectionI;
1901 use strict;
1902 use Bio::Annotation::SimpleValue;
1904 =head2 get_value
1906 Title : get_value
1907 Usage : $ac->get_value($tagname) -or-
1908 $ac->get_value( $tag_level1, $tag_level2,... )
1909 Function: access the annotation value assocated with the given tags
1910 Example :
1911 Returns : a scalar
1912 Args : an array of tagnames that descend into the annotation tree
1914 =cut
1916 sub get_value {
1917 local $_;
1918 my $self = shift;
1919 my @args = @_;
1920 my @h;
1921 return "" unless @_;
1922 while ($_ = shift @args) {
1923 @h = $self->get_Annotations($_);
1924 if (ref($h[0]->{value})) {
1925 $self = $h[0]->{value}; # must be another Bio::AnnotationCollectionI
1927 else {
1928 last;
1931 return $h[0] && $h[0]->{value} ; # now the last value.
1934 =head2 put_value
1936 Title : put_value
1937 Usage : $ac->put_value($tagname, $value) -or-
1938 $ac->put_value([$tag_level1, $tag_level2, ...], $value) -or-
1939 $ac->put_value( [$tag_level1, $tag_level2, ...] )
1940 Function: create a node in an annotation tree, and assign a scalar value to it
1941 if a value is specified
1942 Example :
1943 Returns : scalar or a Bio::AnnotationCollection object
1944 Args : $tagname, $value scalars (can be specified as -KEYS=>$tagname,
1945 -VALUE=>$value) -or-
1946 \@tagnames, $value (or as -KEYS=>\@tagnames, -VALUE=>$value )
1947 Note : If intervening nodes do not exist, put_value creates them, replacing
1948 existing nodes. So if $ac->put_value('x', 10) was done, then later,
1949 $ac->put_value(['x', 'y'], 20), the original value of 'x' is trashed, and $ac->get_value('x') will now return the annotation collection
1950 with tagname 'y'.
1952 =cut
1954 sub put_value {
1955 local $_;
1956 my $self = shift;
1957 my @args = @_;
1958 my ($keys, $value) = $self->_rearrange([qw( KEYS VALUE )], @args);
1959 my (@keys, $lastkey);
1960 $value ||= new Bio::Annotation::Collection;
1961 @keys = (ref($keys) eq 'ARRAY') ? @$keys : ($keys);
1962 $lastkey = pop @keys;
1963 foreach (@keys) {
1964 my $a = $self->get_value($_);
1965 if (ref($a) && $a->isa('Bio::Annotation::Collection')) {
1966 $self = $a;
1968 else {
1969 # replace an old value
1970 $self->remove_Annotations($_) if $a;
1971 my $ac = new Bio::Annotation::Collection;
1972 $self->add_Annotation(new Bio::Annotation::SimpleValue(
1973 -tagname => $_,
1974 -value => $ac
1977 $self = $ac;
1980 if ($self->get_value($lastkey)) {
1981 # replace existing value
1982 ($self->get_Annotations($lastkey))[0]->{value} = $value;
1984 else {
1985 $self->add_Annotation(new Bio::Annotation::SimpleValue(
1986 -tagname=>$lastkey,
1987 -value=>$value
1990 return $value;