1 # to do: support for comment, reference annotations
3 # $Id: HIVQuery.pm 232 2008-12-11 14:51:51Z maj $
5 # BioPerl module for Bio::DB::Query::LANLQuery
7 # Cared for by Mark A. Jensen <maj@fortinbras.us>
9 # Copyright Mark A. Jensen
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::DB::Query::HIVQuery - Query interface to the Los Alamos HIV Sequence Database
21 $q = new Bio::DB::Query::HIVQuery(" C[subtype] ZA[country] CXCR4[coreceptor] ");
22 $q = new Bio::DB::Query::HIVQuery(
23 -query=>{'subtype'=>'C',
25 'coreceptor'=>'CXCR4'});
27 $ac = $q->get_annotations_by_id(($q->ids)[0]);
28 $ac->get_value('Geo', 'country') # returns 'SOUTH AFRICA'
30 $db = new Bio::DB::HIV();
31 $seqio = $db->get_Stream_by_query($q); # returns annotated Bio::Seqs
33 # get subtype C sequences from South Africa and Brazil,
34 # with associated info on patient health, coreceptor use, and
37 $q = new Bio::DB::Query::HIVQuery(
39 'query' => {'subtype'=>'C',
40 'country'=>['ZA', 'BR']},
41 'annot' => ['patient_health',
43 'days_post_infection']
49 Bio::DB::Query::HIVQuery provides a query-like interface to the
50 cgi-based Los Alamos National Laboratory (LANL) HIV Sequence
51 Database. It uses Bioperl facilities to capture both sequences and
52 annotations in batch in an automated and computable way. Use with
53 L<Bio::DB::HIV> to create C<Bio::Seq> objects and annotated C<Bio::SeqIO>
58 The interface implements a simple query language emulation that understands AND,
59 OR, and parenthetical nesting. The basic query unit is
61 (match1 match2 ...)[fieldname]
63 Sequences are returned for which C<fieldname> equals C<match1 OR match2 OR ...>.
64 These units can be combined with AND, OR and parentheses. For example:
66 (B, C)[subtype] AND (2000, 2001, 2002, 2003)[year] AND ((CN)[country] OR (ZA)[country])
68 which can be shortened to
70 (B C)[subtype] (2000 2001 2002 2003)[year] (CN ZA)[country]
72 The user can specify annotation fields, that do not restrict the query, but
73 arrange for the return of the associated field data for each sequence returned.
74 Specify annotation fields between curly braces, as in:
76 (B C)[subtype] 2000[year] {country cd4_count cd8_count}
78 Annotations can be accessed off the query using methods described in APPENDIX.
80 =head2 Hash specifications for query construction
82 Single query specifications can be made as hash references provided to the
83 C<-query> argument of the constructor. There are two forms:
85 -query => { 'country'=>'BR', 'phenotype'=>'NSI', 'cd4_count'=>'Any' }
89 -query => [ 'country'=>'BR', 'phenotype'=>'NSI', 'cd4_count'=>'Any' ]
93 -query => { 'query' => {'country'=>'BR', 'phenotype'=>'NSI'},
94 'annot' => ['cd4_count'] }
96 In both cases, the CD4 count is included in the annotations returned, but does
97 not restrict the rest of the query.
99 To 'OR' multiple values of a field, use an anonymous array ref:
101 -query => { 'country'=>['ZA','BR','NL'], 'subtype'=>['A', 'C', 'D'] }
103 =head2 Valid query field names
105 An attempt was made to make the query field names natural and easy to
106 remember. Aliases are specified in an XML file (C<lanl-schema.xml>) that is part
107 of the distribution. Custom field aliases can be set up by modifying this file.
109 An HTML cheatsheet with valid field names, aliases, and match data can
110 be generated from the XML by using
111 C<hiv_object-E<gt>help('help.html')>. A query can also be validated
112 locally before it is unleashed on the server; see below.
116 LANL DB annotations have been organized into a number of natural
117 groupings, tagged C<Geo>, C<Patient>, C<Virus>, and <StdMap>. After a
118 successful query, each id is associated with a tree of
119 L<Bio::Annotation::SimpleValue> objects. These can be accessed with
120 methods C<get_value()> and C<put_value()> described in APPENDIX.
122 =head2 Delayed/partial query runs
124 Accessing the LANL DB involves multiple HTTP requests. The query can
125 be instructed to proceed through all (the default) or only some of
126 them, using the named parameter C<RUN_OPTION>.
128 To validate a query locally, use
130 $q = new Bio::DB::Query::HIVQuery( -query => {...}, -RUN_OPTION=>0 )
132 which will throw an exception if a field name or option is invalid.
134 To get a query count only, you can save a server hit by using
136 $q = new Bio::DB::Query::HIVQuery( -query => {...}, -RUN_OPTION=>1 )
138 and asking for C<$q-E<gt>count>. To finish the query, do
142 which picks up where you left off.
144 C<-RUN_OPTION=E<gt>2>, the default, runs the full query, returning ids and
149 You can clear the query results, retaining the same LANL session and query spec,
150 by doing C<$q-E<gt>_reset>. Change the query, and rerun with
151 C<$q-E<gt>_do_query($YOUR_RUN_OPTION)>.
157 User feedback is an integral part of the evolution of this and other
158 Bioperl modules. Send your comments and suggestions preferably to
159 the Bioperl mailing list. Your participation is much appreciated.
161 bioperl-l@bioperl.org - General discussion
162 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
164 =head2 Reporting Bugs
166 Report bugs to the Bioperl bug tracking system to help us keep track
167 of the bugs and their resolution. Bug reports can be submitted via
170 http://bugzilla.open-bio.org/
172 =head1 AUTHOR - Mark A. Jensen
174 Email maj@fortinbras.us
180 The rest of the documentation details each of the object methods.
181 Internal methods are usually preceded with a _
185 # Let the code begin...
187 package Bio
::DB
::Query
::HIVQuery
;
189 use vars
qw( $LANL_BASE $LANL_MAP_DB $LANL_MAKE_SEARCH_IF $LANL_SEARCH $SCHEMA_FILE $RUN_OPTION );
191 # Object preamble - inherits from Bio::DB::QueryI
193 use Bio::Annotation::Collection;
194 use Bio::Annotation::Comment;
195 use Bio::Annotation::Reference;
200 use Bio::DB::HIV::HIVQueryHelper;
202 use base qw(Bio::Root::Root Bio::DB::QueryI);
206 $LANL_BASE = "http://www.hiv.lanl.gov/components/sequence/HIV/advanced_search";
207 $LANL_MAP_DB = "map_db.comp";
208 $LANL_MAKE_SEARCH_IF = "make_search_if.comp";
209 $LANL_SEARCH = "search.comp";
210 $SCHEMA_FILE = Bio
::Root
::IO
->catfile(qw(Bio DB HIV lanl-schema.xml));
211 $RUN_OPTION = 2; # execute query
213 @Bio::SchemaNotInit
::Exception
::ISA
= qw( Bio::Root::Exception );
214 @Bio::WebError
::Exception
::ISA
= qw( Bio::Root::Exception );
215 @Bio::QueryNotMade
::Exception
::ISA
= qw( Bio::Root::Exception );
216 @Bio::QueryStringException
::Exception
::ISA
= qw( Bio::Root::Exception );
217 @Bio::HIVSorry
::Exception
::ISA
= qw
( Bio
::Root
::Exception
);
226 Usage : my $hiv_query = new Bio::DB::Query::HIVQuery();
227 Function: Builds a new Bio::DB::Query::HIVQuery object,
228 running a sequence query against the Los Alamos
229 HIV sequence database
230 Returns : an instance of Bio::DB::Query::HIVQuery
236 my($class,@args) = @_;
237 my $self = $class->SUPER::new
(@args);
238 my ($query, $ids, $lanl_base, $lanl_map_db, $lanl_make_search_if, $lanl_search, $schema_file,$run_option) =
239 $self->_rearrange([ qw(QUERY
249 $lanl_base && $self->lanl_base($lanl_base);
250 $lanl_map_db && $self->map_db($lanl_map_db);
251 $lanl_make_search_if && $self->make_search_if($lanl_make_search_if);
252 $lanl_search && $self->search_($lanl_search);
254 # catch this at the top
255 if (defined $schema_file) {
256 if (-e
$schema_file) {
257 $self->_schema_file($schema_file);
260 my ($p) = $self->_schema_file( [grep {$_} map {
261 my $p = Bio
::Root
::IO
->catfile($_, $schema_file);
264 $self->throw(-class=>"Bio::Root::NoSuchThing",
265 -text
=>"Schema file \"".$self->_schema_file."\" cannot be found",
266 -value
=>$self->_schema_file) unless -e
$self->_schema_file;
267 $self->_schema_file($schema_file);
270 $self->_schema_file($SCHEMA_FILE);
272 defined $run_option && do {$RUN_OPTION = $run_option};
274 $self->lanl_base || $self->lanl_base($LANL_BASE);
275 $self->map_db || $self->map_db($LANL_MAP_DB);
276 $self->make_search_if || $self->make_search_if($LANL_MAKE_SEARCH_IF);
277 $self->search_ || $self->search_($LANL_SEARCH);
278 $self->_run_option || $self->_run_option($RUN_OPTION);
281 $self->{_schema
} = HIVSchema
->new($self->_schema_file);
283 # internal storage and flags
284 $self->{'_lanl_query'} = [];
285 $self->{'_lanl_response'} = [];
286 $self->{'_annotations'} = {}; # container for annotation collections assoc. with ids
287 $self->{'_RUN_LEVEL'} = undef; # set in _do_query()
290 defined $query && $self->query($query);
291 defined $ids && $self->ids($ids);
295 $self->_do_query($self->_run_option) if $self->query;
300 =head1 QueryI compliance
305 Usage : $hiv_query->count($newval)
306 Function: return number of sequences found
308 Returns : value of count (a scalar)
309 Args : on set, new value (a scalar or undef, optional)
310 Note : count warns if it is accessed for reading before query
311 has been executed to at least level 1
317 return $self->{'count'} = shift if @_;
318 if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 1)) {
319 $self->warn('Query not yet run at > level 1');
321 return $self->{'count'};
327 Usage : $hiv_query->ids($newval)
328 Function: LANL ids of returned sequences
330 Returns : value of ids (an arrayref of sequence accessions/ids)
331 Args : on set, new value (an arrayref or undef, optional)
339 $self->throw(-class=>'Bio::Root::BadParameter',
340 -text
=>'Arrayref required',
341 -value
=> ref $a) unless ref($a) eq 'ARRAY';
342 @
{$self->{'ids'}}{@
$a} = (1) x @
$a;
345 return keys %{$self->{'ids'}} if $self->{'ids'};
351 Usage : $hiv_query->query
352 Function: Get/set the submitted query hash or string
354 Returns : hashref or string
355 Args : query in hash or string form (see DESCRIPTION)
361 return $self->{'query'} = shift if @_;
362 return $self->{'query'};
365 =head1 Bio::DB::Query::HIVQuery specific methods
370 Usage : $hiv_query->help("help.html")
371 Function: get html-formatted listing of valid fields/aliases/options
372 based on current schema xml
373 Example : perl -MBio::DB::Query::HIVQuery -e "new Bio::DB::Query::HIVQuery()->help" | lynx -stdin
375 Args : optional filename; otherwise prints to stdout
380 my ($self, $fname) = @_;
382 my $schema = $self->_schema;
385 my (@tbls, @flds, @als, @opts, $fh);
387 open ($fh, ">", $fname) or $self->throw(-class=>'Bio::Root::IOException',
388 -text
=>"Error opening help html file $fname for writing",
394 @tbls = $schema->tables;
396 $h->start_html(-title
=>"HIVQuery Help")
398 print $fh $h->a({-id
=>'TOP'}, $h->h2("Valid <span style='font-variant:small-caps'>Bio::DB::Query::HIVQuery</span> query fields and match data"));
399 print $fh "Fields are organized below according to their Los Alamos HIV database tables. Use aliases in place of full field names in queries; for example:<br/>";
400 print $fh "<blockquote><code> \$hiv_query->query( '(CCR5 CXCR4)[coreceptor] ' ); </code></blockquote><br/>";
401 print $fh "rather than <br/>";
402 print $fh "<blockquote><code> \$hiv_query->query( '(CCR5 CXCR4)[SEQ_SAMple.SSAM_second_receptor]' );</code></blockquote><br/>";
403 print $fh "(which does work, however). Click hyperlinks to see valid search options within the field. The token <code><b>Any</b></code> is the wildcard for all fields.<br/><br/>";
404 print $fh $h->start_table({-style
=>"font-family:sans-serif;"}) ;
405 foreach my $tbl (@tbls) {
406 @flds = grep /^$tbl/, $schema->fields;
407 @flds = grep !/_id/, @flds;
409 $h->start_Tr({-style
=>"background-color: lightblue;"}),
410 $h->td([$h->a({-id
=>$tbl},$tbl), $h->span({-style
=>"font-style:italic"},"field aliases")]),
413 foreach my $fld (@flds) {
414 @als = reverse $schema->aliases($fld);
416 $h->Tr( $h->td( ["", $h->a({-href
=>"#opt$fld"}, shift @als)] ))
418 my @tmp = grep {$_} $schema->options($fld);
419 #print STDERR join(', ',@tmp)."\n";
421 {-style
=>"font-family:sans-serif;font-size:small"},
425 "<i>Valid options for</i> <b>$fld</b>: "),
427 @tmp ?
$h->code(join(", ", @tmp)) : $h->i("free text")
430 "<i>Other aliases</i>: "),
432 @als ?
$h->code(join(",",@als)) : "<i>none</i>"
437 $h->a({-href
=>"#$tbl"}, $h->small('BACK')),
438 $h->a({-href
=>"#TOP"}, $h->small('TOP'))
444 print $fh $h->end_table;
446 print $fh $h->end_html;
451 =head1 Annotation manipulation methods
453 =head2 get_annotations_by_ids
455 Title : get_annotations_by_ids (or ..._by_id)
456 Usage : $ac = $hiv_query->get_annotations_by_ids(@ids)
457 Function: Get the Bio::Annotation::Collection for these sequence ids
459 Returns : A Bio::Annotation::Collection object
460 Args : an array of sequence ids
464 sub get_annotations_by_ids
{
468 if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 2)) {
469 $self->warn('Requires query run at level 2');
472 @ret = map {$self->{'_annotations'}->{$_}} @ids if exists($self->{'_annotations'});
474 return (wantarray ?
@ret : $ret[0]) if @ret;
479 sub get_annotations_by_id
{
480 shift->get_annotations_by_ids(@_);
483 =head2 add_annotations_for_id
485 Title : add_annotations_for_id
486 Usage : $hiv_query->add_annotations_for_id( $id ) to create a new
487 empty collection for $id
488 $hiv_query->add_annotations_for_id( $id, $ac ) to associate
490 Function: Associate a Bio::Annotation::Collection with this sequence id
492 Returns : a Bio::Annotation::Collection object
493 Args : sequence id [, Bio::Annotation::Collection object]
497 sub add_annotations_for_id
{
500 $ac = new Bio
::Annotation
::Collection
unless defined $ac;
501 $self->throw(-class=>'Bio::Root::BadParameter'
502 -text
=>'Bio::Annotation::Collection required at arg 2',
503 -value
=>"") unless ref($ac) eq 'Bio::Annotation::Collection';
505 $self->{'_annotations'}->{$id} = $ac unless exists($self->{'_annotations'}->{$id});
509 =head2 remove_annotations_for_ids
511 Title : remove_annotations_for_ids (or ..._for_id)
512 Usage : $hiv_query->remove_annotations_for_ids( @ids)
513 Function: Remove annotation collection for this sequence id
515 Returns : An array of the previous annotation collections for these ids
516 Args : an array of sequence ids
520 sub remove_annotations_for_ids
{
525 push @ac, delete $self->{'_annotations'}->{$_};
531 sub remove_annotations_for_id
{
532 shift->remove_annotations_for_ids(@_);
535 =head2 remove_annotations
537 Title : remove_annotations
538 Usage : $hiv_query->remove_annotations()
539 Function: Remove all annotation collections for this object
541 Returns : The previous annotation collection hash for this object
546 sub remove_annotations
{
549 my $ach = $self->{'_annotations'};
550 $self->{'_annotations'} = {};
557 Usage : $ac->get_value($tagname) -or-
558 $ac->get_value( $tag_level1, $tag_level2,... )
559 Function: access the annotation value assocated with the given tags
562 Args : an array of tagnames that descend into the annotation tree
563 Note : this is a L<Bio::AnnotationCollectionI> method added in
564 L<Bio::DB::HIV::HIVQueryHelper>
571 Usage : $ac->put_value($tagname, $value) -or-
572 $ac->put_value([$tag_level1, $tag_level2, ...], $value) -or-
573 $ac->put_value( [$tag_level1, $tag_level2, ...] )
574 Function: create a node in an annotation tree, and assign a scalar value to it
575 if a value is specified
577 Returns : scalar or a Bio::AnnotationCollection object
578 Args : $tagname, $value scalars (can be specified as -KEYS=>$tagname,
580 \@tagnames, $value (or as -KEYS=>\@tagnames, -VALUE=>$value )
581 Notes : This is a L<Bio::AnnotationCollectionI> method added in
582 L<Bio::DB::HIV::HIVQueryHelper>.
583 If intervening nodes do not exist, put_value creates them, replacing
584 existing nodes. So if $ac->put_value('x', 10) was done, then later,
585 $ac->put_value(['x', 'y'], 20), the original value of 'x' is trashed,
586 and $ac->get_value('x') will now return the annotation collection
591 =head1 GenBank accession manipulation methods
593 =head2 get_accessions
595 Title : get_accessions
596 Usage : $hiv_query->get_accessions()
597 Function: Return an array of GenBank accessions associated with these
598 sequences (available only after a query is subjected to a
599 full run (i.e., when $RUN_OPTION == 2)
601 Returns : array of gb accession numbers, or () if none found for this query
609 if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 2)) {
610 $self->warn('Requires query run at level 2');
613 my @ac = $self->get_annotations_by_ids($self->ids);
615 push @ret, $_->get_value('accession');
620 =head2 get_accessions_by_ids
622 Title : get_accessions_by_ids (or ..._by_id)
623 Usage : $hiv_query->get_accessions_by_ids(@ids)
624 Function: Return an array of GenBank accessions associated with these
625 LANL ids (available only after a query is subjected to a
626 full run (i.e., when $RUN_OPTION == 2)
628 Returns : array of gb accession numbers, or () if none found for this query
633 sub get_accessions_by_ids
{
637 if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 2)) {
638 $self->warn('Requires query run at level 2');
641 my @ac = $self->get_annotations_by_ids(@ids);
643 push @ret, $_->get_value('accession');
645 return wantarray ?
@ret : $ret[0];
649 sub get_accessions_by_id
{
650 shift->get_accessions_by_ids(@_);
655 =head1 Query control methods
660 Usage : $hiv_query->_do_query or $hiv_query->_do_query($run_level)
661 Function: Execute the query according to argument or $RUN_OPTION
663 extent of query reflects the value of argument
664 0 : validate only (no HTTP action)
665 1 : return sequence count only
666 2 : return sequence ids (full query, returns with annotations)
667 noop if current _RUN_LEVEL of query is >= argument or $RUN_OPTION,
669 Returns : actual _RUN_LEVEL (0, 1, or 2) achieved
670 Args : desired run level (optional, global $RUN_OPTION is default)
676 $rl = $RUN_OPTION unless $rl;
677 $self->throw(-class=>"Bio::Root::BadParameter",
678 -text
=>"Invalid run option \"$RUN_OPTION\"",
679 -value
=>$RUN_OPTION) unless grep /^$RUN_OPTION$/, (0, 1, 2);
680 (!defined($self->{'_RUN_LEVEL'})) && do {
681 $self->_create_lanl_query();
682 $self->{'_RUN_LEVEL'} = 0;
684 ($rl > 0) && (!defined($self->{'_RUN_LEVEL'}) || ($self->{'_RUN_LEVEL'} <= 0)) && do {
685 $self->_do_lanl_request();
686 $self->{'_RUN_LEVEL'} = 1;
688 ($rl > 1) && (!defined($self->{'_RUN_LEVEL'}) || ($self->{'_RUN_LEVEL'} <= 1)) && do {
689 $self->_parse_lanl_response();
690 $self->{'_RUN_LEVEL'} = 2;
692 return $self->{'_RUN_LEVEL'};
698 Usage : $hiv_query->_reset
699 Function: Resets query storage, count, and ids, while retaining session id,
700 original query string, and db schema
711 $self->{'_annotations'} = {};
712 $self->{'_lanl_response'} = [];
713 $self->{'_lanl_query'} = [];
714 $self->{'_RUN_LEVEL'} = undef;
721 Usage : $hiv_query->_session_id($newval)
722 Function: Get/set HIV db session id (initialized in _do_lanl_request)
724 Returns : value of _session_id (a scalar)
725 Args : on set, new value (a scalar or undef, optional)
732 return $self->{'_session_id'} = shift if @_;
733 return $self->{'_session_id'};
739 Usage : $hiv_query->_run_option($newval)
740 Function: Get/set HIV db query run option (see _do_query for values)
742 Returns : value of _run_option (a scalar)
743 Args : on set, new value (a scalar or undef, optional)
750 return $self->{'_run_option'} = shift if @_;
751 return $self->{'_run_option'};
761 Usage : $hiv_query->add_id($id)
762 Function: Add new id to ids
772 ${$self->{'ids'}}{$id}++;
779 return $self->{'lanl_base'} = shift if @_;
780 return $self->{'lanl_base'};
786 Usage : $obj->map_db($newval)
789 Returns : value of map_db (a scalar)
790 Args : on set, new value (a scalar or undef, optional)
796 return $self->{'map_db'} = shift if @_;
797 return $self->{'map_db'};
800 =head2 make_search_if
802 Title : make_search_if
803 Usage : $obj->make_search_if($newval)
806 Returns : value of make_search_if (a scalar)
807 Args : on set, new value (a scalar or undef, optional)
813 return $self->{'make_search_if'} = shift if @_;
814 return $self->{'make_search_if'};
820 Usage : $obj->search_($newval)
823 Returns : value of search_ (a scalar)
824 Args : on set, new value (a scalar or undef, optional)
830 return $self->{'search_'} = shift if @_;
831 return $self->{'search_'};
838 Function: return the full map_db uri ("Database Map")
840 Returns : scalar string
847 return $self->lanl_base."/".$self->map_db;
851 =head2 _make_search_if_uri
853 Title : _make_search_if_uri
855 Function: return the full make_search_if uri ("Make Search Interface")
857 Returns : scalar string
862 sub _make_search_if_uri
{
864 return $self->lanl_base."/".$self->make_search_if;
871 Function: return the full search cgi uri ("Search Database")
873 Returns : scalar string
880 return $self->lanl_base."/".$self->search_;
886 Usage : $hiv_query->_schema_file($newval)
889 Returns : value of _schema_file (an XML string or filename)
890 Args : on set, new value (an XML string or filename, or undef, optional)
897 return $self->{'_schema_file'} = shift if @_;
898 return $self->{'_schema_file'};
904 Usage : $hiv_query->_schema($newVal)
907 Returns : value of _schema (an HIVSchema object in package
908 L<Bio::DB::HIV::HIVQueryHelper>)
909 Args : none (field set directly in new())
917 return $self->{'_schema'} :
918 $self->throw(-class=>'Bio::SchemaNotInit::Exception',
919 -text
=>"DB schema not initialized",
927 Usage : $hiv_query->_lanl_query(\@query_parms)
928 Function: pushes \@query_parms onto @{$self->{'_lanl_query'}
930 Returns : value of _lanl_query (an arrayref)
931 Args : on set, new value (an arrayref or undef, optional)
938 return $self->{'_lanl_query'} unless $a;
939 if (ref $a eq 'ARRAY') {
940 push @
{$self->{'_lanl_query'}}, $a;
944 $self->throw(-class=>'Bio::Root::BadParameter',
945 -text
=>'Array ref required for argument.',
951 =head2 _lanl_response
953 Title : _lanl_response
954 Usage : $hiv_query->_lanl_response($response)
955 Function: pushes $response onto @{$hiv_query->{'_lanl_response'}}
957 Returns : value of _lanl_response (an arrayref of HTTP::Response objects)
958 Args : on set, new value (an HTTP::Response object or undef, optional)
966 $self->throw(-class=>'Bio::Root::BadParameter',
967 -text
=>'Requires an HTTP::Response object',
968 -value
=> ref $r) unless ref($r) eq 'HTTP::Response';
969 push @
{$self->{'_lanl_response'}}, $r;
972 return $self->{'_lanl_response'};
975 =head2 _create_lanl_query
977 Title : _create_lanl_query
978 Usage : $hiv_query->_create_lanl_query()
979 Function: validate query hash or string, prepare for _do_lanl_request
981 Returns : 1 if successful; throws exception on invalid query
986 sub _create_lanl_query
{
988 my (%inhash, @query, @qhashes);
989 my ($schema, @validFields, @validAliases);
993 $self->throw(-class=>'Bio::Root::NoSuchThing',
994 -text
=>'Query not specified',
998 ref eq 'HASH' && do {
1000 if ( grep /HASH/, map {ref} values %inhash ) {
1001 # check for {query=>{},annot=>[]} style
1002 $self->throw(-class=>'Bio::Root::BadParameter',
1003 -text
=>'Query style unrecognized',
1004 -value
=>"") unless defined $inhash{query
};
1009 ref eq 'ARRAY' && do {
1010 $inhash{'query'} = {@
$_};
1011 push @qhashes, \
%inhash;
1016 @qhashes = $self->_parse_query_string($_);
1019 $schema = $self->_schema;
1020 @validFields = $schema->fields;
1021 @validAliases = $schema->aliases;
1023 # validate args based on the xml specification file
1024 # only checks blanks and fields with explicitly specified options
1025 # text fields can put anything, and the query will be run before
1026 # an error is caught in these
1027 foreach my $qh (@qhashes) {
1028 foreach my $k (keys %{$$qh{'query'}}) {
1031 if (grep /^$k$/, @validFields) {
1034 elsif (grep /^$k$/, @validAliases) {
1035 foreach (@validFields) {
1036 if (grep (/^$k$/, $schema->aliases($_))) {
1040 # $fld contains the field corresp. to the alias
1044 $self->throw(-class=>'Bio::Root::BadParameter',
1045 -text
=>"Invalid field or alias \"$k\"",
1048 # validate matchdata
1049 my $vf = $schema->_sfieldh($fld);
1050 my @md = (ref($qh->{'query'}{$k}) eq 'ARRAY') ? @
{$qh->{'query'}{$k}} : $qh->{'query'}{$k};
1051 if ($$vf{type
} eq 'text') {
1053 $self->throw(-class=>'Bio::Root::BadParameter',
1054 -text
=>'Value for field \"$k\" cannot be empty',
1056 if ($_ eq "") && ($$vf{blank_ok
} eq 'false');
1059 elsif ($$vf{type
} eq 'option') {
1060 foreach my $md (@md) {
1061 $self->throw(-class=>'Bio::Root::BadParameter',
1062 -text
=>"Invalid value \"".$md."\" for field \"$fld\"",
1064 unless $$vf{option
} && grep {defined $_ && /^$md$/} @
{$$vf{option
}};
1067 # validated; add to query
1069 push @query, ($fld => $_);
1072 if ($qh->{'annot'}) {
1073 # validate the column names to be included in the query
1074 # to obtain annotations
1075 my @annot_cols = @
{$qh->{'annot'}};
1076 foreach my $k (@annot_cols) {
1079 if (grep /^$k$/, @validFields) {
1082 elsif (grep /^$k$/, @validAliases) {
1083 foreach (@validFields) {
1084 if (grep (/^$k$/, $schema->aliases($_))) {
1088 # $fld should contain the field corresp. to the alias
1092 $self->throw(-class=>'Bio::Root::NoSuchThing',
1093 -text
=>"Invalid field or alias \"$k\"",
1096 # lazy: 'Any' may not be the right default (but appears to
1097 # be, based on the lanl html)
1098 push @query, ($fld => 'Any');
1102 # insure that LANL and GenBank ids are retrieved
1103 push @query, ('SequenceEntry.SE_id' => 'Any') unless grep /SequenceEntry\.SE_id/, @query;
1104 push @query, ('SequenceAccessions.SA_GenBankAccession' => 'Any')
1105 unless grep /SequenceAccessions\.SA_GenBankAccession/, @query;
1107 # an "order" field is required by the LANL CGI
1108 # if not specified, default to SE_id
1110 push @query, ('order'=>'SequenceEntry.SE_id') unless grep /order/, @query;
1112 # @query now contains sfield=>matchdata pairs, as specified by user
1113 # include appropriate indexes to create correct automatic joins
1114 # established by the LANL CGI
1115 my (@qtbl, @qpk, @qfk);
1117 # the tables represented in query:
1118 my %q = @query; # squish the tables in the current query into hash keys
1119 @qtbl = $schema->tbl('-s', keys %q);
1122 # more than one table, see if they can be connected
1123 # get primary keys of query tables
1124 @qpk = $schema->pk(@qtbl);
1126 # these tables have primary keys
1127 # $schema->tbl('-s', $schema->pk(@qtbl));
1128 # these tables have foreign keys
1129 # map { $schema->tbl('-s',$schema->fk($_)) } @qtbl;
1130 # these are the tables that the foreign keys point to
1131 # $schema->ftbl($schema->fk(@qtbl));
1133 foreach my $pt ($schema->tbl('-s',@qpk)) {
1134 foreach my $ft (map { $schema->tbl('-s',$schema->fk($_)) } @qtbl) {
1135 push @qfk, $schema->fk($ft, $pt);
1138 # add the fields not currently in the query
1139 foreach (@qpk, @qfk) {
1141 if (!grep(/^$fld$/,keys %q)) {
1142 # lazy: 'Any' may not be the right default (but appears to
1143 # be, based on the lanl html)
1144 push @query, ($_ => 'Any');
1149 # set object property
1150 $self->_lanl_query([@query]);
1155 # _do_lanl_request : post the queries created by _create_lanl_query
1157 # @args (or {@args}) should be unaliased Table.Column=>Matchdata
1158 # pairs (these will be used directly in the POSTs)
1160 =head2 _do_lanl_request
1162 Title : _do_lanl_request
1163 Usage : $hiv_query->_do_lanl_request()
1164 Function: Perform search request on _create_lanl_query-validated query
1166 Returns : 1 if successful
1171 sub _do_lanl_request
{
1173 my (@queries, @query, @interface,$interfGet,$searchGet,$response);
1174 my ($numseqs, $count);
1177 if (!$self->_lanl_query) {
1178 $self->throw(-class=>"Bio::Root::BadParameter",
1179 -text
=>"_lanl_query empty, run _create_lanl_request first",
1183 @queries = @
{$self->_lanl_query};
1187 ## search site specific CGI parms
1188 my @search_pms = ('action'=>'Search');
1189 my @searchif_pms = ('action'=>'Search Interface');
1190 my @download_pms = ('action Download.x'=>1, 'action Download.y'=>1);
1192 ## HTML-testing regexps
1193 my $tags_re = qr{(?:\s*<[^>]+>\s*)};
1194 my $session_id_re = qr{<input.*name="id".*value="([0-9a-f]+)"}m;
1195 my $search_form_re = qr{<form[^>]*action=".*/search.comp"};
1196 my $seqs_found_re = qr{Displaying$tags_re*(?:\s*[0-9-]*\s*)*$tags_re*of$tags_re*\s*([0-9]+)$tags_re*sequences found};
1197 my $no_seqs_found_re = qr{Sorry.*no sequences found};
1198 my $too_many_re = qr{too many records: $tags_re*([0-9]+)};
1200 foreach my $q (@queries) {
1202 # default query control parameters
1206 translate
=>'FALSE' # nucleotides
1211 # pull out commands, designated by the COMMAND pseudo-table...
1212 my @commands = map { $query[$_] =~ s/^COMMAND\.// ?
@query[$_..$_+1] : () } (0..$#query-1);
1213 @query = map { $query[$_] =~ /^COMMAND/ ?
() : @query[2*$_..2*$_+1] } (0..($#query-1)/2);
1216 # set control parameters explicitly made in query
1217 foreach my $cp (keys %qctrl) {
1218 if (!grep( /^$cp$/, @query)) {
1219 push @query, ($cp, $qctrl{$cp});
1223 # note that @interface must be an array, since a single 'key' (the table)
1224 # can be associated with multiple 'values' (the columns) in the POST
1226 # squish fieldnames into hash keys
1228 @interface = grep {defined} map {my ($tbl,$col) = /^(.*)\.(.*)$/} keys %q;
1229 eval { # encapsulate communication errors here, defer biothrows...
1231 #mark the useragent should be setable from outside (so we can modify timeouts, etc)
1232 my $ua = new Bio
::WebAgent
(timeout
=> 90);
1233 my $idPing = $ua->get($self->_map_db_uri);
1234 $idPing->is_success || do {$response=$idPing; die "Connect failed"};
1235 # get the session id
1236 if (!$self->_session_id) {
1237 ($self->{'_session_id'}) = ($idPing->content =~ /$session_id_re/);
1238 $self->_session_id || do {$response=$idPing; die "Session not established";};
1241 # strange bug: if action=>'Search+Interface' below (note "+"),
1242 # the response to the search (in $searchGet) shows the correct
1243 # >number< of sequences found, but also an error "No sequences
1244 # match" and an SQL barf. Changing the "+" to a " " sets up the
1245 # interface to lead to the actual sequences being delivered as
1247 $interfGet = $ua->post($self->_make_search_if_uri, [@interface, @searchif_pms, id
=>$self->_session_id]);
1248 $interfGet->is_success || do {$response=$interfGet,die "Interface request failed";};
1249 # see if a search form was returned...
1251 $interfGet->content =~ /$search_form_re/ || do {$response=$interfGet, die "Interface request failed";};
1253 $searchGet = $ua->post($self->_search_uri, [@query, @commands, @search_pms, id
=>$self->_session_id]);
1254 $searchGet->is_success || do {$response=$searchGet, die "Search failed";};
1255 for ($searchGet->content) {
1256 /$no_seqs_found_re/ && do {
1257 $response=$searchGet;
1258 die "No sequences found";
1261 /$too_many_re/ && do {
1262 $response=$searchGet;
1263 die "Too many records ($1): must be <10000";
1266 /$seqs_found_re/ && do {
1273 $response=$searchGet->content;
1274 die "Search failed (response not parsed)";
1277 $response = $ua->post($self->_search_uri, [@download_pms, id
=>$self->_session_id]);
1278 $response->is_success || die "Query failed";
1279 # $response->content is a tab-separated value table of sequences
1280 # and metadata, first line starts with \# and contains fieldnames
1283 # throw, if necessary
1285 ($@
!~ "No sequences found") &&
1286 $self->throw(-class=>'Bio::WebError::Exception',
1291 $self->_lanl_response($response);
1295 $self->warn("No sequences found for this query") unless $count;
1296 $self->count($count);
1297 return 1; # made it.
1301 =head2 _parse_lanl_response
1303 Title : _parse_lanl_response
1304 Usage : $hiv_query->_parse_lanl_response()
1305 Function: Parse the tab-separated-value response obtained by _do_lanl_request
1306 for sequence ids, accessions, and annotations
1308 Returns : 1 if successful
1313 sub _parse_lanl_response
{
1315 ### handle parsing and merging multiple responses into the query object
1316 ### (ids and annotations)
1319 my ($seqGet) = (@_);
1320 my (@data, @cols, %antbl, %antype,%anxlt, @ankeys );
1322 my ($schema, @retseqs, %rec, $ac);
1324 'country' => 'isolation_country',
1325 'coreceptor' => 'second_receptor',
1326 'patient health' => 'health_status'
1329 $schema = $self->_schema;
1331 $self->_lanl_response ||
1332 $self->throw(-class=>"Bio::QueryNotMade::Exception",
1333 -text
=>"Query not yet performed; call _do_lanl_request()",
1335 foreach my $rsp (@
{$self->_lanl_response}) {
1336 @data = split("\r|\n", $rsp->content);
1337 $numseq += ( shift(@data) =~ /Number.*:\s([0-9]+)/ )[0];
1338 @cols = split(/\t/, shift @data);
1340 # mappings from column headings to annotation keys
1341 # squish into hash keys
1342 my %q = @
{ shift @
{$self->_lanl_query} };
1343 %antbl = $schema->ankh(keys %q);
1344 foreach (values %antbl) {
1345 $antype{$_->{ankey
}} = $_->{antype
};
1346 push @ankeys, $_->{ankey
};
1350 ### conversion kludge for specials
1351 $k = $specials{lc $k} if (grep /$k/i, keys %specials);
1354 ($k) = grep (/$k$/i, keys %antbl);
1356 $anxlt{$_} = $antbl{$k}->{ankey
};
1360 @rec{@cols} = split /\t/;
1362 $self->add_id($rec{'SE id'});
1363 $ac = $self->add_annotations_for_id($rec{'SE id'});
1365 # need to handle reference, comment, dblink annots
1368 my $k = $anxlt{$_}; # annot key
1370 my $t = $antype{$k}; # annot type
1371 my $d = $rec{$_}; # the data
1373 \$ac->put_value(-KEYS=>[\$t, \$k], -VALUE=>\$d);
1377 $ac->put_value('accession', $rec{Accession
});
1381 return 1; # made it.
1384 =head2 _parse_query_string
1386 Title : _parse_query_string
1387 Usage : $hiv_query->_parse_query_string($str)
1388 Function: Parses a query string using query language emulator QRY
1389 : in L<Bio::DB::Query::HIVQueryHelper>
1391 Returns : arrayref of hash structures suitable for passing to _create_lanl_query
1392 Args : a string scalar
1396 sub _parse_query_string
{
1398 my $qstring = shift;
1400 #syntax errors thrown in QRY (in HIVQueryHelper module)
1401 $ptree = QRY
::_parse_q
( $qstring );
1402 @ret = QRY
::_make_q
($ptree);
1411 Usage : $hiv_query->_sorry("-president=>Powell")
1412 Function: Throws an exception for unsupported option or parameter
1415 Args : scalar string
1422 $self->throw(-class=>"Bio::HIVSorry::Exception",
1423 -text
=>"Sorry, option/parameter \"$parm\" not (yet) supported. See manpage to complain.",