4 # command-line script for HIV sequence queries
5 # using HIV.pm and HIVQuery.pm
9 bp_hivq.PL - an interactive command-line interface to L<Bio::DB::HIV> and L<Bio::DB::Query::HIVQuery>
14 hivq> query C[subtype] SI[phenotype]
17 Query: C[subtype] SI[phenotype]
22 hivq> run D[subtype] SI[phenotype]
26 Query: D[subtype] SI[phenotype]
32 The BioPerl modules L<Bio::DB::HIV> and L<Bio::DB::Query::HIVQuery>
33 together allow batch queries against the Los Alamos National
34 Laboratories' HIV Sequence Database using a simple query
35 language. C<bp_hivq.PL> provides both an example of the use of these
36 modules, and a standalone interactive command-line interface to the
37 LANL HIV DB. Simple commands allow the user to retrieve HIV sequences
38 and annotations using the query language implemented in
39 L<Bio::DB::Query::HIVQuery>. Visit the man pages for those modules for
44 Run the script using C<perl bp_hivq.PL> or, in Unix, C<./bp_hivq.PL>. You will see
49 prompt. Type commands with queries to retrieve sequence and annotation
50 data. See the SYNOPSIS for a sample session. Available commands
55 The LANL database is pretty complex and extensive. Use the C<find> facility to
56 explore the available database tables and fields. To identify aliases for a particular field, use C<find alias [fieldname]>. For example, to find a short alias to the
57 weirdly named field C<seq_sample.ssam_second_receptor>, do
59 hivq> find alias seq_sample.ssam_second_receptor
63 coreceptor second_receptor
65 Now, instead of the following query
67 hivq> run C[subtype] CCR5[seq_sample.ssam_second_receptor]
71 hivq> run C[subtype] CCR5[coreceptor]
73 Use the C<outfile> command to set the file that receives the retrieved
74 sequences. You can change the current output file simply by issuing a
75 new C<outfile> command during the session. The output file defaults to
78 Use the C<query> command to validate a query without hitting the
79 database. Use the C<prerun> or C<count> commands to get a count of
80 sequence hits for a query without retrieving the data. Use C<run> or
81 C<do> to perform a complete query, retrieving sequence data into the
82 currently set output files.
84 To process C<bp_hivq.PL> commands in batch, create a text file
85 (C<bp_hivq.cmd>, for example) containing desired commands one per
86 line. Then execute the following from the shell:
88 $ cat bp_hivq.cmd | perl bp_hivq.PL
92 Here is a complete list of commands. Options in single brackets (C<[req_option]>) are
93 required; options in double brackets (C<[[opt_option]]>) are optional.
95 confirm : Toggle interactive confirmation before
98 find : Explore database schema
99 find tables Display all database tables
100 find fields Display all database fields (columns)
101 find fields [table] Display all fields in [table]
102 find alias [field] Display valid aliases for [field]
103 help [[command]] : Show command help
104 if [[command]] not specified, list all
106 id : Display current session id
107 outfile [filename] : Set file for collecting retrieved data
108 ping : Check if LANL DB is available
109 prerun [[query]] : Execute query but retreive hit count only
110 if [[query]] not specified, use current query
111 query [query] : Validate and set current query
112 run [[query]] : Execute query and retrieve data
113 if [[query]] not specified, use current query
114 state : Display current state of the script
116 bye : Alias for 'exit'
117 config : Alias for 'state'
118 count : Alias for 'prerun'
120 out : Alias for 'outfile'
121 quit : Alias for 'exit'
125 -v : verbose; turns on the internal debug() function
131 User feedback is an integral part of the evolution of this and other
132 Bioperl modules. Send your comments and suggestions preferably to
133 the Bioperl mailing list. Your participation is much appreciated.
135 bioperl-l@bioperl.org - General discussion
136 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
138 =head2 Reporting Bugs
140 Report bugs to the Bioperl bug tracking system to help us keep track
141 of the bugs and their resolution. Bug reports can be submitted via the
144 https://github.com/bioperl/bioperl-live/issues
146 =head1 AUTHOR - Mark A. Jensen
148 Mark A. Jensen E<lt>maj@fortinbras.usE<gt>
157 use Bio
::DB
::Query
::HIVQuery
;
164 my $db = new Bio
::DB
::HIV
;
166 my $q = new Bio
::DB
::Query
::HIVQuery
();
167 my $schema = $q->_schema;
191 'outfile' => {'text'=>'[stdout]','handle'=>\
*STDOUT
},
199 'bye' => 'bye: End script',
200 'config' => 'config: Print current configuration state of hivq',
201 'confirm' => 'confirm: Toggle user confirmation of query execution',
202 'count' => 'count [[query string]]: Get number of sequences available for query',
203 'do' => 'do [[query string]]: Run query to completion',
204 'exit' => 'exit: End script',
205 'find' => join("\n","find tables: Print all LANL table names",
206 "find fields: Print all field names",
207 "find fields [tablename]: Print all fields in specified table",
208 "find aliases [fieldname]: Print all valid aliases for specified field",
209 "find options [fieldname/alias]: Print valid match options for specified field"),
210 'help' => 'help [[command]]: Get description of command',
211 'id' => 'id: Print current session id',
212 'out' => 'out [output filename]: Set filename to hold returned sequences',
213 'outfile' => 'outfile [output filename]: Set filename to hold returned sequences',
214 'ping' => 'ping: Ping LANL database',
215 'prerun' => 'prerun [[query string]]: Get number of sequences available',
216 'query' => 'query [[query string]]: Validate and set current query string',
217 'quit' => 'quit: End script',
218 'run' => 'run [[query string]]: Run query to completion',
219 'state' => 'state: Print current configuration state of hivq',
222 debug
("We have arrived.");
225 my $prompt = "hivq> ";
226 my $term = new Term
::ReadLine
'hivq';
230 for ( $cmd = $term->readline($prompt) ) {
233 $term->addhistory($_);
235 $tok[0] = lc $tok[0]; # normalize
237 (!$_ || /^\#/) && do {
240 !(grep(/^$tok[0]$/, @cmds)) && do {
241 error
("Unrecognized command \'$tok[0]\'");
244 (m/^quit$/ || m/^bye$/ || m/^exit$/) && do {
245 my $fh = $state{outfile
}->{handle
};
246 unless ($fh == \
*STDOUT
) {
253 $state{confirm
} = !$state{confirm
};
254 print "User confirmation before running query: ". ($state{confirm
} ?
"yes" : "no")."\n";
257 (m/^outfile$/ || m/^out$/) && do {
259 error
('Output filename not specified');
264 open $fh, ">$tok[1]" or die $!;
270 my $oldfh = $state{outfile
}->{handle
};
271 if ($state{outfile
}->{text
} ne '[stdout]') {
274 $state{outfile
}->{handle
} = $fh;
275 $state{outfile
}->{text
} = $tok[1];
280 my $query = join(' ', @tok[1..$#tok]);
287 print "Current query: $state{query}\n";
290 print "No query set\n";
302 $state{session_id
} = $q->_session_id;
303 $state{query
} = $query;
306 (m/^prerun$/ || m/^count$/) && do {
307 my $query = join(' ', @tok[1..$#tok]);
310 error
('No query currently set');
313 elsif ($q->{'_RUN_LEVEL'} >= 1 &&
314 ($q->query() eq $state{query
})) {
318 # else, fallthrough to prerun current query
333 $state{confirm
} && do { next CMD
unless getConfirm
(); };
334 $state{session_id
} = $q->_session_id;
335 $state{query
} = $query;
345 $state{curct
} = $q->count;
346 $state{query
} = $q->query;
351 (m/^run$/ || m/^do$/) && do {
352 my $query = join(' ', @tok[1..$#tok]);
354 unless (defined $q->{'_RUN_LEVEL'} && $q->{'_RUN_LEVEL'} < 2) {
355 error
('No query currently set');
370 $state{query
} = $query;
372 $state{session_id
} = $q->_session_id;
375 $state{confirm
} && do { next CMD
unless getConfirm
(); };
378 $seqio = $db->get_Stream_by_query($q);
386 $state{curct
} = $q->count;
392 error
('No sequences returned');
405 if (grep(/$tok[1]/, @cmds)) {
406 print $help{$tok[1]}, "\n";
409 error
("Command \'$tok[1]\' unrecognized\n");
413 (m/^state$/ || m/^config$/) && do {
414 foreach my $k (sort keys %state) {
415 if (ref($state{$k}) eq 'HASH') {
416 print "$k: ".$state{$k}->{text
}."\n";
420 if ($k eq 'confirm') {
421 print $state{$k} ?
"yes" : "no";
425 print "$state{$k}\n";
432 error
("Command currently unimplemented\n");
443 if ($msg =~ /MSG:/) {
444 ($msg) = grep (/^MSG:/, split(/\n|\r/,$msg));
446 $msg =~ s/\sat\s.*$//;
448 print STDERR
"hivq: $msg\n";
453 print (($state{curct
} ?
$state{curct
} : "No")
455 . ($state{curct
}>1 ?
"s" : "")
457 print "Query: ".$state{query
}."\n";
464 while (my $seq = $seqio->next_seq) {
466 $nameline .= $seq->annotation->get_value('Special', 'accession').
468 # loop through categories:
469 foreach my $cat ($seq->annotation->get_keys()) {
470 foreach my $an ($seq->annotation->get_keys($cat)) {
471 next if ($an eq 'accession');
472 my $value = $seq->annotation->get_value($cat, $an);
473 # next line: kludge to skip if there's an annotation
474 # object instead of a value (I believe this is a bug)
476 $nameline .= "\t".join('=', "'$an'", "'".$value."'");
479 push @ret, $nameline."\n";
480 push @ret, $seq->seq()."\n";
488 error
("No sequences to output");
490 my $fh = $state{outfile
}->{handle
};
492 print "Download complete\n";
497 print "Run query? [y/n]";
499 ($ans =~ /^[yY]/) && do {return 1;};
504 print "Available commands:\n";
505 outputInColumns
(\
@cmds);
511 for my $arg ($tok[1]) {
513 print $help{find
},"\n";
516 ($arg =~ m/^tables$/) && do {
517 outputInColumns
([$schema->tables]);
520 ($arg =~ m/^fields$/) && do {
523 outputInColumns
([$schema->fields]);
526 unless (grep /^$tbl$/, $schema->tables) {
527 error
("Table \'$tbl\' not valid");
530 outputInColumns
([grep /^$tbl/, $schema->fields()]);
534 ($arg =~ /^options$/) && do {
536 my @aliases = $schema->aliases($fld);
538 unless (grep /^$fld$/, $schema->aliases) {
539 error
("Field \'$tok[2]\' not valid");
542 foreach ($schema->fields) {
543 if ( grep /$fld/, $schema->aliases($_) ) {
548 # on success: $fld is set to valid field
550 my @options = sort {$a cmp $b} $schema->options($fld);
551 @options = (@options ?
@options : ('Free text'));
552 outputInColumns
(\
@options);
556 ($arg =~ /^alias/) && ($arg = $tok[2]);
557 if (grep /$arg/, $schema->fields) {
558 my @aliases = sort $schema->aliases($arg);
560 outputInColumns
(\
@aliases);
563 error
("No aliases to field \'$arg\'");
567 error
("Field \'$arg\' not valid");
577 sub outputInColumns
{
578 my ($items, $n, $w) = @_;
581 my $maxl = length([sort {length($b)<=>length($a)} @items]->[0]);
582 $n ||= int(60/($maxl+3)) || 1;
584 my $coll = int(@items/$n);
585 $coll == @items/$n ?
$coll : ++$coll;
587 for my $j (0..$n-1) {
589 $t[$j] = [@items[$j*$coll..$j*$coll+$coll-1]];
592 $t[$j] = [@items[$j*$coll..$#items]];
595 @items = map { my $j = $_; map { $t[$_]->[$j] || () } (0..$#t) } (0..scalar(@
{$t[0]})-1);
597 $_ .= (' ' x
($w-length($_)));
599 while ($i < @items) {
600 print join('', map { $_ || () } @items[$i..$i+$n-1] ),"\n";
607 print STDERR
shift()."\n" if $opt_v;