2 #$Id: hivq.PL 362 2009-01-23 22:45:51Z maj $#
4 # command-line script for HIV sequence queries
5 # using HIV.pm and HIVQuery.pm
9 hivq.PL - an interactive command-line interface to L<Bio::DB::HIV> and L<Bio::DB::Query::HIVQuery>
14 hivq> query C[subtype] SI[phenotype]
17 Query: C[subtype] SI[phenotype]
22 hivq> run D[subtype] SI[phenotype]
26 Query: D[subtype] SI[phenotype]
32 The BioPerl modules L<Bio::DB::HIV> and L<Bio::DB::Query::HIVQuery>
33 together allow batch queries against the Los Alamos National
34 Laboratories' HIV Sequence Database using a simple query
35 language. C<hivq.PL> provides both an example of the use of these
36 modules, and a standalone interactive command-line interface to the
37 LANL HIV DB. Simple commands allow the user to retrieve HIV sequences
38 and annotations using the query language implemented in
39 L<Bio::DB::Query::HIVQuery>. Visit the man pages for those modules for
44 Run the script using C<perl hivq.PL> or, in Unix, C<./hivq.PL>. You will see
49 prompt. Type commands with queries to retrieve sequence and annotation
50 data. See the SYNOPSIS for a sample session. Available commands
55 The LANL database is pretty complex and extensive. Use the C<find> facility to
56 explore the available database tables and fields. To identify aliases for a particular field, use C<find alias [fieldname]>. For example, to find a short alias to the
57 weirdly named field C<seq_sample.ssam_second_receptor>, do
59 hivq> find alias seq_sample.ssam_second_receptor
63 coreceptor second_receptor
65 Now, instead of the following query
67 hivq> run C[subtype] CCR5[seq_sample.ssam_second_receptor]
71 hivq> run C[subtype] CCR5[coreceptor]
73 Use the C<outfile> command to set the file that receives the retrieved
74 sequences. You can change the current output file simply by issuing a
75 new C<outfile> command during the session. The output file defaults to
78 Use the C<query> command to validate a query without hitting the
79 database. Use the C<prerun> or C<count> commands to get a count of
80 sequence hits for a query without retrieving the data. Use C<run> or
81 C<do> to perform a complete query, retrieving sequence data into the
82 currently set output files.
84 To process C<hivq.PL> commands in batch, create a text file
85 (C<hivq.cmd>, for example) containing desired commands one per
86 line. Then execute the following from the shell:
88 $ cat hivq.cmd | perl hivq.PL
92 Here is a complete list of commands. Options in single brackets (C<[req_option]>) are
93 required; options in double brackets (C<[[opt_option]]>) are optional.
95 confirm : Toggle interactive confirmation before
98 find : Explore database schema
99 find tables Display all database tables
100 find fields Display all database fields (columns)
101 find fields [table] Display all fields in [table]
102 find alias [field] Display valid aliases for [field]
103 help [[command]] : Show command help
104 if [[command]] not specified, list all
106 id : Display current session id
107 outfile [filename] : Set file for collecting retrieved data
108 ping : Check if LANL DB is available
109 prerun [[query]] : Execute query but retreive hit count only
110 if [[query]] not specified, use current query
111 query [query] : Validate and set current query
112 run [[query]] : Execute query and retrieve data
113 if [[query]] not specified, use current query
114 state : Display current state of the script
116 bye : Alias for 'exit'
117 config : Alias for 'state'
118 count : Alias for 'prerun'
120 out : Alias for 'outfile'
121 quit : Alias for 'exit'
125 -v : verbose; turns on the internal debug() function
131 User feedback is an integral part of the evolution of this and other
132 Bioperl modules. Send your comments and suggestions preferably to
133 the Bioperl mailing list. Your participation is much appreciated.
135 bioperl-l@bioperl.org - General discussion
136 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
138 =head2 Reporting Bugs
140 Report bugs to the Bioperl bug tracking system to help us keep track
141 of the bugs and their resolution. Bug reports can be submitted via the
144 https://redmine.open-bio.org/projects/bioperl/
146 =head1 AUTHOR - Mark A. Jensen
148 Mark A. Jensen E<lt>maj@fortinbras.usE<gt>
156 use Bio::DB::Query::HIVQuery;
163 my $db = new Bio::DB::HIV;
165 my $q = new Bio::DB::Query::HIVQuery();
166 my $schema = $q->_schema;
190 'outfile' => {'text'=>'[stdout]','handle'=>\*STDOUT},
198 'bye' => 'bye: End script',
199 'config' => 'config: Print current configuration state of hivq',
200 'confirm' => 'confirm: Toggle user confirmation of query execution',
201 'count' => 'count [[query string]]: Get number of sequences available for query',
202 'do' => 'do [[query string]]: Run query to completion',
203 'exit' => 'exit: End script',
204 'find' => join("\n","find tables: Print all LANL table names",
205 "find fields: Print all field names",
206 "find fields [tablename]: Print all fields in specified table",
207 "find aliases [fieldname]: Print all valid aliases for specified field",
208 "find options [fieldname/alias]: Print valid match options for specified field"),
209 'help' => 'help [[command]]: Get description of command',
210 'id' => 'id: Print current session id',
211 'out' => 'out [output filename]: Set filename to hold returned sequences',
212 'outfile' => 'outfile [output filename]: Set filename to hold returned sequences',
213 'ping' => 'ping: Ping LANL database',
214 'prerun' => 'prerun [[query string]]: Get number of sequences available',
215 'query' => 'query [[query string]]: Validate and set current query string',
216 'quit' => 'quit: End script',
217 'run' => 'run [[query string]]: Run query to completion',
218 'state' => 'state: Print current configuration state of hivq',
221 debug("We have arrived.");
224 my $prompt = "hivq> ";
225 my $term = new Term::ReadLine 'hivq';
229 for ( $cmd = $term->readline($prompt) ) {
232 $term->addhistory($_);
234 $tok[0] = lc $tok[0]; # normalize
236 (!$_ || /^\#/) && do {
239 !(grep(/^$tok[0]$/, @cmds)) && do {
240 error("Unrecognized command \'$tok[0]\'");
243 (m/^quit$/ || m/^bye$/ || m/^exit$/) && do {
244 my $fh = $state{outfile}->{handle};
245 unless ($fh == \*STDOUT) {
252 $state{confirm} = !$state{confirm};
253 print "User confirmation before running query: ". ($state{confirm} ? "yes" : "no")."\n";
256 (m/^outfile$/ || m/^out$/) && do {
258 error('Output filename not specified');
263 open $fh, ">$tok[1]" or die $!;
269 my $oldfh = $state{outfile}->{handle};
270 if ($state{outfile}->{text} ne '[stdout]') {
273 $state{outfile}->{handle} = $fh;
274 $state{outfile}->{text} = $tok[1];
279 my $query = join(' ', @tok[1..$#tok]);
286 print "Current query: $state{query}\n";
289 print "No query set\n";
301 $state{session_id} = $q->_session_id;
302 $state{query} = $query;
305 (m/^prerun$/ || m/^count$/) && do {
306 my $query = join(' ', @tok[1..$#tok]);
309 error('No query currently set');
312 elsif ($q->{'_RUN_LEVEL'} >= 1 &&
313 ($q->query() eq $state{query})) {
317 # else, fallthrough to prerun current query
332 $state{confirm} && do { next CMD unless getConfirm(); };
333 $state{session_id} = $q->_session_id;
334 $state{query} = $query;
344 $state{curct} = $q->count;
345 $state{query} = $q->query;
350 (m/^run$/ || m/^do$/) && do {
351 my $query = join(' ', @tok[1..$#tok]);
353 unless (defined $q->{'_RUN_LEVEL'} && $q->{'_RUN_LEVEL'} < 2) {
354 error('No query currently set');
369 $state{query} = $query;
371 $state{session_id} = $q->_session_id;
374 $state{confirm} && do { next CMD unless getConfirm(); };
377 $seqio = $db->get_Stream_by_query($q);
385 $state{curct} = $q->count;
391 error('No sequences returned');
404 if (grep(/$tok[1]/, @cmds)) {
405 print $help{$tok[1]}, "\n";
408 error("Command \'$tok[1]\' unrecognized\n");
412 (m/^state$/ || m/^config$/) && do {
413 foreach my $k (sort keys %state) {
414 if (ref($state{$k}) eq 'HASH') {
415 print "$k: ".$state{$k}->{text}."\n";
419 if ($k eq 'confirm') {
420 print $state{$k} ? "yes" : "no";
424 print "$state{$k}\n";
431 error("Command currently unimplemented\n");
442 if ($msg =~ /MSG:/) {
443 ($msg) = grep (/^MSG:/, split(/\n|\r/,$msg));
445 $msg =~ s/\sat\s.*$//;
447 print STDERR "hivq: $msg\n";
452 print (($state{curct} ? $state{curct} : "No")
454 . ($state{curct}>1 ? "s" : "")
456 print "Query: ".$state{query}."\n";
463 while (my $seq = $seqio->next_seq) {
465 $nameline .= $seq->annotation->get_value('Special', 'accession').
467 # loop through categories:
468 foreach my $cat ($seq->annotation->get_keys()) {
469 foreach my $an ($seq->annotation->get_keys($cat)) {
470 next if ($an eq 'accession');
471 my $value = $seq->annotation->get_value($cat, $an);
472 # next line: kludge to skip if there's an annotation
473 # object instead of a value (I believe this is a bug)
475 $nameline .= "\t".join('=', "'$an'", "'".$value."'");
478 push @ret, $nameline."\n";
479 push @ret, $seq->seq()."\n";
487 error("No sequences to output");
489 my $fh = $state{outfile}->{handle};
491 print "Download complete\n";
496 print "Run query? [y/n]";
498 ($ans =~ /^[yY]/) && do {return 1;};
503 print "Available commands:\n";
504 outputInColumns(\@cmds);
510 for my $arg ($tok[1]) {
512 print $help{find},"\n";
515 ($arg =~ m/^tables$/) && do {
516 outputInColumns([$schema->tables]);
519 ($arg =~ m/^fields$/) && do {
522 outputInColumns([$schema->fields]);
525 unless (grep /^$tbl$/, $schema->tables) {
526 error("Table \'$tbl\' not valid");
529 outputInColumns([grep /^$tbl/, $schema->fields()]);
533 ($arg =~ /^options$/) && do {
535 my @aliases = $schema->aliases($fld);
537 unless (grep /^$fld$/, $schema->aliases) {
538 error("Field \'$tok[2]\' not valid");
541 foreach ($schema->fields) {
542 if ( grep /$fld/, $schema->aliases($_) ) {
547 # on success: $fld is set to valid field
549 my @options = sort {$a cmp $b} $schema->options($fld);
550 @options = (@options ? @options : ('Free text'));
551 outputInColumns(\@options);
555 ($arg =~ /^alias/) && ($arg = $tok[2]);
556 if (grep /$arg/, $schema->fields) {
557 my @aliases = sort $schema->aliases($arg);
559 outputInColumns(\@aliases);
562 error("No aliases to field \'$arg\'");
566 error("Field \'$arg\' not valid");
576 sub outputInColumns {
577 my ($items, $n, $w) = @_;
580 my $maxl = length([sort {length($b)<=>length($a)} @items]->[0]);
581 $n ||= int(60/($maxl+3)) || 1;
583 my $coll = int(@items/$n);
584 $coll == @items/$n ? $coll : ++$coll;
586 for my $j (0..$n-1) {
588 $t[$j] = [@items[$j*$coll..$j*$coll+$coll-1]];
591 $t[$j] = [@items[$j*$coll..$#items]];
594 @items = map { my $j = $_; map { $t[$_]->[$j] || () } (0..$#t) } (0..scalar(@{$t[0]})-1);
596 $_ .= (' ' x ($w-length($_)));
598 while ($i < @items) {
599 print join('', map { $_ || () } @items[$i..$i+$n-1] ),"\n";
606 print STDERR shift()."\n" if $opt_v;