Minor edits
[bioperl-live.git] / examples / bioperl.pl
blobde65ce7c6ef6dbf0b1ae3e0458752f5a26831f3e
1 #!/usr/bin/perl
3 # bioperl.pl
4 # cjm@fruitfly.org
6 use strict;
7 use lib '.';
8 no strict "vars";
9 use Data::Dumper;
10 use Bio::Perl;
11 use Bio::SeqIO;
12 use Getopt::Long;
13 my $h = {};
15 GetOptions($h,
16 "file|f=s",
18 my @cmds = get_default_cmds();
19 shell($h, \@cmds, @ARGV);
21 # prepare for some seriously hacky code....
22 sub shell {
23 my $h = shift;
24 my @cmds = @{shift || []};
25 my @args = @_;
26 my $prompt = $ENV{BIOPERL_PROMPT} || "BioPerl> ";
27 my $quit = 0;
28 my @lines = ();
29 my $r;
30 my $rv;
31 my $seq;
32 my @pseqs = ();
33 my $seqio;
34 my $wseqio;
35 my $fastadb;
36 my $options =
37 {echo=>0, chatty=>10};
39 my $loadfn = $h->{'file'};
40 if ($loadfn) {
41 @lines = ("load '$loadfn'");
44 sub hr {
45 print "\n===============================\n";
48 sub nl {
49 print "\n";
52 sub demo {
53 if (! -d 't/data') {
54 print "To run the demo, you must be in the bioperl directory\n";
56 @lines =
57 split(/\n/,
59 %keep = %$options;
60 +format ''
61 +outformat ''
62 +echo 1
63 # BioPerl shell utility - Demo
65 # We're now going to take a tour
66 # through some of the features of
67 # this tool.
70 # This demo will go through some of
71 # the major commands, feeding you
72 # the commands as you go. all you have
73 # to do is hit <ENTER> every time
74 # you see the prompt $prompt
75 # you will then see the output of
76 # the command on your terminal window.
77 # type 'q' to end the tour
78 # at any time.
80 waitenter
81 # PARSING GENBANK RECORDS
82 # -----------------------
83 # to parse genbank files, use
84 # the read_seq() method, or
85 # simply use the '<' command.
87 # First of all we're going to
88 # take a look at the file
89 # 't/data/test.genbank'
90 # Let's examine the file itself
91 # using the unix command "cat"
92 # (you can use any unix command
93 # using the ! at the beginning
94 # of a line)
95 ^!cat t/data/test.genbank
96 waitenter
97 # Ok, you can see this is a
98 # typical file of genbank records.
99 # Let's get the first sequence
100 # from the file
101 ^<t/data/test.genbank
102 waitenter
103 # we have parsed the first
104 # record of the file, and placed
105 # the sequence object into
106 # the variable $seq
108 # if you are familiar with perl
109 # objects and the bioperl object
110 # model, you can interact with
111 # the object; for instance, to
112 # display the residues we use the
113 # seq() method like this:
114 ^print $seq->seq()
115 waitenter
117 # we can cycle through all the
118 # sequences in the file using
119 # the ',' command.
121 waitenter
122 # this fetched the second sequence
123 # and placed it in the $seq variable
125 # we can change the output format
126 # by setting the 'outformat' parameter
127 # like this:
128 ^+outformat fasta
130 waitenter
131 # now the sequences are output in
132 # fasta format
133 # to change to embl format:
134 ^+outformat embl
136 waitenter
137 # we can also fetch _all_ seqs from
138 # a file; for this example we will
139 # use t/data/swiss.dat, which is in
140 # swiss format. usually bioperl can guess
141 # the file format from the file extension
142 # but this isn't possible here, so we
143 # must help by setting the input format:
144 ^+format swiss
145 # now lets get all the sequences, like this:
146 ^<*t/data/swiss.dat
147 waitenter
148 # typing <* is equivalent to
149 # using the read_seqs() function,
150 # like this:
151 ^read_seqs('t/data/swiss.dat')
152 waitenter
153 # we now have all the sequences in
154 # the array @seqs
155 # we can write these all out as fasta
156 ^+outformat fasta
158 # we can also write these out to a file:
159 ^>*myfile.tmp
160 ^!cat myfile.tmp
162 # RANDOM ACCESS OF FASTA FILES
163 # END
164 +echo 0
165 %$options = %keep
168 @lines =
169 map {
170 s/^ *//;
172 } @lines;
175 sub error {
176 if ($error) {
177 print "Error:\n$error";
179 else {
180 print "No errors have been reported\n";
184 sub fmt {
185 $options->{format} = shift if @_;
186 print "format=$options->{format}\n";
189 # should this move to Bio::Perl ?
190 sub seqio {
191 my $filename = shift;
192 $options->{format} = shift if @_;
194 if( !defined $filename ) {
195 warn "read_sequence($filename) - usage incorrect";
198 if( defined $options->{format} ) {
199 $seqio = Bio::SeqIO->new( '-file' => $filename, '-format' => $options->{format});
200 } else {
201 $seqio = Bio::SeqIO->new( '-file' => $filename);
204 $seqio;
207 sub wseqio {
208 my $filename = shift;
209 $options->{format} = shift if @_;
211 my @args = ();
212 if ($filename && $filename !~ /^\>/) {
213 $filename = ">$filename";
215 push(@args, -file => "$filename") if $filename;
216 push(@args, -fh => \*STDOUT) unless $filename;
217 push(@args, -format => $options->{outformat}) if $options->{outformat};
218 $wseqio = Bio::SeqIO->new( @args );
220 $wseqio;
223 sub show_seq {
224 return unless $seq;
225 if ($wseqio) {
226 $wseqio->write_seq($seq);
228 else {
229 printf "seq display id: %s\n", $seq->display_id;
233 sub addseq {
234 push(@pseqs, @_);
235 while (scalar(@pseqs) > 50) {
236 # todo - history variable
237 shift @pseqs;
241 sub next_seq {
242 if ($seqio) {
243 eval {
244 $seq = $seqio->next_seq;
246 if ($@) {
247 $error = $@;
248 print "There was an error getting the seq. Type 'error'\n";
249 print "for full details\n";
250 print "(Maybe you have to explicitly set the format?)";
252 addseq($seq);
254 else {
255 print STDERR "use read_seq first\n";
257 show_seq;
258 $seq;
261 sub next_seqs {
262 @seqs = ();
263 if ($seqio) {
264 while ($seq = $seqio->next_seq) {
265 printf "%s\n", $seq->display_id;
266 push(@seqs, $seq);
269 $seq = $seqs[$#seqs] if @seqs;
270 @seqs
273 sub read_seq {
274 seqio(@_);
275 next_seq();
278 sub read_seqs {
279 seqio(@_);
280 next_seqs();
283 sub write_seq {
284 wseqio(@_);
285 $wseqio->write_seq($seq) if $seq;
288 sub write_seqs {
289 wseqio(@_);
290 map {
291 $wseqio->write_seq($_)
292 } @seqs;
295 sub pod {
296 if (!-d "Bio") {
297 print "You need to be in the bioperl directory!\n";
299 else {
300 my $mod = shift;
301 unix("pod2text", "Bio/$mod.pm");
304 sub fastadb {
305 require "Bio/DB/Fasta.pm";
306 my $f = shift;
307 $fastadb = Bio::DB::Fasta->new($f);
308 print "Set \$fastadb object\n";
309 $fastadb;
312 sub subseq {
313 if (!$fastadb) {
314 fastadb(shift);
316 $seq = $fastadb->get_Seq_by_id(shift);
317 if (@_) {
318 printf "%s\n",
319 $seq->subseq(@_);
321 $seq;
324 sub load {
325 open(F, shift);
326 @lines = map {chomp;$_} <F>;
327 close(F);
330 sub waitenter {
331 print "<hit ENTER to continue>";
332 <STDIN>;
335 sub showintro {
337 print "This is a text-based commandline interface to BioPerl;\n";
338 print "\n";
341 sub checkoptions {
344 sub showoptions {
345 my $k = shift;
346 my @k = defined $k ? ($k) : keys %$options;
347 foreach my $ok ($k) {
348 my $v = sprintf("%s", $options->{$k});
349 if ($v =~ /HASH/) {
350 # hide perl internal details
351 # from user; if they are experienced
352 # perlhackers they can just
353 # type "x $options" to see the
354 # gory details
355 $v = "undisplayable";
357 printf("%20s:%s\n",
358 $ok,
359 $b);
363 sub set {
364 my ($k,$v) = @_;
365 if (defined($v)) {
366 $options->{$k} = $v;
367 checkoptions;
369 else {
370 showoptions($k);
372 # if ($k eq "format") {
373 # seqio;
375 if ($k eq "outformat") {
376 wseqio;
380 sub echo {
381 my $e = shift;
382 if (defined($e)) {
383 set("echo", $e);
385 else {
386 set("echo", !$options->{echo});
390 sub options {
391 map {print "$_ = $options->{$_}\n"} keys%$options;
394 sub showcommands {
396 print "BioPerl Shell Commands:\n";
397 my $layout = "%5s : %-20s - %s\n";
398 printf $layout, "cmd", "function", "summary";
399 printf "%s\n", ("-" x 40);
400 foreach my $c (@cmds) {
401 my $sc = $c->{shortcut};
402 $sc =~ s/\\//g;
403 printf($layout,
404 $sc,
405 $c->{'func'} . "()",
406 $c->{'summary'}
412 sub showexamples {
413 print "\nExamples:\n-------\n";
416 sub showvariables {
418 print "Shell variables:\n";
419 print q[
420 $seq : Bio::SeqI object
421 $seqio : Bio::SeqIO object
422 @pseqs : array of previous Bio::SeqI objects
427 sub welcome {
428 print "Welcome to the BioPerl shell interface!\n\n";
429 print "\n\nType 'help' for instructions\n";
430 print "\n\nType 'demo' for demonstration\n";
431 print "\n\nThis is ALPHA software - commands may change\n";
432 print "-lots more commands need to be added to take full\n";
433 print "advantage of the bioperl functionality\n\n";
436 sub help {
437 my $topic = shift;
438 my $c;
439 if ($topic) {
440 ($c) = grep {$_->{func} eq $topic} @cmds;
442 if ($c) {
443 print "Function: $c->{func}\n";
444 print "Shortcut: $c->{shortcut}\n" if $c->{shortcut};
445 print "Summary: $c->{summary}\n" if $c->{summary};
446 print "=======\n";
447 print "$c->{docs}\n" if $c->{docs};
449 elsif ($topic eq "advanced") {
453 else {
455 print "\nBioPerl Shell Help\n\n";
456 showintro;
457 waitenter;
458 showcommands;
459 waitenter;
460 showvariables;
461 waitenter;
462 showexamples;
466 print "Type \"demo\" for an interactive demo of commands\n\n";
467 print "Type \"help advanced\" for advanced options\n\n";
473 sub p {
474 print shift;
475 print "\n";
478 sub x {
479 print Dumper shift;
480 print "\n";
483 # trick to allow barewords as keywords...
484 sub advanced {"advanced"}
486 sub unix {
487 my @cmds = @_;
488 my $c = join(" ", @cmds);
489 print `$c`;
492 welcome;
493 require Term::ReadLine;
494 require Shell;
496 checkoptions;
497 print "\n";
498 my $termline = shift || Term::ReadLine->new($prompt);
500 my $rcfile = "$ENV{HOME}/.goshellrc";
501 if (-f $rcfile) {
502 open(F, $rcfile);
503 @lines = <F>;
504 close(F);
507 my $end_signal = "";
508 while (!$quit) {
509 if ($end_signal) {
510 @lines = ($lines);
511 while ($end_signal && ($line = $termline->readline("? "))) {
512 if($line !~ /$end_signal/) {
513 $lines[0].= "\n$line";
515 else {
516 $end_signal = "";
519 next;
521 my $line =
522 @lines ? shift @lines : $termline->readline($prompt);
523 if ($line =~ /^\^/) {
524 $line =~ s/^\^//;
525 print "$prompt$line";
526 my $e = <STDIN>;
527 if ($e =~ /^q/) {
528 $line = "";
529 @lines = ();
532 if ($options->{echo} && $line !~ /\+?wait/) {
533 if ($line =~ /^\#/) {
534 print "$line\n";
536 else {
537 print "$prompt$line\n";
539 if ($options->{sleep}) {
540 sleep $options->{sleep};
542 if ($options->{wait}) {
543 sleep $options->{wait};
546 my ($cmd, @w) = split(' ',$line);
548 $_ = $cmd;
549 if (/^\<\<(.*)/) {
550 $end_signal = $1;
553 # check for shortcuts
554 my $selected;
555 foreach my $c (@cmds) {
556 my $shortcut = $c->{'shortcut'};
557 next unless $shortcut;
558 if ($line =~ /^$shortcut(.*)/) {
559 if (!defined($selected) ||
560 length($shortcut) > length($selected->{shortcut} || "")) {
561 # get the most specific match
562 $selected = $c;
566 if ($selected) {
567 my $shortcut = $selected->{'shortcut'};
568 if ($line =~ /^$shortcut(.*)/) {
569 my @w = map {"'".$_."'" } split(' ', $1);
570 $line = $selected->{'func'}." ".join(", ", @w);
574 $rv = eval $line;
575 # print "\n";
576 # print "RV=$rv;;;\n";
577 if ($@) {
578 print STDERR $@;
580 if ($options->{sleep}) {
581 sleep $options->{sleep};
583 if ($options->{wait}) {
584 sleep $options->{wait};
585 $options->{wait} = 0;
591 sub get_default_cmds {
592 my @cmds =
595 func => 'read_seq',
596 shortcut => '\<',
597 summary => 'read a Seq from a file',
601 func => 'next_seq',
602 shortcut => ',',
603 summary => 'get the next Seq',
607 func => 'read_seqs',
608 shortcut => '\<\*',
609 summary => 'read all Seqs from a file',
613 func => 'write_seq',
614 shortcut => '\>',
615 summary => 'write a Seq to screen/file',
619 func => 'write_seqs',
620 shortcut => '\>\*',
621 summary => 'write a Seq to screen/file',
625 func => 'fastadb',
626 shortcut => 'fa',
627 summary => 'fast fasta access',
631 func => 'subseq',
632 summary => 'get a subseq from a fastadb',
636 func => 'set',
637 shortcut => '\+',
638 summary => 'set a shell parameter',
642 func => 'unix',
643 shortcut => '\!',
644 summary => 'run a unix command',
648 func => 'x',
649 summary => 'display variable (and internals) using dumper',
653 return @cmds;