Sync that last bit with trunk. I'll have to merge that over to the tag for the next RC.
[bioperl-live.git] / examples / tools / seq_pattern.pl
blob4f1a4751d7d73ae1a8f1ac979cc49e9880d54bfa
1 #!/usr/bin/env perl
3 #-----------------------------------------------------------------------------
4 # PROGRAM : seq_pattern.pl
5 # PURPOSE : This is a simple driver used to test the Bio::Tools::SeqPattern.pm
6 # module for working with sequence patterns (regexps that recognize
7 # nucleotide or peptide sequences).
8 # AUTHOR : Steve Chervitz (sac@bioperl.org)
9 # CREATED : 28 Aug 1997
10 # REVISION: $Id$
11 # USAGE : seq_pattern.pl -h
12 # COMMENTS:
13 # This is a driver script for the Bio::Tools::SeqPattern.pm Bioperl module
14 # that can be used for working with both nucleotide and peptide sequence and
15 # offers features such as:
17 # -- generate reverse complement of sequence pattern
18 # -- ensuring pattern has no invalid characters
19 # -- untainting pattern
20 # -- expanding ambiguity codes.
22 # Functionality is not yet complete but it may be of use as-is.
24 # INSTALLATION
25 # Edit the use lib "...." line to point the directory
26 # containing your Bioperl modules.
28 # DOCUMENTATION:
29 # http://genome-www.stanford.edu/perlOOP/bioperl/lib/Bio/Tools/SeqPattern.pm.html
31 #-----------------------------------------------------------------------------
33 use lib "/Users/steve/lib/perl";
34 use Bio::Tools::SeqPattern ();
35 use Getopt::Std;
37 $opt_h = 0;
38 $opt_n = 0;
39 $opt_p = 0;
40 $opt_r = 0;
42 getopts('hnprv:');
43 $pat = $ARGV[0] || '';
45 $opt_h and die <<"QQ_USAGE_QQ";
47 Usage: seq_pattern.pl [-n|p|r|h] 'REGEXP'
49 regexp : full-regular expression for a nucleotide or peptide sequence.
50 Must be listed *after* one of the following options:
51 -n : interpret regexp as a nucleotide pattern.
52 -p : interpret regexp as a peptide pattern.
53 -r : output only the reverse complement of the nucleotide pattern.
54 -h : print usage.
56 QQ_USAGE_QQ
59 ## Nucleotide test patterns (most are based on actual patterns submitted by users):
61 %nucpat = (1 =>'YR...CG(CCG){5,7}CG[^G]TN{10,}[SA]{4}NN(ACA){2,}GCGTTT.{20,40}GT>',
62 2 =>'cggnnn[ta][ta][ta]n{3,5}[ta][ta][ta]nnnccg',
63 3 =>'<ATGX{6,10}RTTRTT',
64 4 =>'cggnnnwwwn{3,5}wwwnnnccg',
65 5 =>'(CCCCT)N{1,200}(agggg)N{1,200}(agggg)',
66 6 =>'cccct{2,}',
67 7 =>'(a){10,40}',
68 8 =>'(cag){36,}',
69 9 =>'rgaatgx{2,}ygtttca(cag){5,}',
70 10 =>'yattgtt(n){20,80}yattgtt',
71 11 =>'yattgtt(aca){20,80}yattgtt',
72 12 =>'TATAAAN{30,100}[AT][CAT][AT]YCAAR[CAT][AT][CAT]',
73 13 =>'TGACTC[N]{1,300}TGACTC',
74 14 =>'TGACTCN*GAGTCAN*GAGTCAN*TGACTC',
75 15 =>'TGACTC(TCA)*GAGTCA',
76 16 =>'TGACTCN*GAG(TCA)*GAGTCA',
77 17 =>'[at][at]ttcacatgy',
80 %peppat = (1 =>'<X{10,}[WFY]XXXDN[BK][ST]Z{5,}>',
81 2 =>'<x{10,40}[gas]x[gasct]x*[gascdn]x[gas]x{0,10}[bst]{8,}x{0,8}>',
84 #----------------------
85 # Main
87 if($opt_r) {
88 print Bio::Tools::SeqPattern->new(-SEQ =>$pat, -TYPE =>'Dna')->revcom->str,"\n";
90 } else {
91 test_nuc($pat) if ($opt_n and !$opt_p);
92 test_pep($pat) if ($opt_p and !$opt_n);
93 (test_nuc($pat), test_pep($pat)) if !($opt_p or $opt_n);
96 exit 0;
98 #----------------------
100 sub test_nuc {
101 # Create nucleotide pattern object:
102 my $pat = shift;
103 $pat ||= $nucpat{9};
105 $npat = new Bio::Tools::SeqPattern(-seq =>$pat, -type =>'Dna');
107 print "\nNucleotide Pattern:\n";
108 print "-----------------------\n";
109 printf "%18s: %s\n", 'Type', $npat->type;
110 printf "%18s: %s\n", 'Original',$npat->str;
111 printf "%18s: %s\n", 'Expanded', $npat->expand;
112 printf "%18s: %s\n", 'Reverse-Comp', $npat->revcom->str;
113 printf "%18s: %s\n", 'Rev-Comp+Expanded', $npat->revcom(1)->str; # Hate this syntax. May change.
114 print "\n";
118 sub test_pep {
119 # Create peptide pattern object:
120 my $pat = shift;
121 $pat ||= $peppat{1};
123 $ppat = new Bio::Tools::SeqPattern(-seq =>$pat, -type =>'Amino');
125 print "\nPeptide Pattern:\n";
126 print "-----------------------\n";
127 printf "%18s: %s\n", 'Type', $ppat->type;
128 printf "%18s: %s\n", 'Original',$ppat->str;
129 printf "%18s: %s\n", 'Expanded', $ppat->expand;
130 print "\n";