examples/tools/seq_pattern.pl

   1 #!/usr/bin/env perl
   2
   3 #-----------------------------------------------------------------------------
   4 # PROGRAM : seq_pattern.pl
   5 # PURPOSE : This is a simple driver used to test the Bio::Tools::SeqPattern.pm
   6 #           module for working with sequence patterns (regexps that recognize
   7 #           nucleotide or peptide sequences).
   8 # AUTHOR  : Steve Chervitz (sac@bioperl.org)
   9 # CREATED : 28 Aug 1997
  10 # REVISION: $Id$
  11 # USAGE   : seq_pattern.pl -h
  12 # COMMENTS:
  13 #    This is a driver script for the Bio::Tools::SeqPattern.pm Bioperl module
  14 #    that can be used for working with both nucleotide and peptide sequence and
  15 #    offers features such as:
  16 #
  17 #      -- generate reverse complement of sequence pattern
  18 #      -- ensuring pattern has no invalid characters
  19 #      -- untainting pattern
  20 #      -- expanding ambiguity codes.
  21 #
  22 #     Functionality is not yet complete but it may be of use as-is.
  23 #
  24 # INSTALLATION
  25 #    Edit the use lib "...." line to point the directory
  26 #    containing your Bioperl modules.
  27 #
  28 # DOCUMENTATION:
  29 #  http://genome-www.stanford.edu/perlOOP/bioperl/lib/Bio/Tools/SeqPattern.pm.html
  30 #
  31 #-----------------------------------------------------------------------------
  32
  33 use lib "/Users/steve/lib/perl";
  34 use Bio::Tools::SeqPattern ();
  35 use Getopt::Std;
  36
  37 $opt_h = 0;
  38 $opt_n = 0;
  39 $opt_p = 0;
  40 $opt_r = 0;
  41
  42 getopts('hnprv:');
  43 $pat = $ARGV[0] || '';
  44
  45 $opt_h and die <<"QQ_USAGE_QQ";
  46
  47 Usage: seq_pattern.pl [-n|p|r|h] 'REGEXP'
  48
  49  regexp : full-regular expression for a nucleotide or peptide sequence.
  50           Must be listed *after* one of the following options:
  51  -n     : interpret regexp as a nucleotide pattern.
  52  -p     : interpret regexp as a peptide pattern.
  53  -r     : output only the reverse complement of the nucleotide pattern.
  54  -h     : print usage.
  55
  56 QQ_USAGE_QQ
  57
  58
  59 ## Nucleotide test patterns (most are based on actual patterns submitted by users):
  60
  61 %nucpat = (1 =>'YR...CG(CCG){5,7}CG[^G]TN{10,}[SA]{4}NN(ACA){2,}GCGTTT.{20,40}GT>',
  62            2 =>'cggnnn[ta][ta][ta]n{3,5}[ta][ta][ta]nnnccg',
  63            3 =>'<ATGX{6,10}RTTRTT',
  64            4 =>'cggnnnwwwn{3,5}wwwnnnccg',
  65            5 =>'(CCCCT)N{1,200}(agggg)N{1,200}(agggg)',
  66            6 =>'cccct{2,}',
  67            7 =>'(a){10,40}',
  68            8 =>'(cag){36,}',
  69            9 =>'rgaatgx{2,}ygtttca(cag){5,}',
  70            10 =>'yattgtt(n){20,80}yattgtt',
  71            11 =>'yattgtt(aca){20,80}yattgtt',
  72            12 =>'TATAAAN{30,100}[AT][CAT][AT]YCAAR[CAT][AT][CAT]',
  73            13 =>'TGACTC[N]{1,300}TGACTC',
  74            14 =>'TGACTCN*GAGTCAN*GAGTCAN*TGACTC',
  75            15 =>'TGACTC(TCA)*GAGTCA',
  76            16 =>'TGACTCN*GAG(TCA)*GAGTCA',
  77            17 =>'[at][at]ttcacatgy',
  78            );
  79
  80 %peppat = (1 =>'<X{10,}[WFY]XXXDN[BK][ST]Z{5,}>',
  81            2 =>'<x{10,40}[gas]x[gasct]x*[gascdn]x[gas]x{0,10}[bst]{8,}x{0,8}>',
  82            );
  83
  84 #----------------------
  85 # Main
  86
  87 if($opt_r) {
  88     print Bio::Tools::SeqPattern->new(-SEQ =>$pat, -TYPE =>'Dna')->revcom->str,"\n";
  89
  90 } else {
  91     test_nuc($pat) if ($opt_n and !$opt_p);
  92     test_pep($pat) if ($opt_p and !$opt_n);
  93     (test_nuc($pat), test_pep($pat)) if !($opt_p or $opt_n);
  94 }
  95
  96 exit 0;
  97
  98 #----------------------
  99
 100 sub test_nuc {
 101 # Create nucleotide pattern object:
 102     my $pat = shift;
 103     $pat ||= $nucpat{9};
 104
 105     $npat = new Bio::Tools::SeqPattern(-seq =>$pat, -type =>'Dna');
 106
 107     print "\nNucleotide Pattern:\n";
 108     print "-----------------------\n";
 109     printf "%18s: %s\n", 'Type',    $npat->type;
 110     printf "%18s: %s\n", 'Original',$npat->str;
 111     printf "%18s: %s\n", 'Expanded', $npat->expand;
 112     printf "%18s: %s\n", 'Reverse-Comp', $npat->revcom->str;
 113     printf "%18s: %s\n", 'Rev-Comp+Expanded', $npat->revcom(1)->str; # Hate this syntax. May change.
 114     print "\n";
 115 }
 116
 117
 118 sub test_pep {
 119 # Create peptide pattern object:
 120     my $pat = shift;
 121     $pat ||= $peppat{1};
 122
 123     $ppat = new Bio::Tools::SeqPattern(-seq =>$pat, -type =>'Amino');
 124
 125     print "\nPeptide Pattern:\n";
 126     print "-----------------------\n";
 127     printf "%18s: %s\n", 'Type',    $ppat->type;
 128     printf "%18s: %s\n", 'Original',$ppat->str;
 129     printf "%18s: %s\n", 'Expanded', $ppat->expand;
 130     print "\n";
 131 }
 132
 133
 134
 135
 136
 137