5 # random sequence generator #
6 # -c=1 option will cause prot sequences to be built
7 # using vertebrate aa frequencies,
8 # with option -a putting a 1st methionine residues on. Frequencies are
9 # calculated from the NCBI human RefSeq protein sequences
10 # -c and -a only affect protein sequences
11 # -a only works in conjunction with -c
12 # -n number of random sequences, default = 1
17 my ($length,$type,$filename,$comp,$met);
19 $USAGE = 'usage: generate_random_seq.pl --length=1000 --type=dna --filename=/tmp/test.seq --number=50';
21 my %alphabets = ( 'dna' => [qw(C A G T)],
22 'rna' => [qw(C A G U)],
23 'prot'=> [qw( A C D E F G H I K L M N P Q R S T V W Y)],
25 # make random num from 1-10000. numbers in this array reflect the frequency,
26 # e.g., a random number from 1.744 = A, 745-991 = C etc;
27 my @aa_frequencies = qw(744 991 1398 2017 2378 3104 3349 3726 4239 5273 5443
28 5749 6410 6848 7455 8263 8760 9340 9488 9713 10000);
33 'l|length:s' => \
$length,
34 't|type|m|alphabet:s' => \
$type,
35 'f|file|filename:s' => \
$filename,
36 'c|composition:s' => \
$comp,
37 'a|methionine:s' => \
$met,
38 'n|number:s' => \
$number
41 assert
( $type && defined ($alphabets{lc $type}),
43 assert
( $length && $length =~ /^\d+$/, $USAGE );
45 foreach my $num (1..$number) {
47 my $alphabet = $alphabets{lc $type};
48 my $sspace = scalar @
$alphabet;
49 if (!$comp || $type ne 'prot') {
50 foreach ( 1..$length ) {
51 $sequence .= $alphabet->[ int rand($sspace) ];
53 }elsif ($type eq 'prot') {
54 $sequence = build_seq
($length, \
@aa_frequencies);
56 my $seq = Bio
::PrimarySeq
->new(-seq
=> $sequence,
57 -display_id
=> 'randomseq'.$num);
58 my %args = (-format
=> 'fasta');
59 if( $filename ) { $args{-file
} = ">>$filename" }
60 my $seqio = Bio
::SeqIO
->new(%args);
61 $seqio->write_seq($seq);
64 sub assert
{ die $_[1] unless( $_[0] ); }
67 #takes seqlen and ref to frequency data as parameters
69 my $str = ($met)?
'M':'';
72 my $aa = int(rand (10000)) ;
74 while ($pf->[$j] < $aa && $j <19) {
77 $str .= $alphabets{'prot'}[$j];
79 print "str is $str\n";