6 bp_dbsplit - script to split an input set of database(s) into smaller pieces
10 bp_dbsplit.PLS --size 50 [-i inputfile] [-if inputformat] [-of outputformat]
11 [--prefix outputprefix] [ < file1 file 2 OR file1 file2]
15 This script will take as input a list of filenames or a single file or
16 from STDIN a sequence database and split the database into separate
17 files of X numbers of sequences. You specify X with the C<--size/-s>
18 parameter. The input and output sequence format is any that is
19 supported by bioperl (fasta,embl,genbank,gcg, swissprot, etc).
21 You can specify the input data either as a single file with -i
22 filename, or as a single file as an argument like
24 % bp_dbsplit file1 file2
26 or as a list of sequence data with
28 % cat file1 file2 file3 | bp_dbsplit
30 You'll want to use the C<--prefix> to specify what the output prefix will
37 User feedback is an integral part of the evolution of this and other
38 Bioperl modules. Send your comments and suggestions preferably to
39 the Bioperl mailing list. Your participation is much appreciated.
41 bioperl-l@bioperl.org - General discussion
42 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 Report bugs to the Bioperl bug tracking system to help us keep track
47 of the bugs and their resolution. Bug reports can be submitted via
50 https://github.com/bioperl/bioperl-live/issues
54 Jason Stajich, jason-at-bioperl-dot-org
61 use Bio
::SeqIO
::MultiFile
;
66 my ($informat,$outformat,$infile) = ( 'fasta', 'fasta');
69 's|size:s' => \
$dbsize,
71 'of:s' => \
$outformat,
73 'p|prefix:s' => \
$prefix,
77 $infile = shift @ARGV;
79 $prefix ||= $infile || $ARGV[0] || 'db';
83 $in = new Bio
::SeqIO
::MultiFile
(-files
=> [@ARGV],
84 -format
=> $informat || 'fasta');
86 $in = new Bio
::SeqIO
(-file
=> $infile,
89 $in = new Bio
::SeqIO
(-format
=> $informat);
92 my $out = new Bio
::SeqIO
(-format
=> $outformat,
93 -file
=> ">$prefix.$count");
95 while( my $seq = $in->next_seq ) {
96 if( ++$scount > $dbsize && $count ) {
100 $out = new Bio
::SeqIO
(-format
=> $outformat,
101 -file
=> ">$prefix.$count");
104 $out->write_seq($seq);