t/AlignIO/AlignIO.t: fix number of tests in plan (fixup c523e6bed866)
[bioperl-live.git] / scripts / seq / bp_seqcut.pl
blob86f7a12a7bb04454d5f61c5b07fa4a0fd86929e4
1 #!/usr/bin/perl
3 =head1 NAME
5 bp_seqcut.pl - cut FASTA sequences with a given range
7 =head1 USAGE
9 bp_seqcut.pl [options -h,-s,-e,-f,-w] <FILES>...
10 bp_seqcut.pl [options -h,-f,-w] s-e <FILES>...
12 -h this help message
13 -s which residue to start cutting on
14 -e which residue to finish cutting on
15 -f format of the files, defaults to FASTA but you can specify anything supported by SeqIO from BioPerl
16 -w hard wrap width, this might not be supported depending on which format you are using
18 =head1 Description
20 A script to cut FASTA sequences with a given range `fastacut -s 1 -e 10 *.fasta` or `fastacut 1-10 *.fasta`.
21 This is just a convenience wrapper around the Bio::SeqIO module. Useful if you wish to trim out a section of an
22 alignment to build a profile of a specific region of sequence.
24 =head1 AUTHOR
26 B<Matt Oates> - I<Matt.Oates@bristol.ac.uk>
28 =head1 FEEDBACK
30 =head2 Mailing Lists
32 User feedback is an integral part of the evolution of this and other
33 Bioperl modules. Send your comments and suggestions preferably to
34 the Bioperl mailing list. Your participation is much appreciated.
36 bioperl-l@bioperl.org - General discussion
37 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
39 =head2 Reporting Bugs
41 Report bugs to the Bioperl bug tracking system to help us keep track
42 of the bugs and their resolution. Bug reports can be submitted via
43 email or the web:
45 https://github.com/bioperl/bioperl-live/issues
47 =head1 EDIT HISTORY
49 2010-11-22 - Matt Oates
50 First features added.
52 =head1 DEPENDENCY
54 B<Getopt::Long> Used to parse command line options.
55 B<Pod::Usage> Used for usage and help output.
56 B<Bio::SeqIO> Used to cut up sequences and parse FASTA.
58 =cut
60 use strict;
61 use warnings;
62 use Getopt::Long; #Deal with command line options
63 use Pod::Usage; #Print a usage man page from the POD comments after __END__
64 use Bio::SeqIO;
66 # Command Line Options
67 my $help; #Same again but this time should we output the POD man page defined after __END__
68 my $format = "Fasta";
69 my $start;
70 my $end;
71 my $width = 72; #Default for Jalview output
72 my $outfile = '/dev/stdout';
74 #Set command line flags and parameters.
75 GetOptions("help|h!" => \$help,
76 "start|s=s" => \$start,
77 "format|f=s" => \$format,
78 "end|e=s" => \$end,
79 "width|w=s" => \$width,
80 "outfile|o=s" => \$outfile,
81 ) or die "Fatal Error: Problem parsing command-line ".$!;
83 #Get other command line arguments that weren't optional flags.
84 ($start,$end) = split (/-/, shift) unless ($start and $end);
85 my @files = @ARGV;
87 #Print out some help if it was asked for or if no arguments were given.
88 pod2usage(-exitstatus => 0, -verbose => 2) if $help;
90 pod2usage(-exitstatus => 0, -verbose => 1, -msg => 'Please specify the sequence files you wish to cut.')
91 unless scalar @files;
93 pod2usage(-exitstatus => 0, -verbose => 1, -msg => 'Please specify the region you wish to cut -s 1 -e 10 or 1-10.')
94 unless defined $end;
96 my $out = Bio::SeqIO->newFh(-file => ">$outfile", -format => $format) or die "Couldn't open selected output sequence file.";
98 #Open and iterate over all sequence in all files
99 foreach my $file (@files) {
100 my $in = Bio::SeqIO->new(-file => $file, -format => $format);
101 while ( my $seq = $in->next_seq() ) {
102 #Alter the ID to be postfixed with '/s-e'
103 $seq->display_id($seq->display_id."/$start-$end");
104 #Edit the sequence we have cut out
105 my $sequence = $seq->subseq($start,$end);
106 $sequence =~ s/([^\n]{0,$width})/$1\n/gi;
107 chomp $sequence;
108 $seq->seq($sequence);
109 #Print the sequence back out
110 print $out $seq;
115 __END__