maint: restructure to use Dist::Zilla
[bioperl-live.git] / bin / bp_extract_feature_seq
blob0fe742fea4424189e426ac46701d8de2601f14ee
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 use Bio::SeqIO;
5 use Getopt::Long;
6 # Author Jason Stajich <jason@bioperl.org>
8 =head1 NAME
10 bp_extract_feature_seq - extract the corresponding sequence for a specified feature type
12 =head1 SYNOPSIS
14 bp_extract_feature_seq [--format FORMAT] [--feature CDS] [--output FILE] [--input] FILE
16 =head1 DESCRIPTION
18 This script will extract the sequence for all the features you specify.
20 =head1 OPTIONS
22 =over
24 =item B<-i>, B<--input>
26 Specifies the sequence file to be read.
28 =item B<--format>
30 Format of the file specified by B<--input>. If not given, it will try to guess the
31 correct format from the file extension.
33 =item B<--feature>
35 Feature to be extracted. By default, it extracts the CDS feature.
37 =item B<-o>, B<--output>
39 File where the extracted features will be saved. If not specified, STDOUT is used.
41 =back
43 =head1 FEEDBACK
45 =head2 Mailing Lists
47 User feedback is an integral part of the evolution of this and other
48 Bioperl modules. Send your comments and suggestions preferably to
49 the Bioperl mailing list. Your participation is much appreciated.
51 L<bioperl-l@bioperl.org> - General discussion
52 L<http://bioperl.org/wiki/Mailing_lists> - About the mailing lists
54 =head2 Reporting Bugs
56 Report bugs to the Bioperl bug tracking system to help us keep track
57 of the bugs and their resolution. Bug reports can be submitted via
58 email or the web:
60 L<https://github.com/bioperl/bioperl-live/issues>
62 =head1 AUTHOR
64 Jason Stajich <jason-at-bioperl-dot-org>
66 =cut
68 my ($input,$format,$featuretype,$output);
69 $featuretype ='CDS';
70 GetOptions(
71 'i|input:s' => \$input,
72 'format:s' => \$format,
73 'feature:s' => \$featuretype,
74 'o|output:s'=> \$output);
76 $input || shift if @ARGV;
78 my $in = new Bio::SeqIO(-file => $input,
79 -format => $format);
80 my $out;
81 if ($output ) {
82 $out = new Bio::SeqIO(-file => ">$output", -format => 'fasta');
83 } else {
84 $out = new Bio::SeqIO(-format => 'fasta'); # use STDOUT for output
87 my $count = 1;
88 while( my $seq = $in->next_seq ) {
89 foreach my $f ( grep { $_->primary_tag =~ /$featuretype/i }
90 $seq->get_SeqFeatures ) {
91 my $s = $f->spliced_seq;
92 if( $featuretype =~ /gene|CDS/ ) {
93 $s->display_id($f->has_tag('gene') ? join(',',sort $f->each_tag_value('gene')) :
94 $f->has_tag('label') ? join(',',$f->each_tag_value('label')):
95 $s->display_id);
96 } else {
97 $s->display_id(sprintf("%s_%s_%d",
98 $seq->display_id,
99 $f->primary_tag,
100 $count++));
102 $out->write_seq($s);