bin/bp_extract_feature_seq

   1 #!/usr/bin/perl
   2 use strict;
   3 use warnings;
   4 use Bio::SeqIO;
   5 use Getopt::Long;
   6 # Author Jason Stajich <jason@bioperl.org>
   7
   8 =head1 NAME
   9
  10 bp_extract_feature_seq - extract the corresponding sequence for a specified feature type
  11
  12 =head1 SYNOPSIS
  13
  14 bp_extract_feature_seq [--format FORMAT] [--feature CDS] [--output FILE] [--input] FILE
  15
  16 =head1 DESCRIPTION
  17
  18 This script will extract the sequence for all the features you specify.
  19
  20 =head1 OPTIONS
  21
  22 =over
  23
  24 =item B<-i>, B<--input>
  25
  26 Specifies the sequence file to be read.
  27
  28 =item B<--format>
  29
  30 Format of the file specified by B<--input>. If not given, it will try to guess the
  31 correct format from the file extension.
  32
  33 =item B<--feature>
  34
  35 Feature to be extracted. By default, it extracts the CDS feature.
  36
  37 =item B<-o>, B<--output>
  38
  39 File where the extracted features will be saved. If not specified, STDOUT is used.
  40
  41 =back
  42
  43 =head1 FEEDBACK
  44
  45 =head2 Mailing Lists
  46
  47 User feedback is an integral part of the evolution of this and other
  48 Bioperl modules. Send your comments and suggestions preferably to
  49 the Bioperl mailing list. Your participation is much appreciated.
  50
  51   L<bioperl-l@bioperl.org>                  - General discussion
  52   L<http://bioperl.org/wiki/Mailing_lists>  - About the mailing lists
  53
  54 =head2 Reporting Bugs
  55
  56 Report bugs to the Bioperl bug tracking system to help us keep track
  57 of the bugs and their resolution. Bug reports can be submitted via
  58 email or the web:
  59
  60   L<https://github.com/bioperl/bioperl-live/issues>
  61
  62 =head1 AUTHOR
  63
  64  Jason Stajich <jason-at-bioperl-dot-org>
  65
  66 =cut
  67
  68 my ($input,$format,$featuretype,$output);
  69 $featuretype ='CDS';
  70 GetOptions(
  71            'i|input:s' => \$input,
  72            'format:s'  => \$format,
  73            'feature:s' => \$featuretype,
  74            'o|output:s'=> \$output);
  75
  76 $input || shift if @ARGV;
  77
  78 my $in = new Bio::SeqIO(-file => $input,
  79                         -format => $format);
  80 my $out;
  81 if ($output ) {
  82     $out = new Bio::SeqIO(-file => ">$output", -format => 'fasta');
  83 } else {
  84     $out = new Bio::SeqIO(-format => 'fasta'); # use STDOUT for output
  85 }
  86
  87 my $count = 1;
  88 while( my $seq = $in->next_seq ) {
  89     foreach my $f ( grep { $_->primary_tag =~ /$featuretype/i }
  90                     $seq->get_SeqFeatures ) {
  91         my $s = $f->spliced_seq;
  92         if( $featuretype =~ /gene|CDS/ ) {
  93             $s->display_id($f->has_tag('gene') ? join(',',sort $f->each_tag_value('gene')) :
  94                            $f->has_tag('label') ? join(',',$f->each_tag_value('label')):
  95                            $s->display_id);
  96         } else {
  97             $s->display_id(sprintf("%s_%s_%d",
  98                                    $seq->display_id,
  99                                    $f->primary_tag,
 100                                    $count++));
 101         }
 102         $out->write_seq($s);
 103     }
 104 }