scripts/index/bp_seqret.pl

   1 #!/usr/bin/perl
   2 # -*-Perl-*- mode (for emacs)
   3
   4 =head1 NAME
   5
   6 bp_seqret - bioperl implementation of sequence fetch from local db (like EMBOSS seqret)
   7
   8 =head1 USAGE
   9
  10 bp_seqret [-f/--format outputformat] [-o/--out/--outfile outfile] [-d/--db dbname] [-i/--id/-s/--seqname seqname1]
  11
  12 Example usage:
  13
  14    bp_seqret -f fasta -db db.fa -i seq1 -i seq2 > output.fas
  15    bp_seqret db.fa:seq1 output.fas
  16    bp_seqret db.fa:seq1 -o output.fas
  17    bp_seqret -db db.fa -o output.fas seq1 seq2 seq3
  18    bp_seqret -db db.fa seq1 seq2 seq3 output.fas
  19    bp_seqret -db db.fa seq1 seq2 seq3 - > output.fas
  20
  21 The DB is expected to be a Fasta formatted sequence file with multiple
  22 sequences.
  23
  24 Output format is Fasta by default.
  25
  26 If no output filename is provided then output is written to STDOUT.
  27 Providing '-' as the output filename will accomplish the same thing.
  28
  29
  30 =head1 AUTHOR
  31
  32 Jason Stajich jason_AT_bioperl-dot-org
  33
  34 =cut
  35
  36 use strict;
  37 use warnings;
  38 use Bio::DB::Fasta;
  39 use Bio::SeqIO;
  40 use Getopt::Long;
  41
  42 my $dbname;
  43 my @names;
  44 my $format = 'fasta';
  45 my $outfile;
  46 my ($start,$end);
  47 GetOptions(
  48            'f|format:s'   => \$format,
  49            'o|out|outfile:s' => \$outfile,
  50            's|sbegin|begin|start:s'  => \$start,
  51            'e|send|end|stop:s'       => \$end,
  52            'd|db|dbname:s'   => \$dbname,
  53            'i|id|seqname:s' => \@names);
  54
  55
  56 if( ! $dbname ) {
  57     die "need a dbname\n" unless @ARGV;
  58     $dbname = shift @ARGV;
  59     if( $dbname =~ s/^([^:]+):// ) {
  60         push @names, $dbname;
  61         $dbname = $1;
  62     }
  63 }
  64
  65 my $db = Bio::DB::Fasta->new($dbname, -glob => "*.{fa,fas,fsa,fasta,pep,aa,seq,cds,peps}");
  66 if( ! $outfile ) {
  67     $outfile = pop @ARGV;
  68 }
  69 my $out;
  70 if( $outfile ) {
  71     $out = Bio::SeqIO->new(-format => $format,
  72                            -file   => ">$outfile");
  73 } else {
  74     $out = Bio::SeqIO->new(-format => $format);
  75 }
  76 for my $nm ( @names ) {
  77     my $seq;
  78     if( $start || $end ) {
  79         $seq = $db->seq($nm, $start => $end);
  80     } else {
  81         $seq = $db->seq($nm);
  82     }
  83     if( $seq ) {
  84         my ($id,$desc) = split(/\s+/,$db->header($nm),2);
  85         if( $start && $end ) {
  86             $id = sprintf("%s_%d-%d",$id,$start || 0,$end || 0);
  87         }
  88
  89         $out->write_seq(Bio::PrimarySeq->new(-display_id => $id,
  90                                              -description => $desc,
  91                                              -seq => $seq));
  92     } else {
  93         warn("$nm not found\n");
  94     }
  95 }
  96