examples/db/rfetch.pl

   1 #!/usr/bin/perl
   2
   3 #
   4 # Fetch sequence data via OBDA registry system
   5 #
   6 # usage: rfetch -i <file_with_accession_list> -a -v -d embl -s start -e end
   7 #
   8
   9 use Bio::DB::Registry;
  10 use Bio::SeqIO;
  11 use Getopt::Long;
  12 use strict;
  13
  14 my $database = 'embl_biosql';
  15 my $start    = undef;
  16 my $end      = undef;
  17 my $format   = 'fasta';
  18 my $file     = undef;
  19 my $acc      = undef;
  20 my $verbose  = undef;
  21
  22 &GetOptions(
  23             'd|database:s' => \$database,
  24             's|start:i' => \$start,
  25             'e|end:i'   => \$end,
  26             'f|format:s' => \$format,
  27             'i|input:s' => \$file,
  28             'a|acc'     => \$acc,
  29             'v|verbose' => \$verbose,
  30            );
  31
  32
  33 my $registry = Bio::DB::Registry->new();
  34
  35 my $db = $registry->get_database($database);
  36
  37 my $seqout = Bio::SeqIO->new( '-format' => $format, '-fh' => \*STDOUT);
  38
  39 my @ids;
  40
  41 if( defined $file ) {
  42   open my $F, '<', $file or die "Could not read file '$file': $!\n";
  43   while( <$F> ) {
  44     my ($id) = split;
  45     push(@ids,$id);
  46   }
  47   close $F;
  48 } else {
  49   @ids = @ARGV;
  50 }
  51
  52 foreach my $id ( @ids ) {
  53   my $seq;
  54   if( $verbose ){
  55     print STDERR "fetching $id\n";
  56   }
  57
  58   if( $acc ) {
  59     $seq = $db->get_Seq_by_acc($id);
  60   } else {
  61     $seq = $db->get_Seq_by_id($id);
  62   }
  63
  64   if( defined $start && defined $end ) {
  65     $seq = $seq->trunc($start,$end);
  66   }
  67
  68   $seqout->write_seq($seq);
  69 }