fix spelling errors, fixes #3228
[bioperl-live.git] / scripts / searchio / hmmer_to_table.PLS
blob8c0c5db62bdc89a8f3f6d4f19873f5f7f5d5774e
1 #!perl -w
2 use strict;
4 =head1 NAME 
6 hmmer_to_table  - turn HMMER output into tabular format
8 =head1 SYNOPSIS
10  hmmer_to_table [-e evaluefilter] [-b bitscorefilter] [--header] [-o outfile] inputfile1 inputfile2 ... 
12 =head1 DESCRIPTION
14 Command line options:
15   -e/--evalue   evalue    -- filter by evalue
16   -b/--bitscore bitscore  -- filter by bitscore
17   --header                -- boolean flag to print column header 
18   -o/--out                -- optional outputfile to write data, 
19                              otherwise will write to STDOUT
20   -h/--help               -- show this documentation
22 Not technically a SearchIO script as this doesn't use any Bioperl
23 components but is a useful and fast.  The output is tabular output.
25   query sequence/domain (these are flip-flopped for hmmsearch / hmmpfam)  
26   query start
27   query end
28   domain/sequence name or PFAM accession 
29   hit start
30   hit end
31   score
32   e-value
33   domain/sequence name (these are flip-flopped for hmmsearch / hmmpfam)  
35 =head1 AUTHOR - Jason Stajich
37 Jason Stajich jason_at_bioperl-dot-org
39 =cut
41 use Getopt::Long;
43 my ($evalue,$bitscore,$header,$outfile);
44 GetOptions(
45            'b|bitscore|bits:f'   => \$bitscore,
46            'e|evalue:f'          => \$evalue,
47            'header'              => \$header,
48            'o|out|outfile:s'     => \$outfile,
49            'h|help'              => sub { exec('perldoc',$0); exit; }
50            );
52 my $outfh;
53 if( $outfile ) { 
54     open($outfh, ">$outfile") || die("$outfile: $!");
55 } else { 
56     $outfh = \*STDOUT; 
59 my @fields = qw(QNAME QSTART QEND HACCESSION HSTART HEND SCORE EVALUE HNAME);
60 if( $header ) {
61     print $outfh join("\t", @fields), "\n";
63 my %dat;
64 while(<>) {
65     if( s/^Query(\s+(sequence|HMM))?:\s+// ) {  
66         s/\s+$//;
67         $dat{'Query'} = $_;
68     } elsif( /^Parsed for domains:/ ) {
69         my $ready = 0;
70         while(<>) {
71             if(/^Model|Sequence\s+Domain/ ) { $ready = 1; }
72             elsif( $ready && /^\-\-/) { $ready = 2; }
73             elsif( /^Alignments of/ ) { undef %dat; last; }
74             elsif( $ready == 2 ) {
75                 if( my ($n,$domainnum,$domainct, @vals) = 
76                     (m!^(\S+)\s+      # domain name
77                      (\d+)\/(\d+)\s+  # num/num (ie 1 of 2) 
78                      (\d+)\s+(\d+).+? # sequence start and end
79                      (\d+)\s+(\d+)\s+ # hmm start and end
80                      \S+\s+           # []
81                      (\S+)\s+         # score
82                      (\S+)            # evalue
83                      \s*$!ox)  ) {
84                     next if( defined $bitscore && $vals[4] < $bitscore );
85                     next if (defined $evalue && $vals[5] > $evalue);
86                     print $outfh join("\t",
87                                       $dat{'Query'},
88                                       $vals[0], $vals[1],
89                                       $n,
90                                       $vals[2],$vals[3],
91                                       $vals[4],$vals[5],
92                                       $n),"\n";
93                 }
94             }
95         }
96     }