Merge pull request #150 from jkeenan/missing_manifest
[bioperl-live.git] / scripts / searchio / bp_hmmer_to_table.pl
blob885be7c3e90cc940a99986d9c78ae6b59717a864
1 #!perl
2 use strict;
3 use warnings;
5 =head1 NAME
7 bp_hmmer_to_table - turn HMMER output into tabular format
9 =head1 SYNOPSIS
11 bp_hmmer_to_table [-e evaluefilter] [-b bitscorefilter] [--header] [-o outfile] inputfile1 inputfile2 ...
13 =head1 DESCRIPTION
15 Command line options:
16 -e/--evalue evalue -- filter by evalue
17 -b/--bitscore bitscore -- filter by bitscore
18 --header -- boolean flag to print column header
19 -o/--out -- optional outputfile to write data,
20 otherwise will write to STDOUT
21 -h/--help -- show this documentation
23 Not technically a SearchIO script as this doesn't use any Bioperl
24 components but is a useful and fast. The output is tabular output.
26 query sequence/domain (these are flip-flopped for hmmsearch / hmmpfam)
27 query start
28 query end
29 domain/sequence name or PFAM accession
30 hit start
31 hit end
32 score
33 e-value
34 domain/sequence name (these are flip-flopped for hmmsearch / hmmpfam)
36 =head1 AUTHOR - Jason Stajich
38 Jason Stajich jason_at_bioperl-dot-org
40 =cut
42 use Getopt::Long;
44 my ($evalue,$bitscore,$header,$outfile);
45 GetOptions(
46 'b|bitscore|bits:f' => \$bitscore,
47 'e|evalue:f' => \$evalue,
48 'header' => \$header,
49 'o|out|outfile:s' => \$outfile,
50 'h|help' => sub { exec('perldoc',$0); exit; }
53 my $outfh;
54 if( $outfile ) {
55 open $outfh, '>', $outfile or die "Could not write file '$outfile': $!\n";
56 } else {
57 $outfh = \*STDOUT;
60 my @fields = qw(QNAME QSTART QEND HACCESSION HSTART HEND SCORE EVALUE HNAME);
61 if( $header ) {
62 print $outfh join("\t", @fields), "\n";
64 my %dat;
65 while(<>) {
66 if( s/^Query(\s+(sequence|HMM))?:\s+// ) {
67 s/\s+$//;
68 $dat{'Query'} = $_;
69 } elsif( /^Parsed for domains:/ ) {
70 my $ready = 0;
71 while(<>) {
72 if(/^Model|Sequence\s+Domain/ ) { $ready = 1; }
73 elsif( $ready && /^\-\-/) { $ready = 2; }
74 elsif( /^Alignments of/ ) { undef %dat; last; }
75 elsif( $ready == 2 ) {
76 if( my ($n,$domainnum,$domainct, @vals) =
77 (m!^(\S+)\s+ # domain name
78 (\d+)\/(\d+)\s+ # num/num (ie 1 of 2)
79 (\d+)\s+(\d+).+? # sequence start and end
80 (\d+)\s+(\d+)\s+ # hmm start and end
81 \S+\s+ # []
82 (\S+)\s+ # score
83 (\S+) # evalue
84 \s*$!ox) ) {
85 next if( defined $bitscore && $vals[4] < $bitscore );
86 next if (defined $evalue && $vals[5] > $evalue);
87 print $outfh join("\t",
88 $dat{'Query'},
89 $vals[0], $vals[1],
90 $n,
91 $vals[2],$vals[3],
92 $vals[4],$vals[5],
93 $n),"\n";