5 fastm9_to_table - turn FASTA -m 9 output into NCBI -m 9 tabular output
9 fastm9_to_table [-e evaluefilter] [-b bitscorefilter] [--header] [-o outfile] inputfile1 inputfile2 ...
14 -e/--evalue evalue -- filter by evalue
15 -b/--bitscore bitscore -- filter by bitscore
16 --header -- boolean flag to print column header
17 -o/--out -- optional outputfile to write data,
18 otherwise will write to STDOUT
19 -h/--help -- show this documentation
21 Not technically a SearchIO script as this doesn't use any Bioperl
22 components but is a useful and fast. The output is tabular output
23 with the standard NCBI -m9 columns.
31 query start (if on rev-strand start > end)
33 hit start (if on rev-strand start > end)
38 Additionally 3 more columns are provided
47 =head1 AUTHOR - Jason Stajich
49 Jason Stajich jason_at_bioperl-dot-org
58 my ($evalue,$bitscore,$header,$outfile) = ( 10, 0);
60 'b|bitscore|bits:f' => \
$bitscore,
61 'e|evalue:f' => \
$evalue,
63 'o|out|outfile:s' => \
$outfile,
64 'h|help' => sub { exec('perldoc',$0); exit; }
69 open($outfh, ">$outfile") || die("$outfile: $!");
79 my @fields = qw(qname hname percid alen mmcount gapcount
80 qstart qend hstart hend evalue score bits fs sw-score
81 percsim qlen hlen qgap hgap);
82 print $outfh "#",uc(join("", map{ sprintf("%-10s",$_) } @fields)), "\n" if $header;
86 if( /^\s*\d+>>>(\S+).+/ ) {
88 if( /\-\s+(\d+)\s+(aa|nt)\s+$/ ){
91 } elsif( $hitsection && /^>>>\Q$data{'qname'}/ ) {
93 } elsif( /^The best scores are:/ ) {
96 } elsif( $hitsection ) {
97 if( s/^(\S+)\s+(.+)\(\s*(\d+)\)\s+// ) {
98 my ($hit, $desc,$hitlen) = ($1,$2,$3);
99 my ($dir) = ( s/^\[(r|f)\]\s+// );
100 my @line = split(/\s+/,$_);
101 $data{'hname'} = $hit;
102 $data{'hlen'} = $hitlen;
103 $data{'score'} = shift @line;
104 $data{'bits'} = shift @line;
105 $data{'evalue'} = shift @line;
106 $data{'percid'} = shift @line;
108 $data{'percsim'} = shift @line;
109 $data{'sw-score'} = shift @line;
110 $data{'alen'} = shift @line;
111 $data{'qstart'} = shift @line;
112 $data{'qend'} = shift @line;
113 $data{'pn0'} = shift @line; # pn0
114 $data{'px0'} = shift @line; # px0
115 $data{'hstart'} = shift @line; # an1
116 $data{'hend'} = shift @line; # ax1
117 $data{'pn1'} = shift @line; # pn1
118 $data{'px1'} = shift @line; # px1
120 $data{'qgap'} = shift @line;
121 $data{'hgap'} = shift @line;
122 $data{'gapcount'} = $data{'qgap'} + $data{'hgap'};
123 $data{'fs'} = shift @line;
125 $data{'mmcount'} = $data{'alen'} - ( int($data{'percid'} * $data{'alen'}) + $data{'gapcount'});
126 #next if( $data{'evalue'} > $evalue ||
127 # $data{'bits'} < $bitscore );
129 for ( $data{'percid'}, $data{'percsim'} ) {
130 $_ = sprintf("%.2f",$_*100);
132 print $outfh join( "\t",map { $data{$_} } @fields),"\n";
134 # print STDERR "unrecognized line \n$linestr";
137 # warn("skipping a line like this: $_");