RT 103272 - fix for deleting features that have decimals
[bioperl-live.git] / scripts / utilities / bp_search2tribe.pl
blob6138741281c36a09f9b3e647dd6a47887adf4534
1 #!perl
3 # Author: Jason Stajich <jason-at-bioperl-dot-org>
4 # Description: Turn SearchIO parseable report(s) into a TRIBE matrix
6 =head1 NAME
8 bp_search2tribe - Turn SearchIO parseable reports(s) into TRIBE matrix
10 =head1 SYNOPSIS
12 Usage:
13 bp_search2tribe [-o outputfile] [-f reportformat] [-w/--weight] file1 file2 ..
15 =head1 DESCRIPTION
17 This script is probably too slow for most people's uses. It is better
18 to use something like scripts/searchio/fastam9_to_table, -m 9 output
19 from BLAST, or the blast2table from the BLAST O'Reilly book to get a
20 tabular output from these programs and then feed the table into MCL
21 with the mcxdeblast script and the --m9 option.
23 This script will turn a protein Search report (BLASTP, FASTP, SSEARCH)
24 into a Markov Matrix for TribeMCL clustering.
26 The options are:
28 -o filename - the output filename [default STDOUT]
29 -f format - search result format (blast, fasta)
30 (ssearch is fasta format). default is blast.
31 -w or --weight VALUE - Change the default weight for E(0.0) hits
32 to VALUE (default=200 (i.e. 1e-200) )
33 -h - this help menu
35 Additionally specify the filenames you want to process on the
36 command-line. If no files are specified then STDIN input is assumed.
37 You specify this by doing: bp_search2tribe E<lt> file1 file2 file3
39 =head1 AUTHOR
41 Jason Stajich, jason-at-bioperl-dot-org
43 =cut
45 use strict;
46 use warnings;
47 use Bio::SearchIO;
48 use Bio::SearchIO::FastHitEventBuilder; # employ a speedup
49 use Getopt::Long;
50 use constant DEFAULT_WEIGHT => 200;
51 use constant DEFAULT_FORMAT => 'blast';
53 my ($format,@files,$output,$weight);
54 $weight = DEFAULT_WEIGHT; # default weight value
55 $format = DEFAULT_FORMAT;
57 my ($help);
59 GetOptions(
60 'f|format:s' => \$format,
61 'o|output:s' => \$output,
62 'w|weight:i' => \$weight,
63 'h|help' => sub{ exec('perldoc',$0);
64 exit(0)
68 my $outfh;
69 if( $output ) {
70 open $outfh, '>', $output or die "Could not write output file '$output': $!\n";
71 } else {
72 $outfh = *STDOUT;
75 my $parser = new Bio::SearchIO(-format => $format, -fh => \*ARGV);
77 # Let's throw away HSP events
78 $parser->attach_EventHandler(new Bio::SearchIO::FastHitEventBuilder);
79 while( my $report = $parser->next_result ) {
80 my $q = $report->query_name;
81 while( my $hit = $report->next_hit ) {
82 my $evalue = $hit->significance;
83 $evalue =~ s/^e/1e/i;
85 if( $evalue == 0 ) {
86 $evalue = "1e-$weight";
87 } else {
88 $evalue = sprintf("%e",$evalue);
91 print $outfh join("\t",$q,$hit->name, split('e-',$evalue)), "\n";