3 # Author: Jason Stajich <jason-at-bioperl-dot-org>
4 # Description: Turn SearchIO parseable report(s) into a TRIBE matrix
8 bp_search2tribe - Turn SearchIO parseable reports(s) into TRIBE matrix
13 bp_search2tribe [-o outputfile] [-f reportformat] [-w/--weight] file1 file2 ..
17 This script is probably too slow for most people's uses. It is better
18 to use something like scripts/searchio/fastam9_to_table, -m 9 output
19 from BLAST, or the blast2table from the BLAST O'Reilly book to get a
20 tabular output from these programs and then feed the table into MCL
21 with the mcxdeblast script and the --m9 option.
23 This script will turn a protein Search report (BLASTP, FASTP, SSEARCH)
24 into a Markov Matrix for TribeMCL clustering.
28 -o filename - the output filename [default STDOUT]
29 -f format - search result format (blast, fasta)
30 (ssearch is fasta format). default is blast.
31 -w or --weight VALUE - Change the default weight for E(0.0) hits
32 to VALUE (default=200 (i.e. 1e-200) )
35 Additionally specify the filenames you want to process on the
36 command-line. If no files are specified then STDIN input is assumed.
37 You specify this by doing: bp_search2tribe E<lt> file1 file2 file3
41 Jason Stajich, jason-at-bioperl-dot-org
48 use Bio
::SearchIO
::FastHitEventBuilder
; # employ a speedup
50 use constant DEFAULT_WEIGHT
=> 200;
51 use constant DEFAULT_FORMAT
=> 'blast';
53 my ($format,@files,$output,$weight);
54 $weight = DEFAULT_WEIGHT
; # default weight value
55 $format = DEFAULT_FORMAT
;
60 'f|format:s' => \
$format,
61 'o|output:s' => \
$output,
62 'w|weight:i' => \
$weight,
63 'h|help' => sub{ exec('perldoc',$0);
70 open $outfh, '>', $output or die "Could not write output file '$output': $!\n";
75 my $parser = new Bio
::SearchIO
(-format
=> $format, -fh
=> \
*ARGV
);
77 # Let's throw away HSP events
78 $parser->attach_EventHandler(new Bio
::SearchIO
::FastHitEventBuilder
);
79 while( my $report = $parser->next_result ) {
80 my $q = $report->query_name;
81 while( my $hit = $report->next_hit ) {
82 my $evalue = $hit->significance;
86 $evalue = "1e-$weight";
88 $evalue = sprintf("%e",$evalue);
91 print $outfh join("\t",$q,$hit->name, split('e-',$evalue)), "\n";