maint: remove hard tabs and trailing whitespace
[bioperl-live.git] / examples / searchio / hitwriter.pl
blob16b7536ada5c39a57cf8f01b55f9a91d2453c1b9
1 #!/usr/bin/perl
3 # Demonstrates the use of a SearchIO Blast parser and a SearchWriterI object
4 # for producing tab-delimited output of hit data from a Blast report
5 # input stream.
7 # Each row in the output represents data for a single hit.
8 # For hits containing multiple HSPs, the output information represents a
9 # summary across all HSPs.
11 # This parser represents a new and improved version of Bio::Tools::Blast.
13 # Usage:
14 # STDIN: stream containing one or more BLAST or PSI-BLAST reports.
15 # STDOUT: none, but generates an output file "hitwriter.out"
16 # containing tab-delimited data on a per-hit basis.
17 # STDERR: Progress info and any errors.
19 # In this example, we create a SearchIO parser that screens out hits
20 # based on expect (or P) scores and a default HitTableWriter. This writer
21 # provides the same functionality as the original Bio::Tools::Blast::table()
22 # function (i.e., a tab-delimited summary of each hit per row).
23 # HitTableWriter, however, is customizable so you can specify just the columns
24 # you want to have in the output table.
26 # For more documentation about the writer, including
27 # a complete list of columns, execute:
28 # perldoc Bio::SearchIO::Writer::HitTableWriter.
30 # For more documentation about working with Blast result objects,
31 # see docs for these modules:
32 # Bio::Search::Result::BlastResult
33 # Bio::Search::Iteration::IterationI
34 # Bio::Search::Hit::BlastHit
35 # Bio::Search::HSP::BlastHSP
37 # For more documentation about the Blast parser, see docs for
38 # Bio::SearchIO
40 # Author: Steve Chervitz <sac@bioperl.org>
42 use strict;
43 use lib '../../';
45 use Bio::SearchIO;
46 use Bio::SearchIO::Writer::HitTableWriter;
48 # These are the columns that will be in the output table of BLAST results.
49 my @columns = qw(
50 query_name
51 query_length
52 hit_name
53 hit_length
54 num_hsps
55 expect
56 frac_aligned_query
57 frac_identical_query
58 length_aln_query
59 gaps_total
60 strand_query
61 strand_hit
64 # The following columns require HSP alignment data:
65 # num_hsps
66 # frac_identical_query
67 # length_aln_query
68 # gaps_total
69 # strand_query
70 # strand_hit
72 print STDERR "\nUsing SearchIO->new()\n";
74 # Note that all parameters for the $in, $out, and $writer objects are optional.
75 # Default in = STDIN; Default out = STDOUT; Default writer = all columns
76 # In this example, we're reading from STDIN and writing to a file
77 # called "hitwriter.out"
78 # TODO: write hitless reports to STDERR and note if filtered.
79 my $in = Bio::SearchIO->new( -format => 'blast',
80 -fh => \*ARGV,
81 -signif => 0.1,
82 # -verbose=> 2
84 my $writer = Bio::SearchIO::Writer::HitTableWriter->new( -columns => \@columns
86 my $out = Bio::SearchIO->new( -format => 'blast',
87 -writer => $writer,
88 -file => ">hitwriter.out" );
89 # Need to keep a separate count of reports with hits
90 # to know when to include labels. The first report may be hitless,
91 # so we can't use $in->result_count
92 my $hit_count = 0;
93 while ( my $blast = $in->next_result() ) {
94 printf STDERR "\nReport %d: $blast\n", $in->result_count;
96 printf STDERR "query=%s, length=%d\n", $blast->query_name, $blast->query_length;
98 if( $blast->hits ) {
99 print STDERR "# hits= ", $blast->num_hits, "\n";
100 $hit_count++;
101 my @hits= $blast->hits;
102 print STDERR "frac_aligned_query= ", $hits[0]->frac_aligned_query, "\n";
104 $out->write_result($blast, $hit_count==1 );
106 else {
107 print STDERR "Hitless Blast Report ";
108 print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n");
111 ## For a simple progress monitor, uncomment this line:
112 #print STDERR "."; print STDERR "\n" if $in->result_count % 50 == 0;
115 printf STDERR "\n%d Blast report(s) processed.\n", $in->result_count;
116 printf STDERR "Output sent to file: %s\n", $out->file if $out->file;