tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / examples / searchio / hitwriter.pl
blobf41b4e13487449fa6599cc58e685044b7845ee01
1 #!/usr/bin/perl
3 # Demonstrates the use of a SearchIO Blast parser and a SearchWriterI object
4 # for producing tab-delimited output of hit data from a Blast report
5 # input stream.
7 # Each row in the output represents data for a single hit.
8 # For hits containing multiple HSPs, the output information represents a
9 # summary across all HSPs.
11 # This parser represents a new and improved version of Bio::Tools::Blast.
13 # Usage:
14 # STDIN: stream containing one or more BLAST or PSI-BLAST reports.
15 # STDOUT: none, but generates an output file "hitwriter.out"
16 # containing tab-delimited data on a per-hit basis.
17 # STDERR: Progress info and any errors.
19 # In this example, we create a SearchIO parser that screens out hits
20 # based on expect (or P) scores and a default HitTableWriter. This writer
21 # provides the same functionality as the original Bio::Tools::Blast::table()
22 # function (i.e., a tab-delimited summary of each hit per row).
23 # HitTableWriter, however, is customizable so you can specify just the columns
24 # you want to have in the output table.
26 # For more documentation about the writer, including
27 # a complete list of columns, execute:
28 # perldoc Bio::SearchIO::Writer::HitTableWriter.
30 # For more documentation about working with Blast result objects,
31 # see docs for these modules:
32 # Bio::Search::Result::BlastResult
33 # Bio::Search::Iteration::IterationI
34 # Bio::Search::Hit::BlastHit
35 # Bio::Search::HSP::BlastHSP
37 # For more documentation about the Blast parser, see docs for
38 # Bio::SearchIO
40 # Author: Steve Chervitz <sac@bioperl.org>
41 # Revision: $Id$
43 use strict;
44 use lib '../../';
46 use Bio::SearchIO;
47 use Bio::SearchIO::Writer::HitTableWriter;
49 # These are the columns that will be in the output table of BLAST results.
50 my @columns = qw(
51 query_name
52 query_length
53 hit_name
54 hit_length
55 num_hsps
56 expect
57 frac_aligned_query
58 frac_identical_query
59 length_aln_query
60 gaps_total
61 strand_query
62 strand_hit
65 # The following columns require HSP alignment data:
66 # num_hsps
67 # frac_identical_query
68 # length_aln_query
69 # gaps_total
70 # strand_query
71 # strand_hit
73 print STDERR "\nUsing SearchIO->new()\n";
75 # Note that all parameters for the $in, $out, and $writer objects are optional.
76 # Default in = STDIN; Default out = STDOUT; Default writer = all columns
77 # In this example, we're reading from STDIN and writing to a file
78 # called "hitwriter.out"
79 # TODO: write hitless reports to STDERR and note if filtered.
80 my $in = Bio::SearchIO->new( -format => 'blast',
81 -fh => \*ARGV,
82 -signif => 0.1,
83 # -verbose=> 2
85 my $writer = Bio::SearchIO::Writer::HitTableWriter->new( -columns => \@columns
87 my $out = Bio::SearchIO->new( -format => 'blast',
88 -writer => $writer,
89 -file => ">hitwriter.out" );
90 # Need to keep a separate count of reports with hits
91 # to know when to include labels. The first report may be hitless,
92 # so we can't use $in->result_count
93 my $hit_count = 0;
94 while ( my $blast = $in->next_result() ) {
95 printf STDERR "\nReport %d: $blast\n", $in->result_count;
97 printf STDERR "query=%s, length=%d\n", $blast->query_name, $blast->query_length;
99 if( $blast->hits ) {
100 print STDERR "# hits= ", $blast->num_hits, "\n";
101 $hit_count++;
102 my @hits= $blast->hits;
103 print STDERR "frac_aligned_query= ", $hits[0]->frac_aligned_query, "\n";
105 $out->write_result($blast, $hit_count==1 );
107 else {
108 print STDERR "Hitless Blast Report ";
109 print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n");
112 ## For a simple progress monitor, uncomment this line:
113 #print STDERR "."; print STDERR "\n" if $in->result_count % 50 == 0;
116 printf STDERR "\n%d Blast report(s) processed.\n", $in->result_count;
117 printf STDERR "Output sent to file: %s\n", $out->file if $out->file;