changes all issue tracking in preparation for switch to github issues
[bioperl-live.git] / scripts / searchio / bp_parse_hmmsearch.pl
blob3d93499ac937e17a11f56f1b7a38c4e0ccc168f0
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
6 =head1 NAME
8 bp_parse_hmmsearch - parse single/multiple HMMSEARCH results file(s) with
9 different output options
11 =head1 SYNOPSIS
13 bp_parse_hmmsearch [--po] [--ps] -s hmmsearch_file
15 bp_parse_hmmsearch [--po] [--ps] -m index_file
17 =head1 DESCRIPTION
19 =head2 Mandatory Options:
21 -s HMMSEARCH file to parse.
22 -m INDEX file that contains a list of HMMSEARCH files for multiple
23 parsing.
25 =head2 Special Options:
27 --po Print only the hits that have positive scores.
28 --ps Print the total of positive scores found.
29 --help Show this documentation.
31 =head1 FEEDBACK
33 =head2 Mailing Lists
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to the
37 Bioperl mailing list. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
42 =head2 Reporting Bugs
44 Report bugs to the Bioperl bug tracking system to help us keep track
45 of the bugs and their resolution. Bug reports can be submitted via the
46 web:
48 https://github.com/bioperl/bioperl-live/issues
50 =head1 AUTHOR
52 Mauricio Herrera Cuadra <mauricio at open-bio.org>
54 =cut
56 # Modules, pragmas and variables to use
57 use Bio::SearchIO;
58 use Getopt::Long;
59 use vars qw($opt_s $opt_m $opt_po $opt_ps $opt_help);
61 # Gets options from the command line
62 GetOptions qw(-s:s -m:s --po --ps --help);
64 # Print documentation if help switch was given
65 exec('perldoc', $0) and exit() if $opt_help;
67 # If no mandatory options are given prints an error and exits
68 if (!$opt_s && !$opt_m) {
69 print "ERROR: No HMMSEARCH or INDEX file has been specified.\n Use
70 '--help' switch for documentation.\n" and exit();
71 } elsif ($opt_s && $opt_m) {
72 print "ERROR: You must select only one option (-s or -m) for input.\n
73 Use '--help' switch for documentation.\n" and exit();
76 # Initializes a counter for the domain positive scores if the option
77 # was given
78 my $pos_scores = 0 if $opt_ps;
80 # If single file mode was selected
81 if ($opt_s) {
82 parse_hmmsearch($opt_s);
84 # Prints the total domain positive scores if the option was given
85 if ($opt_ps) {
86 print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
87 - - - -\n";
88 print "Total domain positive scores: $pos_scores\n";
91 # If multiple files mode was selected
92 } elsif ($opt_m) {
94 # Opens the INDEX file sent as input
95 open my $FH, '<', $opt_m or die "Could not read INDEX file '$opt_m': $!\n";
97 # Cycle that extracts one line for every loop until finding the
98 # end of file
99 while (my $line = <$FH>) {
101 # Deletes the new line characters from the line
102 chomp $line;
104 # Parses the result file in turn
105 parse_hmmsearch($line);
106 print "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
107 = = = =\n";
110 # Prints the total domain positive scores if the option was given
111 print "Total domain positive scores: $pos_scores\n" if $opt_ps;
113 # Closes INDEX files
114 close $FH;
117 # Exits the program
118 exit();
120 # Subroutine that parses a HMMSEARCH results file
121 sub parse_hmmsearch {
123 # Gets the parameters sent to the function
124 my ($file) = @_;
126 # Creates a new Bio::SearchIO object
127 my $in = new Bio::SearchIO(
128 -format => 'hmmer',
129 -file => $file,
132 # Loops through the results file
133 while (my $result = $in->next_result()) {
135 # Prints program name and version (these are values from
136 # Bio::Search::Result::GenericResult methods)
137 print $result->algorithm(), " ", $result->algorithm_version(), "\n";
138 print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
139 - - - -\n";
141 # Prints HMM file and sequence database (these are values from
142 # Bio::Search::Result::HMMERResult methods)
143 print "HMM file:\t\t\t", $result->hmm_name(), "\n";
144 print "Sequence database:\t\t", $result->sequence_file(), "\n";
145 print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
146 -\n";
148 # Prints some values from Bio::Search::Result::GenericResult
149 # methods
150 print "Query HMM:\t\t\t", $result->query_name(), "\n";
151 print "Accession:\t\t\t", $result->query_accession(), "\n";
152 print "Description:\t\t\t", $result->query_description(), "\n";
153 print "Total hits:\t\t\t", $result->num_hits(), "\n";
155 # Loops through the sequence in turn
156 while (my $hit = $result->next_hit()) {
158 # If only positive scores option was given and the score
159 # in turn is greater than zero
160 if ($opt_po) {
161 printHits($hit) if ($hit->score() >= 0);
163 # Prints all hits otherwise
164 } else {
165 printHits($hit);
171 # Subroutine that prints the values from a Bio::Search::Hit::HitI
172 # object
173 sub printHits {
175 # Gets the parameters sent to the function
176 my ($hit) = @_;
178 # Prints some values from Bio::Search::Hit::HitI methods
179 print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n";
180 print "Hit ", $hit->rank(), "\n";
181 print "Sequence:\t\t\t", $hit->name(), "\n";
182 print "Description:\t\t\t", $hit->description(), "\n";
183 print "Score:\t\t\t\t", $hit->score(), "\n";
184 print "E-value:\t\t\t", $hit->significance(), "\n";
185 print "Number of domains:\t\t", $hit->num_hsps(), "\n";
187 # Loops through the domain in turn
188 while (my $hsp = $hit->next_hsp()) {
190 # Prints some values from Bio::Search::HSP::HSPI methods
191 print " - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n";
192 print " Domain:\t\t\t", $hsp->rank(), " of ", $hit->num_hsps(), "\n";
193 print " seq-f:\t\t\t", $hsp->start('hit'), "\n";
194 print " seq-t:\t\t\t", $hsp->end('hit'), "\n";
195 print " hmm-f:\t\t\t", $hsp->start(), "\n";
196 print " hmm-t:\t\t\t", $hsp->end(), "\n";
197 print " score:\t\t\t", $hsp->score(), "\n";
198 $pos_scores++ if ($hsp->score() >= 0) && $opt_ps;
199 print " E-value:\t\t\t", $hsp->evalue(), "\n";
200 my $hmm_string = $hsp->query_string();
201 $hmm_string =~ s/<-\*$//;
202 print " hmm string:\t\t\t", $hmm_string, "\n";
203 print " homology string:\t\t", $hsp->homology_string(), "\n";
204 print " hit string:\t\t\t", $hsp->hit_string(), "\n";