8 bp_parse_hmmsearch - parse single/multiple HMMSEARCH results file(s) with
9 different output options
13 bp_parse_hmmsearch [--po] [--ps] -s hmmsearch_file
15 bp_parse_hmmsearch [--po] [--ps] -m index_file
19 =head2 Mandatory Options:
21 -s HMMSEARCH file to parse.
22 -m INDEX file that contains a list of HMMSEARCH files for multiple
25 =head2 Special Options:
27 --po Print only the hits that have positive scores.
28 --ps Print the total of positive scores found.
29 --help Show this documentation.
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to the
37 Bioperl mailing list. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44 Report bugs to the Bioperl bug tracking system to help us keep track
45 of the bugs and their resolution. Bug reports can be submitted via the
48 https://github.com/bioperl/bioperl-live/issues
52 Mauricio Herrera Cuadra <mauricio at open-bio.org>
56 # Modules, pragmas and variables to use
59 use vars
qw($opt_s $opt_m $opt_po $opt_ps $opt_help);
61 # Gets options from the command line
62 GetOptions qw(-s:s -m:s --po --ps --help);
64 # Print documentation if help switch was given
65 exec('perldoc', $0) and exit() if $opt_help;
67 # If no mandatory options are given prints an error and exits
68 if (!$opt_s && !$opt_m) {
69 print "ERROR: No HMMSEARCH or INDEX file has been specified.\n Use
70 '--help' switch for documentation.\n" and exit();
71 } elsif ($opt_s && $opt_m) {
72 print "ERROR: You must select only one option (-s or -m) for input.\n
73 Use '--help' switch for documentation.\n" and exit();
76 # Initializes a counter for the domain positive scores if the option
78 my $pos_scores = 0 if $opt_ps;
80 # If single file mode was selected
82 parse_hmmsearch
($opt_s);
84 # Prints the total domain positive scores if the option was given
86 print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
88 print "Total domain positive scores: $pos_scores\n";
91 # If multiple files mode was selected
94 # Opens the INDEX file sent as input
95 open my $FH, '<', $opt_m or die "Could not read INDEX file '$opt_m': $!\n";
97 # Cycle that extracts one line for every loop until finding the
99 while (my $line = <$FH>) {
101 # Deletes the new line characters from the line
104 # Parses the result file in turn
105 parse_hmmsearch
($line);
106 print "= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
110 # Prints the total domain positive scores if the option was given
111 print "Total domain positive scores: $pos_scores\n" if $opt_ps;
120 # Subroutine that parses a HMMSEARCH results file
121 sub parse_hmmsearch
{
123 # Gets the parameters sent to the function
126 # Creates a new Bio::SearchIO object
127 my $in = new Bio
::SearchIO
(
132 # Loops through the results file
133 while (my $result = $in->next_result()) {
135 # Prints program name and version (these are values from
136 # Bio::Search::Result::GenericResult methods)
137 print $result->algorithm(), " ", $result->algorithm_version(), "\n";
138 print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
141 # Prints HMM file and sequence database (these are values from
142 # Bio::Search::Result::HMMERResult methods)
143 print "HMM file:\t\t\t", $result->hmm_name(), "\n";
144 print "Sequence database:\t\t", $result->sequence_file(), "\n";
145 print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
148 # Prints some values from Bio::Search::Result::GenericResult
150 print "Query HMM:\t\t\t", $result->query_name(), "\n";
151 print "Accession:\t\t\t", $result->query_accession(), "\n";
152 print "Description:\t\t\t", $result->query_description(), "\n";
153 print "Total hits:\t\t\t", $result->num_hits(), "\n";
155 # Loops through the sequence in turn
156 while (my $hit = $result->next_hit()) {
158 # If only positive scores option was given and the score
159 # in turn is greater than zero
161 printHits
($hit) if ($hit->score() >= 0);
163 # Prints all hits otherwise
171 # Subroutine that prints the values from a Bio::Search::Hit::HitI
175 # Gets the parameters sent to the function
178 # Prints some values from Bio::Search::Hit::HitI methods
179 print "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n";
180 print "Hit ", $hit->rank(), "\n";
181 print "Sequence:\t\t\t", $hit->name(), "\n";
182 print "Description:\t\t\t", $hit->description(), "\n";
183 print "Score:\t\t\t\t", $hit->score(), "\n";
184 print "E-value:\t\t\t", $hit->significance(), "\n";
185 print "Number of domains:\t\t", $hit->num_hsps(), "\n";
187 # Loops through the domain in turn
188 while (my $hsp = $hit->next_hsp()) {
190 # Prints some values from Bio::Search::HSP::HSPI methods
191 print " - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n";
192 print " Domain:\t\t\t", $hsp->rank(), " of ", $hit->num_hsps(), "\n";
193 print " seq-f:\t\t\t", $hsp->start('hit'), "\n";
194 print " seq-t:\t\t\t", $hsp->end('hit'), "\n";
195 print " hmm-f:\t\t\t", $hsp->start(), "\n";
196 print " hmm-t:\t\t\t", $hsp->end(), "\n";
197 print " score:\t\t\t", $hsp->score(), "\n";
198 $pos_scores++ if ($hsp->score() >= 0) && $opt_ps;
199 print " E-value:\t\t\t", $hsp->evalue(), "\n";
200 my $hmm_string = $hsp->query_string();
201 $hmm_string =~ s/<-\*$//;
202 print " hmm string:\t\t\t", $hmm_string, "\n";
203 print " homology string:\t\t", $hsp->homology_string(), "\n";
204 print " hit string:\t\t\t", $hsp->hit_string(), "\n";