From 0874c639bde87c29b69153b09ab1661753f2bd5a Mon Sep 17 00:00:00 2001 From: "Francisco J. Ossandon" Date: Mon, 6 Jan 2014 16:06:53 -0300 Subject: [PATCH] Hmmer3: Added support to retrieve the CS line using a "consensus_structure" method and the corresponding tests. --- Bio/Search/HSP/GenericHSP.pm | 29 ++++++++++++++++++++++++++++- Bio/SearchIO/hmmer3.pm | 20 +++++++++++++++----- t/SearchIO/hmmer.t | 25 +++++++++++++++++-------- 3 files changed, 60 insertions(+), 14 deletions(-) diff --git a/Bio/Search/HSP/GenericHSP.pm b/Bio/Search/HSP/GenericHSP.pm index 54b602283..844c43ddc 100644 --- a/Bio/Search/HSP/GenericHSP.pm +++ b/Bio/Search/HSP/GenericHSP.pm @@ -473,6 +473,33 @@ sub homology_string{ return $previous; } +=head2 consensus_structure + + Title : consensus_structure + Usage : my $cs_string = $hsp->consensus_structure; + Function: Retrieves the consensus structure line for this HSP as a string (HMMer3). + : If the model had any consensus structure or reference line annotation + : that it inherited from a multiple alignment (#=GC SS cons, + : #=GC RF annotation in Stockholm files), that information is shown + : as CS or RF annotation line. + Returns : string + Args : [optional] string to set for consensus structure + +=cut + +sub consensus_structure { + my ($self,$value) = @_; + my $previous = $self->{CS_SEQ}; + if( defined $value || ! defined $previous ) { + $value = $previous = '' unless defined $value; + $self->{CS_SEQ} = $value; + # do some housekeeping so we know when to + # re-run _calculate_seq_positions + $self->{'_sequenceschanged'} = 1; + } + return $previous; +} + =head2 posterior_string Title : posterior_string @@ -483,7 +510,7 @@ sub homology_string{ : A 0 means 0-5%, 1 means 5-15%, and so on; 9 means 85-95%, : and a * means 95-100% posterior probability. Returns : string - Args : [optional] string to set for homology sequence + Args : [optional] string to set for posterior probability =cut diff --git a/Bio/SearchIO/hmmer3.pm b/Bio/SearchIO/hmmer3.pm index efcba2c62..efed529a6 100644 --- a/Bio/SearchIO/hmmer3.pm +++ b/Bio/SearchIO/hmmer3.pm @@ -124,6 +124,7 @@ BEGIN { 'Hsp_hitgaps' => 'HSP-hit_gaps', 'Hsp_querygaps' => 'HSP-query_gaps', 'Hsp_qseq' => 'HSP-query_seq', + 'Hsp_csline' => 'HSP-cs_seq', 'Hsp_hseq' => 'HSP-hit_seq', 'Hsp_midline' => 'HSP-homology_seq', 'Hsp_pline' => 'HSP-pp_seq', @@ -495,7 +496,8 @@ sub next_result { $qalistart, $qalistop, $score, $ceval, '', '', - '', '' + '', '', + '' ); my $info = $hit_list[ $hitinfo{"$name.$dom_counter"} ]; if ( !defined $info ) { @@ -526,7 +528,7 @@ sub next_result { my $max_count = 3; my $lastdomain; my $hsp; - my ( $hline, $midline, $qline, $pline ); + my ( $csline, $hline, $midline, $qline, $pline ); while ( defined( $_ = $self->_readline ) ) { if ( $_ =~ m/^\>\>/ @@ -544,6 +546,7 @@ sub next_result { $count = 0; my $key = $name . "_" . $domainnum; $hsp = $hsp_list[ $hspinfo{"$key.$dom_counter"} ]; + $csline = $$hsp[-5]; $hline = $$hsp[-4]; $midline = $$hsp[-3]; $qline = $$hsp[-2]; @@ -552,8 +555,9 @@ sub next_result { } # model data track, some reports don't have - elsif ( $_ =~ m/\s+\S+\sCS$/ ) { - my $modeltrack = $_; + elsif ( $_ =~ m/\s+\S+\s(?:CS|RF)$/ ) { + my @data = split( " ", $_ ); + $csline .= $data[-2]; $max_count++; $count++; next; @@ -601,6 +605,7 @@ sub next_result { $pline .= $data[-2]; $count = 0; $max_count = 3; + $$hsp[-5] = $csline; $$hsp[-4] = $hline; $$hsp[-3] = $midline; $$hsp[-2] = $qline; @@ -823,6 +828,11 @@ sub next_result { } ); $self->element( + { 'Name' => 'Hsp_csline', + 'Data' => shift @$hsp + } + ); + $self->element( { 'Name' => 'Hsp_hseq', 'Data' => shift @$hsp } @@ -1035,7 +1045,7 @@ sub characters { my ( $self, $data ) = @_; if ( $self->in_element('hsp') - && $data->{'Name'} =~ /Hsp\_(qseq|hseq|pline|midline)/o + && $data->{'Name'} =~ /Hsp\_(qseq|hseq|csline|pline|midline)/o && defined $data->{'Data'} ) { $self->{'_last_hspdata'}->{ $data->{'Name'} } .= $data->{'Data'}; diff --git a/t/SearchIO/hmmer.t b/t/SearchIO/hmmer.t index 57912a201..4cd1c94f8 100644 --- a/t/SearchIO/hmmer.t +++ b/t/SearchIO/hmmer.t @@ -6,7 +6,7 @@ BEGIN { use lib '.'; use Bio::Root::Test; - test_begin( -tests => 420 ); + test_begin( -tests => 423 ); use_ok('Bio::SearchIO'); } @@ -62,8 +62,6 @@ while ( $result = $searchio->next_result ) { 'lf+g+L + +t+e Lk++F+k G iv++ +++D + t++s+Gf+F+++ ++ + A + +++++gr+++ ', 'Check for homology string' ); - # Hmmpfam don't have PP string, this is a test to check for side effects - is( $hsp->posterior_string, ''); is( length( $hsp->homology_string ), length( $hsp->hit_string ), 'Check if homology string and hit string have an equal length' @@ -72,15 +70,18 @@ while ( $result = $searchio->next_result ) { length( $hsp->homology_string ), 'Check if query string and homology string have an equal length' ); + # Hmmpfam don't have PP or CS strings, these are tests to check for side effects + is( $hsp->posterior_string, '' ); + is( $hsp->consensus_structure, '' ); } } if ( defined( $hit = $result->next_model ) ) { if ( defined( $hsp = $hit->next_domain ) ) { - is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' ); - is( $hsp->hit->end, 77, 'Check for hit hmm to value' ); - is( $hsp->query->start, 124, 'Check for query alifrom value' ); - is( $hsp->query->end, 194, 'Check for query ali to value' ); - is( $hsp->score, 75.5, 'Check for hsp score' ); + is( $hsp->hit->start, 1, 'Check for hit hmmfrom value' ); + is( $hsp->hit->end, 77, 'Check for hit hmm to value' ); + is( $hsp->query->start, 124, 'Check for query alifrom value' ); + is( $hsp->query->end, 194, 'Check for query ali to value' ); + is( $hsp->score, 75.5, 'Check for hsp score' ); float_is( $hsp->evalue, 1.1e-18, 'Check for hsp c-Evalue' ); is( $hsp->query_string, 'LFVGALKDDHDEQSIRDYFQHFGNIVDINIVID-----KETGKKRGFAFVEFDDYDPVDKVVL-QKQHQLNGKMVDV', @@ -503,6 +504,10 @@ while ( $result = $searchio->next_result ) { is( $hsp->query->end, 40, 'Check for query hmm to value' ); is( $hsp->score, -4.3, 'Check for hsp score' ); float_is( $hsp->evalue, 1, 'Check for hsp c-Evalue' ); + is( $hsp->consensus_structure, + '', + 'Check for consensus structure string' + ); is( $hsp->query_string, 'laallAl', 'Check for query string' @@ -567,6 +572,10 @@ while ( $result = $searchio->next_result ) { is( $hsp->query->end, 1021, 'Check for query hmm to value' ); is( $hsp->score, 616.6, 'Check for hsp score' ); float_is( $hsp->evalue, 3.9e-189, 'Check for hsp c-Evalue' ); + is( $hsp->consensus_structure, + 'S-TTEEEEEEEETTSEEEEEEEESTTS-HHHHHHHHHHHHHHHGGGS-HHHHHH-EEEEEEECCECEEEEEEESSSTS-HHHHHHHHHHCTHHHHHTSTTEEEEEESS.--EEEEEEE-HHHHHCTT--HHHHHHHHHHHSSB-EEEECTT-SB-EEEE-SB---SCCHHCT-EEEETTSEEEEHHHCEEEEEEESSSS-EEEETTCEEEEEEEEEETTSBHHHHHHHHHHHHHCCGGGSSTTEEEEEEEESHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHSSHCCCHHHHHHHHHHHHHHHHHHHHTT--EEHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCSS-HHHHHHHHHHHHCCHHHHHHHHHHHHCCGGGGSBHHHHHHHHHHHHHHHHHHHHHHHHHHCCHHHHHHHCS----TT-CC..............................CHHHHHHHHHHHHHHHHHHHHHHHHHSCHHHHHHHHHHHHH.HHHHHCCS-BESS----TSEEEEEEE-STTC-HHHHHHHHHHHHHHHH...TTTTEEEEEEEESESSSS..E........CTTEEEEEEEE--CTTS-SCCCSHHHHHHHHHHHC.CTSTSSEEEEEE-SSSCCCSSSSSEEEEEEE.TSSSCHHHHHHHHHHHHHHHCCSTTEECEEESS-S-EEEEEEEE-HHHHHHCTB-HHHHHHHHHHHHT-..EEEEEEEETTE...EEEEEEEE-GGGSSSGGGGCC-EEEETTSE.EEECGGCEEEEEEEE-SEEEEETTCEEEEEEEEESTTS...-HHHHHHHHHHCCTT..SSTTEEEEEECHHHHHHHHCCCHHHHHHHHHHHHHHHHHHHCTSSSTCHHHHTTHHHHHHHHHHHHHHTT--BSHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCTTTBHHHHHHHHHHHHCHHHHHHHHHHHHHCCHHHHTT-STTHHHHHHHHHHHHHHHHHHHHCHHHHHHHHHHHHH', + 'Check for consensus structure string' + ); is( $hsp->query_string, 'gldglkyvsSqSseglssitvtFedgtdidiArqqvqnrlqeaknkLPeevqepgiskiktssseilvlavtskdgsltktdlrdlaesnikdqlsrveGVgdvqliGgsekavriwldpqklaklgltltdvvsalkeqnvqvaaGqlegqqeelliraqgrlqsaediekiivksqdgskvrlrDvAkvelgaeeeriaatlngkpavllavkklpganaievvkavkekleelketlPegveivvvydttefvrasieeVvktlleaivLvvlvlflFLqnlratlipaiavPlsllgtfavlkalglsiNlltlfgLvlAiGlvvDdAiVvvEnverkleeegekpleaalksmkeiegalvaialvllavfvPilflgGveGklfrqfaltivlaillsvlvaltltPalcallLkarkeekek..............................gffrefnrlfdalerrYekllekvlrhravvllvalllvvg.slllfvripkeflPeedegvlvtsvqlppgvsleqtekvlkqvekilk...ekpevesvfavtGfafagdta........gqnsakvfisLkpekerkeeektvealierlrkel.ekikganvellapiqlreletlsgvrlelqvklfgddleaLseareqllaalkqlpeladvrseqqedepqlqvkidrekaaalGvsiadinetlstalgg..syvndfieegr...vvkvvvqleedlrsspedlkklyvrnkkgk.mvplsavakieeekgpnsierenglrsveisgevaegd...slgeaeeavekiakqvklPagvgiewtglseqeqeagnsllllvalalllvflvLaalyeslsdpllvlltvPlalvGallalllrglelsviaqvGlilliGlavkNailivefakelrekeglsleeAileaaklRLrPiLMTalaailGvlPLalstGaGselqqplgivvlGGlvtstvLtlllvPvlYvlva', 'Check for query string' -- 2.11.4.GIT