From dbb4291205e28b10ae566e6fbb6ac2c4858f8baf Mon Sep 17 00:00:00 2001 From: Michael Coleman Date: Mon, 28 Apr 2008 13:21:00 -0500 Subject: [PATCH] validate: add --no-saturation flag, back to binary search of FDR, bug fixes Add a few checks to give proper FDR calculation even under extreme circumstances (more decoys than reals). Return to binary search for FDR search. --- greylag_validate.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/greylag_validate.py b/greylag_validate.py index 72c5133..8c7fc8b 100755 --- a/greylag_validate.py +++ b/greylag_validate.py @@ -305,6 +305,8 @@ def PPV(reals, decoys): false_positives = 2*decoys true_positives = reals - decoys + if true_positives <= 0: + return 0 return float(true_positives) / (true_positives + false_positives) @@ -454,7 +456,7 @@ def try_fdr(fdr_guess, options, remaining_spectrum_info, saturation_spectrum_inf break remaining_spectrum_info = remaining_spectrum_info_1 - if saturation_spectrum_info: + if not options.no_saturation and saturation_spectrum_info: remaining_spectrum_info_1 = saturate(remaining_spectrum_info_1, saturation_spectrum_info) debug_fdr(remaining_spectrum_info_1) @@ -512,6 +514,8 @@ def search_adjusting_fdr(options, spectrum_info_0): break low_fdr, low_results = high_fdr, high_results high_fdr = min(1.0, high_fdr * FDR_INFLATION_FACTOR) + if high_fdr == 1.0: + break high_results = try_fdr(high_fdr, options, spectrum_info, spectrum_info_0) else: @@ -528,21 +532,19 @@ def search_adjusting_fdr(options, spectrum_info_0): assert high_fdr >= low_fdr if (high_fdr - low_fdr) <= CONVERGENCE_FACTOR*options.fdr: break - #guess_fdr = (high_fdr + low_fdr) / 2.0 - - # interpolation isn't always better? - #guess_fdr = (low_fdr - # + (((options.fdr - low_results[0]) - # / (high_results[0] - low_results[0])) - # * (high_fdr - low_fdr))) + # - seems to be slower than the naive method? # Bias is empirically determined. It's supposed to speed # convergence, but should not affect the results. # FIX: better method possible? - GUESS_BIAS = 1.15 - prop = ((options.fdr - low_results[0]) - / (high_results[0] - low_results[0])) - guess_fdr = low_fdr + prop**GUESS_BIAS * (high_fdr - low_fdr) + #GUESS_BIAS = 1.15 + #prop = ((options.fdr - low_results[0]) + # / (high_results[0] - low_results[0])) + #prop = min(1, max(0, prop)) + #guess_fdr = low_fdr + prop**GUESS_BIAS * (high_fdr - low_fdr) + + #### + guess_fdr = (high_fdr + low_fdr) / 2.0 guess_results = try_fdr(guess_fdr, options, spectrum_info, spectrum_info_0) @@ -603,6 +605,8 @@ def main(args=sys.argv[1:]): help="try to adjust internal FDR to achieve a final FDR closer to" " requested FDR (this takes longer, but may find more read ids in some" " cases)") + pa("--no-saturation", action="store_true", dest="no_saturation", + help="skip peptide saturation") pa("-v", "--verbose", action="store_true", dest="verbose", help="be verbose") pa("--output-peptides", dest="output_peptides", -- 2.11.4.GIT