Merge branch 'maint-0.4.5' into maint-0.4.6
[tor.git] / src / test / test_prob_distr.c
blob0eca435ab5c2236af39543d2fb675d297d47ea47
1 /* Copyright (c) 2018-2021, The Tor Project, Inc. */
2 /* See LICENSE for licensing information */
4 /**
5 * \file test_prob_distr.c
6 * \brief Test probability distributions.
7 * \detail
9 * For each probability distribution we do two kinds of tests:
11 * a) We do numerical deterministic testing of their cdf/icdf/sf/isf functions
12 * and the various relationships between them for each distribution. We also
13 * do deterministic tests on their sampling functions. Test vectors for
14 * these tests were computed from alternative implementations and were
15 * eyeballed to make sure they make sense
16 * (e.g. src/test/prob_distr_mpfr_ref.c computes logit(p) using GNU mpfr
17 * with 200-bit precision and is then tested in test_logit_logistic()).
19 * b) We do stochastic hypothesis testing (G-test) to ensure that sampling from
20 * the given distributions is distributed properly. The stochastic tests are
21 * slow and their false positive rate is not well suited for CI, so they are
22 * currently disabled-by-default and put into 'tests-slow'.
25 #define PROB_DISTR_PRIVATE
27 #include "orconfig.h"
29 #include "test/test.h"
31 #include "core/or/or.h"
33 #include "lib/math/prob_distr.h"
34 #include "lib/math/fp.h"
35 #include "lib/crypt_ops/crypto_rand.h"
36 #include "test/rng_test_helpers.h"
38 #include <float.h>
39 #include <math.h>
40 #include <stdbool.h>
41 #include <stddef.h>
42 #include <stdint.h>
43 #include <stdio.h>
44 #include <stdlib.h>
46 /**
47 * Return floor(d) converted to size_t, as a workaround for complaints
48 * under -Wbad-function-cast for (size_t)floor(d).
50 static size_t
51 floor_to_size_t(double d)
53 double integral_d = floor(d);
54 return (size_t)integral_d;
57 /**
58 * Return ceil(d) converted to size_t, as a workaround for complaints
59 * under -Wbad-function-cast for (size_t)ceil(d).
61 static size_t
62 ceil_to_size_t(double d)
64 double integral_d = ceil(d);
65 return (size_t)integral_d;
69 * Geometric(p) distribution, supported on {1, 2, 3, ...}.
71 * Compute the probability mass function Geom(n; p) of the number of
72 * trials before the first success when success has probability p.
74 static double
75 logpmf_geometric(unsigned n, double p)
77 /* This is actually a check against 1, but we do >= so that the compiler
78 does not raise a -Wfloat-equal */
79 if (p >= 1) {
80 if (n == 1)
81 return 0;
82 else
83 return -HUGE_VAL;
85 return (n - 1)*log1p(-p) + log(p);
88 /**
89 * Compute the logistic function, translated in output by 1/2:
90 * logistichalf(x) = logistic(x) - 1/2. Well-conditioned on the entire
91 * real plane, with maximum condition number 1 at 0.
93 * This implementation gives relative error bounded by 5 eps.
95 static double
96 logistichalf(double x)
99 * Rewrite this with the identity
101 * 1/(1 + e^{-x}) - 1/2
102 * = (1 - 1/2 - e^{-x}/2)/(1 + e^{-x})
103 * = (1/2 - e^{-x}/2)/(1 + e^{-x})
104 * = (1 - e^{-x})/[2 (1 + e^{-x})]
105 * = -(e^{-x} - 1)/[2 (1 + e^{-x})],
107 * which we can evaluate by -expm1(-x)/[2 (1 + exp(-x))].
109 * Suppose exp has error d0, + has error d1, expm1 has error
110 * d2, and / has error d3, so we evaluate
112 * -(1 + d2) (1 + d3) (e^{-x} - 1)
113 * / [2 (1 + d1) (1 + (1 + d0) e^{-x})].
115 * In the denominator,
117 * 1 + (1 + d0) e^{-x}
118 * = 1 + e^{-x} + d0 e^{-x}
119 * = (1 + e^{-x}) (1 + d0 e^{-x}/(1 + e^{-x})),
121 * so the relative error of the numerator is
123 * d' = d2 + d3 + d2 d3,
124 * and of the denominator,
125 * d'' = d1 + d0 e^{-x}/(1 + e^{-x}) + d0 d1 e^{-x}/(1 + e^{-x})
126 * = d1 + d0 L(-x) + d0 d1 L(-x),
128 * where L(-x) is logistic(-x). By Lemma 1 the relative error
129 * of the quotient is bounded by
131 * 2|d2 + d3 + d2 d3 - d1 - d0 L(x) + d0 d1 L(x)|,
133 * Since 0 < L(x) < 1, this is bounded by
135 * 2|d2| + 2|d3| + 2|d2 d3| + 2|d1| + 2|d0| + 2|d0 d1|
136 * <= 4 eps + 2 eps^2.
138 if (x < log(DBL_EPSILON/8)) {
140 * Avoid overflow in e^{-x}. When x < log(eps/4), we
141 * we further have x < logit(eps/4), so that
142 * logistic(x) < eps/4. Hence the relative error of
143 * logistic(x) - 1/2 from -1/2 is bounded by eps/2, and
144 * so the relative error of -1/2 from logistic(x) - 1/2
145 * is bounded by eps.
147 return -0.5;
148 } else {
149 return -expm1(-x)/(2*(1 + exp(-x)));
154 * Compute the log of the sum of the exps. Caller should arrange the
155 * array in descending order to minimize error because I don't want to
156 * deal with using temporary space and the one caller in this file
157 * arranges that anyway.
159 * Warning: This implementation does not handle infinite or NaN inputs
160 * sensibly, because I don't need that here at the moment. (NaN, or
161 * -inf and +inf together, should yield NaN; +inf and finite should
162 * yield +inf; otherwise all -inf should be ignored because exp(-inf) =
163 * 0.)
165 static double
166 logsumexp(double *A, size_t n)
168 double maximum, sum;
169 size_t i;
171 if (n == 0)
172 return log(0);
174 maximum = A[0];
175 for (i = 1; i < n; i++) {
176 if (A[i] > maximum)
177 maximum = A[i];
180 sum = 0;
181 for (i = n; i --> 0;)
182 sum += exp(A[i] - maximum);
184 return log(sum) + maximum;
188 * Compute log(1 - e^x). Defined only for negative x so that e^x < 1.
189 * This is the complement of a probability in log space.
191 static double
192 log1mexp(double x)
196 * We want to compute log on [0, 1/2) but log1p on [1/2, +inf),
197 * so partition x at -log(2) = log(1/2).
199 if (-log(2) < x)
200 return log(-expm1(x));
201 else
202 return log1p(-exp(x));
206 * Tests of numerical errors in computing logit, logistic, and the
207 * various cdfs, sfs, icdfs, and isfs.
210 #define arraycount(A) (sizeof(A)/sizeof(A[0]))
212 /** Return relative error between <b>actual</b> and <b>expected</b>.
213 * Special cases: If <b>expected</b> is zero or infinite, return 1 if
214 * <b>actual</b> is equal to <b>expected</b> and 0 if not, since the
215 * usual notion of relative error is undefined but we only use this
216 * for testing relerr(e, a) <= bound. If either is NaN, return NaN,
217 * which has the property that NaN <= bound is false no matter what
218 * bound is.
220 * Beware: if you test !(relerr(e, a) > bound), then then the result
221 * is true when a is NaN because NaN > bound is false too. See
222 * CHECK_RELERR for correct use to decide when to report failure.
224 static double
225 relerr(double expected, double actual)
228 * To silence -Wfloat-equal, we have to test for equality using
229 * inequalities: we have (fabs(expected) <= 0) iff (expected == 0),
230 * and (actual <= expected && actual >= expected) iff actual ==
231 * expected whether expected is zero or infinite.
233 if (fabs(expected) <= 0 || tor_isinf(expected)) {
234 if (actual <= expected && actual >= expected)
235 return 0;
236 else
237 return 1;
238 } else {
239 return fabs((expected - actual)/expected);
243 /** Check that relative error of <b>expected</b> and <b>actual</b> is within
244 * <b>relerr_bound</b>. Caller must arrange to have i and relerr_bound in
245 * scope. */
246 #define CHECK_RELERR(expected, actual) do { \
247 double check_expected = (expected); \
248 double check_actual = (actual); \
249 const char *str_expected = #expected; \
250 const char *str_actual = #actual; \
251 double check_relerr = relerr(expected, actual); \
252 if (!(relerr(check_expected, check_actual) <= relerr_bound)) { \
253 log_warn(LD_GENERAL, "%s:%d: case %u: relerr(%s=%.17e, %s=%.17e)" \
254 " = %.17e > %.17e\n", \
255 __func__, __LINE__, (unsigned) i, \
256 str_expected, check_expected, \
257 str_actual, check_actual, \
258 check_relerr, relerr_bound); \
259 ok = false; \
261 } while (0)
263 /* Check that a <= b.
264 * Caller must arrange to have i in scope. */
265 #define CHECK_LE(a, b) do { \
266 double check_a = (a); \
267 double check_b = (b); \
268 const char *str_a = #a; \
269 const char *str_b = #b; \
270 if (!(check_a <= check_b)) { \
271 log_warn(LD_GENERAL, "%s:%d: case %u: %s=%.17e > %s=%.17e\n", \
272 __func__, __LINE__, (unsigned) i, \
273 str_a, check_a, str_b, check_b); \
274 ok = false; \
276 } while (0)
279 * Test the logit and logistic functions. Confirm that they agree with
280 * the cdf, sf, icdf, and isf of the standard Logistic distribution.
281 * Confirm that the sampler for the standard logistic distribution maps
282 * [0, 1] into the right subinterval for the inverse transform, for
283 * this implementation.
285 static void
286 test_logit_logistic(void *arg)
288 (void) arg;
290 static const struct {
291 double x; /* x = logit(p) */
292 double p; /* p = logistic(x) */
293 double phalf; /* p - 1/2 = logistic(x) - 1/2 */
294 } cases[] = {
295 { -HUGE_VAL, 0, -0.5 },
296 { -1000, 0, -0.5 },
297 { -710, 4.47628622567513e-309, -0.5 },
298 { -708, 3.307553003638408e-308, -0.5 },
299 { -2, .11920292202211755, -.3807970779778824 },
300 { -1.0000001, .2689414017088022, -.23105859829119776 },
301 { -1, .2689414213699951, -.23105857863000487 },
302 { -0.9999999, .26894144103118883, -.2310585589688111 },
303 /* see src/test/prob_distr_mpfr_ref.c for computation */
304 { -4.000000000537333e-5, .49999, -1.0000000000010001e-5 },
305 { -4.000000000533334e-5, .49999, -.00001 },
306 { -4.000000108916878e-9, .499999999, -1.0000000272292198e-9 },
307 { -4e-9, .499999999, -1e-9 },
308 { -4e-16, .5, -1e-16 },
309 { -4e-300, .5, -1e-300 },
310 { 0, .5, 0 },
311 { 4e-300, .5, 1e-300 },
312 { 4e-16, .5, 1e-16 },
313 { 3.999999886872274e-9, .500000001, 9.999999717180685e-10 },
314 { 4e-9, .500000001, 1e-9 },
315 { 4.0000000005333336e-5, .50001, .00001 },
316 { 8.000042667076272e-3, .502, .002 },
317 { 0.9999999, .7310585589688111, .2310585589688111 },
318 { 1, .7310585786300049, .23105857863000487 },
319 { 1.0000001, .7310585982911977, .23105859829119774 },
320 { 2, .8807970779778823, .3807970779778824 },
321 { 708, 1, .5 },
322 { 710, 1, .5 },
323 { 1000, 1, .5 },
324 { HUGE_VAL, 1, .5 },
326 double relerr_bound = 3e-15; /* >10eps */
327 size_t i;
328 bool ok = true;
330 for (i = 0; i < arraycount(cases); i++) {
331 double x = cases[i].x;
332 double p = cases[i].p;
333 double phalf = cases[i].phalf;
336 * cdf is logistic, icdf is logit, and symmetry for
337 * sf/isf.
339 CHECK_RELERR(logistic(x), cdf_logistic(x, 0, 1));
340 CHECK_RELERR(logistic(-x), sf_logistic(x, 0, 1));
341 CHECK_RELERR(logit(p), icdf_logistic(p, 0, 1));
342 CHECK_RELERR(-logit(p), isf_logistic(p, 0, 1));
344 CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x*2, 0, 2));
345 CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x*2, 0, 2));
346 CHECK_RELERR(icdf_logistic(p, 0, 1), icdf_logistic(p, 0, 2)/2);
347 CHECK_RELERR(isf_logistic(p, 0, 1), isf_logistic(p, 0, 2)/2);
349 CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x/2, 0, .5));
350 CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x/2, 0, .5));
351 CHECK_RELERR(icdf_logistic(p, 0, 1), icdf_logistic(p, 0,.5)*2);
352 CHECK_RELERR(isf_logistic(p, 0, 1), isf_logistic(p, 0, .5)*2);
354 CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x*2 + 1, 1, 2));
355 CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x*2 + 1, 1, 2));
358 * For p near 0 and p near 1/2, the arithmetic of
359 * translating by 1 loses precision.
361 if (fabs(p) > DBL_EPSILON && fabs(p) < 0.4) {
362 CHECK_RELERR(icdf_logistic(p, 0, 1),
363 (icdf_logistic(p, 1, 2) - 1)/2);
364 CHECK_RELERR(isf_logistic(p, 0, 1),
365 (isf_logistic(p, 1, 2) - 1)/2);
368 CHECK_RELERR(p, logistic(x));
369 CHECK_RELERR(phalf, logistichalf(x));
372 * On the interior floating-point numbers, either logit or
373 * logithalf had better give the correct answer.
375 * For probabilities near 0, we can get much finer resolution with
376 * logit, and for probabilities near 1/2, we can get much finer
377 * resolution with logithalf by representing them using p - 1/2.
379 * E.g., we can write -.00001 for phalf, and .49999 for p, but the
380 * difference 1/2 - .00001 gives 1.0000000000010001e-5 in binary64
381 * arithmetic. So test logit(.49999) which should give the same
382 * answer as logithalf(-1.0000000000010001e-5), namely
383 * -4.000000000537333e-5, and also test logithalf(-.00001) which
384 * gives -4.000000000533334e-5 instead -- but don't expect
385 * logit(.49999) to give -4.000000000533334e-5 even though it looks
386 * like 1/2 - .00001.
388 * A naive implementation of logit will just use log(p/(1 - p)) and
389 * give the answer -4.000000000551673e-05 for .49999, which is
390 * wrong in a lot of digits, which happens because log is
391 * ill-conditioned near 1 and thus amplifies whatever relative
392 * error we made in computing p/(1 - p).
394 if ((0 < p && p < 1) || tor_isinf(x)) {
395 if (phalf >= p - 0.5 && phalf <= p - 0.5)
396 CHECK_RELERR(x, logit(p));
397 if (p >= 0.5 + phalf && p <= 0.5 + phalf)
398 CHECK_RELERR(x, logithalf(phalf));
401 CHECK_RELERR(-phalf, logistichalf(-x));
402 if (fabs(phalf) < 0.5 || tor_isinf(x))
403 CHECK_RELERR(-x, logithalf(-phalf));
404 if (p < 1 || tor_isinf(x)) {
405 CHECK_RELERR(1 - p, logistic(-x));
406 if (p > .75 || tor_isinf(x))
407 CHECK_RELERR(-x, logit(1 - p));
408 } else {
409 CHECK_LE(logistic(-x), 1e-300);
413 for (i = 0; i <= 100; i++) {
414 double p0 = (double)i/100;
416 CHECK_RELERR(logit(p0/(1 + M_E)), sample_logistic(0, 0, p0));
417 CHECK_RELERR(-logit(p0/(1 + M_E)), sample_logistic(1, 0, p0));
418 CHECK_RELERR(logithalf(p0*(0.5 - 1/(1 + M_E))),
419 sample_logistic(0, 1, p0));
420 CHECK_RELERR(-logithalf(p0*(0.5 - 1/(1 + M_E))),
421 sample_logistic(1, 1, p0));
424 if (!ok)
425 printf("fail logit/logistic / logistic cdf/sf\n");
427 tt_assert(ok);
429 done:
434 * Test the cdf, sf, icdf, and isf of the LogLogistic distribution.
436 static void
437 test_log_logistic(void *arg)
439 (void) arg;
441 static const struct {
442 /* x is a point in the support of the LogLogistic distribution */
443 double x;
444 /* 'p' is the probability that a random variable X for a given LogLogistic
445 * probability distribution will take value less-or-equal to x */
446 double p;
447 /* 'np' is the probability that a random variable X for a given LogLogistic
448 * probability distribution will take value greater-or-equal to x. */
449 double np;
450 } cases[] = {
451 { 0, 0, 1 },
452 { 1e-300, 1e-300, 1 },
453 { 1e-17, 1e-17, 1 },
454 { 1e-15, 1e-15, .999999999999999 },
455 { .1, .09090909090909091, .90909090909090909 },
456 { .25, .2, .8 },
457 { .5, .33333333333333333, .66666666666666667 },
458 { .75, .42857142857142855, .5714285714285714 },
459 { .9999, .49997499874993756, .5000250012500626 },
460 { .99999999, .49999999749999996, .5000000025 },
461 { .999999999999999, .49999999999999994, .5000000000000002 },
462 { 1, .5, .5 },
464 double relerr_bound = 3e-15;
465 size_t i;
466 bool ok = true;
468 for (i = 0; i < arraycount(cases); i++) {
469 double x = cases[i].x;
470 double p = cases[i].p;
471 double np = cases[i].np;
473 CHECK_RELERR(p, cdf_log_logistic(x, 1, 1));
474 CHECK_RELERR(p, cdf_log_logistic(x/2, .5, 1));
475 CHECK_RELERR(p, cdf_log_logistic(x*2, 2, 1));
476 CHECK_RELERR(p, cdf_log_logistic(sqrt(x), 1, 2));
477 CHECK_RELERR(p, cdf_log_logistic(sqrt(x)/2, .5, 2));
478 CHECK_RELERR(p, cdf_log_logistic(sqrt(x)*2, 2, 2));
479 if (2*sqrt(DBL_MIN) < x) {
480 CHECK_RELERR(p, cdf_log_logistic(x*x, 1, .5));
481 CHECK_RELERR(p, cdf_log_logistic(x*x/2, .5, .5));
482 CHECK_RELERR(p, cdf_log_logistic(x*x*2, 2, .5));
485 CHECK_RELERR(np, sf_log_logistic(x, 1, 1));
486 CHECK_RELERR(np, sf_log_logistic(x/2, .5, 1));
487 CHECK_RELERR(np, sf_log_logistic(x*2, 2, 1));
488 CHECK_RELERR(np, sf_log_logistic(sqrt(x), 1, 2));
489 CHECK_RELERR(np, sf_log_logistic(sqrt(x)/2, .5, 2));
490 CHECK_RELERR(np, sf_log_logistic(sqrt(x)*2, 2, 2));
491 if (2*sqrt(DBL_MIN) < x) {
492 CHECK_RELERR(np, sf_log_logistic(x*x, 1, .5));
493 CHECK_RELERR(np, sf_log_logistic(x*x/2, .5, .5));
494 CHECK_RELERR(np, sf_log_logistic(x*x*2, 2, .5));
497 CHECK_RELERR(np, cdf_log_logistic(1/x, 1, 1));
498 CHECK_RELERR(np, cdf_log_logistic(1/(2*x), .5, 1));
499 CHECK_RELERR(np, cdf_log_logistic(2/x, 2, 1));
500 CHECK_RELERR(np, cdf_log_logistic(1/sqrt(x), 1, 2));
501 CHECK_RELERR(np, cdf_log_logistic(1/(2*sqrt(x)), .5, 2));
502 CHECK_RELERR(np, cdf_log_logistic(2/sqrt(x), 2, 2));
503 if (2*sqrt(DBL_MIN) < x && x < 1/(2*sqrt(DBL_MIN))) {
504 CHECK_RELERR(np, cdf_log_logistic(1/(x*x), 1, .5));
505 CHECK_RELERR(np, cdf_log_logistic(1/(2*x*x), .5, .5));
506 CHECK_RELERR(np, cdf_log_logistic(2/(x*x), 2, .5));
509 CHECK_RELERR(p, sf_log_logistic(1/x, 1, 1));
510 CHECK_RELERR(p, sf_log_logistic(1/(2*x), .5, 1));
511 CHECK_RELERR(p, sf_log_logistic(2/x, 2, 1));
512 CHECK_RELERR(p, sf_log_logistic(1/sqrt(x), 1, 2));
513 CHECK_RELERR(p, sf_log_logistic(1/(2*sqrt(x)), .5, 2));
514 CHECK_RELERR(p, sf_log_logistic(2/sqrt(x), 2, 2));
515 if (2*sqrt(DBL_MIN) < x && x < 1/(2*sqrt(DBL_MIN))) {
516 CHECK_RELERR(p, sf_log_logistic(1/(x*x), 1, .5));
517 CHECK_RELERR(p, sf_log_logistic(1/(2*x*x), .5, .5));
518 CHECK_RELERR(p, sf_log_logistic(2/(x*x), 2, .5));
521 CHECK_RELERR(x, icdf_log_logistic(p, 1, 1));
522 CHECK_RELERR(x/2, icdf_log_logistic(p, .5, 1));
523 CHECK_RELERR(x*2, icdf_log_logistic(p, 2, 1));
524 CHECK_RELERR(x, icdf_log_logistic(p, 1, 1));
525 CHECK_RELERR(sqrt(x)/2, icdf_log_logistic(p, .5, 2));
526 CHECK_RELERR(sqrt(x)*2, icdf_log_logistic(p, 2, 2));
527 CHECK_RELERR(sqrt(x), icdf_log_logistic(p, 1, 2));
528 CHECK_RELERR(x*x/2, icdf_log_logistic(p, .5, .5));
529 CHECK_RELERR(x*x*2, icdf_log_logistic(p, 2, .5));
531 if (np < .9) {
532 CHECK_RELERR(x, isf_log_logistic(np, 1, 1));
533 CHECK_RELERR(x/2, isf_log_logistic(np, .5, 1));
534 CHECK_RELERR(x*2, isf_log_logistic(np, 2, 1));
535 CHECK_RELERR(sqrt(x), isf_log_logistic(np, 1, 2));
536 CHECK_RELERR(sqrt(x)/2, isf_log_logistic(np, .5, 2));
537 CHECK_RELERR(sqrt(x)*2, isf_log_logistic(np, 2, 2));
538 CHECK_RELERR(x*x, isf_log_logistic(np, 1, .5));
539 CHECK_RELERR(x*x/2, isf_log_logistic(np, .5, .5));
540 CHECK_RELERR(x*x*2, isf_log_logistic(np, 2, .5));
542 CHECK_RELERR(1/x, icdf_log_logistic(np, 1, 1));
543 CHECK_RELERR(1/(2*x), icdf_log_logistic(np, .5, 1));
544 CHECK_RELERR(2/x, icdf_log_logistic(np, 2, 1));
545 CHECK_RELERR(1/sqrt(x), icdf_log_logistic(np, 1, 2));
546 CHECK_RELERR(1/(2*sqrt(x)),
547 icdf_log_logistic(np, .5, 2));
548 CHECK_RELERR(2/sqrt(x), icdf_log_logistic(np, 2, 2));
549 CHECK_RELERR(1/(x*x), icdf_log_logistic(np, 1, .5));
550 CHECK_RELERR(1/(2*x*x), icdf_log_logistic(np, .5, .5));
551 CHECK_RELERR(2/(x*x), icdf_log_logistic(np, 2, .5));
554 CHECK_RELERR(1/x, isf_log_logistic(p, 1, 1));
555 CHECK_RELERR(1/(2*x), isf_log_logistic(p, .5, 1));
556 CHECK_RELERR(2/x, isf_log_logistic(p, 2, 1));
557 CHECK_RELERR(1/sqrt(x), isf_log_logistic(p, 1, 2));
558 CHECK_RELERR(1/(2*sqrt(x)), isf_log_logistic(p, .5, 2));
559 CHECK_RELERR(2/sqrt(x), isf_log_logistic(p, 2, 2));
560 CHECK_RELERR(1/(x*x), isf_log_logistic(p, 1, .5));
561 CHECK_RELERR(1/(2*x*x), isf_log_logistic(p, .5, .5));
562 CHECK_RELERR(2/(x*x), isf_log_logistic(p, 2, .5));
565 for (i = 0; i <= 100; i++) {
566 double p0 = (double)i/100;
568 CHECK_RELERR(0.5*p0/(1 - 0.5*p0), sample_log_logistic(0, p0));
569 CHECK_RELERR((1 - 0.5*p0)/(0.5*p0),
570 sample_log_logistic(1, p0));
573 if (!ok)
574 printf("fail log logistic cdf/sf\n");
576 tt_assert(ok);
578 done:
583 * Test the cdf, sf, icdf, isf of the Weibull distribution.
585 static void
586 test_weibull(void *arg)
588 (void) arg;
590 static const struct {
591 /* x is a point in the support of the Weibull distribution */
592 double x;
593 /* 'p' is the probability that a random variable X for a given Weibull
594 * probability distribution will take value less-or-equal to x */
595 double p;
596 /* 'np' is the probability that a random variable X for a given Weibull
597 * probability distribution will take value greater-or-equal to x. */
598 double np;
599 } cases[] = {
600 { 0, 0, 1 },
601 { 1e-300, 1e-300, 1 },
602 { 1e-17, 1e-17, 1 },
603 { .1, .09516258196404043, .9048374180359595 },
604 { .5, .3934693402873666, .6065306597126334 },
605 { .6931471805599453, .5, .5 },
606 { 1, .6321205588285577, .36787944117144233 },
607 { 10, .9999546000702375, 4.5399929762484854e-5 },
608 { 36, .9999999999999998, 2.319522830243569e-16 },
609 { 37, .9999999999999999, 8.533047625744066e-17 },
610 { 38, 1, 3.1391327920480296e-17 },
611 { 100, 1, 3.720075976020836e-44 },
612 { 708, 1, 3.307553003638408e-308 },
613 { 710, 1, 4.47628622567513e-309 },
614 { 1000, 1, 0 },
615 { HUGE_VAL, 1, 0 },
617 double relerr_bound = 3e-15;
618 size_t i;
619 bool ok = true;
621 for (i = 0; i < arraycount(cases); i++) {
622 double x = cases[i].x;
623 double p = cases[i].p;
624 double np = cases[i].np;
626 CHECK_RELERR(p, cdf_weibull(x, 1, 1));
627 CHECK_RELERR(p, cdf_weibull(x/2, .5, 1));
628 CHECK_RELERR(p, cdf_weibull(x*2, 2, 1));
629 /* For 0 < x < sqrt(DBL_MIN), x^2 loses lots of bits. */
630 if (x <= 0 ||
631 sqrt(DBL_MIN) <= x) {
632 CHECK_RELERR(p, cdf_weibull(x*x, 1, .5));
633 CHECK_RELERR(p, cdf_weibull(x*x/2, .5, .5));
634 CHECK_RELERR(p, cdf_weibull(x*x*2, 2, .5));
636 CHECK_RELERR(p, cdf_weibull(sqrt(x), 1, 2));
637 CHECK_RELERR(p, cdf_weibull(sqrt(x)/2, .5, 2));
638 CHECK_RELERR(p, cdf_weibull(sqrt(x)*2, 2, 2));
639 CHECK_RELERR(np, sf_weibull(x, 1, 1));
640 CHECK_RELERR(np, sf_weibull(x/2, .5, 1));
641 CHECK_RELERR(np, sf_weibull(x*2, 2, 1));
642 CHECK_RELERR(np, sf_weibull(x*x, 1, .5));
643 CHECK_RELERR(np, sf_weibull(x*x/2, .5, .5));
644 CHECK_RELERR(np, sf_weibull(x*x*2, 2, .5));
645 if (x >= 10) {
647 * exp amplifies the error of sqrt(x)^2
648 * proportionally to exp(x); for large inputs
649 * this is significant.
651 double t = -expm1(-x*(2*DBL_EPSILON + DBL_EPSILON));
652 relerr_bound = t + DBL_EPSILON + t*DBL_EPSILON;
653 if (relerr_bound < 3e-15)
655 * The tests are written only to 16
656 * decimal places anyway even if your
657 * `double' is, say, i387 binary80, for
658 * whatever reason.
660 relerr_bound = 3e-15;
661 CHECK_RELERR(np, sf_weibull(sqrt(x), 1, 2));
662 CHECK_RELERR(np, sf_weibull(sqrt(x)/2, .5, 2));
663 CHECK_RELERR(np, sf_weibull(sqrt(x)*2, 2, 2));
666 if (p <= 0.75) {
668 * For p near 1, not enough precision near 1 to
669 * recover x.
671 CHECK_RELERR(x, icdf_weibull(p, 1, 1));
672 CHECK_RELERR(x/2, icdf_weibull(p, .5, 1));
673 CHECK_RELERR(x*2, icdf_weibull(p, 2, 1));
675 if (p >= 0.25 && !tor_isinf(x) && np > 0) {
677 * For p near 0, not enough precision in np
678 * near 1 to recover x. For 0, isf gives inf,
679 * even if p is precise enough for the icdf to
680 * work.
682 CHECK_RELERR(x, isf_weibull(np, 1, 1));
683 CHECK_RELERR(x/2, isf_weibull(np, .5, 1));
684 CHECK_RELERR(x*2, isf_weibull(np, 2, 1));
688 for (i = 0; i <= 100; i++) {
689 double p0 = (double)i/100;
691 CHECK_RELERR(3*sqrt(-log(p0/2)), sample_weibull(0, p0, 3, 2));
692 CHECK_RELERR(3*sqrt(-log1p(-p0/2)),
693 sample_weibull(1, p0, 3, 2));
696 if (!ok)
697 printf("fail Weibull cdf/sf\n");
699 tt_assert(ok);
701 done:
706 * Test the cdf, sf, icdf, and isf of the generalized Pareto
707 * distribution.
709 static void
710 test_genpareto(void *arg)
712 (void) arg;
714 struct {
715 /* xi is the 'xi' parameter of the generalized Pareto distribution, and the
716 * rest are the same as in the above tests */
717 double xi, x, p, np;
718 } cases[] = {
719 { 0, 0, 0, 1 },
720 { 1e-300, .004, 3.992010656008528e-3, .9960079893439915 },
721 { 1e-300, .1, .09516258196404043, .9048374180359595 },
722 { 1e-300, 1, .6321205588285577, .36787944117144233 },
723 { 1e-300, 10, .9999546000702375, 4.5399929762484854e-5 },
724 { 1e-200, 1e-16, 9.999999999999999e-17, .9999999999999999 },
725 { 1e-16, 1e-200, 9.999999999999998e-201, 1 },
726 { 1e-16, 1e-16, 1e-16, 1 },
727 { 1e-16, .004, 3.992010656008528e-3, .9960079893439915 },
728 { 1e-16, .1, .09516258196404043, .9048374180359595 },
729 { 1e-16, 1, .6321205588285577, .36787944117144233 },
730 { 1e-16, 10, .9999546000702375, 4.539992976248509e-5 },
731 { 1e-10, 1e-6, 9.999995000001667e-7, .9999990000005 },
732 { 1e-8, 1e-8, 9.999999950000001e-9, .9999999900000001 },
733 { 1, 1e-300, 1e-300, 1 },
734 { 1, 1e-16, 1e-16, .9999999999999999 },
735 { 1, .1, .09090909090909091, .9090909090909091 },
736 { 1, 1, .5, .5 },
737 { 1, 10, .9090909090909091, .0909090909090909 },
738 { 1, 100, .9900990099009901, .0099009900990099 },
739 { 1, 1000, .999000999000999, 9.990009990009992e-4 },
740 { 10, 1e-300, 1e-300, 1 },
741 { 10, 1e-16, 9.999999999999995e-17, .9999999999999999 },
742 { 10, .1, .06696700846319258, .9330329915368074 },
743 { 10, 1, .21320655780322778, .7867934421967723 },
744 { 10, 10, .3696701667040189, .6303298332959811 },
745 { 10, 100, .49886285755007337, .5011371424499267 },
746 { 10, 1000, .6018968102992647, .3981031897007353 },
748 double xi_array[] = { -1.5, -1, -1e-30, 0, 1e-30, 1, 1.5 };
749 size_t i, j;
750 double relerr_bound = 3e-15;
751 bool ok = true;
753 for (i = 0; i < arraycount(cases); i++) {
754 double xi = cases[i].xi;
755 double x = cases[i].x;
756 double p = cases[i].p;
757 double np = cases[i].np;
759 CHECK_RELERR(p, cdf_genpareto(x, 0, 1, xi));
760 CHECK_RELERR(p, cdf_genpareto(x*2, 0, 2, xi));
761 CHECK_RELERR(p, cdf_genpareto(x/2, 0, .5, xi));
762 CHECK_RELERR(np, sf_genpareto(x, 0, 1, xi));
763 CHECK_RELERR(np, sf_genpareto(x*2, 0, 2, xi));
764 CHECK_RELERR(np, sf_genpareto(x/2, 0, .5, xi));
766 if (p < .5) {
767 CHECK_RELERR(x, icdf_genpareto(p, 0, 1, xi));
768 CHECK_RELERR(x*2, icdf_genpareto(p, 0, 2, xi));
769 CHECK_RELERR(x/2, icdf_genpareto(p, 0, .5, xi));
771 if (np < .5) {
772 CHECK_RELERR(x, isf_genpareto(np, 0, 1, xi));
773 CHECK_RELERR(x*2, isf_genpareto(np, 0, 2, xi));
774 CHECK_RELERR(x/2, isf_genpareto(np, 0, .5, xi));
778 for (i = 0; i < arraycount(xi_array); i++) {
779 for (j = 0; j <= 100; j++) {
780 double p0 = (j == 0 ? 2*DBL_MIN : (double)j/100);
782 /* This is actually a check against 0, but we do <= so that the compiler
783 does not raise a -Wfloat-equal */
784 if (fabs(xi_array[i]) <= 0) {
786 * When xi == 0, the generalized Pareto
787 * distribution reduces to an
788 * exponential distribution.
790 CHECK_RELERR(-log(p0/2),
791 sample_genpareto(0, p0, 0));
792 CHECK_RELERR(-log1p(-p0/2),
793 sample_genpareto(1, p0, 0));
794 } else {
795 CHECK_RELERR(expm1(-xi_array[i]*log(p0/2))/xi_array[i],
796 sample_genpareto(0, p0, xi_array[i]));
797 CHECK_RELERR((j == 0 ? DBL_MIN :
798 expm1(-xi_array[i]*log1p(-p0/2))/xi_array[i]),
799 sample_genpareto(1, p0, xi_array[i]));
802 CHECK_RELERR(isf_genpareto(p0/2, 0, 1, xi_array[i]),
803 sample_genpareto(0, p0, xi_array[i]));
804 CHECK_RELERR(icdf_genpareto(p0/2, 0, 1, xi_array[i]),
805 sample_genpareto(1, p0, xi_array[i]));
809 tt_assert(ok);
811 done:
816 * Test the deterministic sampler for uniform distribution on [a, b].
818 * This currently only tests whether the outcome lies within [a, b].
820 static void
821 test_uniform_interval(void *arg)
823 (void) arg;
824 struct {
825 /* Sample from a uniform distribution with parameters 'a' and 'b', using
826 * 't' as the sampling index. */
827 double t, a, b;
828 } cases[] = {
829 { 0, 0, 0 },
830 { 0, 0, 1 },
831 { 0, 1.0000000000000007, 3.999999999999995 },
832 { 0, 4000, 4000 },
833 { 0.42475836677491291, 4000, 4000 },
834 { 0, -DBL_MAX, DBL_MAX },
835 { 0.25, -DBL_MAX, DBL_MAX },
836 { 0.5, -DBL_MAX, DBL_MAX },
838 size_t i = 0;
839 bool ok = true;
841 for (i = 0; i < arraycount(cases); i++) {
842 double t = cases[i].t;
843 double a = cases[i].a;
844 double b = cases[i].b;
846 CHECK_LE(a, sample_uniform_interval(t, a, b));
847 CHECK_LE(sample_uniform_interval(t, a, b), b);
849 CHECK_LE(a, sample_uniform_interval(1 - t, a, b));
850 CHECK_LE(sample_uniform_interval(1 - t, a, b), b);
852 CHECK_LE(sample_uniform_interval(t, -b, -a), -a);
853 CHECK_LE(-b, sample_uniform_interval(t, -b, -a));
855 CHECK_LE(sample_uniform_interval(1 - t, -b, -a), -a);
856 CHECK_LE(-b, sample_uniform_interval(1 - t, -b, -a));
859 tt_assert(ok);
861 done:
865 /********************** Stochastic tests ****************************/
868 * Psi test, sometimes also called G-test. The psi test statistic,
869 * suitably scaled, has chi^2 distribution, but the psi test tends to
870 * have better statistical power in practice to detect deviations than
871 * the chi^2 test does. (The chi^2 test statistic is the first term of
872 * the Taylor expansion of the psi test statistic.) The psi test is
873 * generic, for any CDF; particular distributions might have higher-
874 * power tests to distinguish them from predictable deviations or bugs.
876 * We choose the psi critical value so that a single psi test has
877 * probability below alpha = 1% of spuriously failing even if all the
878 * code is correct. But the false positive rate for a suite of n tests
879 * is higher: 1 - Binom(0; n, alpha) = 1 - (1 - alpha)^n. For n = 10,
880 * this is about 10%, and for n = 100 it is well over 50%.
882 * Given that these tests will run with every CI job, we want to drive down the
883 * false positive rate. We can drive it down by running each test four times,
884 * and accepting it if it passes at least once; in that case, it is as if we
885 * used Binom(4; 2, alpha) = alpha^4 as the false positive rate for each test,
886 * and for n = 10 tests, it would be 9.99999959506e-08. If each CI build has 14
887 * jobs, then the chance of a CI build failing is 1.39999903326e-06, which
888 * means that a CI build will break with probability 50% after about 495106
889 * builds.
891 * The critical value for a chi^2 distribution with 100 degrees of
892 * freedom and false positive rate alpha = 1% was taken from:
894 * NIST/SEMATECH e-Handbook of Statistical Methods, Section
895 * 1.3.6.7.4 `Critical Values of the Chi-Square Distribution',
896 * <https://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm>,
897 * retrieved 2018-10-28.
900 static const size_t NSAMPLES = 100000;
901 /* Number of chances we give to the test to succeed. */
902 static const unsigned NTRIALS = 4;
903 /* Number of times we want the test to pass per NTRIALS. */
904 static const unsigned NPASSES_MIN = 1;
906 #define PSI_DF 100 /* degrees of freedom */
907 static const double PSI_CRITICAL = 135.807; /* critical value, alpha = .01 */
910 * Perform a psi test on an array of sample counts, C, adding up to N
911 * samples, and an array of log expected probabilities, logP,
912 * representing the null hypothesis for the distribution of samples
913 * counted. Return false if the psi test rejects the null hypothesis,
914 * true if otherwise.
916 static bool
917 psi_test(const size_t C[PSI_DF], const double logP[PSI_DF], size_t N)
919 double psi = 0;
920 double c = 0; /* Kahan compensation */
921 double t, u;
922 size_t i;
924 for (i = 0; i < PSI_DF; i++) {
926 * c*log(c/(n*p)) = (1/n) * f*log(f/p) where f = c/n is
927 * the frequency, and f*log(f/p) ---> 0 as f ---> 0, so
928 * this is a reasonable choice. Further, any mass that
929 * _fails_ to turn up in this bin will inflate another
930 * bin instead, so we don't really lose anything by
931 * ignoring empty bins even if they have high
932 * probability.
934 if (C[i] == 0)
935 continue;
936 t = C[i]*(log((double)C[i]/N) - logP[i]) - c;
937 u = psi + t;
938 c = (u - psi) - t;
939 psi = u;
941 psi *= 2;
943 return psi <= PSI_CRITICAL;
946 static bool
947 test_stochastic_geometric_impl(double p)
949 const struct geometric_t geometric = {
950 .base = GEOMETRIC(geometric),
951 .p = p,
953 double logP[PSI_DF] = {0};
954 unsigned ntry = NTRIALS, npass = 0;
955 unsigned i;
956 size_t j;
958 /* Compute logP[i] = Geom(i + 1; p). */
959 for (i = 0; i < PSI_DF - 1; i++)
960 logP[i] = logpmf_geometric(i + 1, p);
962 /* Compute logP[n-1] = log (1 - (P[0] + P[1] + ... + P[n-2])). */
963 logP[PSI_DF - 1] = log1mexp(logsumexp(logP, PSI_DF - 1));
965 while (ntry --> 0) {
966 size_t C[PSI_DF] = {0};
968 for (j = 0; j < NSAMPLES; j++) {
969 double n_tmp = dist_sample(&geometric.base);
971 /* Must be an integer. (XXX -Wfloat-equal) */
972 tor_assert(ceil(n_tmp) <= n_tmp && ceil(n_tmp) >= n_tmp);
974 /* Must be a positive integer. */
975 tor_assert(n_tmp >= 1);
977 /* Probability of getting a value in the billions is negligible. */
978 tor_assert(n_tmp <= (double)UINT_MAX);
980 unsigned n = (unsigned) n_tmp;
982 if (n > PSI_DF)
983 n = PSI_DF;
984 C[n - 1]++;
987 if (psi_test(C, logP, NSAMPLES)) {
988 if (++npass >= NPASSES_MIN)
989 break;
993 if (npass >= NPASSES_MIN) {
994 /* printf("pass %s sampler\n", "geometric"); */
995 return true;
996 } else {
997 printf("fail %s sampler\n", "geometric");
998 return false;
1003 * Divide the support of <b>dist</b> into histogram bins in <b>logP</b>. Start
1004 * at the 1st percentile and ending at the 99th percentile. Pick the bin
1005 * boundaries using linear interpolation so that they are uniformly spaced.
1007 * In each bin logP[i] we insert the expected log-probability that a sampled
1008 * value will fall into that bin. We will use this as the null hypothesis of
1009 * the psi test.
1011 * Set logP[i] = log(CDF(x_i) - CDF(x_{i-1})), where x_-1 = -inf, x_n =
1012 * +inf, and x_i = i*(hi - lo)/(n - 2).
1014 static void
1015 bin_cdfs(const struct dist_t *dist, double lo, double hi, double *logP,
1016 size_t n)
1018 #define CDF(x) dist_cdf(dist, x)
1019 #define SF(x) dist_sf(dist, x)
1020 const double w = (hi - lo)/(n - 2);
1021 double halfway = dist_icdf(dist, 0.5);
1022 double x_0, x_1;
1023 size_t i;
1024 size_t n2 = ceil_to_size_t((halfway - lo)/w);
1026 tor_assert(lo <= halfway);
1027 tor_assert(halfway <= hi);
1028 tor_assert(n2 <= n);
1030 x_1 = lo;
1031 logP[0] = log(CDF(x_1) - 0); /* 0 = CDF(-inf) */
1032 for (i = 1; i < n2; i++) {
1033 x_0 = x_1;
1034 /* do the linear interpolation */
1035 x_1 = (i <= n/2 ? lo + i*w : hi - (n - 2 - i)*w);
1036 /* set the expected log-probability */
1037 logP[i] = log(CDF(x_1) - CDF(x_0));
1039 x_0 = hi;
1040 logP[n - 1] = log(SF(x_0) - 0); /* 0 = SF(+inf) = 1 - CDF(+inf) */
1042 /* In this loop we are filling out the high part of the array. We are using
1043 * SF because in these cases the CDF is near 1 where precision is lower. So
1044 * instead we are using SF near 0 where the precision is higher. We have
1045 * SF(t) = 1 - CDF(t). */
1046 for (i = 1; i < n - n2; i++) {
1047 x_1 = x_0;
1048 /* do the linear interpolation */
1049 x_0 = (i <= n/2 ? hi - i*w : lo + (n - 2 - i)*w);
1050 /* set the expected log-probability */
1051 logP[n - i - 1] = log(SF(x_0) - SF(x_1));
1053 #undef SF
1054 #undef CDF
1058 * Draw NSAMPLES samples from dist, counting the number of samples x in
1059 * the ith bin C[i] if x_{i-1} <= x < x_i, where x_-1 = -inf, x_n =
1060 * +inf, and x_i = i*(hi - lo)/(n - 2).
1062 static void
1063 bin_samples(const struct dist_t *dist, double lo, double hi, size_t *C,
1064 size_t n)
1066 const double w = (hi - lo)/(n - 2);
1067 size_t i;
1069 for (i = 0; i < NSAMPLES; i++) {
1070 double x = dist_sample(dist);
1071 size_t bin;
1073 if (x < lo)
1074 bin = 0;
1075 else if (x < hi)
1076 bin = 1 + floor_to_size_t((x - lo)/w);
1077 else
1078 bin = n - 1;
1079 tor_assert(bin < n);
1080 C[bin]++;
1085 * Carry out a Psi test on <b>dist</b>.
1087 * Sample NSAMPLES from dist, putting them in bins from -inf to lo to
1088 * hi to +inf, and apply up to two psi tests. True if at least one psi
1089 * test passes; false if not. False positive rate should be bounded by
1090 * 0.01^2 = 0.0001.
1092 static bool
1093 test_psi_dist_sample(const struct dist_t *dist)
1095 double logP[PSI_DF] = {0};
1096 unsigned ntry = NTRIALS, npass = 0;
1097 double lo = dist_icdf(dist, 1/(double)(PSI_DF + 2));
1098 double hi = dist_isf(dist, 1/(double)(PSI_DF + 2));
1100 /* Create the null hypothesis in logP */
1101 bin_cdfs(dist, lo, hi, logP, PSI_DF);
1103 /* Now run the test */
1104 while (ntry --> 0) {
1105 size_t C[PSI_DF] = {0};
1106 bin_samples(dist, lo, hi, C, PSI_DF);
1107 if (psi_test(C, logP, NSAMPLES)) {
1108 if (++npass >= NPASSES_MIN)
1109 break;
1113 /* Did we fail or succeed? */
1114 if (npass >= NPASSES_MIN) {
1115 /* printf("pass %s sampler\n", dist_name(dist));*/
1116 return true;
1117 } else {
1118 printf("fail %s sampler\n", dist_name(dist));
1119 return false;
1123 static void
1124 write_stochastic_warning(void)
1126 if (tinytest_cur_test_has_failed()) {
1127 printf("\n"
1128 "NOTE: This is a stochastic test, and we expect it to fail from\n"
1129 "time to time, with some low probability. If you see it fail more\n"
1130 "than one trial in 100, though, please tell us.\n\n");
1134 static void
1135 test_stochastic_uniform(void *arg)
1137 (void) arg;
1139 const struct uniform_t uniform01 = {
1140 .base = UNIFORM(uniform01),
1141 .a = 0,
1142 .b = 1,
1144 const struct uniform_t uniform_pos = {
1145 .base = UNIFORM(uniform_pos),
1146 .a = 1.23,
1147 .b = 4.56,
1149 const struct uniform_t uniform_neg = {
1150 .base = UNIFORM(uniform_neg),
1151 .a = -10,
1152 .b = -1,
1154 const struct uniform_t uniform_cross = {
1155 .base = UNIFORM(uniform_cross),
1156 .a = -1.23,
1157 .b = 4.56,
1159 const struct uniform_t uniform_subnormal = {
1160 .base = UNIFORM(uniform_subnormal),
1161 .a = 4e-324,
1162 .b = 4e-310,
1164 const struct uniform_t uniform_subnormal_cross = {
1165 .base = UNIFORM(uniform_subnormal_cross),
1166 .a = -4e-324,
1167 .b = 4e-310,
1169 bool ok = true, tests_failed = true;
1171 testing_enable_reproducible_rng();
1173 ok &= test_psi_dist_sample(&uniform01.base);
1174 ok &= test_psi_dist_sample(&uniform_pos.base);
1175 ok &= test_psi_dist_sample(&uniform_neg.base);
1176 ok &= test_psi_dist_sample(&uniform_cross.base);
1177 ok &= test_psi_dist_sample(&uniform_subnormal.base);
1178 ok &= test_psi_dist_sample(&uniform_subnormal_cross.base);
1180 tt_assert(ok);
1182 tests_failed = false;
1184 done:
1185 if (tests_failed) {
1186 write_stochastic_warning();
1188 testing_disable_reproducible_rng();
1191 static bool
1192 test_stochastic_logistic_impl(double mu, double sigma)
1194 const struct logistic_t dist = {
1195 .base = LOGISTIC(dist),
1196 .mu = mu,
1197 .sigma = sigma,
1200 /* XXX Consider some fancier logistic test. */
1201 return test_psi_dist_sample(&dist.base);
1204 static bool
1205 test_stochastic_log_logistic_impl(double alpha, double beta)
1207 const struct log_logistic_t dist = {
1208 .base = LOG_LOGISTIC(dist),
1209 .alpha = alpha,
1210 .beta = beta,
1213 /* XXX Consider some fancier log logistic test. */
1214 return test_psi_dist_sample(&dist.base);
1217 static bool
1218 test_stochastic_weibull_impl(double lambda, double k)
1220 const struct weibull_t dist = {
1221 .base = WEIBULL(dist),
1222 .lambda = lambda,
1223 .k = k,
1226 // clang-format off
1228 * XXX Consider applying a Tiku-Singh test:
1230 * M.L. Tiku and M. Singh, `Testing the two-parameter
1231 * Weibull distribution', Communications in Statistics --
1232 * Theory and Methods A10(9), 1981, 907--918.
1233 https://www.tandfonline.com/doi/pdf/10.1080/03610928108828082?needAccess=true
1235 // clang-format on
1236 return test_psi_dist_sample(&dist.base);
1239 static bool
1240 test_stochastic_genpareto_impl(double mu, double sigma, double xi)
1242 const struct genpareto_t dist = {
1243 .base = GENPARETO(dist),
1244 .mu = mu,
1245 .sigma = sigma,
1246 .xi = xi,
1249 /* XXX Consider some fancier GPD test. */
1250 return test_psi_dist_sample(&dist.base);
1253 static void
1254 test_stochastic_genpareto(void *arg)
1256 bool ok = 0;
1257 bool tests_failed = true;
1258 (void) arg;
1260 testing_enable_reproducible_rng();
1262 ok = test_stochastic_genpareto_impl(0, 1, -0.25);
1263 tt_assert(ok);
1264 ok = test_stochastic_genpareto_impl(0, 1, -1e-30);
1265 tt_assert(ok);
1266 ok = test_stochastic_genpareto_impl(0, 1, 0);
1267 tt_assert(ok);
1268 ok = test_stochastic_genpareto_impl(0, 1, 1e-30);
1269 tt_assert(ok);
1270 ok = test_stochastic_genpareto_impl(0, 1, 0.25);
1271 tt_assert(ok);
1272 ok = test_stochastic_genpareto_impl(-1, 1, -0.25);
1273 tt_assert(ok);
1274 ok = test_stochastic_genpareto_impl(1, 2, 0.25);
1275 tt_assert(ok);
1277 tests_failed = false;
1279 done:
1280 if (tests_failed) {
1281 write_stochastic_warning();
1283 testing_disable_reproducible_rng();
1286 static void
1287 test_stochastic_geometric(void *arg)
1289 bool ok = 0;
1290 bool tests_failed = true;
1292 (void) arg;
1294 testing_enable_reproducible_rng();
1296 ok = test_stochastic_geometric_impl(0.1);
1297 tt_assert(ok);
1298 ok = test_stochastic_geometric_impl(0.5);
1299 tt_assert(ok);
1300 ok = test_stochastic_geometric_impl(0.9);
1301 tt_assert(ok);
1302 ok = test_stochastic_geometric_impl(1);
1303 tt_assert(ok);
1305 tests_failed = false;
1307 done:
1308 if (tests_failed) {
1309 write_stochastic_warning();
1311 testing_disable_reproducible_rng();
1314 static void
1315 test_stochastic_logistic(void *arg)
1317 bool ok = 0;
1318 bool tests_failed = true;
1319 (void) arg;
1321 testing_enable_reproducible_rng();
1323 ok = test_stochastic_logistic_impl(0, 1);
1324 tt_assert(ok);
1325 ok = test_stochastic_logistic_impl(0, 1e-16);
1326 tt_assert(ok);
1327 ok = test_stochastic_logistic_impl(1, 10);
1328 tt_assert(ok);
1329 ok = test_stochastic_logistic_impl(-10, 100);
1330 tt_assert(ok);
1332 tests_failed = false;
1334 done:
1335 if (tests_failed) {
1336 write_stochastic_warning();
1338 testing_disable_reproducible_rng();
1341 static void
1342 test_stochastic_log_logistic(void *arg)
1344 bool ok = 0;
1345 (void) arg;
1347 testing_enable_reproducible_rng();
1349 ok = test_stochastic_log_logistic_impl(1, 1);
1350 tt_assert(ok);
1351 ok = test_stochastic_log_logistic_impl(1, 10);
1352 tt_assert(ok);
1353 ok = test_stochastic_log_logistic_impl(M_E, 1e-1);
1354 tt_assert(ok);
1355 ok = test_stochastic_log_logistic_impl(exp(-10), 1e-2);
1356 tt_assert(ok);
1358 done:
1359 write_stochastic_warning();
1360 testing_disable_reproducible_rng();
1363 static void
1364 test_stochastic_weibull(void *arg)
1366 bool ok = 0;
1367 (void) arg;
1369 testing_enable_reproducible_rng();
1371 ok = test_stochastic_weibull_impl(1, 0.5);
1372 tt_assert(ok);
1373 ok = test_stochastic_weibull_impl(1, 1);
1374 tt_assert(ok);
1375 ok = test_stochastic_weibull_impl(1, 1.5);
1376 tt_assert(ok);
1377 ok = test_stochastic_weibull_impl(1, 2);
1378 tt_assert(ok);
1379 ok = test_stochastic_weibull_impl(10, 1);
1380 tt_assert(ok);
1382 done:
1383 write_stochastic_warning();
1384 testing_disable_reproducible_rng();
1385 UNMOCK(crypto_rand);
1388 struct testcase_t prob_distr_tests[] = {
1389 { "logit_logistics", test_logit_logistic, TT_FORK, NULL, NULL },
1390 { "log_logistic", test_log_logistic, TT_FORK, NULL, NULL },
1391 { "weibull", test_weibull, TT_FORK, NULL, NULL },
1392 { "genpareto", test_genpareto, TT_FORK, NULL, NULL },
1393 { "uniform_interval", test_uniform_interval, TT_FORK, NULL, NULL },
1394 END_OF_TESTCASES
1397 struct testcase_t slow_stochastic_prob_distr_tests[] = {
1398 { "stochastic_genpareto", test_stochastic_genpareto, TT_FORK, NULL, NULL },
1399 { "stochastic_geometric", test_stochastic_geometric, TT_FORK, NULL, NULL },
1400 { "stochastic_uniform", test_stochastic_uniform, TT_FORK, NULL, NULL },
1401 { "stochastic_logistic", test_stochastic_logistic, TT_FORK, NULL, NULL },
1402 { "stochastic_log_logistic", test_stochastic_log_logistic, TT_FORK, NULL,
1403 NULL },
1404 { "stochastic_weibull", test_stochastic_weibull, TT_FORK, NULL, NULL },
1405 END_OF_TESTCASES