u8-line: Fix read past end of buffer in u8_line_find_pos().
[pspp.git] / lib / tukey / ptukey.c
blob415355bb2c61e55fecf2c8f82a5718d1719b5689
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 /* This file is taken from the R project source code, and modified.
19 The original copyright notice is reproduced below: */
22 * Mathlib : A C Library of Special Functions
23 * Copyright (C) 1998 Ross Ihaka
24 * Copyright (C) 2000--2007 The R Development Core Team
26 * This program is free software; you can redistribute it and/or modify
27 * it under the terms of the GNU General Public License as published by
28 * the Free Software Foundation; either version 2 of the License, or
29 * (at your option) any later version.
31 * This program is distributed in the hope that it will be useful,
32 * but WITHOUT ANY WARRANTY; without even the implied warranty of
33 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 * GNU General Public License for more details.
36 * You should have received a copy of the GNU General Public License
37 * along with this program; if not, a copy is available at
38 * http://www.r-project.org/Licenses/
40 * SYNOPSIS
42 * #include <Rmath.h>
43 * double ptukey(q, rr, cc, df, lower_tail, log_p);
45 * DESCRIPTION
47 * Computes the probability that the maximum of rr studentized
48 * ranges, each based on cc means and with df degrees of freedom
49 * for the standard error, is less than q.
51 * The algorithm is based on that of the reference.
53 * REFERENCE
55 * Copenhaver, Margaret Diponzio & Holland, Burt S.
56 * Multiple comparisons of simple effects in
57 * the two-way analysis of variance with fixed effects.
58 * Journal of Statistical Computation and Simulation,
59 * Vol.30, pp.1-15, 1988.
64 #include <config.h>
66 #include "libpspp/compiler.h"
67 #include "tukey.h"
69 #include <gsl/gsl_sf_gamma.h>
70 #include <gsl/gsl_cdf.h>
71 #include <assert.h>
72 #include <math.h>
74 #define R_D__0 (log_p ? -INFINITY : 0.) /* 0 */
75 #define R_D__1 (log_p ? 0. : 1.) /* 1 */
76 #define R_DT_0 (lower_tail ? R_D__0 : R_D__1) /* 0 */
77 #define R_DT_1 (lower_tail ? R_D__1 : R_D__0) /* 1 */
79 #define R_D_val(x) (log_p ? log(x) : (x)) /* x in pF(x,..) */
80 #define R_D_Clog(p) (log_p ? log1p(-(p)) : (0.5 - (p) + 0.5)) /* [log](1-p) */
81 #define R_DT_val(x) (lower_tail ? R_D_val(x) : R_D_Clog(x))
84 #define ME_PRECISION 8
87 static inline double
88 pnorm(double x, double mu, double sigma, int lower_tail, int log_p)
90 assert (lower_tail == 1);
91 assert (log_p == 0);
92 assert (sigma == 1.0);
94 return gsl_cdf_gaussian_P (x - mu, sigma);
98 static double
99 wprob (double w, double rr, double cc)
101 const double M_1_SQRT_2PI = 1 / sqrt (2 * M_PI);
104 /* wprob() :
106 This function calculates probability integral of Hartley's
107 form of the range.
109 w = value of range
110 rr = no. of rows or groups
111 cc = no. of columns or treatments
112 ir = error flag = 1 if pr_w probability > 1
113 pr_w = returned probability integral from (0, w)
115 program will not terminate if ir is raised.
117 bb = upper limit of legendre integration
118 iMax = maximum acceptable value of integral
119 nleg = order of legendre quadrature
120 ihalf = int ((nleg + 1) / 2)
121 wlar = value of range above which wincr1 intervals are used to
122 calculate second part of integral,
123 else wincr2 intervals are used.
124 C1, C2, C3 = values which are used as cutoffs for terminating
125 or modifying a calculation.
127 M_1_SQRT_2PI = 1 / sqrt(2 * pi); from abramowitz & stegun, p. 3.
128 M_SQRT2 = sqrt(2)
129 xleg = legendre 12-point nodes
130 aleg = legendre 12-point coefficients
132 #define nleg 12
133 #define ihalf 6
135 /* looks like this is suboptimal for double precision.
136 (see how C1-C3 are used) <MM>
138 /* const double iMax = 1.; not used if = 1 */
139 static const double C1 = -30.;
140 static const double C2 = -50.;
141 static const double C3 = 60.;
142 static const double bb = 8.;
143 static const double wlar = 3.;
144 static const double wincr1 = 2.;
145 static const double wincr2 = 3.;
146 static const double xleg[ihalf] = {
147 0.981560634246719250690549090149,
148 0.904117256370474856678465866119,
149 0.769902674194304687036893833213,
150 0.587317954286617447296702418941,
151 0.367831498998180193752691536644,
152 0.125233408511468915472441369464
154 static const double aleg[ihalf] = {
155 0.047175336386511827194615961485,
156 0.106939325995318430960254718194,
157 0.160078328543346226334652529543,
158 0.203167426723065921749064455810,
159 0.233492536538354808760849898925,
160 0.249147045813402785000562436043
162 double a, ac, pr_w, b, binc, blb, c, cc1,
163 pminus, pplus, qexpo, qsqz, rinsum, wi, wincr, xx;
164 long double bub, einsum, elsum;
165 int j, jj;
168 qsqz = w * 0.5;
170 /* if w >= 16 then the integral lower bound (occurs for c=20) */
171 /* is 0.99999999999995 so return a value of 1. */
173 if (qsqz >= bb)
174 return 1.0;
176 /* find (f(w/2) - 1) ^ cc */
177 /* (first term in integral of hartley's form). */
179 pr_w = 2 * pnorm (qsqz, 0., 1., 1, 0) - 1.; /* erf(qsqz / M_SQRT2) */
180 /* if pr_w ^ cc < 2e-22 then set pr_w = 0 */
181 if (pr_w >= exp (C2 / cc))
182 pr_w = pow (pr_w, cc);
183 else
184 pr_w = 0.0;
186 /* if w is large then the second component of the */
187 /* integral is small, so fewer intervals are needed. */
189 if (w > wlar)
190 wincr = wincr1;
191 else
192 wincr = wincr2;
194 /* find the integral of second term of hartley's form */
195 /* for the integral of the range for equal-length */
196 /* intervals using legendre quadrature. limits of */
197 /* integration are from (w/2, 8). two or three */
198 /* equal-length intervals are used. */
200 /* blb and bub are lower and upper limits of integration. */
202 blb = qsqz;
203 binc = (bb - qsqz) / wincr;
204 bub = blb + binc;
205 einsum = 0.0;
207 /* integrate over each interval */
209 cc1 = cc - 1.0;
210 for (wi = 1; wi <= wincr; wi++)
212 elsum = 0.0;
213 a = 0.5 * (bub + blb);
215 /* legendre quadrature with order = nleg */
217 b = 0.5 * (bub - blb);
219 for (jj = 1; jj <= nleg; jj++)
221 if (ihalf < jj)
223 j = (nleg - jj) + 1;
224 xx = xleg[j - 1];
226 else
228 j = jj;
229 xx = -xleg[j - 1];
231 c = b * xx;
232 ac = a + c;
234 /* if exp(-qexpo/2) < 9e-14, */
235 /* then doesn't contribute to integral */
237 qexpo = ac * ac;
238 if (qexpo > C3)
239 break;
241 pplus = 2 * pnorm (ac, 0., 1., 1, 0);
242 pminus = 2 * pnorm (ac, w, 1., 1, 0);
244 /* if rinsum ^ (cc-1) < 9e-14, */
245 /* then doesn't contribute to integral */
247 rinsum = (pplus * 0.5) - (pminus * 0.5);
248 if (rinsum >= exp (C1 / cc1))
250 rinsum =
251 (aleg[j - 1] * exp (-(0.5 * qexpo))) * pow (rinsum, cc1);
252 elsum += rinsum;
255 elsum *= (((2.0 * b) * cc) * M_1_SQRT_2PI);
256 einsum += elsum;
257 blb = bub;
258 bub += binc;
261 /* if pr_w ^ rr < 9e-14, then return 0 */
262 pr_w = einsum + pr_w;
263 if (pr_w <= exp (C1 / rr))
264 return 0.;
266 pr_w = pow (pr_w, rr);
267 if (pr_w >= 1.) /* 1 was iMax was eps */
268 return 1.;
269 return pr_w;
270 } /* wprob() */
272 double
273 ptukey (double q, double rr, double cc, double df, int lower_tail, int log_p)
275 /* function ptukey() [was qprob() ]:
277 q = value of studentized range
278 rr = no. of rows or groups
279 cc = no. of columns or treatments
280 df = degrees of freedom of error term
281 ir[0] = error flag = 1 if wprob probability > 1
282 ir[1] = error flag = 1 if qprob probability > 1
284 qprob = returned probability integral over [0, q]
286 The program will not terminate if ir[0] or ir[1] are raised.
288 All references in wprob to Abramowitz and Stegun
289 are from the following reference:
291 Abramowitz, Milton and Stegun, Irene A.
292 Handbook of Mathematical Functions.
293 New York: Dover publications, Inc. (1970).
295 All constants taken from this text are
296 given to 25 significant digits.
298 nlegq = order of legendre quadrature
299 ihalfq = int ((nlegq + 1) / 2)
300 eps = max. allowable value of integral
301 eps1 & eps2 = values below which there is
302 no contribution to integral.
304 d.f. <= dhaf: integral is divided into ulen1 length intervals. else
305 d.f. <= dquar: integral is divided into ulen2 length intervals. else
306 d.f. <= deigh: integral is divided into ulen3 length intervals. else
307 d.f. <= dlarg: integral is divided into ulen4 length intervals.
309 d.f. > dlarg: the range is used to calculate integral.
311 M_LN2 = log(2)
313 xlegq = legendre 16-point nodes
314 alegq = legendre 16-point coefficients
316 The coefficients and nodes for the legendre quadrature used in
317 qprob and wprob were calculated using the algorithms found in:
319 Stroud, A. H. and Secrest, D.
320 Gaussian Quadrature Formulas.
321 Englewood Cliffs,
322 New Jersey: Prentice-Hall, Inc, 1966.
324 All values matched the tables (provided in same reference)
325 to 30 significant digits.
327 f(x) = .5 + erf(x / sqrt(2)) / 2 for x > 0
329 f(x) = erfc(-x / sqrt(2)) / 2 for x < 0
331 where f(x) is standard normal c. d. f.
333 if degrees of freedom large, approximate integral
334 with range distribution.
336 #define nlegq 16
337 #define ihalfq 8
339 /* const double eps = 1.0; not used if = 1 */
340 static const double eps1 = -30.0;
341 static const double eps2 = 1.0e-14;
342 static const double dhaf = 100.0;
343 static const double dquar = 800.0;
344 static const double deigh = 5000.0;
345 static const double dlarg = 25000.0;
346 static const double ulen1 = 1.0;
347 static const double ulen2 = 0.5;
348 static const double ulen3 = 0.25;
349 static const double ulen4 = 0.125;
350 static const double xlegq[ihalfq] = {
351 0.989400934991649932596154173450,
352 0.944575023073232576077988415535,
353 0.865631202387831743880467897712,
354 0.755404408355003033895101194847,
355 0.617876244402643748446671764049,
356 0.458016777657227386342419442984,
357 0.281603550779258913230460501460,
358 0.950125098376374401853193354250e-1
360 static const double alegq[ihalfq] = {
361 0.271524594117540948517805724560e-1,
362 0.622535239386478928628438369944e-1,
363 0.951585116824927848099251076022e-1,
364 0.124628971255533872052476282192,
365 0.149595988816576732081501730547,
366 0.169156519395002538189312079030,
367 0.182603415044923588866763667969,
368 0.189450610455068496285396723208
370 double ans, f2, f21, f2lf, ff4, otsum, qsqz, rotsum, t1, twa1, ulen, wprb;
371 int i, j, jj;
373 assert (! (isnan (q) || isnan (rr) || isnan (cc) || isnan (df)));
375 if (q <= 0)
376 return R_DT_0;
378 /* df must be > 1 */
379 /* there must be at least two values */
380 assert (! (df < 2 || rr < 1 || cc < 2));
382 if (!isfinite (q))
383 return R_DT_1;
385 if (df > dlarg)
386 return R_DT_val (wprob (q, rr, cc));
388 /* calculate leading constant */
390 f2 = df * 0.5;
391 /* lgammafn(u) = log(gamma(u)) */
392 f2lf = ((f2 * log (df)) - (df * M_LN2)) - gsl_sf_lngamma (f2);
393 f21 = f2 - 1.0;
395 /* integral is divided into unit, half-unit, quarter-unit, or */
396 /* eighth-unit length intervals depending on the value of the */
397 /* degrees of freedom. */
399 ff4 = df * 0.25;
400 if (df <= dhaf)
401 ulen = ulen1;
402 else if (df <= dquar)
403 ulen = ulen2;
404 else if (df <= deigh)
405 ulen = ulen3;
406 else
407 ulen = ulen4;
409 f2lf += log (ulen);
411 /* integrate over each subinterval */
413 ans = 0.0;
415 for (i = 1; i <= 50; i++)
417 otsum = 0.0;
419 /* legendre quadrature with order = nlegq */
420 /* nodes (stored in xlegq) are symmetric around zero. */
422 twa1 = (2 * i - 1) * ulen;
424 for (jj = 1; jj <= nlegq; jj++)
426 if (ihalfq < jj)
428 j = jj - ihalfq - 1;
429 t1 = (f2lf + (f21 * log (twa1 + (xlegq[j] * ulen))))
430 - (((xlegq[j] * ulen) + twa1) * ff4);
432 else
434 j = jj - 1;
435 t1 = (f2lf + (f21 * log (twa1 - (xlegq[j] * ulen))))
436 + (((xlegq[j] * ulen) - twa1) * ff4);
440 /* if exp(t1) < 9e-14, then doesn't contribute to integral */
441 if (t1 >= eps1)
443 if (ihalfq < jj)
445 qsqz = q * sqrt (((xlegq[j] * ulen) + twa1) * 0.5);
447 else
449 qsqz = q * sqrt (((-(xlegq[j] * ulen)) + twa1) * 0.5);
452 /* call wprob to find integral of range portion */
454 wprb = wprob (qsqz, rr, cc);
455 rotsum = (wprb * alegq[j]) * exp (t1);
456 otsum += rotsum;
458 /* end legendre integral for interval i */
459 /* L200: */
462 /* if integral for interval i < 1e-14, then stop.
463 * However, in order to avoid small area under left tail,
464 * at least 1 / ulen intervals are calculated.
466 if (i * ulen >= 1.0 && otsum <= eps2)
467 break;
469 /* end of interval i */
470 /* L330: */
472 ans += otsum;
475 assert (otsum <= eps2); /* not converged */
477 if (ans > 1.)
478 ans = 1.;
479 return R_DT_val (ans);