expressions: Merge all the little generator programs into generate.pl.
[pspp.git] / src / math / wilcoxon-sig.c
bloba1b43dae506429f70833ddb2c9ae53f3356c707c
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Thanks to Rob van Son for writing the original version of this
18 file. This version has been completely rewritten; only the
19 name of the function has been retained. In the process of
20 rewriting, it was sped up from O(2**N) to O(N**3). */
22 #include <config.h>
23 #include "wilcoxon-sig.h"
24 #include <assert.h>
25 #include <limits.h>
26 #include <math.h>
27 #include <stdlib.h>
28 #include "xalloc.h"
30 /* For integers N and W, with 0 <= N < CHAR_BIT*sizeof(long),
31 calculates and returns the value of the function S(N,W),
32 defined as the number of subsets of 1, 2, 3, ..., N that sum
33 to at least W. There are 2**N subsets of N items, so S(N,W)
34 is in the range 0...2**N.
36 There are a few trivial cases:
38 * For W <= 0, S(N,W) = 2**N.
40 * For W > N*(N+1)/2, S(N,W) = 0.
42 * S(1,1) = 1.
44 Notably, these trivial cases include all values of W for N = 1.
46 Now consider the remaining, nontrivial cases, that is, N > 1 and
47 1 <= W <= N*(N+1)/2. In this case, apply the following identity:
49 S(N,W) = S(N-1, W) + S(N-1, W-N).
51 The first term on the right hand is the number of subsets that do
52 not include N that sum to at least W; the second term is the
53 number of subsets that do include N that sum to at least W.
55 Then we repeatedly apply the identity to the result, reducing the
56 value of N by 1 each time until we reach N=1. Some expansions
57 yield trivial cases, e.g. if W - N <= 0 (in which case we add a
58 2**N term to the final result) or if W is greater than the new N.
60 Here is an example:
62 S(7,7) = S(6,7) + S(6,0)
63 = S(6,7) + 64
65 = (S(5,7) + S(5,1)) + 64
67 = (S(4,7) + S(4,2)) + (S(4,1) + S(4,0)) + 64
68 = S(4,7) + S(4,2) + S(4,1) + 80
70 = (S(3,7) + S(3,3)) + (S(3,2) + S(3,2)) + (S(3,1) + S(3,0)) + 80
71 = S(3,3) + 2*S(3,2) + S(3,1) + 88
73 = (S(2,3) + S(2,0)) + 2*(S(2,2) + S(2,0)) + (S(2,1) + S(2,0)) + 88
74 = S(2,3) + 2*S(2,2) + S(2,1) + 104
76 = (S(1,3) + S(1,1)) + 2*(S(1,2) + S(1,0)) + (S(1,1) + S(2,0)) + 104
77 = 2*S(1,1) + 112
79 = 114
81 This function runs in O(N*W) = O(N**3) time. It seems very
82 likely that it can be made to run in O(N**2) time or perhaps
83 even better, but N is, practically speaking, quite small.
84 Plus, the return value may be as large as N**2, so N must not
85 be larger than 31 on 32-bit systems anyhow, and even 63**3 is
86 only 250,047.
88 static unsigned long int
89 count_sums_to_W (unsigned long int n, unsigned long int w)
91 /* The array contain ints even though everything else is long,
92 but no element in the array can have a value bigger than N,
93 and using int will save some memory on 64-bit systems. */
94 unsigned long int total;
95 unsigned long int max;
96 int *array;
98 assert (n < CHAR_BIT * sizeof (unsigned long int));
99 if (n == 0)
100 return 0;
101 else if (w <= 0)
102 return 1 << n;
103 else if (w > n * (n + 1) / 2)
104 return 0;
105 else if (n == 1)
106 return 1;
108 array = xcalloc (w + 1, sizeof *array);
109 array[w] = 1;
111 max = w;
112 total = 0;
113 for (; n > 1; n--)
115 unsigned long int max_sum = n * (n + 1) / 2;
116 int i;
118 if (max_sum < max)
119 max = max_sum;
121 for (i = 1; i <= max; i++)
122 if (array[i] != 0)
124 int new_w = i - n;
125 if (new_w <= 0)
126 total += array[i] * (1 << (n - 1));
127 else
128 array[new_w] += array[i];
131 total += array[1];
132 free (array);
133 return total;
136 /* Returns the exact, two-tailed level of significance for the
137 Wilcoxon Matched-Pairs Signed-Ranks test, given sum of ranks
138 of positive (or negative samples) W and sample size N.
140 Returns -1 if the exact significance level cannot be
141 calculated because W is out of the supported range. */
142 double
143 LevelOfSignificanceWXMPSR (double w, long int n)
145 unsigned long int max_w;
147 /* Limit N to valid range that won't cause integer overflow. */
148 if (n < 0 || n >= CHAR_BIT * sizeof (unsigned long int))
149 return -1;
151 max_w = n * (n + 1) / 2;
152 if (w < max_w / 2)
153 w = max_w - w;
155 return count_sums_to_W (n, ceil (w)) / (double) (1 << n) * 2;