expressions: Merge all the little generator programs into generate.pl.
[pspp.git] / src / math / levene.c
blob098fcc5a1ba9feba488060e3406f54f84364c998
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include "levene.h"
20 #include <math.h>
22 #include "libpspp/misc.h"
23 #include "libpspp/hmap.h"
24 #include "data/value.h"
25 #include "data/val-type.h"
27 #include <gl/xalloc.h>
28 #include <assert.h>
30 struct lev
32 struct hmap_node node;
33 union value group;
35 double t_bar;
36 double z_mean;
37 double n;
40 typedef unsigned int hash_func (const struct levene *, const union value *v);
41 typedef bool cmp_func (const struct levene *, const union value *v0, const union value *v1);
43 struct levene
45 /* Width of the categorical variable */
46 int gvw ;
48 /* The value dividing the groups. Valid only for dichotomous categorical variable.*/
49 const union value *cutpoint;
52 /* A hashtable of struct lev objects indexed by union value */
53 struct hmap hmap;
55 hash_func *hash;
56 cmp_func *cmp;
59 /* A state variable indicating how many passes have been done */
60 int pass;
62 double grand_n;
63 double z_grand_mean;
65 double denominator;
69 static unsigned int
70 unique_hash (const struct levene *nl, const union value *val)
72 return value_hash (val, nl->gvw, 0);
75 static bool
76 unique_cmp (const struct levene *nl, const union value *val0, const union value *val1)
78 return value_equal (val0, val1, nl->gvw);
81 static unsigned int
82 cutpoint_hash (const struct levene *nl, const union value *val)
84 int x = value_compare_3way (val, nl->cutpoint, nl->gvw);
86 return (x < 0);
89 static bool
90 cutpoint_cmp (const struct levene *nl, const union value *val0, const union value *val1)
92 int x = value_compare_3way (val0, nl->cutpoint, nl->gvw);
94 int y = value_compare_3way (val1, nl->cutpoint, nl->gvw);
96 if ( x == 0) x = 1;
97 if ( y == 0) y = 1;
99 return ( x == y);
104 static struct lev *
105 find_group (const struct levene *nl, const union value *target)
107 struct lev *l = NULL;
109 HMAP_FOR_EACH_WITH_HASH (l, struct lev, node, nl->hash (nl, target), &nl->hmap)
111 if (nl->cmp (nl, &l->group, target))
112 break;
113 l = NULL;
115 return l;
119 struct levene *
120 levene_create (int indep_width, const union value *cutpoint)
122 struct levene *nl = xzalloc (sizeof *nl);
124 hmap_init (&nl->hmap);
126 nl->gvw = indep_width;
127 nl->cutpoint = cutpoint;
129 nl->hash = cutpoint ? cutpoint_hash : unique_hash;
130 nl->cmp = cutpoint ? cutpoint_cmp : unique_cmp;
132 return nl;
136 /* Data accumulation. First pass */
137 void
138 levene_pass_one (struct levene *nl, double value, double weight, const union value *gv)
140 struct lev *lev = find_group (nl, gv);
142 if ( nl->pass == 0 )
144 nl->pass = 1;
146 assert (nl->pass == 1);
148 if ( NULL == lev)
150 struct lev *l = xzalloc (sizeof *l);
151 value_clone (&l->group, gv, nl->gvw);
152 hmap_insert (&nl->hmap, &l->node, nl->hash (nl, &l->group));
153 lev = l;
156 lev->n += weight;
157 lev->t_bar += value * weight;
159 nl->grand_n += weight;
162 /* Data accumulation. Second pass */
163 void
164 levene_pass_two (struct levene *nl, double value, double weight, const union value *gv)
166 struct lev *lev = NULL;
168 if ( nl->pass == 1 )
170 struct lev *next;
171 struct lev *l;
173 nl->pass = 2;
175 HMAP_FOR_EACH_SAFE (l, next, struct lev, node, &nl->hmap)
177 l->t_bar /= l->n;
180 assert (nl->pass == 2);
182 lev = find_group (nl, gv);
184 lev->z_mean += fabs (value - lev->t_bar) * weight;
185 nl->z_grand_mean += fabs (value - lev->t_bar) * weight;
188 /* Data accumulation. Third pass */
189 void
190 levene_pass_three (struct levene *nl, double value, double weight, const union value *gv)
192 double z;
193 struct lev *lev = NULL;
195 if ( nl->pass == 2 )
197 struct lev *next;
198 struct lev *l;
200 nl->pass = 3;
202 HMAP_FOR_EACH_SAFE (l, next, struct lev, node, &nl->hmap)
204 l->z_mean /= l->n;
207 nl->z_grand_mean /= nl->grand_n;
210 assert (nl->pass == 3);
211 lev = find_group (nl, gv);
213 z = fabs (value - lev->t_bar);
214 nl->denominator += pow2 (z - lev->z_mean) * weight;
218 /* Return the value of the levene statistic */
219 double
220 levene_calculate (struct levene *nl)
222 struct lev *next;
223 struct lev *l;
225 double numerator = 0.0;
226 double nn = 0.0;
228 /* The Levene calculation requires three passes.
229 Normally this should have been done prior to calling this function.
230 However, in abnormal circumstances (eg. the dataset is empty) there
231 will have been no passes.
233 assert (nl->pass == 0 || nl->pass == 3);
235 if ( nl->pass == 0 )
236 return SYSMIS;
238 nl->denominator *= hmap_count (&nl->hmap) - 1;
240 HMAP_FOR_EACH_SAFE (l, next, struct lev, node, &nl->hmap)
242 numerator += l->n * pow2 (l->z_mean - nl->z_grand_mean);
243 nn += l->n;
246 numerator *= nn - hmap_count (&nl->hmap);
248 return numerator / nl->denominator;
251 void
252 levene_destroy (struct levene *nl)
254 struct lev *next;
255 struct lev *l;
257 HMAP_FOR_EACH_SAFE (l, next, struct lev, node, &nl->hmap)
259 value_destroy (&l->group, nl->gvw);
260 free (l);
263 hmap_destroy (&nl->hmap);
264 free (nl);