1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "libpspp/misc.h"
23 #include "libpspp/hmap.h"
24 #include "data/value.h"
25 #include "data/val-type.h"
27 #include <gl/xalloc.h>
32 struct hmap_node node
;
40 typedef unsigned int hash_func (const struct levene
*, const union value
*v
);
41 typedef bool cmp_func (const struct levene
*, const union value
*v0
, const union value
*v1
);
45 /* Width of the categorical variable */
48 /* The value dividing the groups. Valid only for dichotomous categorical variable.*/
49 const union value
*cutpoint
;
52 /* A hashtable of struct lev objects indexed by union value */
59 /* A state variable indicating how many passes have been done */
70 unique_hash (const struct levene
*nl
, const union value
*val
)
72 return value_hash (val
, nl
->gvw
, 0);
76 unique_cmp (const struct levene
*nl
, const union value
*val0
, const union value
*val1
)
78 return value_equal (val0
, val1
, nl
->gvw
);
82 cutpoint_hash (const struct levene
*nl
, const union value
*val
)
84 int x
= value_compare_3way (val
, nl
->cutpoint
, nl
->gvw
);
90 cutpoint_cmp (const struct levene
*nl
, const union value
*val0
, const union value
*val1
)
92 int x
= value_compare_3way (val0
, nl
->cutpoint
, nl
->gvw
);
94 int y
= value_compare_3way (val1
, nl
->cutpoint
, nl
->gvw
);
105 find_group (const struct levene
*nl
, const union value
*target
)
107 struct lev
*l
= NULL
;
109 HMAP_FOR_EACH_WITH_HASH (l
, struct lev
, node
, nl
->hash (nl
, target
), &nl
->hmap
)
111 if (nl
->cmp (nl
, &l
->group
, target
))
120 levene_create (int indep_width
, const union value
*cutpoint
)
122 struct levene
*nl
= xzalloc (sizeof *nl
);
124 hmap_init (&nl
->hmap
);
126 nl
->gvw
= indep_width
;
127 nl
->cutpoint
= cutpoint
;
129 nl
->hash
= cutpoint
? cutpoint_hash
: unique_hash
;
130 nl
->cmp
= cutpoint
? cutpoint_cmp
: unique_cmp
;
136 /* Data accumulation. First pass */
138 levene_pass_one (struct levene
*nl
, double value
, double weight
, const union value
*gv
)
140 struct lev
*lev
= find_group (nl
, gv
);
146 assert (nl
->pass
== 1);
150 struct lev
*l
= xzalloc (sizeof *l
);
151 value_clone (&l
->group
, gv
, nl
->gvw
);
152 hmap_insert (&nl
->hmap
, &l
->node
, nl
->hash (nl
, &l
->group
));
157 lev
->t_bar
+= value
* weight
;
159 nl
->grand_n
+= weight
;
162 /* Data accumulation. Second pass */
164 levene_pass_two (struct levene
*nl
, double value
, double weight
, const union value
*gv
)
166 struct lev
*lev
= NULL
;
175 HMAP_FOR_EACH_SAFE (l
, next
, struct lev
, node
, &nl
->hmap
)
180 assert (nl
->pass
== 2);
182 lev
= find_group (nl
, gv
);
184 lev
->z_mean
+= fabs (value
- lev
->t_bar
) * weight
;
185 nl
->z_grand_mean
+= fabs (value
- lev
->t_bar
) * weight
;
188 /* Data accumulation. Third pass */
190 levene_pass_three (struct levene
*nl
, double value
, double weight
, const union value
*gv
)
193 struct lev
*lev
= NULL
;
202 HMAP_FOR_EACH_SAFE (l
, next
, struct lev
, node
, &nl
->hmap
)
207 nl
->z_grand_mean
/= nl
->grand_n
;
210 assert (nl
->pass
== 3);
211 lev
= find_group (nl
, gv
);
213 z
= fabs (value
- lev
->t_bar
);
214 nl
->denominator
+= pow2 (z
- lev
->z_mean
) * weight
;
218 /* Return the value of the levene statistic */
220 levene_calculate (struct levene
*nl
)
225 double numerator
= 0.0;
228 /* The Levene calculation requires three passes.
229 Normally this should have been done prior to calling this function.
230 However, in abnormal circumstances (eg. the dataset is empty) there
231 will have been no passes.
233 assert (nl
->pass
== 0 || nl
->pass
== 3);
238 nl
->denominator
*= hmap_count (&nl
->hmap
) - 1;
240 HMAP_FOR_EACH_SAFE (l
, next
, struct lev
, node
, &nl
->hmap
)
242 numerator
+= l
->n
* pow2 (l
->z_mean
- nl
->z_grand_mean
);
246 numerator
*= nn
- hmap_count (&nl
->hmap
);
248 return numerator
/ nl
->denominator
;
252 levene_destroy (struct levene
*nl
)
257 HMAP_FOR_EACH_SAFE (l
, next
, struct lev
, node
, &nl
->hmap
)
259 value_destroy (&l
->group
, nl
->gvw
);
263 hmap_destroy (&nl
->hmap
);