1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include <gsl/gsl_cdf.h>
23 #include "data/format.h"
26 #include "data/variable.h"
27 #include "data/case.h"
28 #include "data/dictionary.h"
29 #include "data/dataset.h"
30 #include "data/casereader.h"
31 #include "data/casewriter.h"
32 #include "data/subcase.h"
33 #include "data/value.h"
35 #include "math/percentiles.h"
36 #include "math/sort.h"
38 #include "libpspp/cast.h"
39 #include "libpspp/hmap.h"
40 #include "libpspp/array.h"
41 #include "libpspp/str.h"
42 #include "libpspp/misc.h"
44 #include "output/pivot-table.h"
47 #define N_(msgid) msgid
48 #define _(msgid) gettext (msgid)
53 struct hmap_node node
;
61 const struct variable
*var
;
62 struct val_node
**sorted_array
;
71 val_node_cmp_3way (const void *a_
, const void *b_
, const void *aux
)
73 const struct variable
*indep_var
= aux
;
74 const struct val_node
*const *a
= a_
;
75 const struct val_node
*const *b
= b_
;
77 return value_compare_3way (&(*a
)->val
, &(*b
)->val
, var_get_width (indep_var
));
81 show_frequencies (const struct n_sample_test
*nst
, const struct results
*results
, int n_vals
, const struct dictionary
*);
84 show_test_statistics (const struct n_sample_test
*nst
, const struct results
*results
, int, const struct dictionary
*);
87 static struct val_node
*
88 find_value (const struct hmap
*map
, const union value
*val
,
89 const struct variable
*var
)
91 struct val_node
*foo
= NULL
;
92 size_t hash
= value_hash (val
, var_get_width (var
), 0);
93 HMAP_FOR_EACH_WITH_HASH (foo
, struct val_node
, node
, hash
, map
)
94 if (value_equal (val
, &foo
->val
, var_get_width (var
)))
101 median_execute (const struct dataset
*ds
,
102 struct casereader
*input
,
103 enum mv_class exclude
,
104 const struct npar_test
*test
,
108 const struct dictionary
*dict
= dataset_dict (ds
);
109 const struct variable
*wvar
= dict_get_weight (dict
);
112 const struct median_test
*mt
= UP_CAST (test
, const struct median_test
,
115 const struct n_sample_test
*nst
= UP_CAST (test
, const struct n_sample_test
,
118 const bool n_sample_test
= (value_compare_3way (&nst
->val2
, &nst
->val1
,
119 var_get_width (nst
->indep_var
)) > 0);
121 struct results
*results
= XCALLOC (nst
->n_vars
, struct results
);
123 for (v
= 0; v
< nst
->n_vars
; ++v
)
127 double median
= mt
->median
;
128 const struct variable
*var
= nst
->vars
[v
];
130 struct hmap map
= HMAP_INITIALIZER (map
);
131 struct casereader
*r
= casereader_clone (input
);
135 if (n_sample_test
== false)
137 struct val_node
*vn
= XZALLOC (struct val_node
);
138 value_clone (&vn
->val
, &nst
->val1
, var_get_width (nst
->indep_var
));
139 hmap_insert (&map
, &vn
->node
, value_hash (&nst
->val1
,
140 var_get_width (nst
->indep_var
), 0));
142 vn
= xzalloc (sizeof *vn
);
143 value_clone (&vn
->val
, &nst
->val2
, var_get_width (nst
->indep_var
));
144 hmap_insert (&map
, &vn
->node
, value_hash (&nst
->val2
,
145 var_get_width (nst
->indep_var
), 0));
148 if (median
== SYSMIS
)
150 struct percentile
*ptl
;
151 struct order_stats
*os
;
153 struct casereader
*rr
;
155 struct casewriter
*writer
;
156 subcase_init_var (&sc
, var
, SC_ASCEND
);
157 rr
= casereader_clone (r
);
158 writer
= sort_create_writer (&sc
, casereader_get_proto (rr
));
160 for (; (c
= casereader_read (rr
)) != NULL
;)
162 if (var_is_value_missing (var
, case_data (c
, var
)) & exclude
)
168 cc
+= dict_get_case_weight (dict
, c
, &warn
);
169 casewriter_write (writer
, c
);
171 subcase_destroy (&sc
);
172 casereader_destroy (rr
);
174 rr
= casewriter_make_reader (writer
);
176 ptl
= percentile_create (0.5, cc
);
179 order_stats_accumulate (&os
, 1,
185 median
= percentile_calculate (ptl
, PC_HAVERAGE
);
186 statistic_destroy (&ptl
->parent
.parent
);
189 results
[v
].median
= median
;
192 for (; (c
= casereader_read (r
)) != NULL
; case_unref (c
))
194 struct val_node
*vn
;
195 const double weight
= dict_get_case_weight (dict
, c
, &warn
);
196 const union value
*val
= case_data (c
, var
);
197 const union value
*indep_val
= case_data (c
, nst
->indep_var
);
199 if (var_is_value_missing (var
, case_data (c
, var
)) & exclude
)
206 int width
= var_get_width (nst
->indep_var
);
207 /* Ignore out of range values */
209 value_compare_3way (indep_val
, &nst
->val1
, width
) < 0
211 value_compare_3way (indep_val
, &nst
->val2
, width
) > 0
218 vn
= find_value (&map
, indep_val
, nst
->indep_var
);
221 if (n_sample_test
== true)
223 int width
= var_get_width (nst
->indep_var
);
224 vn
= xzalloc (sizeof *vn
);
225 value_clone (&vn
->val
, indep_val
, width
);
227 hmap_insert (&map
, &vn
->node
, value_hash (indep_val
, width
, 0));
235 if (val
->f
<= median
)
242 casereader_destroy (r
);
246 struct val_node
*vn
= NULL
;
249 HMAP_FOR_EACH (vn
, struct val_node
, node
, &map
)
255 results
[v
].n
= count
;
256 results
[v
].sorted_array
= XCALLOC (hmap_count (&map
), struct val_node
*);
257 results
[v
].var
= var
;
259 HMAP_FOR_EACH (vn
, struct val_node
, node
, &map
)
261 double e_0j
= r_0
* (vn
->le
+ vn
->gt
) / count
;
262 double e_1j
= r_1
* (vn
->le
+ vn
->gt
) / count
;
264 results
[v
].chisq
+= pow2 (vn
->le
- e_0j
) / e_0j
;
265 results
[v
].chisq
+= pow2 (vn
->gt
- e_1j
) / e_1j
;
267 results
[v
].sorted_array
[x
++] = vn
;
273 sort (results
[v
].sorted_array
, x
, sizeof (*results
[v
].sorted_array
),
274 val_node_cmp_3way
, nst
->indep_var
);
279 casereader_destroy (input
);
281 show_frequencies (nst
, results
, n_vals
, dict
);
282 show_test_statistics (nst
, results
, n_vals
, dict
);
284 for (v
= 0; v
< nst
->n_vars
; ++v
)
287 const struct results
*rs
= results
+ v
;
289 for (i
= 0; i
< n_vals
; ++i
)
291 struct val_node
*vn
= rs
->sorted_array
[i
];
292 value_destroy (&vn
->val
, var_get_width (nst
->indep_var
));
295 free (rs
->sorted_array
);
303 show_frequencies (const struct n_sample_test
*nst
, const struct results
*results
, int n_vals
, const struct dictionary
*dict
)
305 struct pivot_table
*table
= pivot_table_create (N_("Frequencies"));
306 pivot_table_set_weight_var (table
, dict_get_weight (dict
));
308 struct pivot_dimension
*indep
= pivot_dimension_create__ (
309 table
, PIVOT_AXIS_COLUMN
, pivot_value_new_variable (nst
->indep_var
));
310 indep
->root
->show_label
= true;
311 for (int i
= 0; i
< n_vals
; ++i
)
312 pivot_category_create_leaf_rc (
313 indep
->root
, pivot_value_new_var_value (
314 nst
->indep_var
, &results
->sorted_array
[i
]->val
), PIVOT_RC_COUNT
);
315 pivot_dimension_create (table
, PIVOT_AXIS_ROW
, N_("Statistics"),
316 N_("> Median"), N_("≤ Median"));
318 struct pivot_dimension
*dep
= pivot_dimension_create (
319 table
, PIVOT_AXIS_ROW
, N_("Dependent Variables"));
321 for (int v
= 0; v
< nst
->n_vars
; ++v
)
323 const struct results
*rs
= &results
[v
];
325 int dep_idx
= pivot_category_create_leaf (
326 dep
->root
, pivot_value_new_variable (rs
->var
));
328 for (int indep_idx
= 0; indep_idx
< n_vals
; indep_idx
++)
330 const struct val_node
*vn
= rs
->sorted_array
[indep_idx
];
331 pivot_table_put3 (table
, indep_idx
, 0, dep_idx
,
332 pivot_value_new_number (vn
->gt
));
333 pivot_table_put3 (table
, indep_idx
, 1, dep_idx
,
334 pivot_value_new_number (vn
->le
));
338 pivot_table_submit (table
);
342 show_test_statistics (const struct n_sample_test
*nst
,
343 const struct results
*results
,
345 const struct dictionary
*dict
)
347 struct pivot_table
*table
= pivot_table_create (N_("Test Statistics"));
348 pivot_table_set_weight_var (table
, dict_get_weight (dict
));
350 pivot_dimension_create (table
, PIVOT_AXIS_COLUMN
, N_("Statistics"),
351 N_("N"), PIVOT_RC_COUNT
,
353 N_("Chi-Square"), PIVOT_RC_OTHER
,
354 N_("df"), PIVOT_RC_COUNT
,
355 N_("Asymp. Sig."), PIVOT_RC_SIGNIFICANCE
);
357 struct pivot_dimension
*variables
= pivot_dimension_create (
358 table
, PIVOT_AXIS_ROW
, N_("Variables"));
360 for (int v
= 0; v
< nst
->n_vars
; ++v
)
362 double df
= n_vals
- 1;
363 const struct results
*rs
= &results
[v
];
365 int var_idx
= pivot_category_create_leaf (
366 variables
->root
, pivot_value_new_variable (rs
->var
));
373 gsl_cdf_chisq_Q (rs
->chisq
, df
),
375 for (size_t i
= 0; i
< sizeof entries
/ sizeof *entries
; i
++)
377 struct pivot_value
*value
378 = pivot_value_new_number (entries
[i
]);
380 value
->numeric
.format
= *var_get_print_format (rs
->var
);
381 pivot_table_put2 (table
, i
, var_idx
, value
);
385 pivot_table_submit (table
);