GnmFunc: make this a GObject.
[gnumeric.git] / src / tools / analysis-wilcoxon-mann-whitney.c
blob1e615db41a4bc00139b190a18263b427b3e6e4de
1 /*
2 * analysis-wilcoxon-mann-whitney.c:
4 * Author:
5 * Andreas J. Guelzow <aguelzow@pyrshep.ca>
7 * (C) Copyright 2010 by Andreas J. Guelzow <aguelzow@pyrshep.ca>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, see <https://www.gnu.org/licenses/>.
24 #include <gnumeric-config.h>
25 #include <glib/gi18n-lib.h>
26 #include <gnumeric.h>
27 #include <tools/analysis-wilcoxon-mann-whitney.h>
28 #include <tools/analysis-tools.h>
29 #include <value.h>
30 #include <ranges.h>
31 #include <expr.h>
32 #include <func.h>
33 #include <numbers.h>
35 static
36 GnmExpr const *analysis_tool_combine_area (GnmValue *val_1, GnmValue *val_2, Workbook *wb)
38 GnmFunc *fd_array;
39 GnmExpr const *expr;
41 if (VALUE_IS_CELLRANGE (val_1) && VALUE_IS_CELLRANGE (val_2) &&
42 val_1->v_range.cell.a.sheet == val_2->v_range.cell.a.sheet) {
43 GnmRange r_1, r_2;
44 gboolean combined = FALSE;
46 range_init_rangeref (&r_1, &val_1->v_range.cell);
47 range_init_rangeref (&r_2, &val_2->v_range.cell);
49 if (r_1.start.row == r_2.start.row &&
50 range_height (&r_1) == range_height (&r_2)) {
51 if (r_1.end.col == r_2.start.col - 1) {
52 combined = TRUE;
53 r_1.end.col = r_2.end.col;
54 } else if (r_2.end.col == r_1.start.col - 1) {
55 combined = TRUE;
56 r_1.start.col = r_2.start.col;
58 } else if (r_1.start.col == r_2.start.col &&
59 range_width (&r_1) == range_width (&r_2)) {
60 if (r_1.end.row == r_2.start.row - 1) {
61 combined = TRUE;
62 r_1.end.row = r_2.end.row;
63 } else if (r_2.end.row == r_1.start.row - 1) {
64 combined = TRUE;
65 r_1.start.row = r_2.start.row;
69 if (combined) {
70 GnmValue *val = value_new_cellrange_r (val_1->v_range.cell.a.sheet, &r_1);
71 return gnm_expr_new_constant (val);
75 fd_array = gnm_func_lookup_or_add_placeholder ("ARRAY");
76 gnm_func_inc_usage (fd_array);
78 expr = gnm_expr_new_funcall2 (fd_array,
79 gnm_expr_new_constant (value_dup (val_1)),
80 gnm_expr_new_constant (value_dup (val_2)));
82 gnm_func_dec_usage (fd_array);
84 return expr;
87 static gboolean
88 analysis_tool_wilcoxon_mann_whitney_engine_run (data_analysis_output_t *dao,
89 analysis_tools_data_generic_b_t *info)
91 GnmFunc *fd_count;
92 GnmFunc *fd_sum;
93 GnmFunc *fd_rows;
94 GnmFunc *fd_rank_avg;
95 GnmFunc *fd_rank;
96 GnmFunc *fd_min;
97 GnmFunc *fd_normdist;
98 GnmFunc *fd_sqrt;
99 GnmFunc *fd_if;
100 GnmFunc *fd_isblank;
102 GnmExpr const *expr_total;
103 GnmExpr const *expr_pop_1;
104 GnmExpr const *expr_pop_2;
105 GnmExpr const *expr_u;
106 GnmExpr const *expr_count_total;
108 GnmValue *val_1 = value_dup (info->range_1);
109 GnmValue *val_2 = value_dup (info->range_2);
110 Workbook *wb = dao->sheet ? dao->sheet->workbook : NULL;
112 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
113 gnm_func_inc_usage (fd_count);
114 fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
115 gnm_func_inc_usage (fd_sum);
116 fd_rows = gnm_func_lookup_or_add_placeholder ("ROWS");
117 gnm_func_inc_usage (fd_rows);
118 fd_rank_avg = gnm_func_lookup_or_add_placeholder ("RANK.AVG");
119 gnm_func_inc_usage (fd_rank_avg);
120 fd_rank = gnm_func_lookup_or_add_placeholder ("RANK");
121 gnm_func_inc_usage (fd_rank);
122 fd_min = gnm_func_lookup_or_add_placeholder ("MIN");
123 gnm_func_inc_usage (fd_min);
124 fd_normdist = gnm_func_lookup_or_add_placeholder ("NORMDIST");
125 gnm_func_inc_usage (fd_normdist);
126 fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
127 gnm_func_inc_usage (fd_sqrt);
128 fd_if = gnm_func_lookup_or_add_placeholder ("IF");
129 gnm_func_inc_usage (fd_if);
130 fd_isblank = gnm_func_lookup_or_add_placeholder ("ISBLANK");
131 gnm_func_inc_usage (fd_isblank);
133 dao_set_italic (dao, 0, 0, 0, 8);
134 dao_set_italic (dao, 0, 1, 3, 1);
135 dao_set_merge (dao, 0, 0, 3, 0);
136 dao_set_cell (dao, 0, 0, _("Wilcoxon-Mann-Whitney Test"));
137 set_cell_text_col (dao, 0, 2, _("/Rank-Sum"
138 "/N"
139 "/U"
140 "/Ties"
141 "/Statistic"
142 "/U-Statistic"
143 "/p-Value"));
144 dao_set_cell (dao, 3, 1, _("Total"));
146 /* Label */
147 analysis_tools_write_label_ftest (val_1, dao, 1, 1, info->labels, 1);
148 analysis_tools_write_label_ftest (val_2, dao, 2, 1, info->labels, 2);
150 expr_total = analysis_tool_combine_area (val_1, val_2, wb);
151 expr_pop_1 = gnm_expr_new_constant (val_1);
152 expr_pop_2 = gnm_expr_new_constant (val_2);
154 /* =sum(if(isblank(region1),0,rank.avg(region1,combined_regions,1))) */
156 dao_set_cell_array_expr (dao, 1, 2,
157 gnm_expr_new_funcall1
158 (fd_sum,
159 gnm_expr_new_funcall3
160 (fd_if,
161 gnm_expr_new_funcall1
162 (fd_isblank,
163 gnm_expr_copy (expr_pop_1)),
164 gnm_expr_new_constant (value_new_int (0)),
165 gnm_expr_new_funcall3
166 (fd_rank_avg,
167 gnm_expr_copy (expr_pop_1),
168 gnm_expr_copy (expr_total),
169 gnm_expr_new_constant (value_new_int (1))))));
170 dao_set_cell_array_expr (dao, 2, 2,
171 gnm_expr_new_funcall1
172 (fd_sum,
173 gnm_expr_new_funcall3
174 (fd_if,
175 gnm_expr_new_funcall1
176 (fd_isblank,
177 gnm_expr_copy (expr_pop_2)),
178 gnm_expr_new_constant (value_new_int (0)),
179 gnm_expr_new_funcall3
180 (fd_rank_avg,
181 gnm_expr_copy (expr_pop_2),
182 gnm_expr_copy (expr_total),
183 gnm_expr_new_constant (value_new_int (1))))));
185 expr_count_total = gnm_expr_new_funcall1
186 (fd_count, gnm_expr_copy (expr_total));
187 dao_set_cell_expr (dao, 3, 2,
188 gnm_expr_new_binary
189 (gnm_expr_new_binary
190 (gnm_expr_copy (expr_count_total),
191 GNM_EXPR_OP_MULT,
192 gnm_expr_new_binary
193 (gnm_expr_copy (expr_count_total),
194 GNM_EXPR_OP_ADD,
195 gnm_expr_new_constant (value_new_int (1)))),
196 GNM_EXPR_OP_DIV,
197 gnm_expr_new_constant (value_new_int (2))));
199 dao_set_cell_expr (dao, 1, 3,
200 gnm_expr_new_funcall1
201 (fd_count,
202 expr_pop_1));
203 dao_set_cell_expr (dao, 2, 3,
204 gnm_expr_new_funcall1
205 (fd_count,
206 expr_pop_2));
207 dao_set_cell_expr (dao, 3, 3,
208 gnm_expr_new_funcall1
209 (fd_count,
210 gnm_expr_copy (expr_total)));
212 expr_u = gnm_expr_new_binary
213 (make_cellref (0,- 2), GNM_EXPR_OP_SUB,
214 gnm_expr_new_binary
215 (gnm_expr_new_binary
216 (make_cellref (0,- 1),
217 GNM_EXPR_OP_MULT,
218 gnm_expr_new_binary
219 (make_cellref (0,- 1),
220 GNM_EXPR_OP_ADD,
221 gnm_expr_new_constant (value_new_int (1)))),
222 GNM_EXPR_OP_DIV,
223 gnm_expr_new_constant (value_new_int (2))));
225 dao_set_cell_expr (dao, 1, 4, gnm_expr_copy (expr_u));
226 dao_set_cell_expr (dao, 2, 4, expr_u);
227 dao_set_cell_expr (dao, 3, 4,
228 gnm_expr_new_binary
229 (make_cellref (-2,-1),
230 GNM_EXPR_OP_MULT,
231 make_cellref (-1,-1)));
233 dao_set_cell_array_expr (dao, 1, 5,
234 gnm_expr_new_funcall1
235 (fd_sum,
236 gnm_expr_new_binary
237 (gnm_expr_new_funcall2
238 (fd_rank_avg,
239 gnm_expr_copy (expr_total),
240 gnm_expr_copy (expr_total)),
241 GNM_EXPR_OP_SUB,
242 gnm_expr_new_funcall2
243 (fd_rank,
244 gnm_expr_copy (expr_total),
245 gnm_expr_copy (expr_total)))));
247 if (dao_cell_is_visible (dao, 2, 4)) {
248 GnmExpr const *expr_prod;
249 GnmExpr const *expr_sqrt;
250 GnmExpr const *expr_normdist;
252 expr_prod = gnm_expr_new_binary
253 (make_cellref (0,-5),
254 GNM_EXPR_OP_MULT,
255 make_cellref (1,-5));
256 expr_sqrt = gnm_expr_new_funcall1
257 (fd_sqrt,
258 gnm_expr_new_binary
259 (gnm_expr_new_binary
260 (gnm_expr_copy(expr_prod),
261 GNM_EXPR_OP_MULT,
262 gnm_expr_new_binary
263 (gnm_expr_new_binary
264 (make_cellref (0,-5),
265 GNM_EXPR_OP_ADD,
266 make_cellref (1,-5)),
267 GNM_EXPR_OP_ADD,
268 gnm_expr_new_constant (value_new_int (1)))),
269 GNM_EXPR_OP_DIV,
270 gnm_expr_new_constant (value_new_int (12))));
271 expr_normdist = gnm_expr_new_funcall4
272 (fd_normdist,
273 make_cellref (0,-1),
274 gnm_expr_new_binary
275 (expr_prod,
276 GNM_EXPR_OP_DIV,
277 gnm_expr_new_constant (value_new_int (2))),
278 expr_sqrt,
279 gnm_expr_new_constant (value_new_bool (TRUE)));
281 dao_set_cell_expr (dao, 1, 6,
282 gnm_expr_new_funcall2
283 (fd_min,
284 make_cellref (0,-4),
285 make_cellref (1,-4)));
286 dao_set_cell_expr (dao, 1, 7,
287 gnm_expr_new_funcall2
288 (fd_min,
289 make_cellref (0,-3),
290 make_cellref (1,-3)));
292 dao_set_cell_expr (dao, 1, 8,
293 gnm_expr_new_binary
294 (gnm_expr_new_constant (value_new_int (2)),
295 GNM_EXPR_OP_MULT,
296 expr_normdist));
297 dao_set_cell_comment (dao, 1, 8,
298 _("This p-value is calculated using a\n"
299 "normal approximation, so it is\n"
300 "only valid for large samples of\n"
301 "at least 15 observations in each\n"
302 "population, and few if any ties."));
303 } else {
304 dao_set_cell_na (dao, 1, 6);
305 dao_set_cell_comment (dao, 1, 6,
306 _("Since there is insufficient space\n"
307 "for the third column of output,\n"
308 "this value is not calculated."));
309 dao_set_cell_na (dao, 1, 7);
310 dao_set_cell_comment (dao, 1, 7,
311 _("Since there is insufficient space\n"
312 "for the third column of output,\n"
313 "this value is not calculated."));
314 dao_set_cell_na (dao, 1, 8);
315 dao_set_cell_comment (dao, 1, 8,
316 _("Since there is insufficient space\n"
317 "for the third column of output,\n"
318 "this value is not calculated."));
322 gnm_expr_free (expr_count_total);
324 gnm_expr_free (expr_total);
326 gnm_func_dec_usage (fd_count);
327 gnm_func_dec_usage (fd_sum);
328 gnm_func_dec_usage (fd_rows);
329 gnm_func_dec_usage (fd_rank_avg);
330 gnm_func_dec_usage (fd_rank);
331 gnm_func_dec_usage (fd_min);
332 gnm_func_dec_usage (fd_normdist);
333 gnm_func_dec_usage (fd_sqrt);
334 gnm_func_dec_usage (fd_if);
335 gnm_func_dec_usage (fd_isblank);
337 dao_redraw_respan (dao);
338 return 0;
341 gboolean
342 analysis_tool_wilcoxon_mann_whitney_engine
343 (G_GNUC_UNUSED GOCmdContext *gcc,
344 data_analysis_output_t *dao, gpointer specs,
345 analysis_tool_engine_t selector, gpointer result)
347 switch (selector) {
348 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
349 return (dao_command_descriptor
350 (dao, _("Wilcoxon-Mann-Whitney Test (%s)"), result)
351 == NULL);
352 case TOOL_ENGINE_UPDATE_DAO:
353 dao_adjust (dao, 4, 9);
354 return FALSE;
355 case TOOL_ENGINE_CLEAN_UP:
356 return analysis_tool_generic_b_clean (specs);
357 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
358 return FALSE;
359 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
360 dao_prepare_output (NULL, dao, _("Wilcoxon-Mann-Whitney Test"));
361 return FALSE;
362 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
363 return dao_format_output (dao, _("Wilcoxon-Mann-Whitney Test"));
364 case TOOL_ENGINE_PERFORM_CALC:
365 default:
366 return analysis_tool_wilcoxon_mann_whitney_engine_run (dao, specs);
368 return TRUE; /* We shouldn't get here */