1.12.42
[gnumeric.git] / src / tools / analysis-tools.c
blob59ee76897bae28a5983ffe0c0441d1e8aed17790
1 /*
2 * analysis-tools.c:
4 * Authors:
5 * Jukka-Pekka Iivonen <jiivonen@hutcs.cs.hut.fi>
6 * Andreas J. Guelzow <aguelzow@taliesin.ca>
8 * (C) Copyright 2000, 2001 by Jukka-Pekka Iivonen <jiivonen@hutcs.cs.hut.fi>
9 * (C) Copyright 2002, 2004 by Andreas J. Guelzow <aguelzow@taliesin.ca>
11 * Modified 2001 to use range_* functions of mathfunc.h
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, see <https://www.gnu.org/licenses/>.
27 #include <gnumeric-config.h>
28 #include <glib/gi18n-lib.h>
29 #include <gnumeric.h>
30 #include <tools/analysis-tools.h>
32 #include <mathfunc.h>
33 #include <func.h>
34 #include <expr.h>
35 #include <position.h>
36 #include <tools/tools.h>
37 #include <value.h>
38 #include <cell.h>
39 #include <sheet.h>
40 #include <ranges.h>
41 #include <parse-util.h>
42 #include <style.h>
43 #include <regression.h>
44 #include <sheet-style.h>
45 #include <workbook.h>
46 #include <collect.h>
47 #include <gnm-format.h>
48 #include <sheet-object-cell-comment.h>
49 #include <workbook-control.h>
50 #include <command-context.h>
51 #include <sheet-object-graph.h>
52 #include <graph.h>
53 #include <goffice/goffice.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <math.h>
60 const GnmExpr *
61 make_cellref (int dx, int dy)
63 GnmCellRef r;
64 r.sheet = NULL;
65 r.col = dx;
66 r.col_relative = TRUE;
67 r.row = dy;
68 r.row_relative = TRUE;
69 return gnm_expr_new_cellref (&r);
72 const GnmExpr *
73 make_rangeref (int dx0, int dy0, int dx1, int dy1)
75 GnmCellRef a, b;
76 GnmValue *val;
78 a.sheet = NULL;
79 a.col = dx0;
80 a.col_relative = TRUE;
81 a.row = dy0;
82 a.row_relative = TRUE;
83 b.sheet = NULL;
84 b.col = dx1;
85 b.col_relative = TRUE;
86 b.row = dy1;
87 b.row_relative = TRUE;
89 val = value_new_cellrange_unsafe (&a, &b);
90 return gnm_expr_new_constant (val);
94 typedef struct {
95 char *format;
96 GPtrArray *data_lists;
97 gboolean read_label;
98 gboolean ignore_non_num;
99 guint length;
100 Sheet *sheet;
101 } data_list_specs_t;
104 * cb_adjust_areas:
105 * @data:
106 * @user_data:
109 static void
110 cb_adjust_areas (gpointer data, G_GNUC_UNUSED gpointer user_data)
112 GnmValue *range = (GnmValue *)data;
114 if (range == NULL || !VALUE_IS_CELLRANGE (range)) {
115 return;
118 range->v_range.cell.a.col_relative = 0;
119 range->v_range.cell.a.row_relative = 0;
120 range->v_range.cell.b.col_relative = 0;
121 range->v_range.cell.b.row_relative = 0;
125 * analysis_tools_remove_label:
128 static void
129 analysis_tools_remove_label (GnmValue *val,
130 gboolean labels, group_by_t group_by)
132 if (labels) {
133 switch (group_by) {
134 case GROUPED_BY_ROW:
135 val->v_range.cell.a.col++;
136 break;
137 case GROUPED_BY_COL:
138 case GROUPED_BY_BIN:
139 case GROUPED_BY_AREA:
140 default:
141 val->v_range.cell.a.row++;
142 break;
150 * analysis_tools_write_label:
151 * @val: range to extract label from
152 * @dao: data_analysis_output_t, where to write to
153 * @info: analysis_tools_data_generic_t info
154 * @x: output col number
155 * @y: output row number
156 * @i: default col/row number
160 void
161 analysis_tools_write_label (GnmValue *val, data_analysis_output_t *dao,
162 analysis_tools_data_generic_t *info,
163 int x, int y, int i)
165 char const *format = NULL;
167 if (info->labels) {
168 GnmValue *label = value_dup (val);
170 label->v_range.cell.b = label->v_range.cell.a;
171 dao_set_cell_expr (dao, x, y, gnm_expr_new_constant (label));
172 analysis_tools_remove_label (val, info->labels, info->group_by);
173 } else {
174 switch (info->group_by) {
175 case GROUPED_BY_ROW:
176 format = _("Row %i");
177 break;
178 case GROUPED_BY_COL:
179 format = _("Column %i");
180 break;
181 case GROUPED_BY_BIN:
182 format = _("Bin %i");
183 break;
184 case GROUPED_BY_AREA:
185 default:
186 format = _("Area %i");
187 break;
190 dao_set_cell_printf (dao, x, y, format, i);
195 * analysis_tools_write_label:
196 * @val: range to extract label from
197 * @dao: data_analysis_output_t, where to write to
198 * @labels: analysis_tools_data_generic_t infowhether the
199 * @val contains label info
200 * @group_by: grouping info
201 * @x: output col number
202 * @y: output row number
203 * @i: default col/row number
207 static void
208 analysis_tools_write_a_label (GnmValue *val, data_analysis_output_t *dao,
209 gboolean labels, group_by_t group_by,
210 int x, int y)
212 if (labels) {
213 GnmValue *label = value_dup (val);
215 label->v_range.cell.b = label->v_range.cell.a;
216 dao_set_cell_expr (dao, x, y, gnm_expr_new_constant (label));
217 analysis_tools_remove_label (val, labels, group_by);
218 } else {
219 char const *str = ((group_by == GROUPED_BY_ROW) ? "row" : "col");
220 char const *label = ((group_by == GROUPED_BY_ROW) ? _("Row") : _("Column"));
222 GnmFunc *fd_concatenate;
223 GnmFunc *fd_cell;
225 fd_concatenate = gnm_func_lookup_or_add_placeholder ("CONCATENATE");
226 gnm_func_inc_usage (fd_concatenate);
227 fd_cell = gnm_func_lookup_or_add_placeholder ("CELL");
228 gnm_func_inc_usage (fd_cell);
230 dao_set_cell_expr (dao, x, y, gnm_expr_new_funcall3
231 (fd_concatenate, gnm_expr_new_constant (value_new_string (label)),
232 gnm_expr_new_constant (value_new_string (" ")),
233 gnm_expr_new_funcall2 (fd_cell,
234 gnm_expr_new_constant (value_new_string (str)),
235 gnm_expr_new_constant (value_dup (val)))));
237 gnm_func_dec_usage (fd_concatenate);
238 gnm_func_dec_usage (fd_cell);
243 * analysis_tools_write_label_ftest:
244 * @val: range to extract label from
245 * @dao: data_analysis_output_t, where to write to
246 * @info: analysis_tools_data_generic_t info
247 * @x: output col number
248 * @y: output row number
249 * @i: default col/row number
253 void
254 analysis_tools_write_label_ftest (GnmValue *val, data_analysis_output_t *dao,
255 int x, int y, gboolean labels, int i)
257 cb_adjust_areas (val, NULL);
259 if (labels) {
260 GnmValue *label = value_dup (val);
262 label->v_range.cell.b = label->v_range.cell.a;
263 dao_set_cell_expr (dao, x, y, gnm_expr_new_constant (label));
265 if ((val->v_range.cell.b.col - val->v_range.cell.a.col) <
266 (val->v_range.cell.b.row - val->v_range.cell.a.row))
267 val->v_range.cell.a.row++;
268 else
269 val->v_range.cell.a.col++;
270 } else {
271 dao_set_cell_printf (dao, x, y, _("Variable %i"), i);
276 * cb_cut_into_cols:
277 * @data:
278 * @user_data:
281 static void
282 cb_cut_into_cols (gpointer data, gpointer user_data)
284 GnmValue *range = (GnmValue *)data;
285 GnmValue *col_value;
286 GSList **list_of_units = (GSList **) user_data;
287 gint col;
289 if (range == NULL) {
290 return;
292 if (!VALUE_IS_CELLRANGE (range) ||
293 (range->v_range.cell.b.sheet != NULL &&
294 range->v_range.cell.b.sheet != range->v_range.cell.a.sheet)) {
295 value_release (range);
296 return;
299 cb_adjust_areas (data, NULL);
301 if (range->v_range.cell.a.col == range->v_range.cell.b.col) {
302 *list_of_units = g_slist_prepend (*list_of_units, range);
303 return;
306 for (col = range->v_range.cell.a.col; col <= range->v_range.cell.b.col; col++) {
307 col_value = value_dup (range);
308 col_value->v_range.cell.a.col = col;
309 col_value->v_range.cell.b.col = col;
310 *list_of_units = g_slist_prepend (*list_of_units, col_value);
312 value_release (range);
313 return;
317 * cb_cut_into_rows:
318 * @data:
319 * @user_data:
322 static void
323 cb_cut_into_rows (gpointer data, gpointer user_data)
325 GnmValue *range = (GnmValue *)data;
326 GnmValue *row_value;
327 GSList **list_of_units = (GSList **) user_data;
328 gint row;
330 if (range == NULL) {
331 return;
333 if (!VALUE_IS_CELLRANGE (range) ||
334 (range->v_range.cell.b.sheet != NULL &&
335 range->v_range.cell.b.sheet != range->v_range.cell.a.sheet)) {
336 value_release (range);
337 return;
340 cb_adjust_areas (data, NULL);
342 if (range->v_range.cell.a.row == range->v_range.cell.b.row) {
343 *list_of_units = g_slist_prepend (*list_of_units, range);
344 return;
347 for (row = range->v_range.cell.a.row; row <= range->v_range.cell.b.row; row++) {
348 row_value = value_dup (range);
349 row_value->v_range.cell.a.row = row;
350 row_value->v_range.cell.b.row = row;
351 *list_of_units = g_slist_prepend (*list_of_units, row_value);
353 value_release (range);
354 return;
359 * prepare_input_range:
360 * @input_range: (inout) (element-type GnmRange) (transfer full):
361 * @group_by:
363 void
364 prepare_input_range (GSList **input_range, group_by_t group_by)
366 GSList *input_by_units = NULL;
368 switch (group_by) {
369 case GROUPED_BY_ROW:
370 g_slist_foreach (*input_range, cb_cut_into_rows, &input_by_units);
371 g_slist_free (*input_range);
372 *input_range = g_slist_reverse (input_by_units);
373 return;
374 case GROUPED_BY_COL:
375 g_slist_foreach (*input_range, cb_cut_into_cols, &input_by_units);
376 g_slist_free (*input_range);
377 *input_range = g_slist_reverse (input_by_units);
378 return;
379 case GROUPED_BY_AREA:
380 default:
381 g_slist_foreach (*input_range, cb_adjust_areas, NULL);
382 return;
386 typedef struct {
387 gboolean init;
388 gint size;
389 gboolean hom;
390 } homogeneity_check_t;
394 * cb_check_hom:
395 * @data:
396 * @user_data:
399 static void
400 cb_check_hom (gpointer data, gpointer user_data)
402 GnmValue *range = (GnmValue *)data;
403 homogeneity_check_t *state = (homogeneity_check_t *) user_data;
404 gint this_size;
406 if (!VALUE_IS_CELLRANGE (range)) {
407 state->hom = FALSE;
408 return;
411 this_size = (range->v_range.cell.b.col - range->v_range.cell.a.col + 1) *
412 (range->v_range.cell.b.row - range->v_range.cell.a.row + 1);
414 if (state->init) {
415 if (state->size != this_size)
416 state->hom = FALSE;
417 } else {
418 state->init = TRUE;
419 state->size = this_size;
421 return;
425 * gnm_check_input_range_list_homogeneity:
426 * @input_range:
428 * Check that all columns have the same size
431 static gboolean
432 gnm_check_input_range_list_homogeneity (GSList *input_range)
434 homogeneity_check_t state = { FALSE, 0, TRUE };
436 g_slist_foreach (input_range, cb_check_hom, &state);
438 return state.hom;
442 /***** Some general routines ***********************************************/
444 static gint
445 float_compare (gnm_float const *a, gnm_float const *b)
447 if (*a < *b)
448 return -1;
449 else if (*a == *b)
450 return 0;
451 else
452 return 1;
455 gnm_float *
456 range_sort (gnm_float const *xs, int n)
458 if (n <= 0)
459 return NULL;
460 else {
461 gnm_float *ys = g_new (gnm_float, n);
462 memcpy (ys, xs, n * sizeof (gnm_float));
463 qsort (ys, n, sizeof (ys[0]),
464 (int (*) (const void *, const void *))&float_compare);
465 return ys;
471 * Set a column of text from a string like "/first/second/third" or "|foo|bar|baz".
473 void
474 set_cell_text_col (data_analysis_output_t *dao, int col, int row, const char *text)
476 gboolean leave = FALSE;
477 char *copy, *orig_copy;
478 char sep = *text;
479 if (sep == 0) return;
481 copy = orig_copy = g_strdup (text + 1);
482 while (!leave) {
483 char *p = copy;
484 while (*copy && *copy != sep)
485 copy++;
486 if (*copy)
487 *copy++ = 0;
488 else
489 leave = TRUE;
490 dao_set_cell_value (dao, col, row++, value_new_string (p));
492 g_free (orig_copy);
497 * Set a row of text from a string like "/first/second/third" or "|foo|bar|baz".
499 void
500 set_cell_text_row (data_analysis_output_t *dao, int col, int row, const char *text)
502 gboolean leave = 0;
503 char *copy, *orig_copy;
504 char sep = *text;
505 if (sep == 0) return;
507 copy = orig_copy = g_strdup (text + 1);
508 while (!leave) {
509 char *p = copy;
510 while (*copy && *copy != sep)
511 copy++;
512 if (*copy)
513 *copy++ = 0;
514 else
515 leave = TRUE;
516 dao_set_cell_value (dao, col++, row, value_new_string (p));
518 g_free (orig_copy);
521 gboolean
522 analysis_tool_generic_clean (gpointer specs)
524 analysis_tools_data_generic_t *info = specs;
526 range_list_destroy (info->input);
527 info->input = NULL;
528 return FALSE;
531 gboolean
532 analysis_tool_generic_b_clean (gpointer specs)
534 analysis_tools_data_generic_b_t *info = specs;
536 value_release (info->range_1);
537 info->range_1 = NULL;
538 value_release (info->range_2);
539 info->range_2 = NULL;
540 return FALSE;
545 int analysis_tool_calc_length (analysis_tools_data_generic_t *info)
547 int result = 1;
548 GSList *dataset;
550 for (dataset = info->input; dataset; dataset = dataset->next) {
551 GnmValue *current = dataset->data;
552 int given_length;
554 if (info->group_by == GROUPED_BY_AREA) {
555 given_length = (current->v_range.cell.b.row - current->v_range.cell.a.row + 1) *
556 (current->v_range.cell.b.col - current->v_range.cell.a.col + 1);
557 } else
558 given_length = (info->group_by == GROUPED_BY_COL) ?
559 (current->v_range.cell.b.row - current->v_range.cell.a.row + 1) :
560 (current->v_range.cell.b.col - current->v_range.cell.a.col + 1);
561 if (given_length > result)
562 result = given_length;
564 if (info->labels)
565 result--;
566 return result;
570 * analysis_tool_get_function:
571 * @name: name of function
572 * @dao:
574 * Returns: (transfer full): the function named @name or a placeholder.
575 * The usage count of the function is incremented.
577 GnmFunc *
578 analysis_tool_get_function (char const *name,
579 data_analysis_output_t *dao)
581 GnmFunc *fd;
583 fd = gnm_func_lookup_or_add_placeholder (name);
584 gnm_func_inc_usage (fd);
585 return fd;
590 /************* Correlation Tool *******************************************
592 * The correlation tool calculates the correlation coefficient of two
593 * data sets. The two data sets can be grouped by rows or by columns.
594 * The results are given in a table which can be printed out in a new
595 * sheet, in a new workbook, or simply into an existing sheet.
599 gboolean
600 analysis_tool_table (data_analysis_output_t *dao,
601 analysis_tools_data_generic_t *info,
602 gchar const *title, gchar const *functionname,
603 gboolean full_table)
605 GSList *inputdata, *inputexpr = NULL;
606 GnmFunc *fd = NULL;
608 guint col, row;
610 dao_set_italic (dao, 0, 0, 0, 0);
611 dao_set_cell_printf (dao, 0, 0, "%s", title);
613 fd = gnm_func_lookup_or_add_placeholder (functionname);
614 gnm_func_inc_usage (fd);
616 for (col = 1, inputdata = info->input; inputdata != NULL;
617 inputdata = inputdata->next, col++) {
618 GnmValue *val = NULL;
620 val = value_dup (inputdata->data);
622 /* Label */
623 dao_set_italic (dao, col, 0, col, 0);
624 analysis_tools_write_label (val, dao, info,
625 col, 0, col);
627 inputexpr = g_slist_prepend (inputexpr,
628 (gpointer) gnm_expr_new_constant (val));
630 inputexpr = g_slist_reverse (inputexpr);
632 for (row = 1, inputdata = info->input; inputdata != NULL;
633 inputdata = inputdata->next, row++) {
634 GnmValue *val = value_dup (inputdata->data);
635 GSList *colexprlist;
637 /* Label */
638 dao_set_italic (dao, 0, row, 0, row);
639 analysis_tools_write_label (val, dao, info,
640 0, row, row);
642 for (col = 1, colexprlist = inputexpr; colexprlist != NULL;
643 colexprlist = colexprlist->next, col++) {
644 GnmExpr const *colexpr = colexprlist->data;
646 if ((!full_table) && (col < row))
647 continue;
649 dao_set_cell_expr
650 (dao, row, col,
651 gnm_expr_new_funcall2
652 (fd,
653 gnm_expr_new_constant (value_dup (val)),
654 gnm_expr_copy (colexpr)));
657 value_release (val);
660 g_slist_free_full (inputexpr, (GDestroyNotify)gnm_expr_free);
661 if (fd) gnm_func_dec_usage (fd);
663 dao_redraw_respan (dao);
664 return FALSE;
667 static gboolean
668 analysis_tool_correlation_engine_run (data_analysis_output_t *dao,
669 analysis_tools_data_generic_t *info)
671 return analysis_tool_table (dao, info, _("Correlations"),
672 "CORREL", FALSE);
675 gboolean
676 analysis_tool_correlation_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
677 analysis_tool_engine_t selector, gpointer result)
679 analysis_tools_data_generic_t *info = specs;
681 switch (selector) {
682 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
683 return (dao_command_descriptor (dao, _("Correlation (%s)"), result)
684 == NULL);
685 case TOOL_ENGINE_UPDATE_DAO:
686 prepare_input_range (&info->input, info->group_by);
687 if (!gnm_check_input_range_list_homogeneity (info->input)) {
688 info->err = info->group_by + 1;
689 return TRUE;
691 dao_adjust (dao, 1 + g_slist_length (info->input),
692 1 + g_slist_length (info->input));
693 return FALSE;
694 case TOOL_ENGINE_CLEAN_UP:
695 return analysis_tool_generic_clean (specs);
696 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
697 return FALSE;
698 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
699 dao_prepare_output (NULL, dao, _("Correlation"));
700 return FALSE;
701 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
702 return dao_format_output (dao, _("Correlation"));
703 case TOOL_ENGINE_PERFORM_CALC:
704 default:
705 return analysis_tool_correlation_engine_run (dao, specs);
707 return TRUE; /* We shouldn't get here */
713 /************* Covariance Tool ********************************************
715 * The covariance tool calculates the covariance of two data sets.
716 * The two data sets can be grouped by rows or by columns. The
717 * results are given in a table which can be printed out in a new
718 * sheet, in a new workbook, or simply into an existing sheet.
722 static gboolean
723 analysis_tool_covariance_engine_run (data_analysis_output_t *dao,
724 analysis_tools_data_generic_t *info)
726 return analysis_tool_table (dao, info, _("Covariances"),
727 "COVAR", FALSE);
730 gboolean
731 analysis_tool_covariance_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
732 analysis_tool_engine_t selector, gpointer result)
734 analysis_tools_data_generic_t *info = specs;
736 switch (selector) {
737 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
738 return (dao_command_descriptor (dao, _("Covariance (%s)"), result)
739 == NULL);
740 case TOOL_ENGINE_UPDATE_DAO:
741 prepare_input_range (&info->input, info->group_by);
742 if (!gnm_check_input_range_list_homogeneity (info->input)) {
743 info->err = info->group_by + 1;
744 return TRUE;
746 dao_adjust (dao, 1 + g_slist_length (info->input),
747 1 + g_slist_length (info->input));
748 return FALSE;
749 case TOOL_ENGINE_CLEAN_UP:
750 return analysis_tool_generic_clean (specs);
751 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
752 return FALSE;
753 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
754 dao_prepare_output (NULL, dao, _("Covariance"));
755 return FALSE;
756 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
757 return dao_format_output (dao, _("Covariance"));
758 case TOOL_ENGINE_PERFORM_CALC:
759 default:
760 return analysis_tool_covariance_engine_run (dao, specs);
762 return TRUE; /* We shouldn't get here */
768 /************* Descriptive Statistics Tool *******************************
770 * Descriptive Statistics Tool calculates some useful statistical
771 * information such as the mean, standard deviation, sample variance,
772 * skewness, kurtosis, and standard error about the given variables.
773 * The results are given in a table which can be printed out in a new
774 * sheet, in a new workbook, or simply into an existing sheet.
778 typedef struct {
779 gnm_float mean;
780 gint error_mean;
781 gnm_float var;
782 gint error_var;
783 gint len;
784 } desc_stats_t;
786 static void
787 summary_statistics (data_analysis_output_t *dao,
788 analysis_tools_data_descriptive_t *info)
790 guint col;
791 GSList *data = info->base.input;
792 GnmFunc *fd_mean;
793 GnmFunc *fd_median;
794 GnmFunc *fd_mode;
795 GnmFunc *fd_stdev;
796 GnmFunc *fd_var;
797 GnmFunc *fd_kurt;
798 GnmFunc *fd_skew;
799 GnmFunc *fd_min;
800 GnmFunc *fd_max;
801 GnmFunc *fd_sum;
802 GnmFunc *fd_count;
803 GnmFunc *fd_sqrt;
805 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
806 gnm_func_inc_usage (fd_mean);
807 fd_median = gnm_func_lookup_or_add_placeholder (info->use_ssmedian ? "SSMEDIAN" : "MEDIAN");
808 gnm_func_inc_usage (fd_median);
809 fd_mode = gnm_func_lookup_or_add_placeholder ("MODE");
810 gnm_func_inc_usage (fd_mode);
811 fd_stdev = gnm_func_lookup_or_add_placeholder ("STDEV");
812 gnm_func_inc_usage (fd_stdev);
813 fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
814 gnm_func_inc_usage (fd_var);
815 fd_kurt = gnm_func_lookup_or_add_placeholder ("KURT");
816 gnm_func_inc_usage (fd_kurt);
817 fd_skew = gnm_func_lookup_or_add_placeholder ("SKEW");
818 gnm_func_inc_usage (fd_skew);
819 fd_min = gnm_func_lookup_or_add_placeholder ("MIN");
820 gnm_func_inc_usage (fd_min);
821 fd_max = gnm_func_lookup_or_add_placeholder ("MAX");
822 gnm_func_inc_usage (fd_max);
823 fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
824 gnm_func_inc_usage (fd_sum);
825 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
826 gnm_func_inc_usage (fd_count);
827 fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
828 gnm_func_inc_usage (fd_sqrt);
830 dao_set_cell (dao, 0, 0, NULL);
832 dao_set_italic (dao, 0, 1, 0, 13);
834 * Note to translators: in the following string and others like it,
835 * the "/" is a separator character that can be changed to anything
836 * if the translation needs the slash; just use, say, "|" instead.
838 * The items are bundled like this to increase translation context.
840 set_cell_text_col (dao, 0, 1, _("/Mean"
841 "/Standard Error"
842 "/Median"
843 "/Mode"
844 "/Standard Deviation"
845 "/Sample Variance"
846 "/Kurtosis"
847 "/Skewness"
848 "/Range"
849 "/Minimum"
850 "/Maximum"
851 "/Sum"
852 "/Count"));
854 for (col = 0; data != NULL; data = data->next, col++) {
855 GnmExpr const *expr;
856 GnmExpr const *expr_min;
857 GnmExpr const *expr_max;
858 GnmExpr const *expr_var;
859 GnmExpr const *expr_count;
860 GnmValue *val_org = value_dup (data->data);
862 dao_set_italic (dao, col + 1, 0, col+1, 0);
863 /* Note that analysis_tools_write_label may modify val_org */
864 analysis_tools_write_label (val_org, dao, &info->base,
865 col + 1, 0, col + 1);
867 /* Mean */
868 expr = gnm_expr_new_funcall1
869 (fd_mean,
870 gnm_expr_new_constant (value_dup (val_org)));
871 dao_set_cell_expr (dao, col + 1, 1, expr);
873 /* Standard Deviation */
874 expr = gnm_expr_new_funcall1
875 (fd_stdev,
876 gnm_expr_new_constant (value_dup (val_org)));
877 dao_set_cell_expr (dao, col + 1, 5, expr);
879 /* Sample Variance */
880 expr_var = gnm_expr_new_funcall1
881 (fd_var,
882 gnm_expr_new_constant (value_dup (val_org)));
883 dao_set_cell_expr (dao, col + 1, 6, gnm_expr_copy (expr_var));
885 /* Median */
886 expr = gnm_expr_new_funcall1
887 (fd_median,
888 gnm_expr_new_constant (value_dup (val_org)));
889 dao_set_cell_expr (dao, col + 1, 3, expr);
891 /* Mode */
892 expr = gnm_expr_new_funcall1
893 (fd_mode,
894 gnm_expr_new_constant (value_dup (val_org)));
895 dao_set_cell_expr (dao, col + 1, 4, expr);
897 /* Kurtosis */
898 expr = gnm_expr_new_funcall1
899 (fd_kurt,
900 gnm_expr_new_constant (value_dup (val_org)));
901 dao_set_cell_expr (dao, col + 1, 7, expr);
903 /* Skewness */
904 expr = gnm_expr_new_funcall1
905 (fd_skew,
906 gnm_expr_new_constant (value_dup (val_org)));
907 dao_set_cell_expr (dao, col + 1, 8, expr);
909 /* Minimum */
910 expr_min = gnm_expr_new_funcall1
911 (fd_min,
912 gnm_expr_new_constant (value_dup (val_org)));
913 dao_set_cell_expr (dao, col + 1, 10, gnm_expr_copy (expr_min));
915 /* Maximum */
916 expr_max = gnm_expr_new_funcall1
917 (fd_max,
918 gnm_expr_new_constant (value_dup (val_org)));
919 dao_set_cell_expr (dao, col + 1, 11, gnm_expr_copy (expr_max));
921 /* Range */
922 expr = gnm_expr_new_binary (expr_max, GNM_EXPR_OP_SUB, expr_min);
923 dao_set_cell_expr (dao, col + 1, 9, expr);
925 /* Sum */
926 expr = gnm_expr_new_funcall1
927 (fd_sum,
928 gnm_expr_new_constant (value_dup (val_org)));
929 dao_set_cell_expr (dao, col + 1, 12, expr);
931 /* Count */
932 expr_count = gnm_expr_new_funcall1
933 (fd_count,
934 gnm_expr_new_constant (val_org));
935 dao_set_cell_expr (dao, col + 1, 13, gnm_expr_copy (expr_count));
937 /* Standard Error */
938 expr = gnm_expr_new_funcall1
939 (fd_sqrt,
940 gnm_expr_new_binary (expr_var,
941 GNM_EXPR_OP_DIV,
942 expr_count));
943 dao_set_cell_expr (dao, col + 1, 2, expr);
946 gnm_func_dec_usage (fd_mean);
947 gnm_func_dec_usage (fd_median);
948 gnm_func_dec_usage (fd_mode);
949 gnm_func_dec_usage (fd_stdev);
950 gnm_func_dec_usage (fd_var);
951 gnm_func_dec_usage (fd_kurt);
952 gnm_func_dec_usage (fd_skew);
953 gnm_func_dec_usage (fd_min);
954 gnm_func_dec_usage (fd_max);
955 gnm_func_dec_usage (fd_sum);
956 gnm_func_dec_usage (fd_count);
957 gnm_func_dec_usage (fd_sqrt);
960 static void
961 confidence_level (data_analysis_output_t *dao,
962 analysis_tools_data_descriptive_t *info)
964 guint col;
965 char *buffer;
966 char *format;
967 GSList *data = info->base.input;
968 GnmFunc *fd_mean;
969 GnmFunc *fd_var;
970 GnmFunc *fd_count;
971 GnmFunc *fd_tinv;
972 GnmFunc *fd_sqrt;
974 format = g_strdup_printf (_("/%%%s%%%% CI for the Mean from"
975 "/to"), GNM_FORMAT_g);
976 buffer = g_strdup_printf (format, info->c_level * 100);
977 g_free (format);
978 dao_set_italic (dao, 0, 1, 0, 2);
979 set_cell_text_col (dao, 0, 1, buffer);
980 g_free (buffer);
982 dao_set_cell (dao, 0, 0, NULL);
984 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
985 gnm_func_inc_usage (fd_mean);
986 fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
987 gnm_func_inc_usage (fd_var);
988 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
989 gnm_func_inc_usage (fd_count);
990 fd_tinv = gnm_func_lookup_or_add_placeholder ("TINV");
991 gnm_func_inc_usage (fd_tinv);
992 fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
993 gnm_func_inc_usage (fd_sqrt);
996 for (col = 0; data != NULL; data = data->next, col++) {
997 GnmExpr const *expr;
998 GnmExpr const *expr_mean;
999 GnmExpr const *expr_var;
1000 GnmExpr const *expr_count;
1001 GnmValue *val_org = value_dup (data->data);
1003 dao_set_italic (dao, col+1, 0, col+1, 0);
1004 /* Note that analysis_tools_write_label may modify val_org */
1005 analysis_tools_write_label (val_org, dao, &info->base, col + 1, 0, col + 1);
1007 expr_mean = gnm_expr_new_funcall1
1008 (fd_mean,
1009 gnm_expr_new_constant (value_dup (val_org)));
1011 expr_var = gnm_expr_new_funcall1
1012 (fd_var,
1013 gnm_expr_new_constant (value_dup (val_org)));
1015 expr_count = gnm_expr_new_funcall1
1016 (fd_count,
1017 gnm_expr_new_constant (val_org));
1019 expr = gnm_expr_new_binary
1020 (gnm_expr_new_funcall2
1021 (fd_tinv,
1022 gnm_expr_new_constant (value_new_float (1 - info->c_level)),
1023 gnm_expr_new_binary
1024 (gnm_expr_copy (expr_count),
1025 GNM_EXPR_OP_SUB,
1026 gnm_expr_new_constant (value_new_int (1)))),
1027 GNM_EXPR_OP_MULT,
1028 gnm_expr_new_funcall1
1029 (fd_sqrt,
1030 gnm_expr_new_binary (expr_var,
1031 GNM_EXPR_OP_DIV,
1032 expr_count)));
1034 dao_set_cell_expr (dao, col + 1, 1,
1035 gnm_expr_new_binary
1036 (gnm_expr_copy (expr_mean),
1037 GNM_EXPR_OP_SUB,
1038 gnm_expr_copy (expr)));
1039 dao_set_cell_expr (dao, col + 1, 2,
1040 gnm_expr_new_binary (expr_mean,
1041 GNM_EXPR_OP_ADD,
1042 expr));
1045 gnm_func_dec_usage (fd_mean);
1046 gnm_func_dec_usage (fd_var);
1047 gnm_func_dec_usage (fd_count);
1048 gnm_func_dec_usage (fd_tinv);
1049 gnm_func_dec_usage (fd_sqrt);
1052 static void
1053 kth_smallest_largest (data_analysis_output_t *dao,
1054 analysis_tools_data_descriptive_t *info,
1055 char const* func, char const* label, int k)
1057 guint col;
1058 GSList *data = info->base.input;
1059 GnmFunc *fd = gnm_func_lookup_or_add_placeholder (func);
1060 gnm_func_inc_usage (fd);
1062 dao_set_italic (dao, 0, 1, 0, 1);
1063 dao_set_cell_printf (dao, 0, 1, label, k);
1065 dao_set_cell (dao, 0, 0, NULL);
1067 for (col = 0; data != NULL; data = data->next, col++) {
1068 GnmExpr const *expr = NULL;
1069 GnmValue *val = value_dup (data->data);
1071 dao_set_italic (dao, col + 1, 0, col + 1, 0);
1072 analysis_tools_write_label (val, dao, &info->base,
1073 col + 1, 0, col + 1);
1075 expr = gnm_expr_new_funcall2
1076 (fd,
1077 gnm_expr_new_constant (val),
1078 gnm_expr_new_constant (value_new_int (k)));
1080 dao_set_cell_expr (dao, col + 1, 1, expr);
1083 gnm_func_dec_usage (fd);
1086 /* Descriptive Statistics
1088 static gboolean
1089 analysis_tool_descriptive_engine_run (data_analysis_output_t *dao,
1090 analysis_tools_data_descriptive_t *info)
1092 if (info->summary_statistics) {
1093 summary_statistics (dao, info);
1094 dao->offset_row += 16;
1095 if (dao->rows <= dao->offset_row)
1096 goto finish_descriptive_tool;
1098 if (info->confidence_level) {
1099 confidence_level (dao, info);
1100 dao->offset_row += 4;
1101 if (dao->rows <= dao->offset_row)
1102 goto finish_descriptive_tool;
1104 if (info->kth_largest) {
1105 kth_smallest_largest (dao, info, "LARGE", _("Largest (%d)"),
1106 info->k_largest);
1107 dao->offset_row += 4;
1108 if (dao->rows <= dao->offset_row)
1109 goto finish_descriptive_tool;
1111 if (info->kth_smallest)
1112 kth_smallest_largest (dao, info, "SMALL", _("Smallest (%d)"),
1113 info->k_smallest);
1115 finish_descriptive_tool:
1117 dao_redraw_respan (dao);
1118 return 0;
1121 gboolean
1122 analysis_tool_descriptive_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
1123 analysis_tool_engine_t selector, gpointer result)
1125 analysis_tools_data_descriptive_t *info = specs;
1127 switch (selector) {
1128 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
1129 return (dao_command_descriptor (dao, _("Descriptive Statistics (%s)"), result)
1130 == NULL);
1131 case TOOL_ENGINE_UPDATE_DAO:
1132 prepare_input_range (&info->base.input, info->base.group_by);
1133 dao_adjust (dao, 1 + g_slist_length (info->base.input),
1134 (info->summary_statistics ? 16 : 0) +
1135 (info->confidence_level ? 4 : 0) +
1136 (info->kth_largest ? 4 : 0) +
1137 (info->kth_smallest ? 4 : 0 ) - 1);
1138 return FALSE;
1139 case TOOL_ENGINE_CLEAN_UP:
1140 return analysis_tool_generic_clean (specs);
1141 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
1142 return FALSE;
1143 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
1144 dao_prepare_output (NULL, dao, _("Descriptive Statistics"));
1145 return FALSE;
1146 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
1147 return dao_format_output (dao, _("Descriptive Statistics"));
1148 case TOOL_ENGINE_PERFORM_CALC:
1149 default:
1150 return analysis_tool_descriptive_engine_run (dao, specs);
1152 return TRUE; /* We shouldn't get here */
1157 /************* Sampling Tool *********************************************
1159 * Sampling tool takes a sample from a given data set. Sample can be
1160 * a random sample where a given number of data points are selected
1161 * randomly from the data set. The sample can also be a periodic
1162 * sample where, for example, every fourth data element is selected to
1163 * the sample. The results are given in a table which can be printed
1164 * out in a new sheet, in a new workbook, or simply into an existing
1165 * sheet.
1170 static gboolean
1171 analysis_tool_sampling_engine_run (data_analysis_output_t *dao,
1172 analysis_tools_data_sampling_t *info)
1174 GSList *l;
1175 gint col = 0;
1176 guint ct;
1177 GnmFunc *fd_index = NULL;
1178 GnmFunc *fd_randdiscrete = NULL;
1179 gint source;
1181 if (info->base.labels || info->periodic) {
1182 fd_index = gnm_func_lookup_or_add_placeholder ("INDEX");
1183 gnm_func_inc_usage (fd_index);
1185 if (!info->periodic) {
1186 fd_randdiscrete = gnm_func_lookup_or_add_placeholder ("RANDDISCRETE");
1187 gnm_func_inc_usage (fd_randdiscrete);
1190 for (l = info->base.input, source = 1; l; l = l->next, source++) {
1191 GnmValue *val = value_dup ((GnmValue *)l->data);
1192 GnmValue *val_c = NULL;
1193 GnmExpr const *expr_title = NULL;
1194 GnmExpr const *expr_input = NULL;
1195 char const *format = NULL;
1196 guint offset = info->periodic ? ((info->offset == 0) ? info->period : info->offset): 0;
1197 GnmEvalPos ep;
1199 eval_pos_init_sheet (&ep, val->v_range.cell.a.sheet);
1201 dao_set_italic (dao, col, 0, col + info->number - 1, 0);
1203 if (info->base.labels) {
1204 val_c = value_dup (val);
1205 switch (info->base.group_by) {
1206 case GROUPED_BY_ROW:
1207 val->v_range.cell.a.col++;
1208 break;
1209 case GROUPED_BY_COL:
1210 val->v_range.cell.a.row++;
1211 break;
1212 default:
1213 offset++;
1214 break;
1216 expr_title = gnm_expr_new_funcall1 (fd_index,
1217 gnm_expr_new_constant (val_c));
1218 for (ct = 0; ct < info->number; ct++)
1219 dao_set_cell_expr (dao, col+ct, 0, gnm_expr_copy (expr_title));
1220 gnm_expr_free (expr_title);
1221 } else {
1222 switch (info->base.group_by) {
1223 case GROUPED_BY_ROW:
1224 format = _("Row %d");
1225 break;
1226 case GROUPED_BY_COL:
1227 format = _("Column %d");
1228 break;
1229 default:
1230 format = _("Area %d");
1231 break;
1233 for (ct = 0; ct < info->number; ct++)
1234 dao_set_cell_printf (dao, col+ct, 0, format, source);
1237 expr_input = gnm_expr_new_constant (value_dup (val));
1240 if (info->periodic) {
1241 guint i;
1242 gint height = value_area_get_height (val, &ep);
1243 gint width = value_area_get_width (val, &ep);
1244 GnmExpr const *expr_period;
1246 for (i=0; i < info->size; i++, offset += info->period) {
1247 gint x_offset;
1248 gint y_offset;
1250 if (info->row_major) {
1251 y_offset = (offset - 1)/width + 1;
1252 x_offset = offset - (y_offset - 1) * width;
1253 } else {
1254 x_offset = (offset - 1)/height + 1;
1255 y_offset = offset - (x_offset - 1) * height;
1258 expr_period = gnm_expr_new_funcall3
1259 (fd_index, gnm_expr_copy (expr_input),
1260 gnm_expr_new_constant (value_new_int (y_offset)),
1261 gnm_expr_new_constant (value_new_int (x_offset)));
1263 for (ct = 0; ct < info->number; ct += 2)
1264 dao_set_cell_expr (dao, col + ct, i + 1,
1265 gnm_expr_copy (expr_period));
1266 gnm_expr_free (expr_period);
1268 if (info->number > 1) {
1269 if (!info->row_major) {
1270 y_offset = (offset - 1)/width + 1;
1271 x_offset = offset - (y_offset - 1) * width;
1272 } else {
1273 x_offset = (offset - 1)/height + 1;
1274 y_offset = offset - (x_offset - 1) * height;
1277 expr_period = gnm_expr_new_funcall3
1278 (fd_index, gnm_expr_copy (expr_input),
1279 gnm_expr_new_constant (value_new_int (y_offset)),
1280 gnm_expr_new_constant (value_new_int (x_offset)));
1282 for (ct = 1; ct < info->number; ct += 2)
1283 dao_set_cell_expr (dao, col + ct, i + 1,
1284 gnm_expr_copy (expr_period));
1285 gnm_expr_free (expr_period);
1289 col += info->number;
1290 } else {
1291 GnmExpr const *expr_random;
1292 guint i;
1294 expr_random = gnm_expr_new_funcall1 (fd_randdiscrete,
1295 gnm_expr_copy (expr_input));
1297 for (ct = 0; ct < info->number; ct++, col++)
1298 for (i=0; i < info->size; i++)
1299 dao_set_cell_expr (dao, col, i + 1,
1300 gnm_expr_copy (expr_random));
1301 gnm_expr_free (expr_random);
1304 value_release (val);
1305 gnm_expr_free (expr_input);
1309 if (fd_index != NULL)
1310 gnm_func_dec_usage (fd_index);
1311 if (fd_randdiscrete != NULL)
1312 gnm_func_dec_usage (fd_randdiscrete);
1314 dao_redraw_respan (dao);
1316 return FALSE;
1319 gboolean
1320 analysis_tool_sampling_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
1321 analysis_tool_engine_t selector, gpointer result)
1323 analysis_tools_data_sampling_t *info = specs;
1325 switch (selector) {
1326 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
1327 return (dao_command_descriptor (dao, _("Sampling (%s)"), result)
1328 == NULL);
1329 case TOOL_ENGINE_UPDATE_DAO:
1331 GSList *l;
1333 prepare_input_range (&info->base.input, info->base.group_by);
1335 if (info->periodic) {
1336 info->size = 1;
1337 for (l = info->base.input; l; l = l->next) {
1338 GnmEvalPos ep;
1339 GnmValue *val = ((GnmValue *)l->data);
1340 gint size;
1341 guint usize;
1342 eval_pos_init_sheet (&ep, val->v_range.cell.a.sheet);
1343 size = (value_area_get_width (val, &ep) *
1344 value_area_get_height (val, &ep));
1345 usize = (size > 0) ? size : 1;
1347 if (info->offset == 0)
1348 usize = usize/info->period;
1349 else
1350 usize = (usize - info->offset)/info->period + 1;
1351 if (usize > info->size)
1352 info->size = usize;
1356 dao_adjust (dao, info->number * g_slist_length (info->base.input),
1357 1 + info->size);
1358 return FALSE;
1360 case TOOL_ENGINE_CLEAN_UP:
1361 return analysis_tool_generic_clean (specs);
1362 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
1363 return FALSE;
1364 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
1365 dao_prepare_output (NULL, dao, _("Sample"));
1366 return FALSE;
1367 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
1368 return dao_format_output (dao, _("Sample"));
1369 case TOOL_ENGINE_PERFORM_CALC:
1370 default:
1371 return analysis_tool_sampling_engine_run (dao, specs);
1373 return TRUE; /* We shouldn't get here */
1378 /************* z-Test: Two Sample for Means ******************************
1380 * The results are given in a table which can be printed out in a new
1381 * sheet, in a new workbook, or simply into an existing sheet.
1386 static gboolean
1387 analysis_tool_ztest_engine_run (data_analysis_output_t *dao,
1388 analysis_tools_data_ttests_t *info)
1390 GnmValue *val_1;
1391 GnmValue *val_2;
1392 GnmFunc *fd_count;
1393 GnmFunc *fd_mean;
1394 GnmFunc *fd_normsdist;
1395 GnmFunc *fd_normsinv;
1396 GnmFunc *fd_abs;
1397 GnmFunc *fd_sqrt;
1398 GnmExpr const *expr_1;
1399 GnmExpr const *expr_2;
1400 GnmExpr const *expr_mean_1;
1401 GnmExpr const *expr_mean_2;
1402 GnmExpr const *expr_count_1;
1403 GnmExpr const *expr_count_2;
1405 dao_set_italic (dao, 0, 0, 0, 11);
1406 dao_set_italic (dao, 0, 0, 2, 0);
1408 dao_set_cell (dao, 0, 0, "");
1409 set_cell_text_col (dao, 0, 1, _("/Mean"
1410 "/Known Variance"
1411 "/Observations"
1412 "/Hypothesized Mean Difference"
1413 "/Observed Mean Difference"
1414 "/z"
1415 "/P (Z<=z) one-tail"
1416 "/z Critical one-tail"
1417 "/P (Z<=z) two-tail"
1418 "/z Critical two-tail"));
1420 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
1421 gnm_func_inc_usage (fd_mean);
1422 fd_normsdist = gnm_func_lookup_or_add_placeholder ("NORMSDIST");
1423 gnm_func_inc_usage (fd_normsdist);
1424 fd_abs = gnm_func_lookup_or_add_placeholder ("ABS");
1425 gnm_func_inc_usage (fd_abs);
1426 fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
1427 gnm_func_inc_usage (fd_sqrt);
1428 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
1429 gnm_func_inc_usage (fd_count);
1430 fd_normsinv = gnm_func_lookup_or_add_placeholder ("NORMSINV");
1431 gnm_func_inc_usage (fd_normsinv);
1433 val_1 = value_dup (info->base.range_1);
1434 expr_1 = gnm_expr_new_constant (value_dup (val_1));
1436 val_2 = value_dup (info->base.range_2);
1437 expr_2 = gnm_expr_new_constant (value_dup (val_2));
1439 /* Labels */
1440 analysis_tools_write_label_ftest (val_1, dao, 1, 0,
1441 info->base.labels, 1);
1442 analysis_tools_write_label_ftest (val_2, dao, 2, 0,
1443 info->base.labels, 2);
1446 /* Mean */
1447 expr_mean_1 = gnm_expr_new_funcall1 (fd_mean, gnm_expr_copy (expr_1));
1448 dao_set_cell_expr (dao, 1, 1, expr_mean_1);
1449 expr_mean_2 = gnm_expr_new_funcall1 (fd_mean, gnm_expr_copy (expr_2));
1450 dao_set_cell_expr (dao, 2, 1, gnm_expr_copy (expr_mean_2));
1452 /* Known Variance */
1453 dao_set_cell_float (dao, 1, 2, info->var1);
1454 dao_set_cell_float (dao, 2, 2, info->var2);
1456 /* Observations */
1457 expr_count_1 = gnm_expr_new_funcall1 (fd_count, expr_1);
1458 dao_set_cell_expr (dao, 1, 3, expr_count_1);
1459 expr_count_2 = gnm_expr_new_funcall1 (fd_count, expr_2);
1460 dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_count_2));
1462 /* Hypothesized Mean Difference */
1463 dao_set_cell_float (dao, 1, 4, info->mean_diff);
1465 /* Observed Mean Difference */
1466 if (dao_cell_is_visible (dao, 2, 1)) {
1467 gnm_expr_free (expr_mean_2);
1468 expr_mean_2 = make_cellref (1, -4);
1472 dao_set_cell_expr (dao, 1, 5,
1473 gnm_expr_new_binary
1474 (make_cellref (0, -4),
1475 GNM_EXPR_OP_SUB,
1476 expr_mean_2));
1479 /* z */
1481 GnmExpr const *expr_var_1 = make_cellref (0, -4);
1482 GnmExpr const *expr_var_2 = NULL;
1483 GnmExpr const *expr_count_1 = make_cellref (0, -3);
1484 GnmExpr const *expr_a = NULL;
1485 GnmExpr const *expr_b = NULL;
1486 GnmExpr const *expr_count_2_adj = NULL;
1488 if (dao_cell_is_visible (dao, 2, 2)) {
1489 expr_var_2 = make_cellref (1, -4);
1490 } else {
1491 expr_var_2 = gnm_expr_new_constant
1492 (value_new_float (info->var2));
1495 if (dao_cell_is_visible (dao, 2, 3)) {
1496 gnm_expr_free (expr_count_2);
1497 expr_count_2_adj = make_cellref (1, -3);
1498 } else
1499 expr_count_2_adj = expr_count_2;
1501 expr_a = gnm_expr_new_binary (expr_var_1, GNM_EXPR_OP_DIV,
1502 expr_count_1);
1503 expr_b = gnm_expr_new_binary (expr_var_2, GNM_EXPR_OP_DIV,
1504 expr_count_2_adj);
1506 dao_set_cell_expr (dao, 1, 6,
1507 gnm_expr_new_binary
1508 (gnm_expr_new_binary
1509 (make_cellref (0, -1),
1510 GNM_EXPR_OP_SUB,
1511 make_cellref (0, -2)),
1512 GNM_EXPR_OP_DIV,
1513 gnm_expr_new_funcall1
1514 (fd_sqrt,
1515 gnm_expr_new_binary
1516 (expr_a,
1517 GNM_EXPR_OP_ADD,
1518 expr_b))));
1521 /* P (Z<=z) one-tail */
1522 /* FIXME: 1- looks like a bad idea. */
1523 dao_set_cell_expr
1524 (dao, 1, 7,
1525 gnm_expr_new_binary
1526 (gnm_expr_new_constant (value_new_int (1)),
1527 GNM_EXPR_OP_SUB,
1528 gnm_expr_new_funcall1
1529 (fd_normsdist,
1530 gnm_expr_new_funcall1
1531 (fd_abs,
1532 make_cellref (0, -1)))));
1535 /* Critical Z, one right tail */
1536 dao_set_cell_expr
1537 (dao, 1, 8,
1538 gnm_expr_new_unary
1539 (GNM_EXPR_OP_UNARY_NEG,
1540 gnm_expr_new_funcall1
1541 (fd_normsinv,
1542 gnm_expr_new_constant
1543 (value_new_float (info->base.alpha)))));
1545 /* P (T<=t) two-tail */
1546 dao_set_cell_expr
1547 (dao, 1, 9,
1548 gnm_expr_new_binary
1549 (gnm_expr_new_constant (value_new_int (2)),
1550 GNM_EXPR_OP_MULT,
1551 gnm_expr_new_funcall1
1552 (fd_normsdist,
1553 gnm_expr_new_unary
1554 (GNM_EXPR_OP_UNARY_NEG,
1555 gnm_expr_new_funcall1
1556 (fd_abs,
1557 make_cellref (0, -3))))));
1559 /* Critical Z, two tails */
1560 dao_set_cell_expr
1561 (dao, 1, 10,
1562 gnm_expr_new_unary
1563 (GNM_EXPR_OP_UNARY_NEG,
1564 gnm_expr_new_funcall1
1565 (fd_normsinv,
1566 gnm_expr_new_binary
1567 (gnm_expr_new_constant
1568 (value_new_float (info->base.alpha)),
1569 GNM_EXPR_OP_DIV,
1570 gnm_expr_new_constant (value_new_int (2))))));
1572 gnm_func_dec_usage (fd_mean);
1573 gnm_func_dec_usage (fd_normsdist);
1574 gnm_func_dec_usage (fd_abs);
1575 gnm_func_dec_usage (fd_sqrt);
1576 gnm_func_dec_usage (fd_count);
1577 gnm_func_dec_usage (fd_normsinv);
1579 /* And finish up */
1581 value_release (val_1);
1582 value_release (val_2);
1584 dao_redraw_respan (dao);
1586 return FALSE;
1590 gboolean
1591 analysis_tool_ztest_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
1592 analysis_tool_engine_t selector, gpointer result)
1594 switch (selector) {
1595 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
1596 return (dao_command_descriptor (dao, _("z-Test (%s)"), result)
1597 == NULL);
1598 case TOOL_ENGINE_UPDATE_DAO:
1599 dao_adjust (dao, 3, 11);
1600 return FALSE;
1601 case TOOL_ENGINE_CLEAN_UP:
1602 return analysis_tool_generic_b_clean (specs);
1603 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
1604 return FALSE;
1605 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
1606 dao_prepare_output (NULL, dao, _("z-Test"));
1607 return FALSE;
1608 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
1609 return dao_format_output (dao, _("z-Test"));
1610 case TOOL_ENGINE_PERFORM_CALC:
1611 default:
1612 return analysis_tool_ztest_engine_run (dao, specs);
1614 return TRUE; /* We shouldn't get here */
1618 /************* t-Test Tools ********************************************
1620 * The t-Test tool set consists of three kinds of tests to test the
1621 * mean of two variables. The tests are: Student's t-test for paired
1622 * sample, Student's t-test for two samples assuming equal variance
1623 * and the same test assuming unequal variance. The results are given
1624 * in a table which can be printed out in a new sheet, in a new
1625 * workbook, or simply into an existing sheet.
1629 /* t-Test: Paired Two Sample for Means.
1631 static gboolean
1632 analysis_tool_ttest_paired_engine_run (data_analysis_output_t *dao,
1633 analysis_tools_data_ttests_t *info)
1635 GnmValue *val_1;
1636 GnmValue *val_2;
1638 GnmFunc *fd_count;
1639 GnmFunc *fd_mean;
1640 GnmFunc *fd_var;
1641 GnmFunc *fd_tdist;
1642 GnmFunc *fd_abs;
1643 GnmFunc *fd_tinv;
1644 GnmFunc *fd_correl;
1645 GnmFunc *fd_isodd;
1646 GnmFunc *fd_isnumber;
1647 GnmFunc *fd_if;
1648 GnmFunc *fd_sum;
1650 GnmExpr const *expr_1;
1651 GnmExpr const *expr_2;
1652 GnmExpr const *expr_diff;
1653 GnmExpr const *expr_ifisnumber;
1654 GnmExpr const *expr_ifisoddifisnumber;
1656 dao_set_italic (dao, 0, 0, 0, 13);
1657 dao_set_italic (dao, 0, 0, 2, 0);
1659 dao_set_cell (dao, 0, 0, "");
1660 set_cell_text_col (dao, 0, 1, _("/Mean"
1661 "/Variance"
1662 "/Observations"
1663 "/Pearson Correlation"
1664 "/Hypothesized Mean Difference"
1665 "/Observed Mean Difference"
1666 "/Variance of the Differences"
1667 "/df"
1668 "/t Stat"
1669 "/P (T<=t) one-tail"
1670 "/t Critical one-tail"
1671 "/P (T<=t) two-tail"
1672 "/t Critical two-tail"));
1674 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
1675 gnm_func_inc_usage (fd_mean);
1676 fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
1677 gnm_func_inc_usage (fd_var);
1678 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
1679 gnm_func_inc_usage (fd_count);
1680 fd_correl = gnm_func_lookup_or_add_placeholder ("CORREL");
1681 gnm_func_inc_usage (fd_correl);
1682 fd_tinv = gnm_func_lookup_or_add_placeholder ("TINV");
1683 gnm_func_inc_usage (fd_tinv);
1684 fd_tdist = gnm_func_lookup_or_add_placeholder ("TDIST");
1685 gnm_func_inc_usage (fd_tdist);
1686 fd_abs = gnm_func_lookup_or_add_placeholder ("ABS");
1687 gnm_func_inc_usage (fd_abs);
1688 fd_isodd = gnm_func_lookup_or_add_placeholder ("ISODD");
1689 gnm_func_inc_usage (fd_isodd);
1690 fd_isnumber = gnm_func_lookup_or_add_placeholder ("ISNUMBER");
1691 gnm_func_inc_usage (fd_isnumber);
1692 fd_if = gnm_func_lookup_or_add_placeholder ("IF");
1693 gnm_func_inc_usage (fd_if);
1694 fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
1695 gnm_func_inc_usage (fd_sum);
1697 val_1 = value_dup (info->base.range_1);
1698 val_2 = value_dup (info->base.range_2);
1700 /* Labels */
1701 analysis_tools_write_label_ftest (val_1, dao, 1, 0,
1702 info->base.labels, 1);
1703 analysis_tools_write_label_ftest (val_2, dao, 2, 0,
1704 info->base.labels, 2);
1706 /* Mean */
1708 expr_1 = gnm_expr_new_constant (value_dup (val_1));
1709 dao_set_cell_expr (dao, 1, 1,
1710 gnm_expr_new_funcall1 (fd_mean,
1711 gnm_expr_copy (expr_1)));
1713 expr_2 = gnm_expr_new_constant (value_dup (val_2));
1714 dao_set_cell_expr (dao, 2, 1,
1715 gnm_expr_new_funcall1 (fd_mean,
1716 gnm_expr_copy (expr_2)));
1718 /* Variance */
1719 dao_set_cell_expr (dao, 1, 2,
1720 gnm_expr_new_funcall1 (fd_var,
1721 gnm_expr_copy (expr_1)));
1722 dao_set_cell_expr (dao, 2, 2,
1723 gnm_expr_new_funcall1 (fd_var,
1724 gnm_expr_copy (expr_2)));
1726 /* Observations */
1727 dao_set_cell_expr (dao, 1, 3,
1728 gnm_expr_new_funcall1 (fd_count,
1729 gnm_expr_copy (expr_1)));
1730 dao_set_cell_expr (dao, 2, 3,
1731 gnm_expr_new_funcall1 (fd_count,
1732 gnm_expr_copy (expr_2)));
1734 /* Pearson Correlation */
1735 dao_set_cell_expr (dao, 1, 4,
1736 gnm_expr_new_funcall2 (fd_correl,
1737 gnm_expr_copy (expr_1),
1738 gnm_expr_copy (expr_2)));
1740 /* Hypothesized Mean Difference */
1741 dao_set_cell_float (dao, 1, 5, info->mean_diff);
1743 /* Some useful expressions for the next field */
1745 expr_diff = gnm_expr_new_binary (expr_1, GNM_EXPR_OP_SUB, expr_2);
1747 /* IF (ISNUMBER (area1), 1, 0) * IF (ISNUMBER (area2), 1, 0) */
1748 expr_ifisnumber = gnm_expr_new_binary (gnm_expr_new_funcall3 (
1749 fd_if,
1750 gnm_expr_new_funcall1 (
1751 fd_isnumber,
1752 gnm_expr_copy (expr_1)),
1753 gnm_expr_new_constant (value_new_int (1)),
1754 gnm_expr_new_constant (value_new_int (0))),
1755 GNM_EXPR_OP_MULT,
1756 gnm_expr_new_funcall3 (
1757 fd_if,
1758 gnm_expr_new_funcall1 (
1759 fd_isnumber,
1760 gnm_expr_copy (expr_2)),
1761 gnm_expr_new_constant (value_new_int (1)),
1762 gnm_expr_new_constant (value_new_int (0)))
1764 /* IF (ISODD (expr_ifisnumber), area1-area2, "NA")*/
1765 expr_ifisoddifisnumber = gnm_expr_new_funcall3 (fd_if,
1766 gnm_expr_new_funcall1 (fd_isodd,
1767 gnm_expr_copy (expr_ifisnumber)),
1768 expr_diff,
1769 gnm_expr_new_constant (value_new_string ("NA")));
1771 /* Observed Mean Difference */
1772 dao_set_cell_array_expr (dao, 1, 6,
1773 gnm_expr_new_funcall1 (fd_mean,
1774 gnm_expr_copy (expr_ifisoddifisnumber)));
1776 /* Variance of the Differences */
1777 dao_set_cell_array_expr (dao, 1, 7,
1778 gnm_expr_new_funcall1 (fd_var,
1779 expr_ifisoddifisnumber));
1781 /* df */
1782 dao_set_cell_array_expr (dao, 1, 8,
1783 gnm_expr_new_binary
1784 (gnm_expr_new_funcall1 (
1785 fd_sum,
1786 expr_ifisnumber),
1787 GNM_EXPR_OP_SUB,
1788 gnm_expr_new_constant (value_new_int (1))));
1790 /* t */
1791 /* E24 = (E21-E20)/(E22/(E23+1))^0.5 */
1793 GnmExpr const *expr_num;
1794 GnmExpr const *expr_denom;
1796 expr_num = gnm_expr_new_binary (make_cellref (0, -3),
1797 GNM_EXPR_OP_SUB,
1798 make_cellref (0,-4));
1800 expr_denom = gnm_expr_new_binary
1801 (gnm_expr_new_binary
1802 (make_cellref (0, -2),
1803 GNM_EXPR_OP_DIV,
1804 gnm_expr_new_binary
1805 (make_cellref (0, -1),
1806 GNM_EXPR_OP_ADD,
1807 gnm_expr_new_constant
1808 (value_new_int (1)))),
1809 GNM_EXPR_OP_EXP,
1810 gnm_expr_new_constant
1811 (value_new_float (0.5)));
1813 dao_set_cell_expr (dao, 1, 9,
1814 gnm_expr_new_binary
1815 (expr_num, GNM_EXPR_OP_DIV, expr_denom));
1818 /* P (T<=t) one-tail */
1819 dao_set_cell_expr
1820 (dao, 1, 10,
1821 gnm_expr_new_funcall3
1822 (fd_tdist,
1823 gnm_expr_new_funcall1
1824 (fd_abs,
1825 make_cellref (0, -1)),
1826 make_cellref (0, -2),
1827 gnm_expr_new_constant (value_new_int (1))));
1829 /* t Critical one-tail */
1830 dao_set_cell_expr
1831 (dao, 1, 11,
1832 gnm_expr_new_funcall2
1833 (fd_tinv,
1834 gnm_expr_new_binary
1835 (gnm_expr_new_constant (value_new_int (2)),
1836 GNM_EXPR_OP_MULT,
1837 gnm_expr_new_constant
1838 (value_new_float (info->base.alpha))),
1839 make_cellref (0, -3)));
1841 /* P (T<=t) two-tail */
1842 dao_set_cell_expr
1843 (dao, 1, 12,
1844 gnm_expr_new_funcall3
1845 (fd_tdist,
1846 gnm_expr_new_funcall1 (fd_abs, make_cellref (0, -3)),
1847 make_cellref (0, -4),
1848 gnm_expr_new_constant (value_new_int (2))));
1850 /* t Critical two-tail */
1851 dao_set_cell_expr
1852 (dao, 1, 13,
1853 gnm_expr_new_funcall2
1854 (fd_tinv,
1855 gnm_expr_new_constant
1856 (value_new_float (info->base.alpha)),
1857 make_cellref (0, -5)));
1859 /* And finish up */
1861 value_release (val_1);
1862 value_release (val_2);
1864 gnm_func_dec_usage (fd_count);
1865 gnm_func_dec_usage (fd_correl);
1866 gnm_func_dec_usage (fd_mean);
1867 gnm_func_dec_usage (fd_var);
1868 gnm_func_dec_usage (fd_tinv);
1869 gnm_func_dec_usage (fd_tdist);
1870 gnm_func_dec_usage (fd_abs);
1871 gnm_func_dec_usage (fd_isodd);
1872 gnm_func_dec_usage (fd_isnumber);
1873 gnm_func_dec_usage (fd_if);
1874 gnm_func_dec_usage (fd_sum);
1876 dao_redraw_respan (dao);
1878 return FALSE;
1881 gboolean
1882 analysis_tool_ttest_paired_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
1883 analysis_tool_engine_t selector,
1884 gpointer result)
1886 switch (selector) {
1887 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
1888 return (dao_command_descriptor (dao, _("t-Test, paired (%s)"), result)
1889 == NULL);
1890 case TOOL_ENGINE_UPDATE_DAO:
1891 dao_adjust (dao, 3, 14);
1892 return FALSE;
1893 case TOOL_ENGINE_CLEAN_UP:
1894 return analysis_tool_generic_b_clean (specs);
1895 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
1896 return FALSE;
1897 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
1898 dao_prepare_output (NULL, dao, _("t-Test"));
1899 return FALSE;
1900 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
1901 return dao_format_output (dao, _("t-Test"));
1902 case TOOL_ENGINE_PERFORM_CALC:
1903 default:
1904 return analysis_tool_ttest_paired_engine_run (dao, specs);
1906 return TRUE; /* We shouldn't get here */
1912 /* t-Test: Two-Sample Assuming Equal Variances.
1914 static gboolean
1915 analysis_tool_ttest_eqvar_engine_run (data_analysis_output_t *dao,
1916 analysis_tools_data_ttests_t *info)
1918 GnmValue *val_1;
1919 GnmValue *val_2;
1920 GnmFunc *fd_count;
1921 GnmFunc *fd_mean;
1922 GnmFunc *fd_var;
1923 GnmFunc *fd_tdist;
1924 GnmFunc *fd_abs;
1925 GnmFunc *fd_tinv;
1926 GnmExpr const *expr_1;
1927 GnmExpr const *expr_2;
1928 GnmExpr const *expr_mean_1;
1929 GnmExpr const *expr_mean_2;
1930 GnmExpr const *expr_var_1;
1931 GnmExpr const *expr_var_2;
1932 GnmExpr const *expr_count_1;
1933 GnmExpr const *expr_count_2;
1935 dao_set_italic (dao, 0, 0, 0, 12);
1936 dao_set_italic (dao, 0, 0, 2, 0);
1938 dao_set_cell (dao, 0, 0, "");
1939 set_cell_text_col (dao, 0, 1, _("/Mean"
1940 "/Variance"
1941 "/Observations"
1942 "/Pooled Variance"
1943 "/Hypothesized Mean Difference"
1944 "/Observed Mean Difference"
1945 "/df"
1946 "/t Stat"
1947 "/P (T<=t) one-tail"
1948 "/t Critical one-tail"
1949 "/P (T<=t) two-tail"
1950 "/t Critical two-tail"));
1953 val_1 = value_dup (info->base.range_1);
1954 val_2 = value_dup (info->base.range_2);
1956 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
1957 gnm_func_inc_usage (fd_mean);
1958 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
1959 gnm_func_inc_usage (fd_count);
1960 fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
1961 gnm_func_inc_usage (fd_var);
1962 fd_tdist = gnm_func_lookup_or_add_placeholder ("TDIST");
1963 gnm_func_inc_usage (fd_tdist);
1964 fd_abs = gnm_func_lookup_or_add_placeholder ("ABS");
1965 gnm_func_inc_usage (fd_abs);
1966 fd_tinv = gnm_func_lookup_or_add_placeholder ("TINV");
1967 gnm_func_inc_usage (fd_tinv);
1969 /* Labels */
1970 analysis_tools_write_label_ftest (val_1, dao, 1, 0,
1971 info->base.labels, 1);
1972 analysis_tools_write_label_ftest (val_2, dao, 2, 0,
1973 info->base.labels, 2);
1976 /* Mean */
1977 expr_1 = gnm_expr_new_constant (value_dup (val_1));
1978 expr_mean_1 = gnm_expr_new_funcall1 (fd_mean,
1979 gnm_expr_copy (expr_1));
1980 dao_set_cell_expr (dao, 1, 1, expr_mean_1);
1981 expr_2 = gnm_expr_new_constant (value_dup (val_2));
1982 expr_mean_2 = gnm_expr_new_funcall1 (fd_mean,
1983 gnm_expr_copy (expr_2));
1984 dao_set_cell_expr (dao, 2, 1, gnm_expr_copy (expr_mean_2));
1986 /* Variance */
1987 expr_var_1 = gnm_expr_new_funcall1 (fd_var, gnm_expr_copy (expr_1));
1988 dao_set_cell_expr (dao, 1, 2, expr_var_1);
1989 expr_var_2 = gnm_expr_new_funcall1 (fd_var, gnm_expr_copy (expr_2));
1990 dao_set_cell_expr (dao, 2, 2, gnm_expr_copy (expr_var_2));
1992 /* Observations */
1993 expr_count_1 = gnm_expr_new_funcall1 (fd_count, expr_1);
1994 dao_set_cell_expr (dao, 1, 3, expr_count_1);
1995 expr_count_2 = gnm_expr_new_funcall1 (fd_count, expr_2);
1996 dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_count_2));
1998 /* Pooled Variance */
2000 GnmExpr const *expr_var_2_adj = NULL;
2001 GnmExpr const *expr_count_2_adj = NULL;
2002 GnmExpr const *expr_var_1 = make_cellref (0, -2);
2003 GnmExpr const *expr_count_1 = make_cellref (0, -1);
2004 GnmExpr const *expr_one = gnm_expr_new_constant
2005 (value_new_int (1));
2006 GnmExpr const *expr_count_1_minus_1;
2007 GnmExpr const *expr_count_2_minus_1;
2009 if (dao_cell_is_visible (dao, 2, 2)) {
2010 gnm_expr_free (expr_var_2);
2011 expr_var_2_adj = make_cellref (1, -2);
2012 } else
2013 expr_var_2_adj = expr_var_2;
2015 if (dao_cell_is_visible (dao, 2, 3)) {
2016 expr_count_2_adj = make_cellref (1, -1);
2017 } else
2018 expr_count_2_adj = gnm_expr_copy (expr_count_2);
2020 expr_count_1_minus_1 = gnm_expr_new_binary
2021 (expr_count_1,
2022 GNM_EXPR_OP_SUB,
2023 gnm_expr_copy (expr_one));
2024 expr_count_2_minus_1 = gnm_expr_new_binary
2025 (expr_count_2_adj, GNM_EXPR_OP_SUB, expr_one);
2027 dao_set_cell_expr (dao, 1, 4,
2028 gnm_expr_new_binary
2029 (gnm_expr_new_binary
2030 (gnm_expr_new_binary
2031 (gnm_expr_copy (expr_count_1_minus_1),
2032 GNM_EXPR_OP_MULT,
2033 expr_var_1),
2034 GNM_EXPR_OP_ADD,
2035 gnm_expr_new_binary
2036 (gnm_expr_copy (expr_count_2_minus_1),
2037 GNM_EXPR_OP_MULT,
2038 expr_var_2_adj)),
2039 GNM_EXPR_OP_DIV,
2040 gnm_expr_new_binary
2041 (expr_count_1_minus_1,
2042 GNM_EXPR_OP_ADD,
2043 expr_count_2_minus_1)));
2047 /* Hypothesized Mean Difference */
2048 dao_set_cell_float (dao, 1, 5, info->mean_diff);
2050 /* Observed Mean Difference */
2051 if (dao_cell_is_visible (dao, 2,1)) {
2052 gnm_expr_free (expr_mean_2);
2053 expr_mean_2 = make_cellref (1, -5);
2055 dao_set_cell_expr (dao, 1, 6,
2056 gnm_expr_new_binary
2057 (make_cellref (0, -5),
2058 GNM_EXPR_OP_SUB,
2059 expr_mean_2));
2061 /* df */
2063 GnmExpr const *expr_count_1 = make_cellref (0, -4);
2064 GnmExpr const *expr_count_2_adj;
2065 GnmExpr const *expr_two = gnm_expr_new_constant
2066 (value_new_int (2));
2068 if (dao_cell_is_visible (dao, 2,3)) {
2069 expr_count_2_adj = make_cellref (1, -4);
2070 } else
2071 expr_count_2_adj = gnm_expr_copy (expr_count_2);
2073 dao_set_cell_expr (dao, 1, 7,
2074 gnm_expr_new_binary
2075 (gnm_expr_new_binary
2076 (expr_count_1,
2077 GNM_EXPR_OP_ADD,
2078 expr_count_2_adj),
2079 GNM_EXPR_OP_SUB,
2080 expr_two));
2083 /* t */
2085 GnmExpr const *expr_var = make_cellref (0, -4);
2086 GnmExpr const *expr_count_1 = make_cellref (0, -5);
2087 GnmExpr const *expr_a;
2088 GnmExpr const *expr_b;
2089 GnmExpr const *expr_count_2_adj;
2091 if (dao_cell_is_visible (dao, 2,3)) {
2092 gnm_expr_free (expr_count_2);
2093 expr_count_2_adj = make_cellref (1, -5);
2094 } else
2095 expr_count_2_adj = expr_count_2;
2097 expr_a = gnm_expr_new_binary (gnm_expr_copy (expr_var),
2098 GNM_EXPR_OP_DIV,
2099 expr_count_1);
2100 expr_b = gnm_expr_new_binary (expr_var,
2101 GNM_EXPR_OP_DIV,
2102 expr_count_2_adj);
2104 dao_set_cell_expr (dao, 1, 8,
2105 gnm_expr_new_binary
2106 (gnm_expr_new_binary
2107 (make_cellref (0, -2),
2108 GNM_EXPR_OP_SUB,
2109 make_cellref (0, -3)),
2110 GNM_EXPR_OP_DIV,
2111 gnm_expr_new_binary
2112 (gnm_expr_new_binary
2113 (expr_a,
2114 GNM_EXPR_OP_ADD,
2115 expr_b),
2116 GNM_EXPR_OP_EXP,
2117 gnm_expr_new_constant
2118 (value_new_float (0.5)))));
2122 /* P (T<=t) one-tail */
2123 dao_set_cell_expr
2124 (dao, 1, 9,
2125 gnm_expr_new_funcall3
2126 (fd_tdist,
2127 gnm_expr_new_funcall1
2128 (fd_abs,
2129 make_cellref (0, -1)),
2130 make_cellref (0, -2),
2131 gnm_expr_new_constant (value_new_int (1))));
2133 /* t Critical one-tail */
2134 dao_set_cell_expr
2135 (dao, 1, 10,
2136 gnm_expr_new_funcall2
2137 (fd_tinv,
2138 gnm_expr_new_binary
2139 (gnm_expr_new_constant (value_new_int (2)),
2140 GNM_EXPR_OP_MULT,
2141 gnm_expr_new_constant
2142 (value_new_float (info->base.alpha))),
2143 make_cellref (0, -3)));
2145 /* P (T<=t) two-tail */
2146 dao_set_cell_expr
2147 (dao, 1, 11,
2148 gnm_expr_new_funcall3
2149 (fd_tdist,
2150 gnm_expr_new_funcall1
2151 (fd_abs,
2152 make_cellref (0, -3)),
2153 make_cellref (0, -4),
2154 gnm_expr_new_constant (value_new_int (2))));
2156 /* t Critical two-tail */
2157 dao_set_cell_expr
2158 (dao, 1, 12,
2159 gnm_expr_new_funcall2
2160 (fd_tinv,
2161 gnm_expr_new_constant
2162 (value_new_float (info->base.alpha)),
2163 make_cellref (0, -5)));
2165 /* And finish up */
2167 value_release (val_1);
2168 value_release (val_2);
2170 gnm_func_dec_usage (fd_mean);
2171 gnm_func_dec_usage (fd_var);
2172 gnm_func_dec_usage (fd_count);
2173 gnm_func_dec_usage (fd_tdist);
2174 gnm_func_dec_usage (fd_abs);
2175 gnm_func_dec_usage (fd_tinv);
2177 dao_redraw_respan (dao);
2179 return FALSE;
2182 gboolean
2183 analysis_tool_ttest_eqvar_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
2184 analysis_tool_engine_t selector, gpointer result)
2186 switch (selector) {
2187 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
2188 return (dao_command_descriptor (dao, _("t-Test (%s)"), result)
2189 == NULL);
2190 case TOOL_ENGINE_UPDATE_DAO:
2191 dao_adjust (dao, 3, 13);
2192 return FALSE;
2193 case TOOL_ENGINE_CLEAN_UP:
2194 return analysis_tool_generic_b_clean (specs);
2195 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
2196 return FALSE;
2197 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
2198 dao_prepare_output (NULL, dao, _("t-Test"));
2199 return FALSE;
2200 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
2201 return dao_format_output (dao, _("t-Test"));
2202 case TOOL_ENGINE_PERFORM_CALC:
2203 default:
2204 return analysis_tool_ttest_eqvar_engine_run (dao, specs);
2206 return TRUE; /* We shouldn't get here */
2209 /* t-Test: Two-Sample Assuming Unequal Variances.
2211 static gboolean
2212 analysis_tool_ttest_neqvar_engine_run (data_analysis_output_t *dao,
2213 analysis_tools_data_ttests_t *info)
2215 GnmValue *val_1;
2216 GnmValue *val_2;
2217 GnmFunc *fd_count;
2218 GnmFunc *fd_mean;
2219 GnmFunc *fd_var;
2220 GnmFunc *fd_tdist;
2221 GnmFunc *fd_abs;
2222 GnmFunc *fd_tinv;
2223 GnmExpr const *expr_1;
2224 GnmExpr const *expr_2;
2225 GnmExpr const *expr_mean_1;
2226 GnmExpr const *expr_mean_2;
2227 GnmExpr const *expr_var_1;
2228 GnmExpr const *expr_var_2;
2229 GnmExpr const *expr_count_1;
2230 GnmExpr const *expr_count_2;
2232 dao_set_italic (dao, 0, 0, 0, 11);
2233 dao_set_italic (dao, 0, 0, 2, 0);
2235 dao_set_cell (dao, 0, 0, "");
2236 set_cell_text_col (dao, 0, 1, _("/Mean"
2237 "/Variance"
2238 "/Observations"
2239 "/Hypothesized Mean Difference"
2240 "/Observed Mean Difference"
2241 "/df"
2242 "/t Stat"
2243 "/P (T<=t) one-tail"
2244 "/t Critical one-tail"
2245 "/P (T<=t) two-tail"
2246 "/t Critical two-tail"));
2249 val_1 = value_dup (info->base.range_1);
2250 val_2 = value_dup (info->base.range_2);
2252 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
2253 gnm_func_inc_usage (fd_mean);
2254 fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
2255 gnm_func_inc_usage (fd_var);
2256 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
2257 gnm_func_inc_usage (fd_count);
2258 fd_tdist = gnm_func_lookup_or_add_placeholder ("TDIST");
2259 gnm_func_inc_usage (fd_tdist);
2260 fd_abs = gnm_func_lookup_or_add_placeholder ("ABS");
2261 gnm_func_inc_usage (fd_abs);
2262 fd_tinv = gnm_func_lookup_or_add_placeholder ("TINV");
2263 gnm_func_inc_usage (fd_tinv);
2265 /* Labels */
2266 analysis_tools_write_label_ftest (val_1, dao, 1, 0,
2267 info->base.labels, 1);
2268 analysis_tools_write_label_ftest (val_2, dao, 2, 0,
2269 info->base.labels, 2);
2272 /* Mean */
2273 expr_1 = gnm_expr_new_constant (value_dup (val_1));
2274 expr_mean_1 = gnm_expr_new_funcall1 (fd_mean, gnm_expr_copy (expr_1));
2275 dao_set_cell_expr (dao, 1, 1, expr_mean_1);
2276 expr_2 = gnm_expr_new_constant (value_dup (val_2));
2277 expr_mean_2 = gnm_expr_new_funcall1 (fd_mean, gnm_expr_copy (expr_2));
2278 dao_set_cell_expr (dao, 2, 1, gnm_expr_copy (expr_mean_2));
2280 /* Variance */
2281 expr_var_1 = gnm_expr_new_funcall1 (fd_var, gnm_expr_copy (expr_1));
2282 dao_set_cell_expr (dao, 1, 2, expr_var_1);
2283 expr_var_2 = gnm_expr_new_funcall1 (fd_var, gnm_expr_copy (expr_2));
2284 dao_set_cell_expr (dao, 2, 2, gnm_expr_copy (expr_var_2));
2286 /* Observations */
2287 expr_count_1 = gnm_expr_new_funcall1 (fd_count, expr_1);
2288 dao_set_cell_expr (dao, 1, 3, expr_count_1);
2289 expr_count_2 = gnm_expr_new_funcall1 (fd_count, expr_2);
2290 dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_count_2));
2292 /* Hypothesized Mean Difference */
2293 dao_set_cell_float (dao, 1, 4, info->mean_diff);
2295 /* Observed Mean Difference */
2296 if (dao_cell_is_visible (dao, 2,1)) {
2297 gnm_expr_free (expr_mean_2);
2298 expr_mean_2 = make_cellref (1, -4);
2300 dao_set_cell_expr (dao, 1, 5,
2301 gnm_expr_new_binary
2302 (make_cellref (0, -4),
2303 GNM_EXPR_OP_SUB,
2304 expr_mean_2));
2306 /* df */
2309 GnmExpr const *expr_var_1 = make_cellref (0, -4);
2310 GnmExpr const *expr_count_1 = make_cellref (0, -3);
2311 GnmExpr const *expr_a;
2312 GnmExpr const *expr_b;
2313 GnmExpr const *expr_var_2_adj;
2314 GnmExpr const *expr_count_2_adj;
2315 GnmExpr const *expr_two = gnm_expr_new_constant
2316 (value_new_int (2));
2317 GnmExpr const *expr_one = gnm_expr_new_constant
2318 (value_new_int (1));
2320 if (dao_cell_is_visible (dao, 2,2)) {
2321 expr_var_2_adj = make_cellref (1, -4);
2322 } else
2323 expr_var_2_adj = gnm_expr_copy (expr_var_2);
2325 if (dao_cell_is_visible (dao, 2,3)) {
2326 expr_count_2_adj = make_cellref (1, -3);
2327 } else
2328 expr_count_2_adj = gnm_expr_copy (expr_count_2);
2330 expr_a = gnm_expr_new_binary (expr_var_1,
2331 GNM_EXPR_OP_DIV,
2332 gnm_expr_copy (expr_count_1));
2333 expr_b = gnm_expr_new_binary (expr_var_2_adj,
2334 GNM_EXPR_OP_DIV,
2335 gnm_expr_copy (expr_count_2_adj));
2337 dao_set_cell_expr (dao, 1, 6,
2338 gnm_expr_new_binary (
2339 gnm_expr_new_binary
2340 (gnm_expr_new_binary
2341 (gnm_expr_copy (expr_a),
2342 GNM_EXPR_OP_ADD,
2343 gnm_expr_copy (expr_b)),
2344 GNM_EXPR_OP_EXP,
2345 gnm_expr_copy (expr_two)),
2346 GNM_EXPR_OP_DIV,
2347 gnm_expr_new_binary
2348 (gnm_expr_new_binary
2349 (gnm_expr_new_binary
2350 (expr_a,
2351 GNM_EXPR_OP_EXP,
2352 gnm_expr_copy (expr_two)),
2353 GNM_EXPR_OP_DIV,
2354 gnm_expr_new_binary
2355 (expr_count_1,
2356 GNM_EXPR_OP_SUB,
2357 gnm_expr_copy (expr_one))),
2358 GNM_EXPR_OP_ADD,
2359 gnm_expr_new_binary
2360 (gnm_expr_new_binary
2361 (expr_b,
2362 GNM_EXPR_OP_EXP,
2363 expr_two),
2364 GNM_EXPR_OP_DIV,
2365 gnm_expr_new_binary
2366 (expr_count_2_adj,
2367 GNM_EXPR_OP_SUB,
2368 expr_one)))));
2371 /* t */
2374 GnmExpr const *expr_var_1 = make_cellref (0, -5);
2375 GnmExpr const *expr_count_1 = make_cellref (0, -4);
2376 GnmExpr const *expr_a;
2377 GnmExpr const *expr_b;
2378 GnmExpr const *expr_var_2_adj;
2379 GnmExpr const *expr_count_2_adj;
2381 if (dao_cell_is_visible (dao, 2,2)) {
2382 gnm_expr_free (expr_var_2);
2383 expr_var_2_adj = make_cellref (1, -5);
2384 } else
2385 expr_var_2_adj = expr_var_2;
2386 if (dao_cell_is_visible (dao, 2,3)) {
2387 gnm_expr_free (expr_count_2);
2388 expr_count_2_adj = make_cellref (1, -4);
2389 } else
2390 expr_count_2_adj = expr_count_2;
2392 expr_a = gnm_expr_new_binary (expr_var_1, GNM_EXPR_OP_DIV,
2393 expr_count_1);
2394 expr_b = gnm_expr_new_binary (expr_var_2_adj, GNM_EXPR_OP_DIV,
2395 expr_count_2_adj);
2397 dao_set_cell_expr (dao, 1, 7,
2398 gnm_expr_new_binary
2399 (gnm_expr_new_binary
2400 (make_cellref (0, -2),
2401 GNM_EXPR_OP_SUB,
2402 make_cellref (0, -3)),
2403 GNM_EXPR_OP_DIV,
2404 gnm_expr_new_binary
2405 (gnm_expr_new_binary
2406 (expr_a,
2407 GNM_EXPR_OP_ADD,
2408 expr_b),
2409 GNM_EXPR_OP_EXP,
2410 gnm_expr_new_constant
2411 (value_new_float (0.5)))));
2415 /* P (T<=t) one-tail */
2416 /* I9: =tdist(abs(Sheet1!I8),Sheet1!I7,1) */
2417 dao_set_cell_expr
2418 (dao, 1, 8,
2419 gnm_expr_new_funcall3
2420 (fd_tdist,
2421 gnm_expr_new_funcall1 (fd_abs,
2422 make_cellref (0, -1)),
2423 make_cellref (0, -2),
2424 gnm_expr_new_constant (value_new_int (1))));
2426 /* t Critical one-tail */
2427 /* H10 = tinv(2*alpha,Sheet1!H7) */
2428 dao_set_cell_expr
2429 (dao, 1, 9,
2430 gnm_expr_new_funcall2
2431 (fd_tinv,
2432 gnm_expr_new_binary
2433 (gnm_expr_new_constant (value_new_int (2)),
2434 GNM_EXPR_OP_MULT,
2435 gnm_expr_new_constant
2436 (value_new_float (info->base.alpha))),
2437 make_cellref (0, -3)));
2439 /* P (T<=t) two-tail */
2440 /* I11: =tdist(abs(Sheet1!I8),Sheet1!I7,1) */
2441 dao_set_cell_expr
2442 (dao, 1, 10,
2443 gnm_expr_new_funcall3
2444 (fd_tdist,
2445 gnm_expr_new_funcall1 (fd_abs,
2446 make_cellref (0, -3)),
2447 make_cellref (0, -4),
2448 gnm_expr_new_constant (value_new_int (2))));
2450 /* t Critical two-tail */
2451 dao_set_cell_expr
2452 (dao, 1, 11,
2453 gnm_expr_new_funcall2
2454 (fd_tinv,
2455 gnm_expr_new_constant
2456 (value_new_float (info->base.alpha)),
2457 make_cellref (0, -5)));
2459 /* And finish up */
2461 gnm_func_dec_usage (fd_mean);
2462 gnm_func_dec_usage (fd_var);
2463 gnm_func_dec_usage (fd_count);
2464 gnm_func_dec_usage (fd_tdist);
2465 gnm_func_dec_usage (fd_abs);
2466 gnm_func_dec_usage (fd_tinv);
2468 value_release (val_1);
2469 value_release (val_2);
2471 dao_redraw_respan (dao);
2472 return FALSE;
2475 gboolean
2476 analysis_tool_ttest_neqvar_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
2477 analysis_tool_engine_t selector, gpointer result)
2479 switch (selector) {
2480 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
2481 return (dao_command_descriptor (dao, _("t-Test (%s)"), result)
2482 == NULL);
2483 case TOOL_ENGINE_UPDATE_DAO:
2484 dao_adjust (dao, 3, 12);
2485 return FALSE;
2486 case TOOL_ENGINE_CLEAN_UP:
2487 return analysis_tool_generic_b_clean (specs);
2488 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
2489 return FALSE;
2490 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
2491 dao_prepare_output (NULL, dao, _("t-Test"));
2492 return FALSE;
2493 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
2494 return dao_format_output (dao, _("t-Test"));
2495 case TOOL_ENGINE_PERFORM_CALC:
2496 default:
2497 return analysis_tool_ttest_neqvar_engine_run (dao, specs);
2499 return TRUE; /* We shouldn't get here */
2503 /************* F-Test Tool *********************************************
2505 * The results are given in a table which can be printed out in a new
2506 * sheet, in a new workbook, or simply into an existing sheet.
2511 /* F-Test: Two-Sample for Variances
2513 static gboolean
2514 analysis_tool_ftest_engine_run (data_analysis_output_t *dao,
2515 analysis_tools_data_generic_b_t *info)
2517 GnmValue *val_1 = value_dup (info->range_1);
2518 GnmValue *val_2 = value_dup (info->range_2);
2519 GnmExpr const *expr;
2520 GnmExpr const *expr_var_denum;
2521 GnmExpr const *expr_count_denum;
2522 GnmExpr const *expr_df_denum = NULL;
2524 GnmFunc *fd_finv;
2526 fd_finv = gnm_func_lookup_or_add_placeholder ("FINV");
2527 gnm_func_inc_usage (fd_finv);
2529 dao_set_italic (dao, 0, 0, 0, 11);
2530 dao_set_cell (dao, 0, 0, _("F-Test"));
2531 set_cell_text_col (dao, 0, 1, _("/Mean"
2532 "/Variance"
2533 "/Observations"
2534 "/df"
2535 "/F"
2536 "/P (F<=f) right-tail"
2537 "/F Critical right-tail"
2538 "/P (f<=F) left-tail"
2539 "/F Critical left-tail"
2540 "/P two-tail"
2541 "/F Critical two-tail"));
2543 /* Label */
2544 dao_set_italic (dao, 0, 0, 2, 0);
2545 analysis_tools_write_label_ftest (val_1, dao, 1, 0, info->labels, 1);
2546 analysis_tools_write_label_ftest (val_2, dao, 2, 0, info->labels, 2);
2548 /* Mean */
2550 GnmFunc *fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
2551 gnm_func_inc_usage (fd_mean);
2553 dao_set_cell_expr
2554 (dao, 1, 1,
2555 gnm_expr_new_funcall1
2556 (fd_mean,
2557 gnm_expr_new_constant (value_dup (val_1))));
2559 dao_set_cell_expr
2560 (dao, 2, 1,
2561 gnm_expr_new_funcall1
2562 (fd_mean,
2563 gnm_expr_new_constant (value_dup (val_2))));
2565 gnm_func_dec_usage (fd_mean);
2568 /* Variance */
2570 GnmFunc *fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
2571 gnm_func_inc_usage (fd_var);
2573 dao_set_cell_expr
2574 (dao, 1, 2,
2575 gnm_expr_new_funcall1
2576 (fd_var,
2577 gnm_expr_new_constant (value_dup (val_1))));
2579 expr_var_denum = gnm_expr_new_funcall1
2580 (fd_var,
2581 gnm_expr_new_constant (value_dup (val_2)));
2582 dao_set_cell_expr (dao, 2, 2, gnm_expr_copy (expr_var_denum));
2584 gnm_func_dec_usage (fd_var);
2587 /* Count */
2589 GnmFunc *fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
2590 gnm_func_inc_usage (fd_count);
2592 dao_set_cell_expr
2593 (dao, 1, 3,
2594 gnm_expr_new_funcall1
2595 (fd_count,
2596 gnm_expr_new_constant (value_dup (val_1))));
2598 expr_count_denum = gnm_expr_new_funcall1
2599 (fd_count,
2600 gnm_expr_new_constant (value_dup (val_2)));
2601 dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_count_denum));
2603 gnm_func_dec_usage (fd_count);
2606 /* df */
2608 expr = gnm_expr_new_binary
2609 (make_cellref (0, -1),
2610 GNM_EXPR_OP_SUB,
2611 gnm_expr_new_constant (value_new_int (1)));
2612 dao_set_cell_expr (dao, 1, 4, gnm_expr_copy (expr));
2613 dao_set_cell_expr (dao, 2, 4, expr);
2616 /* F value */
2617 if (dao_cell_is_visible (dao, 2, 2)) {
2618 expr = gnm_expr_new_binary
2619 (make_cellref (0, -3),
2620 GNM_EXPR_OP_DIV,
2621 make_cellref (1, -3));
2622 gnm_expr_free (expr_var_denum);
2623 } else {
2624 expr = gnm_expr_new_binary
2625 (make_cellref (0, -3),
2626 GNM_EXPR_OP_DIV,
2627 expr_var_denum);
2629 dao_set_cell_expr (dao, 1, 5, expr);
2631 /* P right-tail */
2633 GnmFunc *fd_fdist = gnm_func_lookup_or_add_placeholder ("FDIST");
2634 const GnmExpr *arg3;
2636 gnm_func_inc_usage (fd_fdist);
2638 if (dao_cell_is_visible (dao, 2, 2)) {
2639 arg3 = make_cellref (1, -2);
2640 gnm_expr_free (expr_count_denum);
2641 } else {
2642 expr_df_denum = gnm_expr_new_binary
2643 (expr_count_denum,
2644 GNM_EXPR_OP_SUB,
2645 gnm_expr_new_constant (value_new_int (1)));
2646 arg3 = gnm_expr_copy (expr_df_denum);
2649 dao_set_cell_expr
2650 (dao, 1, 6,
2651 gnm_expr_new_funcall3
2652 (fd_fdist,
2653 make_cellref (0, -1),
2654 make_cellref (0, -2),
2655 arg3));
2657 gnm_func_dec_usage (fd_fdist);
2660 /* F critical right-tail */
2662 const GnmExpr *arg3;
2664 if (expr_df_denum == NULL) {
2665 arg3 = make_cellref (1, -3);
2666 } else {
2667 arg3 = gnm_expr_copy (expr_df_denum);
2670 dao_set_cell_expr
2671 (dao, 1, 7,
2672 gnm_expr_new_funcall3
2673 (fd_finv,
2674 gnm_expr_new_constant (value_new_float (info->alpha)),
2675 make_cellref (0, -3),
2676 arg3));
2679 /* P left-tail */
2680 dao_set_cell_expr (dao, 1, 8,
2681 gnm_expr_new_binary
2682 (gnm_expr_new_constant (value_new_int (1)),
2683 GNM_EXPR_OP_SUB,
2684 make_cellref (0, -2)));
2686 /* F critical left-tail */
2688 const GnmExpr *arg3;
2690 if (expr_df_denum == NULL) {
2691 arg3 = make_cellref (1, -5);
2692 } else {
2693 arg3 = gnm_expr_copy (expr_df_denum);
2696 dao_set_cell_expr
2697 (dao, 1, 9,
2698 gnm_expr_new_funcall3
2699 (fd_finv,
2700 gnm_expr_new_constant
2701 (value_new_float (1. - info->alpha)),
2702 make_cellref (0, -5),
2703 arg3));
2706 /* P two-tail */
2708 GnmFunc *fd_min = gnm_func_lookup_or_add_placeholder ("MIN");
2710 gnm_func_inc_usage (fd_min);
2712 dao_set_cell_expr
2713 (dao, 1, 10,
2714 gnm_expr_new_binary
2715 (gnm_expr_new_constant (value_new_int (2)),
2716 GNM_EXPR_OP_MULT,
2717 gnm_expr_new_funcall2
2718 (fd_min,
2719 make_cellref (0, -4),
2720 make_cellref (0, -2))));
2721 gnm_func_dec_usage (fd_min);
2724 /* F critical two-tail (left) */
2726 const GnmExpr *arg3;
2728 if (expr_df_denum == NULL) {
2729 arg3 = make_cellref (1, -7);
2730 } else {
2731 arg3 = expr_df_denum;
2734 dao_set_cell_expr
2735 (dao, 1, 11,
2736 gnm_expr_new_funcall3
2737 (fd_finv,
2738 gnm_expr_new_constant
2739 (value_new_float (1 - info->alpha / 2.)),
2740 make_cellref (0, -7),
2741 arg3));
2744 /* F critical two-tail (right) */
2745 dao_set_cell_expr
2746 (dao, 2, 11,
2747 gnm_expr_new_funcall3
2748 (fd_finv,
2749 gnm_expr_new_constant
2750 (value_new_float (info->alpha / 2.)),
2751 make_cellref (-1, -7),
2752 make_cellref (0, -7)));
2754 value_release (val_1);
2755 value_release (val_2);
2757 gnm_func_dec_usage (fd_finv);
2759 dao_redraw_respan (dao);
2760 return FALSE;
2763 gboolean
2764 analysis_tool_ftest_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
2765 analysis_tool_engine_t selector, gpointer result)
2767 switch (selector) {
2768 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
2769 return (dao_command_descriptor (dao, _("F-Test (%s)"), result)
2770 == NULL);
2771 case TOOL_ENGINE_UPDATE_DAO:
2772 dao_adjust (dao, 3, 12);
2773 return FALSE;
2774 case TOOL_ENGINE_CLEAN_UP:
2775 return analysis_tool_generic_b_clean (specs);
2776 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
2777 return FALSE;
2778 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
2779 dao_prepare_output (NULL, dao, _("F-Test"));
2780 return FALSE;
2781 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
2782 return dao_format_output (dao, _("F-Test"));
2783 case TOOL_ENGINE_PERFORM_CALC:
2784 default:
2785 return analysis_tool_ftest_engine_run (dao, specs);
2787 return TRUE; /* We shouldn't get here */
2792 /************* Regression Tool *********************************************
2794 * The results are given in a table which can be printed out in a new
2795 * sheet, in a new workbook, or simply into an existing sheet.
2797 * Excel Bug 1: (Andrew) I believe that the following is a bug in Excel: When
2798 * calculating the F-statistic in the no-intercept case, it will use xdim as
2799 * the numerator df and (n - xdim) as the denominator df, which is as it should
2800 * be. However, in the regression it will then calculate the significance of the
2801 * F-statistic using (n - #slope parameters - 1) as the denominator df, which
2802 * makes sense when you are calculating an intercept, but in this case you are not
2803 * and the df should be just (n - #slope parameters). Excel is inconsistent,
2804 * in that it does not use the same df to calculate the significance that it
2805 * does to calculate the F-stat itself. Inference on regressions
2806 * without intercepts don't really work anyway (because of the way the
2807 * statistics work, not the code), so this is not a terribly big deal, and
2808 * those who would actually use the significance of F are not likely to be
2809 * using interceptless regressions anyway. So while it is easy to mimic Excel
2810 * in this respect, currently we do not and chose what at least for now seems
2811 * to be more correct.
2813 * Excel Bug 2: (Andrew) Also in the no-intercept case: Excel has some weird way of
2814 * calculating the adjusted R^2 value that makes absolutely no sense to me, so
2815 * I couldn't mimic it if I tried. Again, what statistical opinion I have found
2816 * suggests that if you're running interceptless regressions, you won't know what
2817 * to do with an adjusted R^2 anyway.
2821 static gint
2822 calculate_xdim (GnmValue *input, group_by_t group_by)
2824 GnmRange r;
2826 g_return_val_if_fail (input != NULL, 0);
2828 if (NULL == range_init_value (&r, input))
2829 return 0;
2831 if (group_by == GROUPED_BY_ROW)
2832 return range_height (&r);
2834 return range_width (&r);
2837 static gint
2838 calculate_n_obs (GnmValue *input, group_by_t group_by)
2840 GnmRange r;
2842 g_return_val_if_fail (input != NULL, 0);
2844 if (NULL == range_init_value (&r, input))
2845 return 0;
2847 if (group_by == GROUPED_BY_ROW)
2848 return range_width (&r);
2850 return range_height (&r);
2854 static gboolean
2855 analysis_tool_regression_engine_run (data_analysis_output_t *dao,
2856 analysis_tools_data_regression_t *info)
2858 gint xdim = calculate_xdim (info->base.range_1, info->group_by);
2859 gint i;
2861 GnmValue *val_1 = value_dup (info->base.range_1);
2862 GnmValue *val_2 = value_dup (info->base.range_2);
2863 GnmValue *val_1_cp = NULL;
2864 GnmValue *val_2_cp = NULL;
2866 GnmExpr const *expr_x;
2867 GnmExpr const *expr_y;
2868 GnmExpr const *expr_linest;
2869 GnmExpr const *expr_intercept;
2870 GnmExpr const *expr_ms;
2871 GnmExpr const *expr_sum;
2872 GnmExpr const *expr_tstat;
2873 GnmExpr const *expr_pvalue;
2874 GnmExpr const *expr_n;
2875 GnmExpr const *expr_df;
2876 GnmExpr const *expr_lower;
2877 GnmExpr const *expr_upper;
2878 GnmExpr const *expr_confidence;
2880 GnmFunc *fd_linest = analysis_tool_get_function ("LINEST", dao);
2881 GnmFunc *fd_index = analysis_tool_get_function ("INDEX", dao);
2882 GnmFunc *fd_fdist = analysis_tool_get_function ("FDIST", dao);
2883 GnmFunc *fd_sum = analysis_tool_get_function ("SUM", dao);
2884 GnmFunc *fd_sqrt = analysis_tool_get_function ("SQRT", dao);
2885 GnmFunc *fd_tdist = analysis_tool_get_function ("TDIST", dao);
2886 GnmFunc *fd_abs = analysis_tool_get_function ("ABS", dao);
2887 GnmFunc *fd_tinv = analysis_tool_get_function ("TINV", dao);
2888 GnmFunc *fd_transpose = analysis_tool_get_function ("TRANSPOSE", dao);
2889 GnmFunc *fd_concatenate = NULL;
2890 GnmFunc *fd_cell = NULL;
2891 GnmFunc *fd_offset = NULL;
2892 GnmFunc *fd_sumproduct = NULL;
2893 GnmFunc *fd_leverage = NULL;
2895 char const *str = ((info->group_by == GROUPED_BY_ROW) ? "row" : "col");
2896 char const *label = ((info->group_by == GROUPED_BY_ROW) ? _("Row")
2897 : _("Column"));
2899 if (!info->base.labels) {
2900 fd_concatenate = analysis_tool_get_function ("CONCATENATE",
2901 dao);
2902 fd_cell = analysis_tool_get_function ("CELL", dao);
2903 fd_offset = analysis_tool_get_function ("OFFSET", dao);
2905 if (info->residual) {
2906 fd_sumproduct = analysis_tool_get_function ("SUMPRODUCT", dao);
2907 fd_leverage = analysis_tool_get_function ("LEVERAGE", dao);
2910 cb_adjust_areas (val_1, NULL);
2911 cb_adjust_areas (val_2, NULL);
2913 dao_set_italic (dao, 0, 0, 0, 16 + xdim);
2914 set_cell_text_col (dao, 0, 0, _("/SUMMARY OUTPUT"
2916 "/Regression Statistics"
2917 "/Multiple R"
2918 "/R^2"
2919 "/Standard Error"
2920 "/Adjusted R^2"
2921 "/Observations"
2923 "/ANOVA"
2925 "/Regression"
2926 "/Residual"
2927 "/Total"
2930 "/Intercept"));
2931 dao_set_merge (dao, 0, 0, 1, 0);
2932 dao_set_italic (dao, 2, 0, 3, 0);
2933 dao_set_cell (dao, 2, 0, _("Response Variable"));
2934 dao_set_merge (dao, 0, 2, 1, 2);
2936 if (info->base.labels) {
2938 dao_set_cell_expr (dao, 3, 0,
2939 gnm_expr_new_funcall1 (fd_index, gnm_expr_new_constant (value_dup (val_2))));
2941 val_1_cp = value_dup (val_1);
2942 val_2_cp = value_dup (val_2);
2943 if (info->group_by == GROUPED_BY_ROW) {
2944 val_1->v_range.cell.a.col++;
2945 val_2->v_range.cell.a.col++;
2946 val_1_cp->v_range.cell.b.col = val_1_cp->v_range.cell.a.col;
2947 dao_set_array_expr (dao, 0, 17, 1, xdim, gnm_expr_new_constant
2948 (value_dup (val_1_cp)));
2949 } else {
2950 val_1->v_range.cell.a.row++;
2951 val_2->v_range.cell.a.row++;
2952 val_1_cp->v_range.cell.b.row = val_1_cp->v_range.cell.a.row;
2953 dao_set_array_expr (dao, 0, 17, 1, xdim, gnm_expr_new_funcall1
2954 (fd_transpose,
2955 gnm_expr_new_constant (value_dup (val_1_cp))));
2957 } else {
2958 dao_set_cell_expr (dao, 3, 0, gnm_expr_new_funcall3
2959 (fd_concatenate, gnm_expr_new_constant (value_new_string (label)),
2960 gnm_expr_new_constant (value_new_string (" ")),
2961 gnm_expr_new_funcall2 (fd_cell,
2962 gnm_expr_new_constant (value_new_string (str)),
2963 gnm_expr_new_constant (value_dup (val_2)))));
2966 dao_set_italic (dao, 1, 10, 5, 10);
2967 set_cell_text_row (dao, 1, 10, _("/df"
2968 "/SS"
2969 "/MS"
2970 "/F"
2971 "/Significance of F"));
2973 dao_set_italic (dao, 1, 15, 6, 15);
2974 set_cell_text_row (dao, 1, 15, _("/Coefficients"
2975 "/Standard Error"
2976 "/t-Statistics"
2977 "/p-Value"));
2979 /* xgettext: this is an Excel-style number format. Use "..." quotes and do not translate the 0% */
2980 dao_set_format (dao, 5, 15, 5, 15, _("\"Lower\" 0%"));
2981 /* xgettext: this is an Excel-style number format. Use "..." quotes and do not translate the 0% */
2982 dao_set_format (dao, 6, 15, 6, 15, _("\"Upper\" 0%"));
2983 dao_set_align (dao, 5, 15, 5, 15, GNM_HALIGN_LEFT, GNM_VALIGN_TOP);
2984 dao_set_align (dao, 6, 15, 6, 15, GNM_HALIGN_RIGHT, GNM_VALIGN_TOP);
2986 dao_set_cell_float (dao, 5, 15, 1.0 - info->base.alpha);
2987 dao_set_cell_expr (dao, 6, 15, make_cellref (-1, 0));
2988 expr_confidence = dao_get_cellref (dao, 5, 15);
2990 dao_set_cell_comment (dao, 4, 15,
2991 _("Probability of observing a t-statistic\n"
2992 "whose absolute value is at least as large\n"
2993 "as the absolute value of the actually\n"
2994 "observed t-statistic, assuming the null\n"
2995 "hypothesis is in fact true."));
2996 if (!info->intercept)
2997 dao_set_cell_comment (dao, 0, 4,
2998 _("This value is not the square of R\n"
2999 "but the uncentered version of the\n"
3000 "coefficient of determination; that\n"
3001 "is, the proportion of the sum of\n"
3002 "squares explained by the model."));
3004 expr_x = gnm_expr_new_constant (value_dup (val_1));
3005 expr_y = gnm_expr_new_constant (value_dup (val_2));
3007 expr_intercept = gnm_expr_new_constant (value_new_bool (info->intercept));
3009 expr_linest = gnm_expr_new_funcall4 (fd_linest,
3010 expr_y,
3011 expr_x,
3012 expr_intercept,
3013 gnm_expr_new_constant (value_new_bool (TRUE)));
3016 /* Multiple R */
3017 if (info->intercept) {
3018 if (dao_cell_is_visible (dao, 1, 4))
3019 dao_set_cell_expr (dao, 1, 3, gnm_expr_new_funcall1 (fd_sqrt, make_cellref (0, 1)));
3020 else
3021 dao_set_cell_expr (dao, 1, 3,
3022 gnm_expr_new_funcall1 (fd_sqrt, gnm_expr_new_funcall3
3023 (fd_index,
3024 gnm_expr_copy (expr_linest),
3025 gnm_expr_new_constant (value_new_int (3)),
3026 gnm_expr_new_constant (value_new_int (1)))));
3027 } else
3028 dao_set_cell_expr (dao, 1, 3,
3029 gnm_expr_new_funcall1 (fd_sqrt, gnm_expr_new_funcall3
3030 (fd_index,
3031 gnm_expr_new_funcall4
3032 (fd_linest,
3033 gnm_expr_new_constant (value_dup (val_2)),
3034 gnm_expr_new_constant (value_dup (val_1)),
3035 gnm_expr_new_constant (value_new_bool (TRUE)),
3036 gnm_expr_new_constant (value_new_bool (TRUE))),
3037 gnm_expr_new_constant (value_new_int (3)),
3038 gnm_expr_new_constant (value_new_int (1)))));
3041 /* R Square */
3042 dao_set_cell_array_expr (dao, 1, 4,
3043 gnm_expr_new_funcall3 (fd_index,
3044 gnm_expr_copy (expr_linest),
3045 gnm_expr_new_constant (value_new_int (3)),
3046 gnm_expr_new_constant (value_new_int (1))));
3048 /* Standard Error */
3049 dao_set_cell_array_expr (dao, 1, 5,
3050 gnm_expr_new_funcall3 (fd_index,
3051 gnm_expr_copy (expr_linest),
3052 gnm_expr_new_constant (value_new_int (3)),
3053 gnm_expr_new_constant (value_new_int (2))));
3055 /* Adjusted R Square */
3056 if (dao_cell_is_visible (dao, 1, 7))
3057 expr_n = make_cellref (0, 1);
3058 else
3059 expr_n = gnm_expr_new_funcall3 (fd_sum,
3060 gnm_expr_new_constant (value_new_int (xdim)),
3061 gnm_expr_new_funcall3 (fd_index,
3062 gnm_expr_copy (expr_linest),
3063 gnm_expr_new_constant (value_new_int (4)),
3064 gnm_expr_new_constant (value_new_int (2))),
3065 gnm_expr_new_constant (value_new_int (1)));
3067 dao_set_cell_expr (dao, 1, 6, gnm_expr_new_binary
3068 (gnm_expr_new_constant (value_new_int (1)),
3069 GNM_EXPR_OP_SUB,
3070 gnm_expr_new_binary
3071 (gnm_expr_new_binary
3072 (gnm_expr_new_binary
3073 (gnm_expr_copy (expr_n),
3074 GNM_EXPR_OP_SUB,
3075 gnm_expr_new_constant (value_new_int (1))),
3076 GNM_EXPR_OP_DIV,
3077 gnm_expr_new_binary
3078 (expr_n,
3079 GNM_EXPR_OP_SUB,
3080 gnm_expr_new_constant (value_new_int (xdim + (info->intercept?1:0))))),
3081 GNM_EXPR_OP_MULT,
3082 gnm_expr_new_binary
3083 (gnm_expr_new_constant (value_new_int (1)),
3084 GNM_EXPR_OP_SUB,
3085 make_cellref (0, -2)))));
3087 /* Observations */
3089 if (dao_cell_is_visible (dao, 1, 13))
3090 dao_set_cell_expr (dao, 1, 7,
3091 gnm_expr_new_funcall2 (fd_sum,
3092 make_cellref (0, 6),
3093 gnm_expr_new_constant (value_new_int (info->intercept?1:0))));
3094 else if (dao_cell_is_visible (dao, 1, 12))
3095 dao_set_cell_expr (dao, 1, 7,
3096 gnm_expr_new_funcall3 (fd_sum,
3097 make_cellref (0, 4),
3098 make_cellref (0, 5),
3099 gnm_expr_new_constant (value_new_int (info->intercept?1:0))));
3100 else
3101 dao_set_cell_expr (dao, 1, 7,
3102 gnm_expr_new_funcall3 (fd_sum,
3103 gnm_expr_new_constant (value_new_int (xdim)),
3104 gnm_expr_new_funcall3 (fd_index,
3105 gnm_expr_copy (expr_linest),
3106 gnm_expr_new_constant (value_new_int (4)),
3107 gnm_expr_new_constant (value_new_int (2))),
3108 gnm_expr_new_constant (value_new_int (info->intercept?1:0))));
3112 /* Regression / df */
3114 dao_set_cell_int (dao, 1, 11, xdim);
3116 /* Residual / df */
3117 dao_set_cell_array_expr (dao, 1, 12,
3118 gnm_expr_new_funcall3 (fd_index,
3119 gnm_expr_copy (expr_linest),
3120 gnm_expr_new_constant (value_new_int (4)),
3121 gnm_expr_new_constant (value_new_int (2))));
3124 /* Total / df */
3125 expr_sum = gnm_expr_new_binary (make_cellref (0, -2),
3126 GNM_EXPR_OP_ADD,
3127 make_cellref (0, -1));
3128 dao_set_cell_expr (dao, 1, 13, gnm_expr_copy (expr_sum));
3130 /* Regression / SS */
3131 dao_set_cell_array_expr (dao, 2, 11,
3132 gnm_expr_new_funcall3 (fd_index,
3133 gnm_expr_copy (expr_linest),
3134 gnm_expr_new_constant (value_new_int (5)),
3135 gnm_expr_new_constant (value_new_int (1))));
3137 /* Residual / SS */
3138 dao_set_cell_array_expr (dao, 2, 12,
3139 gnm_expr_new_funcall3 (fd_index,
3140 gnm_expr_copy (expr_linest),
3141 gnm_expr_new_constant (value_new_int (5)),
3142 gnm_expr_new_constant (value_new_int (2))));
3145 /* Total / SS */
3146 dao_set_cell_expr (dao, 2, 13, expr_sum);
3149 /* Regression / MS */
3150 expr_ms = gnm_expr_new_binary (make_cellref (-1, 0),
3151 GNM_EXPR_OP_DIV,
3152 make_cellref (-2, 0));
3153 dao_set_cell_expr (dao, 3, 11, gnm_expr_copy (expr_ms));
3155 /* Residual / MS */
3156 dao_set_cell_expr (dao, 3, 12, expr_ms);
3159 /* F */
3160 dao_set_cell_array_expr (dao, 4, 11,
3161 gnm_expr_new_funcall3 (fd_index,
3162 gnm_expr_copy (expr_linest),
3163 gnm_expr_new_constant (value_new_int (4)),
3164 gnm_expr_new_constant (value_new_int (1))));
3166 /* Significance of F */
3168 if (dao_cell_is_visible (dao, 1, 12))
3169 dao_set_cell_expr (dao, 5, 11, gnm_expr_new_funcall3 (fd_fdist,
3170 make_cellref (-1, 0),
3171 make_cellref (-4, 0),
3172 make_cellref (-4, 1)));
3173 else
3174 dao_set_cell_expr (dao, 5, 11, gnm_expr_new_funcall3 (fd_fdist,
3175 make_cellref (-1, 0),
3176 make_cellref (-4, 0),
3177 gnm_expr_new_funcall3
3178 (fd_index,
3179 gnm_expr_copy (expr_linest),
3180 gnm_expr_new_constant (value_new_int (4)),
3181 gnm_expr_new_constant (value_new_int (2)))));
3184 /* Intercept */
3187 expr_tstat = gnm_expr_new_binary (make_cellref (-2, 0),
3188 GNM_EXPR_OP_DIV,
3189 make_cellref (-1, 0));
3190 expr_df = dao_get_cellref (dao, 1, 12);
3191 expr_pvalue = gnm_expr_new_funcall3 (fd_tdist, gnm_expr_new_funcall1 (fd_abs, make_cellref (-1, 0)),
3192 gnm_expr_copy (expr_df),
3193 gnm_expr_new_constant (value_new_int (2)));
3194 expr_lower = gnm_expr_new_binary (make_cellref (-4, 0),
3195 GNM_EXPR_OP_SUB,
3196 gnm_expr_new_binary (make_cellref (-3, 0),
3197 GNM_EXPR_OP_MULT,
3198 gnm_expr_new_funcall2
3199 (fd_tinv,
3200 gnm_expr_new_binary
3201 (gnm_expr_new_constant (value_new_float (1.0)),
3202 GNM_EXPR_OP_SUB,
3203 gnm_expr_copy (expr_confidence)),
3204 gnm_expr_copy (expr_df))));
3205 expr_upper = gnm_expr_new_binary (make_cellref (-5, 0),
3206 GNM_EXPR_OP_ADD,
3207 gnm_expr_new_binary (make_cellref (-4, 0),
3208 GNM_EXPR_OP_MULT,
3209 gnm_expr_new_funcall2
3210 (fd_tinv,
3211 gnm_expr_new_binary
3212 (gnm_expr_new_constant (value_new_float (1.0)),
3213 GNM_EXPR_OP_SUB,
3214 expr_confidence),
3215 expr_df)));
3218 /* Intercept */
3220 if (!info->intercept) {
3221 dao_set_cell_int (dao, 1, 16, 0);
3222 for (i = 2; i <= 6; i++)
3223 dao_set_cell_na (dao, i, 16);
3224 } else {
3225 dao_set_cell_array_expr (dao, 1, 16,
3226 gnm_expr_new_funcall3
3227 (fd_index,
3228 gnm_expr_copy (expr_linest),
3229 gnm_expr_new_constant (value_new_int (1)),
3230 gnm_expr_new_constant (value_new_int (xdim+1))));
3231 dao_set_cell_array_expr (dao, 2, 16,
3232 gnm_expr_new_funcall3
3233 (fd_index,
3234 gnm_expr_copy (expr_linest),
3235 gnm_expr_new_constant (value_new_int (2)),
3236 gnm_expr_new_constant (value_new_int (xdim+1))));
3237 dao_set_cell_expr (dao, 3, 16, gnm_expr_copy (expr_tstat));
3238 dao_set_cell_expr (dao, 4, 16, gnm_expr_copy (expr_pvalue));
3239 dao_set_cell_expr (dao, 5, 16, gnm_expr_copy (expr_lower));
3240 dao_set_cell_expr (dao, 6, 16, gnm_expr_copy (expr_upper));
3243 /* Coefficients */
3245 dao->offset_row += 17;
3247 for (i = 0; i < xdim; i++) {
3248 if (!info->base.labels) {
3249 GnmExpr const *expr_offset;
3251 if (info->group_by == GROUPED_BY_ROW)
3252 expr_offset = gnm_expr_new_funcall3
3253 (fd_offset, gnm_expr_new_constant (value_dup (val_1)),
3254 gnm_expr_new_constant (value_new_int (i)),
3255 gnm_expr_new_constant (value_new_int (0)));
3256 else
3257 expr_offset = gnm_expr_new_funcall3
3258 (fd_offset, gnm_expr_new_constant (value_dup (val_1)),
3259 gnm_expr_new_constant (value_new_int (0)),
3260 gnm_expr_new_constant (value_new_int (i)));
3262 dao_set_cell_expr (dao, 0, i, gnm_expr_new_funcall3
3263 (fd_concatenate, gnm_expr_new_constant (value_new_string (label)),
3264 gnm_expr_new_constant (value_new_string (" ")),
3265 gnm_expr_new_funcall2
3266 (fd_cell,
3267 gnm_expr_new_constant (value_new_string (str)),
3268 expr_offset)));
3271 dao_set_cell_array_expr (dao, 1, i,
3272 gnm_expr_new_funcall3
3273 (fd_index,
3274 gnm_expr_copy (expr_linest),
3275 gnm_expr_new_constant (value_new_int (1)),
3276 gnm_expr_new_constant (value_new_int (xdim - i))));
3277 dao_set_cell_array_expr (dao, 2, i,
3278 gnm_expr_new_funcall3
3279 (fd_index,
3280 gnm_expr_copy (expr_linest),
3281 gnm_expr_new_constant (value_new_int (2)),
3282 gnm_expr_new_constant (value_new_int (xdim - i))));
3283 dao_set_cell_expr (dao, 3, i, gnm_expr_copy (expr_tstat));
3284 dao_set_cell_expr (dao, 4, i, gnm_expr_copy (expr_pvalue));
3285 dao_set_cell_expr (dao, 5, i, gnm_expr_copy (expr_lower));
3286 dao_set_cell_expr (dao, 6, i, gnm_expr_copy (expr_upper));
3290 gnm_expr_free (expr_linest);
3291 gnm_expr_free (expr_tstat);
3292 gnm_expr_free (expr_pvalue);
3293 gnm_expr_free (expr_lower);
3294 gnm_expr_free (expr_upper);
3296 value_release (val_1_cp);
3297 value_release (val_2_cp);
3299 if (info->residual) {
3300 gint n_obs = calculate_n_obs (val_1, info->group_by);
3301 GnmExpr const *expr_diff;
3302 GnmExpr const *expr_prediction;
3304 dao->offset_row += xdim + 1;
3305 dao_set_italic (dao, 0, 0, xdim + 7, 0);
3306 dao_set_cell (dao, 0, 0, _("Constant"));
3307 dao_set_array_expr (dao, 1, 0, xdim, 1,
3308 gnm_expr_new_funcall1
3309 (fd_transpose,
3310 make_rangeref (-1, - xdim - 1, -1, -2)));
3311 set_cell_text_row (dao, xdim + 1, 0, _("/Prediction"
3313 "/Residual"
3314 "/Leverages"
3315 "/Internally studentized"
3316 "/Externally studentized"
3317 "/p-Value"));
3318 dao_set_cell_expr (dao, xdim + 2, 0, make_cellref (1 - xdim, - 18 - xdim));
3319 if (info->group_by == GROUPED_BY_ROW) {
3320 dao_set_array_expr (dao, 1, 1, xdim, n_obs,
3321 gnm_expr_new_funcall1
3322 (fd_transpose,
3323 gnm_expr_new_constant (val_1)));
3324 dao_set_array_expr (dao, xdim + 2, 1, 1, n_obs,
3325 gnm_expr_new_funcall1
3326 (fd_transpose,
3327 gnm_expr_new_constant (val_2)));
3328 } else {
3329 dao_set_array_expr (dao, 1, 1, xdim, n_obs,
3330 gnm_expr_new_constant (val_1));
3331 dao_set_array_expr (dao, xdim + 2, 1, 1, n_obs,
3332 gnm_expr_new_constant (val_2));
3335 expr_prediction = gnm_expr_new_funcall2 (fd_sumproduct,
3336 dao_get_rangeref (dao, 1, - 2 - xdim, 1, - 2),
3337 gnm_expr_new_funcall1
3338 (fd_transpose, make_rangeref
3339 (-1 - xdim, 0, -1, 0)));
3340 expr_diff = gnm_expr_new_binary (make_cellref (-1, 0), GNM_EXPR_OP_SUB, make_cellref (-2, 0));
3342 for (i = 0; i < n_obs; i++) {
3343 dao_set_cell_expr (dao, xdim + 1, i + 1, gnm_expr_copy (expr_prediction));
3344 dao_set_cell_expr (dao, xdim + 3, i + 1, gnm_expr_copy (expr_diff));
3345 dao_set_cell_expr (dao, 0, i + 1, gnm_expr_new_constant (value_new_int (1)));
3347 gnm_expr_free (expr_diff);
3348 gnm_expr_free (expr_prediction);
3350 if (dao_cell_is_visible (dao, xdim + 4, n_obs)) {
3351 GnmExpr const *expr_X = dao_get_rangeref (dao, info->intercept ? 0 : 1, 1, xdim, n_obs);
3352 GnmExpr const *expr_diagonal =
3353 gnm_expr_new_funcall1
3354 (fd_leverage, expr_X);
3355 GnmExpr const *expr_var =
3356 dao_get_cellref (dao, 3, - 6 - xdim);
3357 GnmExpr const *expr_int_stud =
3358 gnm_expr_new_binary
3359 (make_cellref (-2, 0),
3360 GNM_EXPR_OP_DIV,
3361 gnm_expr_new_funcall1
3362 (fd_sqrt,
3363 gnm_expr_new_binary
3364 (expr_var,
3365 GNM_EXPR_OP_MULT,
3366 gnm_expr_new_binary
3367 (gnm_expr_new_constant (value_new_int (1)),
3368 GNM_EXPR_OP_SUB,
3369 make_cellref (-1, 0)))));
3370 GnmExpr const *expr_ext_stud;
3371 GnmExpr const *expr_p_val_res;
3373 expr_var = gnm_expr_new_binary
3374 (gnm_expr_new_binary
3375 (dao_get_cellref (dao, 2, - 6 - xdim),
3376 GNM_EXPR_OP_SUB,
3377 gnm_expr_new_binary
3378 (make_cellref (-3, 0),
3379 GNM_EXPR_OP_EXP,
3380 gnm_expr_new_constant (value_new_int (2)))),
3381 GNM_EXPR_OP_DIV,
3382 gnm_expr_new_binary
3383 (dao_get_cellref (dao, 1, - 6 - xdim),
3384 GNM_EXPR_OP_SUB,
3385 gnm_expr_new_constant (value_new_int (1))));
3386 expr_ext_stud = gnm_expr_new_binary
3387 (make_cellref (-3, 0),
3388 GNM_EXPR_OP_DIV,
3389 gnm_expr_new_funcall1
3390 (fd_sqrt,
3391 gnm_expr_new_binary
3392 (expr_var,
3393 GNM_EXPR_OP_MULT,
3394 gnm_expr_new_binary
3395 (gnm_expr_new_constant (value_new_int (1)),
3396 GNM_EXPR_OP_SUB,
3397 make_cellref (-2, 0)))));
3398 expr_p_val_res = gnm_expr_new_funcall3
3399 (fd_tdist,
3400 gnm_expr_new_funcall1
3401 (fd_abs,
3402 make_cellref (-1, 0)),
3403 gnm_expr_new_binary
3404 (dao_get_cellref (dao, 1, - 6 - xdim),
3405 GNM_EXPR_OP_SUB,
3406 gnm_expr_new_constant (value_new_int (1))),
3407 gnm_expr_new_constant (value_new_int (2)));
3409 dao_set_array_expr (dao, xdim + 4, 1, 1, n_obs, expr_diagonal);
3410 dao_set_format (dao, xdim + 5, 1, xdim + 6, n_obs, "0.0000");
3411 dao_set_percent (dao, xdim + 7, 1, xdim + 7, n_obs);
3412 for (i = 0; i < n_obs; i++){
3413 dao_set_cell_expr (dao, xdim + 5, i + 1, gnm_expr_copy (expr_int_stud));
3414 dao_set_cell_expr (dao, xdim + 6, i + 1, gnm_expr_copy (expr_ext_stud));
3415 dao_set_cell_expr (dao, xdim + 7, i + 1, gnm_expr_copy (expr_p_val_res));
3417 gnm_expr_free (expr_int_stud);
3418 gnm_expr_free (expr_ext_stud);
3419 gnm_expr_free (expr_p_val_res);
3421 } else {
3422 value_release (val_1);
3423 value_release (val_2);
3426 gnm_func_dec_usage (fd_linest);
3427 gnm_func_dec_usage (fd_index);
3428 gnm_func_dec_usage (fd_fdist);
3429 gnm_func_dec_usage (fd_sum);
3430 gnm_func_dec_usage (fd_sqrt);
3431 gnm_func_dec_usage (fd_tdist);
3432 gnm_func_dec_usage (fd_abs);
3433 gnm_func_dec_usage (fd_tinv);
3434 gnm_func_dec_usage (fd_transpose);
3435 if (fd_concatenate != NULL)
3436 gnm_func_dec_usage (fd_concatenate);
3437 if (fd_cell != NULL)
3438 gnm_func_dec_usage (fd_cell);
3439 if (fd_offset != NULL)
3440 gnm_func_dec_usage (fd_offset);
3441 if (fd_sumproduct != NULL)
3442 gnm_func_dec_usage (fd_sumproduct);
3443 if (fd_leverage != NULL)
3444 gnm_func_dec_usage (fd_leverage);
3446 dao_redraw_respan (dao);
3448 return FALSE;
3451 static gboolean
3452 analysis_tool_regression_simple_engine_run (data_analysis_output_t *dao,
3453 analysis_tools_data_regression_t *info)
3455 GnmFunc *fd_linest = analysis_tool_get_function ("LINEST", dao);
3456 GnmFunc *fd_index = analysis_tool_get_function ("INDEX", dao);
3457 GnmFunc *fd_fdist = analysis_tool_get_function ("FDIST", dao);
3458 GnmFunc *fd_rows = analysis_tool_get_function ("ROWS", dao);
3459 GnmFunc *fd_columns = analysis_tool_get_function ("COLUMNS", dao);
3461 GSList *inputdata;
3462 guint row;
3464 GnmValue *val_dep = value_dup (info->base.range_2);
3465 GnmExpr const *expr_intercept
3466 = gnm_expr_new_constant (value_new_bool (info->intercept));
3467 GnmExpr const *expr_observ;
3468 GnmExpr const *expr_val_dep;
3470 dao_set_italic (dao, 0, 0, 4, 0);
3471 dao_set_italic (dao, 0, 2, 5, 2);
3472 set_cell_text_row (dao, 0, 0, info->multiple_y ?
3473 _("/SUMMARY OUTPUT"
3475 "/Independent Variable"
3477 "/Observations") :
3478 _("/SUMMARY OUTPUT"
3480 "/Response Variable"
3482 "/Observations"));
3483 set_cell_text_row (dao, 0, 2, info->multiple_y ?
3484 _("/Response Variable"
3485 "/R^2"
3486 "/Slope"
3487 "/Intercept"
3488 "/F"
3489 "/Significance of F") :
3490 _("/Independent Variable"
3491 "/R^2"
3492 "/Slope"
3493 "/Intercept"
3494 "/F"
3495 "/Significance of F"));
3496 analysis_tools_write_a_label (val_dep, dao,
3497 info->base.labels, info->group_by,
3498 3, 0);
3500 expr_val_dep = gnm_expr_new_constant (val_dep);
3501 dao_set_cell_expr (dao, 5, 0, gnm_expr_new_binary (gnm_expr_new_funcall1 (fd_rows, gnm_expr_copy (expr_val_dep)),
3502 GNM_EXPR_OP_MULT,
3503 gnm_expr_new_funcall1 (fd_columns, gnm_expr_copy (expr_val_dep))));
3504 expr_observ = dao_get_cellref (dao, 5, 0);
3506 for (row = 3, inputdata = info->indep_vars; inputdata != NULL;
3507 inputdata = inputdata->next, row++) {
3508 GnmValue *val_indep = value_dup (inputdata->data);
3509 GnmExpr const *expr_linest;
3511 dao_set_italic (dao, 0, row, 0, row);
3512 analysis_tools_write_a_label (val_indep, dao,
3513 info->base.labels, info->group_by,
3514 0, row);
3515 expr_linest = info->multiple_y ?
3516 gnm_expr_new_funcall4 (fd_linest,
3517 gnm_expr_new_constant (val_indep),
3518 gnm_expr_copy (expr_val_dep),
3519 gnm_expr_copy (expr_intercept),
3520 gnm_expr_new_constant (value_new_bool (TRUE))) :
3521 gnm_expr_new_funcall4 (fd_linest,
3522 gnm_expr_copy (expr_val_dep),
3523 gnm_expr_new_constant (val_indep),
3524 gnm_expr_copy (expr_intercept),
3525 gnm_expr_new_constant (value_new_bool (TRUE)));
3526 dao_set_cell_array_expr (dao, 1, row,
3527 gnm_expr_new_funcall3 (fd_index,
3528 gnm_expr_copy (expr_linest),
3529 gnm_expr_new_constant (value_new_int (3)),
3530 gnm_expr_new_constant (value_new_int (1))));
3531 dao_set_cell_array_expr (dao, 4, row,
3532 gnm_expr_new_funcall3 (fd_index,
3533 gnm_expr_copy (expr_linest),
3534 gnm_expr_new_constant (value_new_int (4)),
3535 gnm_expr_new_constant (value_new_int (1))));
3536 dao_set_array_expr (dao, 2, row, 2, 1, expr_linest);
3538 dao_set_cell_expr (dao, 5, row, gnm_expr_new_funcall3
3539 (fd_fdist,
3540 make_cellref (-1, 0),
3541 gnm_expr_new_constant (value_new_int (1)),
3542 gnm_expr_new_binary (gnm_expr_copy (expr_observ),
3543 GNM_EXPR_OP_SUB,
3544 gnm_expr_new_constant (value_new_int (2)))));
3548 gnm_expr_free (expr_intercept);
3549 gnm_expr_free (expr_observ);
3550 gnm_expr_free (expr_val_dep);
3552 gnm_func_dec_usage (fd_fdist);
3553 gnm_func_dec_usage (fd_linest);
3554 gnm_func_dec_usage (fd_index);
3555 gnm_func_dec_usage (fd_rows);
3556 gnm_func_dec_usage (fd_columns);
3558 dao_redraw_respan (dao);
3560 return FALSE;
3563 gboolean
3564 analysis_tool_regression_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
3565 analysis_tool_engine_t selector, gpointer result)
3567 analysis_tools_data_regression_t *info = specs;
3569 switch (selector) {
3570 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
3571 return (dao_command_descriptor (dao, _("Regression (%s)"), result)
3572 == NULL);
3573 case TOOL_ENGINE_UPDATE_DAO:
3575 gint xdim = calculate_xdim (info->base.range_1, info->group_by);
3576 gint cols, rows;
3578 if (info->multiple_regression) {
3579 cols = 7;
3580 rows = 17 + xdim;
3581 info->indep_vars = NULL;
3582 if (info->residual) {
3583 gint residual_cols = xdim + 4;
3584 GnmValue *val = info->base.range_1;
3586 rows += 2 + calculate_n_obs (val, info->group_by);
3587 residual_cols += 4;
3588 if (cols < residual_cols)
3589 cols = residual_cols;
3591 } else {
3592 info->indep_vars = g_slist_prepend (NULL, info->base.range_1);
3593 info->base.range_1 = NULL;
3594 prepare_input_range (&info->indep_vars, info->group_by);
3595 cols = 6;
3596 rows = 3 + xdim;
3598 dao_adjust (dao, cols, rows);
3599 return FALSE;
3601 case TOOL_ENGINE_CLEAN_UP:
3602 range_list_destroy (info->indep_vars);
3603 info->indep_vars = NULL;
3604 return analysis_tool_generic_b_clean (specs);
3606 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
3607 return FALSE;
3608 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
3609 dao_prepare_output (NULL, dao, _("Regression"));
3610 return FALSE;
3611 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
3612 return dao_format_output (dao, _("Regression"));
3613 case TOOL_ENGINE_PERFORM_CALC:
3614 default:
3615 if (info->multiple_regression)
3616 return analysis_tool_regression_engine_run (dao, specs);
3617 else
3618 return analysis_tool_regression_simple_engine_run (dao, specs);
3620 return TRUE; /* We shouldn't get here */
3625 /************* Moving Average Tool *****************************************
3627 * The moving average tool calculates moving averages of given data
3628 * set. The results are given in a table which can be printed out in
3629 * a new sheet, in a new workbook, or simply into an existing sheet.
3633 static GnmExpr const *
3634 analysis_tool_moving_average_funcall5 (GnmFunc *fd, GnmExpr const *ex, int y, int x, int dy, int dx)
3636 GnmExprList *list;
3637 list = gnm_expr_list_prepend (NULL, gnm_expr_new_constant (value_new_int (dx)));
3638 list = gnm_expr_list_prepend (list, gnm_expr_new_constant (value_new_int (dy)));
3639 list = gnm_expr_list_prepend (list, gnm_expr_new_constant (value_new_int (x)));
3640 list = gnm_expr_list_prepend (list, gnm_expr_new_constant (value_new_int (y)));
3641 list = gnm_expr_list_prepend (list, gnm_expr_copy (ex));
3643 return gnm_expr_new_funcall (fd, list);
3646 static GnmExpr const *
3647 analysis_tool_moving_average_weighted_av (GnmFunc *fd_sum, GnmFunc *fd_in, GnmExpr const *ex,
3648 int y, int x, int dy, int dx, int *w)
3650 GnmExprList *list = NULL;
3652 while (*w != 0) {
3653 list = gnm_expr_list_prepend
3654 (list, gnm_expr_new_binary
3655 (gnm_expr_new_constant (value_new_int (*w)),
3656 GNM_EXPR_OP_MULT,
3657 gnm_expr_new_funcall3 (fd_in, gnm_expr_copy (ex),
3658 gnm_expr_new_constant (value_new_int (y)),
3659 gnm_expr_new_constant (value_new_int (x)))));
3660 w++;
3661 x += dx;
3662 y += dy;
3665 return gnm_expr_new_funcall (fd_sum, list);
3668 static gboolean
3669 analysis_tool_moving_average_engine_run (data_analysis_output_t *dao,
3670 analysis_tools_data_moving_average_t *info)
3672 GnmFunc *fd_index = NULL;
3673 GnmFunc *fd_average;
3674 GnmFunc *fd_offset;
3675 GnmFunc *fd_sqrt = NULL;
3676 GnmFunc *fd_sumxmy2 = NULL;
3677 GnmFunc *fd_sum = NULL;
3678 GSList *l;
3679 gint col = 0;
3680 gint source;
3681 SheetObject *so = NULL;
3682 GogPlot *plot = NULL;
3684 if (info->base.labels || info->ma_type == moving_average_type_wma
3685 || info->ma_type== moving_average_type_spencer_ma) {
3686 fd_index = gnm_func_lookup_or_add_placeholder ("INDEX");
3687 gnm_func_inc_usage (fd_index);
3689 if (info->std_error_flag) {
3690 fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
3691 gnm_func_inc_usage (fd_sqrt);
3692 fd_sumxmy2 = gnm_func_lookup_or_add_placeholder ("SUMXMY2");
3693 gnm_func_inc_usage (fd_sumxmy2);
3695 if (moving_average_type_wma == info->ma_type || moving_average_type_spencer_ma == info->ma_type) {
3696 fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
3697 gnm_func_inc_usage (fd_sum);
3699 fd_average = gnm_func_lookup_or_add_placeholder ("AVERAGE");
3700 gnm_func_inc_usage (fd_average);
3701 fd_offset = gnm_func_lookup_or_add_placeholder ("OFFSET");
3702 gnm_func_inc_usage (fd_offset);
3704 if (info->show_graph) {
3705 GogGraph *graph;
3706 GogChart *chart;
3708 graph = g_object_new (GOG_TYPE_GRAPH, NULL);
3709 chart = GOG_CHART (gog_object_add_by_name (GOG_OBJECT (graph), "Chart", NULL));
3710 plot = gog_plot_new_by_name ("GogLinePlot");
3711 gog_object_add_by_name (GOG_OBJECT (chart), "Plot", GOG_OBJECT (plot));
3712 so = sheet_object_graph_new (graph);
3713 g_object_unref (graph);
3716 for (l = info->base.input, source = 1; l; l = l->next, col++, source++) {
3717 GnmValue *val = value_dup ((GnmValue *)l->data);
3718 GnmValue *val_c = NULL;
3719 GnmExpr const *expr_title = NULL;
3720 GnmExpr const *expr_input = NULL;
3721 char const *format = NULL;
3722 gint height;
3723 gint x = 0;
3724 gint y = 0;
3725 gint *mover;
3726 guint *delta_mover;
3727 guint delta_x = 1;
3728 guint delta_y = 1;
3729 gint row, base;
3730 Sheet *sheet;
3731 GnmEvalPos ep;
3733 eval_pos_init_sheet (&ep, val->v_range.cell.a.sheet);
3735 if (info->base.labels) {
3736 val_c = value_dup (val);
3737 switch (info->base.group_by) {
3738 case GROUPED_BY_ROW:
3739 val->v_range.cell.a.col++;
3740 break;
3741 default:
3742 val->v_range.cell.a.row++;
3743 break;
3745 expr_title = gnm_expr_new_funcall1 (fd_index,
3746 gnm_expr_new_constant (val_c));
3748 dao_set_italic (dao, col, 0, col, 0);
3749 dao_set_cell_expr (dao, col, 0, expr_title);
3750 } else {
3751 switch (info->base.group_by) {
3752 case GROUPED_BY_ROW:
3753 format = _("Row %d");
3754 break;
3755 default:
3756 format = _("Column %d");
3757 break;
3759 dao_set_cell_printf (dao, col, 0, format, source);
3762 switch (info->base.group_by) {
3763 case GROUPED_BY_ROW:
3764 height = value_area_get_width (val, &ep);
3765 mover = &x;
3766 delta_mover = &delta_x;
3767 break;
3768 default:
3769 height = value_area_get_height (val, &ep);
3770 mover = &y;
3771 delta_mover = &delta_y;
3772 break;
3775 sheet = val->v_range.cell.a.sheet;
3776 expr_input = gnm_expr_new_constant (val);
3778 if (plot != NULL) {
3779 GogSeries *series;
3781 series = gog_plot_new_series (plot);
3782 gog_series_set_dim (series, 1,
3783 gnm_go_data_vector_new_expr (sheet,
3784 gnm_expr_top_new (gnm_expr_copy (expr_input))),
3785 NULL);
3787 series = gog_plot_new_series (plot);
3788 gog_series_set_dim (series, 1,
3789 dao_go_data_vector (dao, col, 1, col, height),
3790 NULL);
3793 switch (info->ma_type) {
3794 case moving_average_type_central_sma:
3796 GnmExpr const *expr_offset_last = NULL;
3797 GnmExpr const *expr_offset = NULL;
3798 *delta_mover = info->interval;
3799 (*mover) = 1 - info->interval + info->offset;
3800 for (row = 1; row <= height; row++, (*mover)++) {
3801 expr_offset_last = expr_offset;
3802 expr_offset = NULL;
3803 if ((*mover >= 0) && (*mover < height - info->interval + 1)) {
3804 expr_offset = gnm_expr_new_funcall1
3805 (fd_average, analysis_tool_moving_average_funcall5
3806 (fd_offset,expr_input, y, x, delta_y, delta_x));
3808 if (expr_offset_last == NULL)
3809 dao_set_cell_na (dao, col, row);
3810 else
3811 dao_set_cell_expr (dao, col, row,
3812 gnm_expr_new_funcall2 (fd_average, expr_offset_last,
3813 gnm_expr_copy (expr_offset)));
3814 } else {
3815 if (expr_offset_last != NULL) {
3816 gnm_expr_free (expr_offset_last);
3817 expr_offset_last = NULL;
3819 dao_set_cell_na (dao, col, row);
3822 base = info->interval - info->offset;
3824 break;
3825 case moving_average_type_cma:
3826 for (row = 1; row <= height; row++) {
3827 GnmExpr const *expr_offset;
3829 *delta_mover = row;
3831 expr_offset = analysis_tool_moving_average_funcall5
3832 (fd_offset, expr_input, y, x, delta_y, delta_x);
3834 dao_set_cell_expr (dao, col, row,
3835 gnm_expr_new_funcall1 (fd_average, expr_offset));
3837 base = 0;
3838 break;
3839 case moving_average_type_wma:
3841 GnmExpr const *expr_divisor = gnm_expr_new_constant
3842 (value_new_int((info->interval * (info->interval + 1))/2));
3843 int *w = g_new (int, (info->interval + 1));
3844 int i;
3846 for (i = 0; i < info->interval; i++)
3847 w[i] = i+1;
3848 w[info->interval] = 0;
3850 delta_x = 0;
3851 delta_y= 0;
3852 (*delta_mover) = 1;
3853 (*mover) = 1 - info->interval;
3854 for (row = 1; row <= height; row++, (*mover)++) {
3855 if ((*mover >= 0) && (*mover < height - info->interval + 1)) {
3856 GnmExpr const *expr_sum;
3858 expr_sum = analysis_tool_moving_average_weighted_av
3859 (fd_sum, fd_index, expr_input, y+1, x+1, delta_y, delta_x, w);
3861 dao_set_cell_expr (dao, col, row,
3862 gnm_expr_new_binary
3863 (expr_sum,
3864 GNM_EXPR_OP_DIV,
3865 gnm_expr_copy (expr_divisor)));
3866 } else
3867 dao_set_cell_na (dao, col, row);
3869 g_free (w);
3870 gnm_expr_free (expr_divisor);
3871 base = info->interval - 1;
3872 delta_x = 1;
3873 delta_y= 1;
3875 break;
3876 case moving_average_type_spencer_ma:
3878 GnmExpr const *expr_divisor = gnm_expr_new_constant
3879 (value_new_int(-3-6-5+3+21+45+67+74+67+46+21+3-5-6-3));
3880 int w[] = {-3, -6, -5, 3, 21, 45, 67, 74, 67, 46, 21, 3, -5, -6, -3, 0};
3882 delta_x = 0;
3883 delta_y= 0;
3884 (*delta_mover) = 1;
3885 (*mover) = 1 - info->interval + info->offset;
3886 for (row = 1; row <= height; row++, (*mover)++) {
3887 if ((*mover >= 0) && (*mover < height - info->interval + 1)) {
3888 GnmExpr const *expr_sum;
3890 expr_sum = analysis_tool_moving_average_weighted_av
3891 (fd_sum, fd_index, expr_input, y+1, x+1, delta_y, delta_x, w);
3893 dao_set_cell_expr (dao, col, row,
3894 gnm_expr_new_binary
3895 (expr_sum,
3896 GNM_EXPR_OP_DIV,
3897 gnm_expr_copy (expr_divisor)));
3898 } else
3899 dao_set_cell_na (dao, col, row);
3901 gnm_expr_free (expr_divisor);
3902 base = info->interval - info->offset - 1;
3903 delta_x = 1;
3904 delta_y= 1;
3906 break;
3907 default:
3908 (*delta_mover) = info->interval;
3909 (*mover) = 1 - info->interval + info->offset;
3910 for (row = 1; row <= height; row++, (*mover)++) {
3911 if ((*mover >= 0) && (*mover < height - info->interval + 1)) {
3912 GnmExpr const *expr_offset;
3914 expr_offset = analysis_tool_moving_average_funcall5
3915 (fd_offset, expr_input, y, x, delta_y, delta_x);
3916 dao_set_cell_expr (dao, col, row,
3917 gnm_expr_new_funcall1 (fd_average, expr_offset));
3918 } else
3919 dao_set_cell_na (dao, col, row);
3921 base = info->interval - info->offset - 1;
3922 break;
3925 if (info->std_error_flag) {
3926 col++;
3927 dao_set_italic (dao, col, 0, col, 0);
3928 dao_set_cell (dao, col, 0, _("Standard Error"));
3930 (*mover) = base;
3931 for (row = 1; row <= height; row++) {
3932 if (row > base && row <= height - info->offset && (row - base - info->df) > 0) {
3933 GnmExpr const *expr_offset;
3935 if (info->base.group_by == GROUPED_BY_ROW)
3936 delta_x = row - base;
3937 else
3938 delta_y = row - base;
3940 expr_offset = analysis_tool_moving_average_funcall5
3941 (fd_offset, expr_input, y, x, delta_y, delta_x);
3942 dao_set_cell_expr (dao, col, row,
3943 gnm_expr_new_funcall1
3944 (fd_sqrt,
3945 gnm_expr_new_binary
3946 (gnm_expr_new_funcall2
3947 (fd_sumxmy2,
3948 expr_offset,
3949 make_rangeref (-1, - row + base + 1, -1, 0)),
3950 GNM_EXPR_OP_DIV,
3951 gnm_expr_new_constant (value_new_int
3952 (row - base - info->df)))));
3953 } else
3954 dao_set_cell_na (dao, col, row);
3958 gnm_expr_free (expr_input);
3961 if (so != NULL)
3962 dao_set_sheet_object (dao, 0, 1, so);
3964 if (fd_index != NULL)
3965 gnm_func_dec_usage (fd_index);
3966 if (fd_sqrt != NULL)
3967 gnm_func_dec_usage (fd_sqrt);
3968 if (fd_sumxmy2 != NULL)
3969 gnm_func_dec_usage (fd_sumxmy2);
3970 if (fd_sum != NULL)
3971 gnm_func_dec_usage (fd_sum);
3972 gnm_func_dec_usage (fd_average);
3973 gnm_func_dec_usage (fd_offset);
3975 dao_redraw_respan (dao);
3977 return FALSE;
3981 gboolean
3982 analysis_tool_moving_average_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
3983 analysis_tool_engine_t selector, gpointer result)
3985 analysis_tools_data_moving_average_t *info = specs;
3987 switch (selector) {
3988 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
3989 return (dao_command_descriptor (dao, _("Moving Average (%s)"), result)
3990 == NULL);
3991 case TOOL_ENGINE_UPDATE_DAO:
3992 prepare_input_range (&info->base.input, info->base.group_by);
3993 dao_adjust (dao, (info->std_error_flag ? 2 : 1) *
3994 g_slist_length (info->base.input),
3995 1 + analysis_tool_calc_length (specs));
3996 return FALSE;
3997 case TOOL_ENGINE_CLEAN_UP:
3998 return analysis_tool_generic_clean (specs);
3999 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
4000 return FALSE;
4001 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
4002 dao_prepare_output (NULL, dao, _("Moving Average"));
4003 return FALSE;
4004 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
4005 return dao_format_output (dao, _("Moving Average"));
4006 case TOOL_ENGINE_PERFORM_CALC:
4007 default:
4008 return analysis_tool_moving_average_engine_run (dao, specs);
4010 return TRUE; /* We shouldn't get here */
4014 /************* Rank and Percentile Tool ************************************
4016 * The results are given in a table which can be printed out in a new
4017 * sheet, in a new workbook, or simply into an existing sheet.
4021 static gboolean
4022 analysis_tool_ranking_engine_run (data_analysis_output_t *dao,
4023 analysis_tools_data_ranking_t *info)
4025 GSList *data = info->base.input;
4026 int col = 0;
4028 GnmFunc *fd_large;
4029 GnmFunc *fd_row;
4030 GnmFunc *fd_rank;
4031 GnmFunc *fd_match;
4032 GnmFunc *fd_percentrank;
4034 fd_large = gnm_func_lookup_or_add_placeholder ("LARGE");
4035 gnm_func_inc_usage (fd_large);
4036 fd_row = gnm_func_lookup_or_add_placeholder ("ROW");
4037 gnm_func_inc_usage (fd_row);
4038 fd_rank = gnm_func_lookup_or_add_placeholder ("RANK");
4039 gnm_func_inc_usage (fd_rank);
4040 fd_match = gnm_func_lookup_or_add_placeholder ("MATCH");
4041 gnm_func_inc_usage (fd_match);
4042 fd_percentrank = gnm_func_lookup_or_add_placeholder ("PERCENTRANK");
4043 gnm_func_inc_usage (fd_percentrank);
4045 dao_set_merge (dao, 0, 0, 1, 0);
4046 dao_set_italic (dao, 0, 0, 0, 0);
4047 dao_set_cell (dao, 0, 0, _("Ranks & Percentiles"));
4049 for (; data; data = data->next, col++) {
4050 GnmValue *val_org = value_dup (data->data);
4051 GnmExpr const *expr_large;
4052 GnmExpr const *expr_rank;
4053 GnmExpr const *expr_position;
4054 GnmExpr const *expr_percentile;
4055 int rows, i;
4057 dao_set_italic (dao, 0, 1, 3, 1);
4058 dao_set_cell (dao, 0, 1, _("Point"));
4059 dao_set_cell (dao, 2, 1, _("Rank"));
4060 dao_set_cell (dao, 3, 1, _("Percentile Rank"));
4061 analysis_tools_write_label (val_org, dao, &info->base, 1, 1, col + 1);
4063 rows = (val_org->v_range.cell.b.row - val_org->v_range.cell.a.row + 1) *
4064 (val_org->v_range.cell.b.col - val_org->v_range.cell.a.col + 1);
4066 expr_large = gnm_expr_new_funcall2
4067 (fd_large, gnm_expr_new_constant (value_dup (val_org)),
4068 gnm_expr_new_binary (gnm_expr_new_binary
4069 (gnm_expr_new_funcall (fd_row, NULL),
4070 GNM_EXPR_OP_SUB,
4071 gnm_expr_new_funcall1
4072 (fd_row, dao_get_cellref (dao, 1, 2))),
4073 GNM_EXPR_OP_ADD,
4074 gnm_expr_new_constant (value_new_int (1))));
4075 dao_set_array_expr (dao, 1, 2, 1, rows, gnm_expr_copy (expr_large));
4077 /* If there are ties the following will only give us the first occurrence... */
4078 expr_position = gnm_expr_new_funcall3 (fd_match, expr_large,
4079 gnm_expr_new_constant (value_dup (val_org)),
4080 gnm_expr_new_constant (value_new_int (0)));
4082 dao_set_array_expr (dao, 0, 2, 1, rows, expr_position);
4084 expr_rank = gnm_expr_new_funcall2 (fd_rank,
4085 make_cellref (-1,0),
4086 gnm_expr_new_constant (value_dup (val_org)));
4087 if (info->av_ties) {
4088 GnmExpr const *expr_rank_lower;
4089 GnmExpr const *expr_rows_p_one;
4090 GnmExpr const *expr_rows;
4091 GnmFunc *fd_count;
4092 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
4093 gnm_func_inc_usage (fd_count);
4095 expr_rows = gnm_expr_new_funcall1
4096 (fd_count, gnm_expr_new_constant (value_dup (val_org)));
4097 expr_rows_p_one = gnm_expr_new_binary
4098 (expr_rows,
4099 GNM_EXPR_OP_ADD,
4100 gnm_expr_new_constant (value_new_int (1)));
4101 expr_rank_lower = gnm_expr_new_funcall3
4102 (fd_rank,
4103 make_cellref (-1,0),
4104 gnm_expr_new_constant (value_dup (val_org)),
4105 gnm_expr_new_constant (value_new_int (1)));
4106 expr_rank = gnm_expr_new_binary
4107 (gnm_expr_new_binary
4108 (gnm_expr_new_binary (expr_rank, GNM_EXPR_OP_SUB, expr_rank_lower),
4109 GNM_EXPR_OP_ADD, expr_rows_p_one),
4110 GNM_EXPR_OP_DIV,
4111 gnm_expr_new_constant (value_new_int (2)));
4113 gnm_func_dec_usage (fd_count);
4115 expr_percentile = gnm_expr_new_funcall3 (fd_percentrank,
4116 gnm_expr_new_constant (value_dup (val_org)),
4117 make_cellref (-2,0),
4118 gnm_expr_new_constant (value_new_int (10)));
4120 dao_set_percent (dao, 3, 2, 3, 1 + rows);
4121 for (i = 2; i < rows + 2; i++) {
4122 dao_set_cell_expr ( dao, 2, i, gnm_expr_copy (expr_rank));
4123 dao_set_cell_expr ( dao, 3, i, gnm_expr_copy (expr_percentile));
4127 dao->offset_col += 4;
4128 value_release (val_org);
4129 gnm_expr_free (expr_rank);
4130 gnm_expr_free (expr_percentile);
4133 gnm_func_dec_usage (fd_large);
4134 gnm_func_dec_usage (fd_row);
4135 gnm_func_dec_usage (fd_rank);
4136 gnm_func_dec_usage (fd_match);
4137 gnm_func_dec_usage (fd_percentrank);
4139 dao_redraw_respan (dao);
4141 return FALSE;
4144 gboolean
4145 analysis_tool_ranking_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
4146 analysis_tool_engine_t selector, gpointer result)
4148 analysis_tools_data_ranking_t *info = specs;
4150 switch (selector) {
4151 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
4152 return (dao_command_descriptor (dao, _("Ranks (%s)"), result)
4153 == NULL);
4154 case TOOL_ENGINE_UPDATE_DAO:
4155 prepare_input_range (&info->base.input, info->base.group_by);
4156 dao_adjust (dao, 4 * g_slist_length (info->base.input),
4157 2 + analysis_tool_calc_length (specs));
4158 return FALSE;
4159 case TOOL_ENGINE_CLEAN_UP:
4160 return analysis_tool_generic_clean (specs);
4161 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
4162 return FALSE;
4163 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
4164 dao_prepare_output (NULL, dao, _("Ranks"));
4165 return FALSE;
4166 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
4167 return dao_format_output (dao, _("Ranks"));
4168 case TOOL_ENGINE_PERFORM_CALC:
4169 default:
4170 return analysis_tool_ranking_engine_run (dao, specs);
4172 return TRUE; /* We shouldn't get here */
4178 /************* Anova: Single Factor Tool **********************************
4180 * The results are given in a table which can be printed out in a new
4181 * sheet, in a new workbook, or simply into an existing sheet.
4185 static gboolean
4186 analysis_tool_anova_single_engine_run (data_analysis_output_t *dao, gpointer specs)
4188 analysis_tools_data_anova_single_t *info = specs;
4189 GSList *inputdata = info->base.input;
4190 GnmFunc *fd_sum;
4191 GnmFunc *fd_count;
4192 GnmFunc *fd_mean;
4193 GnmFunc *fd_var;
4194 GnmFunc *fd_devsq;
4196 guint index;
4198 dao_set_italic (dao, 0, 0, 0, 2);
4199 dao_set_cell (dao, 0, 0, _("Anova: Single Factor"));
4200 dao_set_cell (dao, 0, 2, _("SUMMARY"));
4202 dao_set_italic (dao, 0, 3, 4, 3);
4203 set_cell_text_row (dao, 0, 3, _("/Groups"
4204 "/Count"
4205 "/Sum"
4206 "/Average"
4207 "/Variance"));
4209 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
4210 gnm_func_inc_usage (fd_mean);
4211 fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
4212 gnm_func_inc_usage (fd_var);
4213 fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
4214 gnm_func_inc_usage (fd_sum);
4215 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
4216 gnm_func_inc_usage (fd_count);
4217 fd_devsq = gnm_func_lookup_or_add_placeholder ("DEVSQ");
4218 gnm_func_inc_usage (fd_devsq);
4220 dao->offset_row += 4;
4221 if (dao->rows <= dao->offset_row)
4222 goto finish_anova_single_factor_tool;
4224 /* SUMMARY */
4226 for (index = 0; inputdata != NULL;
4227 inputdata = inputdata->next, index++) {
4228 GnmValue *val_org = value_dup (inputdata->data);
4230 /* Label */
4231 dao_set_italic (dao, 0, index, 0, index);
4232 analysis_tools_write_label (val_org, dao, &info->base,
4233 0, index, index + 1);
4235 /* Count */
4236 dao_set_cell_expr
4237 (dao, 1, index,
4238 gnm_expr_new_funcall1
4239 (fd_count,
4240 gnm_expr_new_constant (value_dup (val_org))));
4242 /* Sum */
4243 dao_set_cell_expr
4244 (dao, 2, index,
4245 gnm_expr_new_funcall1
4246 (fd_sum,
4247 gnm_expr_new_constant (value_dup (val_org))));
4249 /* Average */
4250 dao_set_cell_expr
4251 (dao, 3, index,
4252 gnm_expr_new_funcall1
4253 (fd_mean,
4254 gnm_expr_new_constant (value_dup (val_org))));
4256 /* Variance */
4257 dao_set_cell_expr
4258 (dao, 4, index,
4259 gnm_expr_new_funcall1
4260 (fd_var,
4261 gnm_expr_new_constant (val_org)));
4265 dao->offset_row += index + 2;
4266 if (dao->rows <= dao->offset_row)
4267 goto finish_anova_single_factor_tool;
4270 dao_set_italic (dao, 0, 0, 0, 4);
4271 set_cell_text_col (dao, 0, 0, _("/ANOVA"
4272 "/Source of Variation"
4273 "/Between Groups"
4274 "/Within Groups"
4275 "/Total"));
4276 dao_set_italic (dao, 1, 1, 6, 1);
4277 set_cell_text_row (dao, 1, 1, _("/SS"
4278 "/df"
4279 "/MS"
4280 "/F"
4281 "/P-value"
4282 "/F critical"));
4284 /* ANOVA */
4286 GnmExprList *sum_wdof_args = NULL;
4287 GnmExprList *sum_tdof_args = NULL;
4288 GnmExprList *arg_ss_total = NULL;
4289 GnmExprList *arg_ss_within = NULL;
4291 GnmExpr const *expr_wdof = NULL;
4292 GnmExpr const *expr_ss_total = NULL;
4293 GnmExpr const *expr_ss_within = NULL;
4295 for (inputdata = info->base.input; inputdata != NULL;
4296 inputdata = inputdata->next) {
4297 GnmValue *val_org = value_dup (inputdata->data);
4298 GnmExpr const *expr_one;
4299 GnmExpr const *expr_count_one;
4301 analysis_tools_remove_label (val_org,
4302 info->base.labels,
4303 info->base.group_by);
4304 expr_one = gnm_expr_new_constant (value_dup (val_org));
4306 arg_ss_total = gnm_expr_list_append
4307 (arg_ss_total,
4308 gnm_expr_new_constant (val_org));
4310 arg_ss_within = gnm_expr_list_append
4311 (arg_ss_within,
4312 gnm_expr_new_funcall1
4313 (fd_devsq, gnm_expr_copy (expr_one)));
4315 expr_count_one =
4316 gnm_expr_new_funcall1 (fd_count, expr_one);
4318 sum_wdof_args = gnm_expr_list_append
4319 (sum_wdof_args,
4320 gnm_expr_new_binary(
4321 gnm_expr_copy (expr_count_one),
4322 GNM_EXPR_OP_SUB,
4323 gnm_expr_new_constant
4324 (value_new_int (1))));
4325 sum_tdof_args = gnm_expr_list_append
4326 (sum_tdof_args,
4327 expr_count_one);
4330 expr_ss_total = gnm_expr_new_funcall
4331 (fd_devsq, arg_ss_total);
4332 expr_ss_within = gnm_expr_new_funcall
4333 (fd_sum, arg_ss_within);
4336 /* SS between groups */
4337 GnmExpr const *expr_ss_between;
4339 if (dao_cell_is_visible (dao, 1,4)) {
4340 expr_ss_between = gnm_expr_new_binary
4341 (make_cellref (0, 2),
4342 GNM_EXPR_OP_SUB,
4343 make_cellref (0, 1));
4345 } else {
4346 expr_ss_between = gnm_expr_new_binary
4347 (gnm_expr_copy (expr_ss_total),
4348 GNM_EXPR_OP_SUB,
4349 gnm_expr_copy (expr_ss_within));
4351 dao_set_cell_expr (dao, 1, 2, expr_ss_between);
4354 /* SS within groups */
4355 dao_set_cell_expr (dao, 1, 3, gnm_expr_copy (expr_ss_within));
4358 /* SS total groups */
4359 dao_set_cell_expr (dao, 1, 4, expr_ss_total);
4362 /* Between groups degrees of freedom */
4363 dao_set_cell_int (dao, 2, 2,
4364 g_slist_length (info->base.input) - 1);
4367 /* Within groups degrees of freedom */
4368 expr_wdof = gnm_expr_new_funcall (fd_sum, sum_wdof_args);
4369 dao_set_cell_expr (dao, 2, 3, gnm_expr_copy (expr_wdof));
4372 /* Total degrees of freedom */
4373 GnmExpr const *expr_tdof =
4374 gnm_expr_new_binary
4375 (gnm_expr_new_funcall (fd_sum, sum_tdof_args),
4376 GNM_EXPR_OP_SUB,
4377 gnm_expr_new_constant (value_new_int (1)));
4378 dao_set_cell_expr (dao, 2, 4, expr_tdof);
4381 /* MS values */
4382 GnmExpr const *expr_ms =
4383 gnm_expr_new_binary
4384 (make_cellref (-2, 0),
4385 GNM_EXPR_OP_DIV,
4386 make_cellref (-1, 0));
4387 dao_set_cell_expr (dao, 3, 2, gnm_expr_copy (expr_ms));
4388 dao_set_cell_expr (dao, 3, 3, expr_ms);
4391 /* Observed F */
4392 GnmExpr const *expr_denom;
4393 GnmExpr const *expr_f;
4395 if (dao_cell_is_visible (dao, 3, 3)) {
4396 expr_denom = make_cellref (-1, 1);
4397 gnm_expr_free (expr_ss_within);
4398 } else {
4399 expr_denom = gnm_expr_new_binary
4400 (expr_ss_within,
4401 GNM_EXPR_OP_DIV,
4402 gnm_expr_copy (expr_wdof));
4405 expr_f = gnm_expr_new_binary
4406 (make_cellref (-1, 0),
4407 GNM_EXPR_OP_DIV,
4408 expr_denom);
4409 dao_set_cell_expr(dao, 4, 2, expr_f);
4412 /* P value */
4413 GnmFunc *fd_fdist;
4414 const GnmExpr *arg1;
4415 const GnmExpr *arg2;
4416 const GnmExpr *arg3;
4418 arg1 = make_cellref (-1, 0);
4419 arg2 = make_cellref (-3, 0);
4421 if (dao_cell_is_visible (dao, 2, 3)) {
4422 arg3 = make_cellref (-3, 1);
4423 } else {
4424 arg3 = gnm_expr_copy (expr_wdof);
4427 fd_fdist = gnm_func_lookup_or_add_placeholder ("FDIST");
4428 gnm_func_inc_usage (fd_fdist);
4430 dao_set_cell_expr
4431 (dao, 5, 2,
4432 gnm_expr_new_funcall3
4433 (fd_fdist,
4434 arg1, arg2, arg3));
4435 if (fd_fdist)
4436 gnm_func_dec_usage (fd_fdist);
4439 /* Critical F*/
4440 GnmFunc *fd_finv;
4441 const GnmExpr *arg3;
4443 if (dao_cell_is_visible (dao, 2, 3)) {
4444 arg3 = make_cellref (-4, 1);
4445 gnm_expr_free (expr_wdof);
4446 } else
4447 arg3 = expr_wdof;
4449 fd_finv = gnm_func_lookup_or_add_placeholder ("FINV");
4450 gnm_func_inc_usage (fd_finv);
4452 dao_set_cell_expr
4453 (dao, 6, 2,
4454 gnm_expr_new_funcall3
4455 (fd_finv,
4456 gnm_expr_new_constant
4457 (value_new_float (info->alpha)),
4458 make_cellref (-4, 0),
4459 arg3));
4460 gnm_func_dec_usage (fd_finv);
4464 finish_anova_single_factor_tool:
4466 gnm_func_dec_usage (fd_mean);
4467 gnm_func_dec_usage (fd_var);
4468 gnm_func_dec_usage (fd_sum);
4469 gnm_func_dec_usage (fd_count);
4470 gnm_func_dec_usage (fd_devsq);
4472 dao->offset_row = 0;
4473 dao->offset_col = 0;
4475 dao_redraw_respan (dao);
4476 return FALSE;
4481 gboolean
4482 analysis_tool_anova_single_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
4483 analysis_tool_engine_t selector, gpointer result)
4485 analysis_tools_data_anova_single_t *info = specs;
4487 switch (selector) {
4488 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
4489 return (dao_command_descriptor (dao, _("Single Factor ANOVA (%s)"), result)
4490 == NULL);
4491 case TOOL_ENGINE_UPDATE_DAO:
4492 prepare_input_range (&info->base.input, info->base.group_by);
4493 dao_adjust (dao, 7, 11 + g_slist_length (info->base.input));
4494 return FALSE;
4495 case TOOL_ENGINE_CLEAN_UP:
4496 return analysis_tool_generic_clean (specs);
4497 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
4498 return FALSE;
4499 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
4500 dao_prepare_output (NULL, dao, _("Anova"));
4501 return FALSE;
4502 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
4503 return dao_format_output (dao, _("Single Factor ANOVA"));
4504 case TOOL_ENGINE_PERFORM_CALC:
4505 default:
4506 return analysis_tool_anova_single_engine_run (dao, specs);
4508 return TRUE; /* We shouldn't get here */
4512 /************* Fourier Analysis Tool **************************************
4514 * This tool performes a fast fourier transform calculating the fourier
4515 * transform as defined in Weaver: Theory of dis and cont Fouriere Analysis
4521 static gboolean
4522 analysis_tool_fourier_engine_run (data_analysis_output_t *dao,
4523 analysis_tools_data_fourier_t *info)
4525 GSList *data = info->base.input;
4526 int col = 0;
4528 GnmFunc *fd_fourier;
4530 fd_fourier = gnm_func_lookup_or_add_placeholder ("FOURIER");
4531 gnm_func_inc_usage (fd_fourier);
4533 dao_set_merge (dao, 0, 0, 1, 0);
4534 dao_set_italic (dao, 0, 0, 0, 0);
4535 dao_set_cell (dao, 0, 0, info->inverse ? _("Inverse Fourier Transform")
4536 : _("Fourier Transform"));
4538 for (; data; data = data->next, col++) {
4539 GnmValue *val_org = value_dup (data->data);
4540 GnmExpr const *expr_fourier;
4541 int rows, n;
4543 dao_set_italic (dao, 0, 1, 1, 2);
4544 set_cell_text_row (dao, 0, 2, _("/Real"
4545 "/Imaginary"));
4546 dao_set_merge (dao, 0, 1, 1, 1);
4547 analysis_tools_write_label (val_org, dao, &info->base, 0, 1, col + 1);
4549 n = (val_org->v_range.cell.b.row - val_org->v_range.cell.a.row + 1) *
4550 (val_org->v_range.cell.b.col - val_org->v_range.cell.a.col + 1);
4551 rows = 1;
4552 while (rows < n)
4553 rows *= 2;
4555 expr_fourier = gnm_expr_new_funcall3
4556 (fd_fourier,
4557 gnm_expr_new_constant (val_org),
4558 gnm_expr_new_constant (value_new_bool (info->inverse)),
4559 gnm_expr_new_constant (value_new_bool (TRUE)));
4561 dao_set_array_expr (dao, 0, 3, 2, rows, expr_fourier);
4563 dao->offset_col += 2;
4566 gnm_func_dec_usage (fd_fourier);
4568 dao_redraw_respan (dao);
4570 return FALSE;
4573 static int
4574 analysis_tool_fourier_calc_length (analysis_tools_data_fourier_t *info)
4576 int m = 1, n = analysis_tool_calc_length (&info->base);
4578 while (m < n)
4579 m *= 2;
4580 return m;
4584 gboolean
4585 analysis_tool_fourier_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
4586 analysis_tool_engine_t selector, gpointer result)
4588 analysis_tools_data_fourier_t *info = specs;
4590 switch (selector) {
4591 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
4592 return (dao_command_descriptor (dao, _("Fourier Series (%s)"), result)
4593 == NULL);
4594 case TOOL_ENGINE_UPDATE_DAO:
4595 prepare_input_range (&info->base.input, info->base.group_by);
4596 dao_adjust (dao, 2 * g_slist_length (info->base.input),
4597 3 + analysis_tool_fourier_calc_length (specs));
4598 return FALSE;
4599 case TOOL_ENGINE_CLEAN_UP:
4600 return analysis_tool_generic_clean (specs);
4601 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
4602 return FALSE;
4603 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
4604 dao_prepare_output (NULL, dao, _("Fourier Series"));
4605 return FALSE;
4606 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
4607 return dao_format_output (dao, _("Fourier Series"));
4608 case TOOL_ENGINE_PERFORM_CALC:
4609 default:
4610 return analysis_tool_fourier_engine_run (dao, specs);
4612 return TRUE; /* We shouldn't get here */