GnmFunc: make this a GObject.
[gnumeric.git] / src / tools / analysis-principal-components.c
blobb5ab82571a68d50f7bdcd30e2c8b56945b2f1625
1 /*
2 * analysis-principal-components.c:
4 * Author:
5 * Andreas J. Guelzow <aguelzow@pyrshep.ca>
7 * (C) Copyright 2009 by Andreas J. Guelzow <aguelzow@pyrshep.ca>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, see <https://www.gnu.org/licenses/>.
24 #include <gnumeric-config.h>
25 #include <glib/gi18n-lib.h>
26 #include <gnumeric.h>
27 #include <tools/analysis-principal-components.h>
28 #include <tools/analysis-tools.h>
29 #include <value.h>
30 #include <ranges.h>
31 #include <expr.h>
32 #include <func.h>
33 #include <numbers.h>
35 static gboolean
36 analysis_tool_principal_components_engine_run (data_analysis_output_t *dao,
37 analysis_tools_data_generic_t *info)
39 int l = g_slist_length (info->input), i;
40 GSList *inputdata;
42 GnmFunc *fd_mean;
43 GnmFunc *fd_var;
44 GnmFunc *fd_eigen;
45 GnmFunc *fd_mmult;
46 GnmFunc *fd_munit;
47 GnmFunc *fd_sqrt;
48 GnmFunc *fd_count;
49 GnmFunc *fd_sum;
50 GnmFunc *fd_and;
51 GnmFunc *fd_if;
53 GnmExpr const *expr;
54 GnmExpr const *expr_count;
55 GnmExpr const *expr_munit;
56 GnmExpr const *expr_and;
58 int data_points;
59 GnmExprList *and_args = NULL;
60 GnmEvalPos ep;
62 if (!dao_cell_is_visible (dao, l, 9 + 3 * l)) {
63 dao_set_bold (dao, 0, 0, 0, 0);
64 dao_set_italic (dao, 0, 0, 0, 0);
65 dao_set_cell (dao, 0, 0,
66 _("Principal components analysis has "
67 "insufficient space."));
68 return 0;
71 fd_mean = gnm_func_lookup_or_add_placeholder ("AVERAGE");
72 gnm_func_inc_usage (fd_mean);
73 fd_var = gnm_func_lookup_or_add_placeholder ("VAR");
74 gnm_func_inc_usage (fd_var);
75 fd_eigen = gnm_func_lookup_or_add_placeholder ("EIGEN");
76 gnm_func_inc_usage (fd_eigen);
77 fd_mmult = gnm_func_lookup_or_add_placeholder ("MMULT");
78 gnm_func_inc_usage (fd_mmult);
79 fd_munit = gnm_func_lookup_or_add_placeholder ("MUNIT");
80 gnm_func_inc_usage (fd_munit);
81 fd_sqrt = gnm_func_lookup_or_add_placeholder ("SQRT");
82 gnm_func_inc_usage (fd_sqrt);
83 fd_count = gnm_func_lookup_or_add_placeholder ("COUNT");
84 gnm_func_inc_usage (fd_count);
85 fd_sum = gnm_func_lookup_or_add_placeholder ("SUM");
86 gnm_func_inc_usage (fd_sum);
87 fd_and = gnm_func_lookup_or_add_placeholder ("AND");
88 gnm_func_inc_usage (fd_and);
89 fd_if = gnm_func_lookup_or_add_placeholder ("IF");
90 gnm_func_inc_usage (fd_if);
92 dao_set_bold (dao, 0, 0, 0, 0);
93 dao_set_italic (dao, 0, 0, 0, 11 + 3 * l);
94 dao_set_format (dao, 0, 0, 0, 0,
95 /* translator info: The quotation marks in the next strings need to */
96 /* remain since these are Excel-style format strings */
97 _("\"Principal Components Analysis\";"
98 "[Red]\"Principal Components Analysis is invalid.\""));
99 dao_set_align (dao, 0, 0, 0, 0,
100 GNM_HALIGN_LEFT, GNM_VALIGN_BOTTOM);
102 dao->offset_row++;
103 analysis_tool_table (dao, info, _("Covariances"), "COVAR", TRUE);
104 dao->offset_row--;
106 for (i = 1, inputdata = info->input; inputdata != NULL; i++, inputdata = inputdata->next)
107 analysis_tools_write_label (inputdata->data, dao, info, 0, 9 + 2 * l + i, i);
109 eval_pos_init_sheet (&ep,
110 ((GnmValue *)(info->input->data))->v_range.cell.a.sheet);
111 data_points = value_area_get_width (info->input->data, &ep) *
112 value_area_get_height (info->input->data, &ep);
114 for (i = 0; i < l; i++)
115 and_args = gnm_expr_list_prepend
116 (and_args,
117 gnm_expr_new_binary
118 (gnm_expr_new_constant (value_new_int (data_points)),
119 GNM_EXPR_OP_EQUAL,
120 make_cellref (1 + i, 3 + l)));
121 expr_and = gnm_expr_new_funcall (fd_and, and_args);
122 dao_set_cell_expr (dao, 0, 0,
123 gnm_expr_new_funcall3
124 (fd_if,
125 expr_and,
126 gnm_expr_new_constant (value_new_int (1)),
127 gnm_expr_new_constant (value_new_int (-1))));
128 dao_set_merge (dao,0,0,2,0);
129 set_cell_text_col (dao, 0, 3 + l,
130 _("/Count"
131 "/Mean"
132 "/Variance"
133 "//Eigenvalues"
134 "/Eigenvectors"));
135 dao_set_cell (dao, 0, 11 + 3 * l, _("Percent of Trace"));
136 dao_set_italic (dao, 0, 9 + 2 * l, 1 + l, 9 + 2 * l);
137 dao_set_percent (dao, 1, 11 + 3 * l, 1 + l, 11 + 3 * l);
139 for (i = 1, inputdata = info->input; inputdata != NULL; i++, inputdata = inputdata->next) {
140 expr = gnm_expr_new_constant (value_dup (inputdata->data));
142 dao_set_cell_expr (dao, i, 3 + l,
143 gnm_expr_new_funcall1 (fd_count, gnm_expr_copy (expr)));
144 dao_set_cell_expr (dao, i, 4 + l,
145 gnm_expr_new_funcall1 (fd_mean, gnm_expr_copy (expr)));
146 dao_set_cell_expr (dao, i, 5 + l,
147 gnm_expr_new_funcall1 (fd_var, expr));
150 expr_count = gnm_expr_new_binary (make_cellref (0,-4), GNM_EXPR_OP_DIV,
151 gnm_expr_new_binary (make_cellref (0,-4), GNM_EXPR_OP_SUB,
152 gnm_expr_new_constant (value_new_int (1))));
153 expr = gnm_expr_new_funcall1
154 (fd_eigen, gnm_expr_new_binary
155 (expr_count, GNM_EXPR_OP_MULT, make_rangeref (0, - (5 + l), l - 1, - 6)));
156 dao_set_array_expr (dao, 1, 7 + l, l, l + 1, expr);
158 for (i = 1; i <= l; i++) {
159 dao_set_align (dao, i, 9 + 2 * l, i, 9 + 2 * l,
160 GNM_HALIGN_CENTER, GNM_VALIGN_BOTTOM);
161 dao_set_cell_printf (dao, i, 9 + 2 * l, "\xce\xbe%i", i);
162 dao_set_cell_expr (dao, i, 11 + 3 * l,
163 gnm_expr_new_binary (make_cellref (0,- 4 - 2 * l),
164 GNM_EXPR_OP_DIV,
165 gnm_expr_new_funcall1
166 (fd_sum,
167 dao_get_rangeref (dao, 1, 7 + l, l, 7 + l))));
170 expr_munit = gnm_expr_new_funcall1 (fd_munit, gnm_expr_new_constant (value_new_int (l)));
171 expr = gnm_expr_new_funcall2 (fd_mmult,
172 gnm_expr_new_binary
173 (gnm_expr_new_funcall1
174 (fd_sqrt, gnm_expr_new_binary
175 (gnm_expr_new_constant (value_new_int (1)),
176 GNM_EXPR_OP_DIV,
177 make_rangeref (0, - 5 - l, l - 1, - 5 - l))),
178 GNM_EXPR_OP_MULT,
179 gnm_expr_copy (expr_munit)),
180 make_rangeref (0, - 2 - l, l - 1, - 3));
181 expr = gnm_expr_new_funcall2 (fd_mmult, expr,
182 gnm_expr_new_binary
183 (gnm_expr_new_funcall1
184 (fd_sqrt, make_rangeref (0, - 3 - l, l - 1, - 3 - l)),
185 GNM_EXPR_OP_MULT,
186 expr_munit));
187 dao_set_array_expr (dao, 1, 10 + 2 * l, l, l, expr);
189 gnm_func_dec_usage (fd_mean);
190 gnm_func_dec_usage (fd_var);
191 gnm_func_dec_usage (fd_eigen);
192 gnm_func_dec_usage (fd_mmult);
193 gnm_func_dec_usage (fd_munit);
194 gnm_func_dec_usage (fd_sqrt);
195 gnm_func_dec_usage (fd_count);
196 gnm_func_dec_usage (fd_sum);
197 gnm_func_dec_usage (fd_and);
198 gnm_func_dec_usage (fd_if);
200 dao_redraw_respan (dao);
201 return 0;
204 gboolean
205 analysis_tool_principal_components_engine (G_GNUC_UNUSED GOCmdContext *gcc, data_analysis_output_t *dao, gpointer specs,
206 analysis_tool_engine_t selector, gpointer result)
208 analysis_tools_data_generic_t *info = specs;
210 switch (selector) {
211 case TOOL_ENGINE_UPDATE_DESCRIPTOR:
212 return (dao_command_descriptor
213 (dao, _("Principal Components Analysis (%s)"), result)
214 == NULL);
215 case TOOL_ENGINE_UPDATE_DAO:
216 prepare_input_range (&info->input, info->group_by);
217 dao_adjust (dao, 1 + g_slist_length (info->input),
218 12 + 3 * g_slist_length (info->input));
219 return FALSE;
220 case TOOL_ENGINE_CLEAN_UP:
221 return analysis_tool_generic_clean (specs);
222 case TOOL_ENGINE_LAST_VALIDITY_CHECK:
223 return FALSE;
224 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE:
225 dao_prepare_output (NULL, dao, _("Principal Components Analysis"));
226 return FALSE;
227 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE:
228 return dao_format_output (dao, _("Principal Components Analysis"));
229 case TOOL_ENGINE_PERFORM_CALC:
230 default:
231 return analysis_tool_principal_components_engine_run (dao, specs);
233 return TRUE; /* We shouldn't get here */