Update Spanish translation
[gnumeric.git] / src / rangefunc.c
blob97a699fa8ffdf1b7f740515f4a2f1caacc3398c6
1 /*
2 * rangefunc.c: Functions on ranges (data sets).
4 * Authors:
5 * Copyright (C) 2007-2009 Morten Welinder (terra@gnome.org)
6 * Andreas J. Guelzow <aguelzow@taliesin.ca>
7 */
9 #include <gnumeric-config.h>
10 #include <gnumeric.h>
11 #include <rangefunc.h>
13 #include <mathfunc.h>
14 #include <sf-gamma.h>
15 #include <gutils.h>
16 #include <math.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <tools/analysis-tools.h>
21 int
22 gnm_range_count (G_GNUC_UNUSED gnm_float const *xs, int n, gnm_float *res)
24 *res = n;
25 return 0;
28 int
29 gnm_range_hypot (gnm_float const *xs, int n, gnm_float *res)
31 /* Drop outside zeros because the n<=2 cases are more accurate. */
32 while (n > 0 && xs[0] == 0)
33 xs++, n--;
34 while (n > 0 && xs[n - 1] == 0)
35 n--;
37 switch (n) {
38 case 0: *res = 0; return 0;
39 case 1: *res = gnm_abs (xs[0]); return 0;
40 case 2: *res = gnm_hypot (xs[0], xs[1]); return 0;
41 default:
42 if (gnm_range_sumsq (xs, n, res))
43 return 1;
44 *res = gnm_sqrt (*res);
45 return 0;
49 /* Average absolute deviation from mean. */
50 int
51 gnm_range_avedev (gnm_float const *xs, int n, gnm_float *res)
53 if (n > 0) {
54 gnm_float m, s = 0;
55 int i;
57 gnm_range_average (xs, n, &m);
58 for (i = 0; i < n; i++)
59 s += gnm_abs (xs[i] - m);
60 *res = s / n;
61 return 0;
62 } else
63 return 1;
66 /* Variance with weight N. */
67 int
68 gnm_range_var_pop (gnm_float const *xs, int n, gnm_float *res)
70 if (n > 0) {
71 gnm_float q;
73 gnm_range_devsq (xs, n, &q);
74 *res = q / n;
75 return 0;
76 } else
77 return 1;
80 /* Variance with weight N-1. */
81 int
82 gnm_range_var_est (gnm_float const *xs, int n, gnm_float *res)
84 if (n > 1) {
85 gnm_float q;
87 gnm_range_devsq (xs, n, &q);
88 *res = q / (n - 1);
89 return 0;
90 } else
91 return 1;
94 /* Standard deviation with weight N. */
95 int
96 gnm_range_stddev_pop (gnm_float const *xs, int n, gnm_float *res)
98 if (gnm_range_var_pop (xs, n, res))
99 return 1;
100 else {
101 *res = gnm_sqrt (*res);
102 return 0;
106 /* Standard deviation with weight N-1. */
108 gnm_range_stddev_est (gnm_float const *xs, int n, gnm_float *res)
110 if (gnm_range_var_est (xs, n, res))
111 return 1;
112 else {
113 *res = gnm_sqrt (*res);
114 return 0;
118 /* Population skew. */
120 gnm_range_skew_pop (gnm_float const *xs, int n, gnm_float *res)
122 gnm_float m, s, dxn, x3 = 0;
123 int i;
125 if (n < 1 || gnm_range_average (xs, n, &m) || gnm_range_stddev_pop (xs, n, &s))
126 return 1;
127 if (s == 0)
128 return 1;
130 for (i = 0; i < n; i++) {
131 dxn = (xs[i] - m) / s;
132 x3 += dxn * dxn *dxn;
135 *res = x3 / n;
136 return 0;
139 /* Maximum-likelyhood estimator for skew. */
141 gnm_range_skew_est (gnm_float const *xs, int n, gnm_float *res)
143 gnm_float m, s, dxn, x3 = 0;
144 int i;
146 if (n < 3 || gnm_range_average (xs, n, &m) || gnm_range_stddev_est (xs, n, &s))
147 return 1;
148 if (s == 0)
149 return 1;
151 for (i = 0; i < n; i++) {
152 dxn = (xs[i] - m) / s;
153 x3 += dxn * dxn *dxn;
156 *res = ((x3 * n) / (n - 1)) / (n - 2);
157 return 0;
160 /* Population kurtosis (with offset 3). */
162 gnm_range_kurtosis_m3_pop (gnm_float const *xs, int n, gnm_float *res)
164 gnm_float m, s, dxn, x4 = 0;
165 int i;
167 if (n < 1 || gnm_range_average (xs, n, &m) || gnm_range_stddev_pop (xs, n, &s))
168 return 1;
169 if (s == 0)
170 return 1;
172 for (i = 0; i < n; i++) {
173 dxn = (xs[i] - m) / s;
174 x4 += (dxn * dxn) * (dxn * dxn);
177 *res = x4 / n - 3;
178 return 0;
181 /* Unbiased, I hope, estimator for kurtosis (with offset 3). */
183 gnm_range_kurtosis_m3_est (gnm_float const *xs, int n, gnm_float *res)
185 gnm_float m, s, dxn, x4 = 0;
186 gnm_float common_den, nth, three;
187 int i;
189 if (n < 4 || gnm_range_average (xs, n, &m) || gnm_range_stddev_est (xs, n, &s))
190 return 1;
191 if (s == 0)
192 return 1;
194 for (i = 0; i < n; i++) {
195 dxn = (xs[i] - m) / s;
196 x4 += (dxn * dxn) * (dxn * dxn);
199 common_den = (gnm_float)(n - 2) * (n - 3);
200 nth = (gnm_float)n * (n + 1) / ((n - 1) * common_den);
201 three = 3.0 * (n - 1) * (n - 1) / common_den;
203 *res = x4 * nth - three;
204 return 0;
207 /* Harmonic mean of positive numbers. */
209 gnm_range_harmonic_mean (gnm_float const *xs, int n, gnm_float *res)
211 if (n > 0) {
212 gnm_float invsum = 0;
213 int i;
215 for (i = 0; i < n; i++) {
216 if (xs[i] <= 0)
217 return 1;
218 invsum += 1 / xs[i];
220 *res = n / invsum;
221 return 0;
222 } else
223 return 1;
226 static void
227 product_helper (gnm_float const *xs, int n,
228 gnm_float *res, int *exp2,
229 gboolean *zerop, gboolean *anynegp)
231 gnm_float x0 = xs[0];
232 *zerop = (x0 == 0);
233 *anynegp = (x0 < 0);
235 if (n == 1 || *zerop) {
236 *res = x0;
237 *exp2 = 0;
238 } else {
239 int e;
240 gnm_float mant = gnm_frexp (x0, &e);
241 int i;
243 for (i = 1; i < n; i++) {
244 int thise;
245 gnm_float x = xs[i];
247 if (x == 0) {
248 *zerop = TRUE;
249 *res = 0;
250 *exp2 = 0;
251 return;
253 if (x < 0) *anynegp = TRUE;
255 mant *= gnm_frexp (x, &thise);
256 e += thise;
258 /* Keep 0.5 < |mant| <= 1. */
259 if (gnm_abs (mant) <= 0.5) {
260 mant *= 2;
261 e--;
265 *exp2 = e;
266 *res = mant;
271 /* Geometric mean of positive numbers. */
273 gnm_range_geometric_mean (gnm_float const *xs, int n, gnm_float *res)
275 int exp2;
276 gboolean zerop, anynegp;
278 if (n < 1)
279 return 1;
281 product_helper (xs, n, res, &exp2, &zerop, &anynegp);
282 if (zerop || anynegp)
283 return 1;
285 /* Now compute (res * 2^exp2) ^ (1/n). */
286 if (exp2 >= 0)
287 *res = gnm_pow (*res * gnm_pow2 (exp2 % n), 1.0 / n) * gnm_pow2 (exp2 / n);
288 else
289 *res = gnm_pow (*res / gnm_pow2 ((-exp2) % n), 1.0 / n) / gnm_pow2 ((-exp2) / n);
291 return 0;
295 /* Product. */
297 gnm_range_product (gnm_float const *xs, int n, gnm_float *res)
299 if (n == 0) {
300 *res = 1;
301 } else {
302 int exp2;
303 gboolean zerop, anynegp;
305 product_helper (xs, n, res, &exp2, &zerop, &anynegp);
306 if (exp2)
307 *res = gnm_ldexp (*res, exp2);
310 return 0;
314 gnm_range_multinomial (gnm_float const *xs, int n, gnm_float *res)
316 gnm_float result = 1;
317 int sum = 0;
318 int i;
320 for (i = 0; i < n; i++) {
321 gnm_float x = xs[i];
322 int xi;
324 if (x < 0 || x > INT_MAX)
325 return 1;
327 xi = (int)x;
328 if (sum == 0 || xi == 0)
329 ; /* Nothing. */
330 else if (xi < 20) {
331 int j;
332 int f = sum + xi;
334 result *= f--;
335 for (j = 2; j <= xi; j++)
336 result = result * f-- / j;
337 } else {
338 /* Same as above, only faster. */
339 result *= combin (sum + xi, xi);
342 sum += xi;
345 *res = result;
346 return 0;
349 /* Population co-variance. */
351 gnm_range_covar_pop (gnm_float const *xs, const gnm_float *ys, int n, gnm_float *res)
353 gnm_float ux, uy, s = 0;
354 int i;
356 if (n <= 0 || gnm_range_average (xs, n, &ux) || gnm_range_average (ys, n, &uy))
357 return 1;
359 for (i = 0; i < n; i++)
360 s += (xs[i] - ux) * (ys[i] - uy);
361 *res = s / n;
362 return 0;
365 /* Estimation co-variance. */
367 gnm_range_covar_est (gnm_float const *xs, const gnm_float *ys, int n, gnm_float *res)
369 gnm_float ux, uy, s = 0;
370 int i;
372 if (n <= 1 || gnm_range_average (xs, n, &ux) || gnm_range_average (ys, n, &uy))
373 return 1;
375 for (i = 0; i < n; i++)
376 s += (xs[i] - ux) * (ys[i] - uy);
377 *res = s / (n - 1);
378 return 0;
381 /* Population correlation coefficient. */
383 gnm_range_correl_pop (gnm_float const *xs, const gnm_float *ys, int n, gnm_float *res)
385 gnm_float sx, sy, vxy, c;
387 if (gnm_range_stddev_pop (xs, n, &sx) || sx == 0 ||
388 gnm_range_stddev_pop (ys, n, &sy) || sy == 0 ||
389 gnm_range_covar_pop (xs, ys, n, &vxy))
390 return 1;
392 c = vxy / (sx * sy);
394 // Rounding errors can push us beyond [-1,+1]. Avoid that.
395 // This isn't a great solution, but it'll have to do until
396 // someone comes up with a better approach.
397 c = CLAMP (c, -1.0, +1.0);
399 *res = c;
400 return 0;
403 /* Population R-squared. */
405 gnm_range_rsq_pop (gnm_float const *xs, const gnm_float *ys, int n, gnm_float *res)
407 if (gnm_range_correl_pop (xs, ys, n, res))
408 return 1;
410 *res *= *res;
411 return 0;
414 /* Most-common element. (The one whose first occurrence comes first in
415 case of several equally common. */
417 gnm_range_mode (gnm_float const *xs, int n, gnm_float *res)
419 GHashTable *h;
420 int i;
421 gnm_float mode = 0;
422 gconstpointer mode_key = NULL;
423 int dups = 0;
425 if (n <= 1) return 1;
427 h = g_hash_table_new_full ((GHashFunc)gnm_float_hash,
428 (GCompareFunc)gnm_float_equal,
429 NULL,
430 (GDestroyNotify)g_free);
431 for (i = 0; i < n; i++) {
432 gpointer rkey, rval;
433 gboolean found = g_hash_table_lookup_extended (h, &xs[i], &rkey, &rval);
434 int *pdups;
436 if (found) {
437 pdups = (int *)rval;
438 (*pdups)++;
439 if (*pdups == dups && rkey < mode_key) {
440 mode = xs[i];
441 mode_key = rkey;
443 } else {
444 pdups = g_new (int, 1);
445 *pdups = 1;
446 rkey = (gpointer)(xs + i);
447 g_hash_table_insert (h, rkey, pdups);
450 if (*pdups > dups) {
451 dups = *pdups;
452 mode = xs[i];
453 mode_key = rkey;
456 g_hash_table_destroy (h);
458 if (dups <= 1)
459 return 1;
461 *res = mode;
462 return 0;
466 gnm_range_adtest (gnm_float const *xs, int n, gnm_float *pvalue,
467 gnm_float *statistics)
469 gnm_float mu = 0.;
470 gnm_float sigma = 1.;
472 if ((n < 8) || gnm_range_average (xs, n, &mu)
473 || gnm_range_stddev_est (xs, n, &sigma))
474 return 1;
475 else {
476 int i;
477 gnm_float total = 0.;
478 gnm_float p;
479 gnm_float *ys;
481 ys = range_sort (xs, n);
483 for (i = 0; i < n; i++) {
484 gnm_float val = (pnorm (ys[i], mu, sigma, TRUE, TRUE) +
485 pnorm (ys[n - i - 1],
486 mu, sigma, FALSE, TRUE));
487 total += ((2*i+1)* val);
490 total = - n - total/n;
491 g_free (ys);
493 total *= (1 + 0.75 / n + 2.25 / (n * n));
494 if (total < 0.2)
495 p = 1. - gnm_exp (-13.436 + 101.14 * total - 223.73 * total * total);
496 else if (total < 0.34)
497 p = 1. - gnm_exp (-8.318 + 42.796 * total - 59.938 * total * total);
498 else if (total < 0.6)
499 p = gnm_exp (0.9177 - 4.279 * total - 1.38 * total * total);
500 else
501 p = gnm_exp (1.2937 - 5.709 * total + 0.0186 * total * total);
502 if (statistics)
503 *statistics = total;
504 if (pvalue)
505 *pvalue = p;
506 return 0;