2 * rangefunc.c: Functions on ranges (data sets).
5 * Copyright (C) 2007-2009 Morten Welinder (terra@gnome.org)
6 * Andreas J. Guelzow <aguelzow@taliesin.ca>
9 #include <gnumeric-config.h>
11 #include <rangefunc.h>
19 #include <tools/analysis-tools.h>
22 gnm_range_count (G_GNUC_UNUSED gnm_float
const *xs
, int n
, gnm_float
*res
)
29 gnm_range_hypot (gnm_float
const *xs
, int n
, gnm_float
*res
)
31 /* Drop outside zeros because the n<=2 cases are more accurate. */
32 while (n
> 0 && xs
[0] == 0)
34 while (n
> 0 && xs
[n
- 1] == 0)
38 case 0: *res
= 0; return 0;
39 case 1: *res
= gnm_abs (xs
[0]); return 0;
40 case 2: *res
= gnm_hypot (xs
[0], xs
[1]); return 0;
42 if (gnm_range_sumsq (xs
, n
, res
))
44 *res
= gnm_sqrt (*res
);
49 /* Average absolute deviation from mean. */
51 gnm_range_avedev (gnm_float
const *xs
, int n
, gnm_float
*res
)
57 gnm_range_average (xs
, n
, &m
);
58 for (i
= 0; i
< n
; i
++)
59 s
+= gnm_abs (xs
[i
] - m
);
66 /* Variance with weight N. */
68 gnm_range_var_pop (gnm_float
const *xs
, int n
, gnm_float
*res
)
73 gnm_range_devsq (xs
, n
, &q
);
80 /* Variance with weight N-1. */
82 gnm_range_var_est (gnm_float
const *xs
, int n
, gnm_float
*res
)
87 gnm_range_devsq (xs
, n
, &q
);
94 /* Standard deviation with weight N. */
96 gnm_range_stddev_pop (gnm_float
const *xs
, int n
, gnm_float
*res
)
98 if (gnm_range_var_pop (xs
, n
, res
))
101 *res
= gnm_sqrt (*res
);
106 /* Standard deviation with weight N-1. */
108 gnm_range_stddev_est (gnm_float
const *xs
, int n
, gnm_float
*res
)
110 if (gnm_range_var_est (xs
, n
, res
))
113 *res
= gnm_sqrt (*res
);
118 /* Population skew. */
120 gnm_range_skew_pop (gnm_float
const *xs
, int n
, gnm_float
*res
)
122 gnm_float m
, s
, dxn
, x3
= 0;
125 if (n
< 1 || gnm_range_average (xs
, n
, &m
) || gnm_range_stddev_pop (xs
, n
, &s
))
130 for (i
= 0; i
< n
; i
++) {
131 dxn
= (xs
[i
] - m
) / s
;
132 x3
+= dxn
* dxn
*dxn
;
139 /* Maximum-likelyhood estimator for skew. */
141 gnm_range_skew_est (gnm_float
const *xs
, int n
, gnm_float
*res
)
143 gnm_float m
, s
, dxn
, x3
= 0;
146 if (n
< 3 || gnm_range_average (xs
, n
, &m
) || gnm_range_stddev_est (xs
, n
, &s
))
151 for (i
= 0; i
< n
; i
++) {
152 dxn
= (xs
[i
] - m
) / s
;
153 x3
+= dxn
* dxn
*dxn
;
156 *res
= ((x3
* n
) / (n
- 1)) / (n
- 2);
160 /* Population kurtosis (with offset 3). */
162 gnm_range_kurtosis_m3_pop (gnm_float
const *xs
, int n
, gnm_float
*res
)
164 gnm_float m
, s
, dxn
, x4
= 0;
167 if (n
< 1 || gnm_range_average (xs
, n
, &m
) || gnm_range_stddev_pop (xs
, n
, &s
))
172 for (i
= 0; i
< n
; i
++) {
173 dxn
= (xs
[i
] - m
) / s
;
174 x4
+= (dxn
* dxn
) * (dxn
* dxn
);
181 /* Unbiased, I hope, estimator for kurtosis (with offset 3). */
183 gnm_range_kurtosis_m3_est (gnm_float
const *xs
, int n
, gnm_float
*res
)
185 gnm_float m
, s
, dxn
, x4
= 0;
186 gnm_float common_den
, nth
, three
;
189 if (n
< 4 || gnm_range_average (xs
, n
, &m
) || gnm_range_stddev_est (xs
, n
, &s
))
194 for (i
= 0; i
< n
; i
++) {
195 dxn
= (xs
[i
] - m
) / s
;
196 x4
+= (dxn
* dxn
) * (dxn
* dxn
);
199 common_den
= (gnm_float
)(n
- 2) * (n
- 3);
200 nth
= (gnm_float
)n
* (n
+ 1) / ((n
- 1) * common_den
);
201 three
= 3.0 * (n
- 1) * (n
- 1) / common_den
;
203 *res
= x4
* nth
- three
;
207 /* Harmonic mean of positive numbers. */
209 gnm_range_harmonic_mean (gnm_float
const *xs
, int n
, gnm_float
*res
)
212 gnm_float invsum
= 0;
215 for (i
= 0; i
< n
; i
++) {
227 product_helper (gnm_float
const *xs
, int n
,
228 gnm_float
*res
, int *exp2
,
229 gboolean
*zerop
, gboolean
*anynegp
)
231 gnm_float x0
= xs
[0];
235 if (n
== 1 || *zerop
) {
240 gnm_float mant
= gnm_frexp (x0
, &e
);
243 for (i
= 1; i
< n
; i
++) {
253 if (x
< 0) *anynegp
= TRUE
;
255 mant
*= gnm_frexp (x
, &thise
);
258 /* Keep 0.5 < |mant| <= 1. */
259 if (gnm_abs (mant
) <= 0.5) {
271 /* Geometric mean of positive numbers. */
273 gnm_range_geometric_mean (gnm_float
const *xs
, int n
, gnm_float
*res
)
276 gboolean zerop
, anynegp
;
281 product_helper (xs
, n
, res
, &exp2
, &zerop
, &anynegp
);
282 if (zerop
|| anynegp
)
285 /* Now compute (res * 2^exp2) ^ (1/n). */
287 *res
= gnm_pow (*res
* gnm_pow2 (exp2
% n
), 1.0 / n
) * gnm_pow2 (exp2
/ n
);
289 *res
= gnm_pow (*res
/ gnm_pow2 ((-exp2
) % n
), 1.0 / n
) / gnm_pow2 ((-exp2
) / n
);
297 gnm_range_product (gnm_float
const *xs
, int n
, gnm_float
*res
)
303 gboolean zerop
, anynegp
;
305 product_helper (xs
, n
, res
, &exp2
, &zerop
, &anynegp
);
307 *res
= gnm_ldexp (*res
, exp2
);
314 gnm_range_multinomial (gnm_float
const *xs
, int n
, gnm_float
*res
)
316 gnm_float result
= 1;
320 for (i
= 0; i
< n
; i
++) {
324 if (x
< 0 || x
> INT_MAX
)
328 if (sum
== 0 || xi
== 0)
335 for (j
= 2; j
<= xi
; j
++)
336 result
= result
* f
-- / j
;
338 /* Same as above, only faster. */
339 result
*= combin (sum
+ xi
, xi
);
349 /* Population co-variance. */
351 gnm_range_covar_pop (gnm_float
const *xs
, const gnm_float
*ys
, int n
, gnm_float
*res
)
353 gnm_float ux
, uy
, s
= 0;
356 if (n
<= 0 || gnm_range_average (xs
, n
, &ux
) || gnm_range_average (ys
, n
, &uy
))
359 for (i
= 0; i
< n
; i
++)
360 s
+= (xs
[i
] - ux
) * (ys
[i
] - uy
);
365 /* Estimation co-variance. */
367 gnm_range_covar_est (gnm_float
const *xs
, const gnm_float
*ys
, int n
, gnm_float
*res
)
369 gnm_float ux
, uy
, s
= 0;
372 if (n
<= 1 || gnm_range_average (xs
, n
, &ux
) || gnm_range_average (ys
, n
, &uy
))
375 for (i
= 0; i
< n
; i
++)
376 s
+= (xs
[i
] - ux
) * (ys
[i
] - uy
);
381 /* Population correlation coefficient. */
383 gnm_range_correl_pop (gnm_float
const *xs
, const gnm_float
*ys
, int n
, gnm_float
*res
)
385 gnm_float sx
, sy
, vxy
, c
;
387 if (gnm_range_stddev_pop (xs
, n
, &sx
) || sx
== 0 ||
388 gnm_range_stddev_pop (ys
, n
, &sy
) || sy
== 0 ||
389 gnm_range_covar_pop (xs
, ys
, n
, &vxy
))
394 // Rounding errors can push us beyond [-1,+1]. Avoid that.
395 // This isn't a great solution, but it'll have to do until
396 // someone comes up with a better approach.
397 c
= CLAMP (c
, -1.0, +1.0);
403 /* Population R-squared. */
405 gnm_range_rsq_pop (gnm_float
const *xs
, const gnm_float
*ys
, int n
, gnm_float
*res
)
407 if (gnm_range_correl_pop (xs
, ys
, n
, res
))
414 /* Most-common element. (The one whose first occurrence comes first in
415 case of several equally common. */
417 gnm_range_mode (gnm_float
const *xs
, int n
, gnm_float
*res
)
422 gconstpointer mode_key
= NULL
;
425 if (n
<= 1) return 1;
427 h
= g_hash_table_new_full ((GHashFunc
)gnm_float_hash
,
428 (GCompareFunc
)gnm_float_equal
,
430 (GDestroyNotify
)g_free
);
431 for (i
= 0; i
< n
; i
++) {
433 gboolean found
= g_hash_table_lookup_extended (h
, &xs
[i
], &rkey
, &rval
);
439 if (*pdups
== dups
&& rkey
< mode_key
) {
444 pdups
= g_new (int, 1);
446 rkey
= (gpointer
)(xs
+ i
);
447 g_hash_table_insert (h
, rkey
, pdups
);
456 g_hash_table_destroy (h
);
466 gnm_range_adtest (gnm_float
const *xs
, int n
, gnm_float
*pvalue
,
467 gnm_float
*statistics
)
470 gnm_float sigma
= 1.;
472 if ((n
< 8) || gnm_range_average (xs
, n
, &mu
)
473 || gnm_range_stddev_est (xs
, n
, &sigma
))
477 gnm_float total
= 0.;
481 ys
= range_sort (xs
, n
);
483 for (i
= 0; i
< n
; i
++) {
484 gnm_float val
= (pnorm (ys
[i
], mu
, sigma
, TRUE
, TRUE
) +
485 pnorm (ys
[n
- i
- 1],
486 mu
, sigma
, FALSE
, TRUE
));
487 total
+= ((2*i
+1)* val
);
490 total
= - n
- total
/n
;
493 total
*= (1 + 0.75 / n
+ 2.25 / (n
* n
));
495 p
= 1. - gnm_exp (-13.436 + 101.14 * total
- 223.73 * total
* total
);
496 else if (total
< 0.34)
497 p
= 1. - gnm_exp (-8.318 + 42.796 * total
- 59.938 * total
* total
);
498 else if (total
< 0.6)
499 p
= gnm_exp (0.9177 - 4.279 * total
- 1.38 * total
* total
);
501 p
= gnm_exp (1.2937 - 5.709 * total
+ 0.0186 * total
* total
);