expressions: Merge all the little generator programs into generate.pl.
[pspp.git] / src / math / histogram.c
blob8aaa16f9e0a684bca8333fee51d6c6874319a69b
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include "math/histogram.h"
21 #include <gsl/gsl_histogram.h>
22 #include <math.h>
24 #include "data/settings.h"
25 #include "libpspp/message.h"
26 #include "libpspp/assertion.h"
27 #include "libpspp/cast.h"
28 #include "math/chart-geometry.h"
30 #include "gettext.h"
31 #define _(msgid) gettext (msgid)
32 #define N_(msgid) msgid
35 #include "gl/xalloc.h"
37 void
38 histogram_add (struct histogram *h, double y, double c)
40 struct statistic *stat = &h->parent;
41 stat->accumulate (stat, NULL, c, 0, y);
44 static void
45 acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y)
47 struct histogram *hist = UP_CAST (s, struct histogram, parent);
49 gsl_histogram_accumulate (hist->gsl_hist, y, c);
52 static void
53 destroy (struct statistic *s)
55 struct histogram *h = UP_CAST (s, struct histogram, parent);
56 gsl_histogram_free (h->gsl_hist);
57 free (s);
61 /* Find a bin width which is adapted to the scaling of the x axis
62 In the example here, the binwidth is half of the tick interval.
64 binwidth
65 > <
66 |....+....+....+. .+....|
67 LOWER 1 2 3 N_TICKS
68 ^LOWDBL ^HIGHDBL
70 This only works, when the min and max value for the histogram are adapted
71 such that (max-min) is a multiple of the binwidth. Then the location of the
72 first bin has to be aligned to the ticks.
74 static int
75 hist_find_pretty_no_of_bins(double bin_width_in, double min, double max,
76 double *adjusted_min, double *adjusted_max)
78 double lower, interval;
79 int n_ticks;
80 double binwidth;
81 int nbins;
83 chart_get_scale (max, min, &lower, &interval, &n_ticks);
85 if (bin_width_in >= 2 * interval)
87 binwidth = floor(bin_width_in/interval) * interval;
88 *adjusted_min = lower;
90 else if (bin_width_in >= 1.5 * interval)
92 binwidth = 1.5 * interval;
93 if (min < (lower + 0.5 * interval))
94 *adjusted_min = lower;
95 else
96 *adjusted_min = lower + 0.5 * interval;
98 else if (bin_width_in >= interval)
100 binwidth = interval;
101 *adjusted_min = lower;
103 else if (bin_width_in >= (2.0/3.0 * interval))
105 binwidth = (2.0/3.0 * interval);
106 if (min >= lower + binwidth)
107 *adjusted_min = lower + binwidth;
108 else
109 *adjusted_min = lower;
111 else
113 int i;
114 for(i = 2; bin_width_in < interval/i; i++);
115 binwidth = interval/i;
116 *adjusted_min = floor((min - lower)/binwidth)*binwidth + lower;
119 nbins = ceil((max-*adjusted_min)/binwidth);
120 *adjusted_max = nbins*binwidth + *adjusted_min;
122 /* adjusted_max should never be smaller than max but if it is equal
123 then the gsl_histogram will not add the cases which have max value */
124 if (*adjusted_max <= max)
126 *adjusted_max += binwidth;
127 nbins++;
129 assert (*adjusted_min <= min);
131 return nbins;
135 struct histogram *
136 histogram_create (double bin_width_in, double min, double max)
138 struct histogram *h;
139 struct statistic *stat;
140 int bins;
141 double adjusted_min, adjusted_max;
143 if (max == min)
145 msg (MW, _("Not creating histogram because the data contains less than 2 distinct values"));
146 return NULL;
149 assert (bin_width_in > 0);
151 bins = hist_find_pretty_no_of_bins(bin_width_in, min, max, &adjusted_min, &adjusted_max);
153 h = xmalloc (sizeof *h);
155 h->gsl_hist = gsl_histogram_alloc (bins);
157 /* The bin ranges could be computed with gsl_histogram_set_ranges_uniform,
158 but the number of bins is adapted to the ticks of the axis such that for example
159 data ranging from 1.0 to 7.0 with 6 bins will have bin limits at
160 2.0,3.0,4.0 and 5.0. Due to numerical accuracy the computed bin limits might
161 be 4.99999999 for a value which is expected to be 5.0. But the limits of
162 the histogram bins should be that what is expected from the displayed ticks.
163 Therefore the bin limits are computed from the rounded values which is similar
164 to the procedure at the chart_get_ticks_format. Actual bin limits should be
165 exactly what is displayed at the ticks.
166 But... I cannot reproduce the problem that I see with gsl_histogram_set_ranges_uniform
167 with the code below without rounding...
170 double *ranges = xmalloc (sizeof (double) * (bins + 1));
171 double interval = (adjusted_max - adjusted_min) / bins;
172 for (int i = 0; i < bins; i++)
173 ranges[i] = adjusted_min + interval * i;
174 ranges[bins] = adjusted_max;
175 gsl_histogram_set_ranges (h->gsl_hist, ranges, bins + 1);
176 free (ranges);
179 stat = &h->parent;
180 stat->accumulate = acc;
181 stat->destroy = destroy;
183 return h;