From a032ca4d86997d19a8a84cf515236766e154b0c3 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 27 May 2023 19:52:01 -0700 Subject: [PATCH] histogram: Improve coding style. --- src/math/chart-geometry.c | 67 ++++++++++++++++++++++++---------------- src/math/histogram.c | 79 +++++++++++++++++++++++------------------------ src/math/histogram.h | 8 ----- 3 files changed, 80 insertions(+), 74 deletions(-) diff --git a/src/math/chart-geometry.c b/src/math/chart-geometry.c index eaa2c0159..21176f85d 100644 --- a/src/math/chart-geometry.c +++ b/src/math/chart-geometry.c @@ -29,8 +29,6 @@ #include "gettext.h" #define _(msgid) gettext (msgid) -static const double standard_tick[] = {1, 2, 5, 10}; - /* Find a set {LOWER, INTERVAL, N_TICKS} such that: @@ -57,35 +55,52 @@ chart_get_scale (double high, double low, double *lower, double *interval, int *n_ticks) { - double fitness = DBL_MAX; - double logrange; - *n_ticks = 0; - assert (high >= low); + if ((high - low) < 10 * DBL_MIN) + { + *n_ticks = 0; + *lower = low; + *interval = 0.0; + return; + } - if ((high - low) < 10 * DBL_MIN) { - *n_ticks = 0; - *lower = low; - *interval = 0.0; - return; - } - - logrange = floor(log10(high-low)); + /* Round down the difference between HIGH and LOW to a power of 10, then + divide by 10. If HIGH - LOW is a power of 10, then BINTERVAL will be + (HIGH - LOW) / 10; otherwise, it will be less than (HIGH - LOW) / 10 but + more than (HIGH - LOW) / 100. + + for a range of [5.5,9.2], binterval = 0.1; + For a range of [0,10], binterval = 1; + for a range of [55,92], binterval = 1; + for a range of [0,100], binterval = 10; + for a range of [555,922], binterval = 10; + and so on. */ + double binterval = pow (10.0, floor (log10 (high - low)) - 1); + double fitness = DBL_MAX; - /* Find the most pleasing interval */ + /* Find the most pleasing interval. */ + static const double standard_tick[] = {1, 2, 5, 10}; for (int i = 0; i < sizeof standard_tick / sizeof *standard_tick; i++) { - double cinterval = standard_tick[i] * pow(10.0,logrange-1); - double clower = floor(low/cinterval) * cinterval; - int cnticks = ceil((high - clower) / cinterval)-1; - double cfitness = fabs(7.5 - cnticks); - - if (cfitness < fitness) { - fitness = cfitness; - *lower = clower; - *interval = cinterval; - *n_ticks = cnticks; - } + /* Take a multiple of the basic interval. */ + double cinterval = standard_tick[i] * binterval; + + /* Make a range by rounding LOW down to the next multiple of CINTERVAL, + and HIGH up to the next multiple of CINTERVAL. */ + double clower = floor (low / cinterval) * cinterval; + int cnticks = ceil ((high - clower) / cinterval) - 1; + + /* Compute a score based on considering 7.5 ticks to be ideal. */ + double cfitness = fabs (7.5 - cnticks); + + /* Choose the lowest score. */ + if (cfitness < fitness) + { + fitness = cfitness; + *lower = clower; + *interval = cinterval; + *n_ticks = cnticks; + } } } diff --git a/src/math/histogram.c b/src/math/histogram.c index f054f824b..1a4da905f 100644 --- a/src/math/histogram.c +++ b/src/math/histogram.c @@ -63,16 +63,16 @@ such that (max-min) is a multiple of the binwidth. Then the location of the first bin has to be aligned to the ticks. */ static int -hist_find_pretty_no_of_bins(double bin_width_in, double min, double max, - double *adjusted_min, double *adjusted_max) +hist_find_pretty_no_of_bins (double bin_width_in, double min, double max, + double *adjusted_min, double *adjusted_max) { + /* Choose an initial chart scale, without considering bin width. */ double lower, interval; int n_ticks; - double binwidth; - int nbins; - chart_get_scale (max, min, &lower, &interval, &n_ticks); + /* Then adjust things. */ + double binwidth; if (bin_width_in >= 2 * interval) { binwidth = floor(bin_width_in/interval) * interval; @@ -111,7 +111,7 @@ hist_find_pretty_no_of_bins(double bin_width_in, double min, double max, if (*adjusted_min > min) *adjusted_min = min; - nbins = ceil((max-*adjusted_min)/binwidth); + int nbins = ceil((max-*adjusted_min)/binwidth); *adjusted_max = nbins*binwidth + *adjusted_min; /* adjusted_max should never be smaller than max but if it is equal @@ -126,54 +126,53 @@ hist_find_pretty_no_of_bins(double bin_width_in, double min, double max, return nbins; } +/* Creates and returns a new histogram for data that ranges from MIN to MAX + with a suggested bin width of BIN_WIDTH_IN. The implementation will adjust + the bin width. The caller should add each data point to the histogram with + histogram_add(), and eventually destroy it with statistic_destroy(). + Returns NULL if the histogram cannot be created. */ struct histogram * histogram_create (double bin_width_in, double min, double max) { - struct histogram *h; - struct statistic *stat; - int bins; - double adjusted_min, adjusted_max; - if (max == min) { - msg (MW, _("Not creating histogram because the data contains less than 2 distinct values")); + msg (MW, _("Not creating histogram because the data contains less than 2 " + "distinct values")); return NULL; - } + } assert (bin_width_in > 0); - bins = hist_find_pretty_no_of_bins(bin_width_in, min, max, &adjusted_min, &adjusted_max); - - h = xmalloc (sizeof *h); + double adjusted_min, adjusted_max; + int bins = hist_find_pretty_no_of_bins (bin_width_in, min, max, &adjusted_min, + &adjusted_max); - h->gsl_hist = gsl_histogram_alloc (bins); + struct histogram *h = xmalloc (sizeof *h); + *h = (struct histogram) { + .parent.destroy = destroy, + .gsl_hist = gsl_histogram_alloc (bins), + }; /* The bin ranges could be computed with gsl_histogram_set_ranges_uniform, - but the number of bins is adapted to the ticks of the axis such that for example - data ranging from 1.0 to 7.0 with 6 bins will have bin limits at - 2.0,3.0,4.0 and 5.0. Due to numerical accuracy the computed bin limits might - be 4.99999999 for a value which is expected to be 5.0. But the limits of - the histogram bins should be that what is expected from the displayed ticks. - Therefore the bin limits are computed from the rounded values which is similar - to the procedure at the chart_get_ticks_format. Actual bin limits should be - exactly what is displayed at the ticks. - But... I cannot reproduce the problem that I see with gsl_histogram_set_ranges_uniform - with the code below without rounding... + but the number of bins is adapted to the ticks of the axis such that for + example data ranging from 1.0 to 7.0 with 6 bins will have bin limits at + 2.0,3.0,4.0 and 5.0. Due to numerical accuracy the computed bin limits + might be 4.99999999 for a value which is expected to be 5.0. But the limits + of the histogram bins should be that what is expected from the displayed + ticks. Therefore the bin limits are computed from the rounded values which + is similar to the procedure at the chart_get_ticks_format. Actual bin + limits should be exactly what is displayed at the ticks. But... I cannot + reproduce the problem that I see with gsl_histogram_set_ranges_uniform with + the code below without rounding... */ - { - double *ranges = xmalloc (sizeof (double) * (bins + 1)); - double interval = (adjusted_max - adjusted_min) / bins; - for (int i = 0; i < bins; i++) - ranges[i] = adjusted_min + interval * i; - ranges[bins] = adjusted_max; - gsl_histogram_set_ranges (h->gsl_hist, ranges, bins + 1); - free (ranges); - } - - stat = &h->parent; - stat->destroy = destroy; + double *ranges = xmalloc (sizeof *ranges * (bins + 1)); + double interval = (adjusted_max - adjusted_min) / bins; + for (int i = 0; i < bins; i++) + ranges[i] = adjusted_min + interval * i; + ranges[bins] = adjusted_max; + gsl_histogram_set_ranges (h->gsl_hist, ranges, bins + 1); + free (ranges); return h; } - diff --git a/src/math/histogram.h b/src/math/histogram.h index 89d4f5e86..ce4669a6c 100644 --- a/src/math/histogram.h +++ b/src/math/histogram.h @@ -30,15 +30,7 @@ struct histogram gsl_histogram *gsl_hist; }; -/* - Prepare a histogram for data which lies in the range [min, max) - bin_width is a nominal figure only. It is a hint about what might be - an good approximate bin width, but the implementation will adjust it - as it thinks fit. - */ struct histogram * histogram_create (double bin_width, double min, double max); - void histogram_add (struct histogram *h, double y, double c); - #endif -- 2.11.4.GIT