treewide: Replace <name>_cnt by n_<name>s and <name>_cap by allocated_<name>.
[pspp.git] / src / language / stats / examine.c
blobbb77e9010a3564dfa0982ac129558b1606607db9
1 /*
2 PSPP - a program for statistical analysis.
3 Copyright (C) 2012, 2013, 2016, 2019 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 #include <config.h>
21 #include <math.h>
22 #include <gsl/gsl_cdf.h>
24 #include "libpspp/assertion.h"
25 #include "libpspp/message.h"
26 #include "libpspp/pool.h"
29 #include "data/dataset.h"
30 #include "data/dictionary.h"
31 #include "data/casegrouper.h"
32 #include "data/casereader.h"
33 #include "data/casewriter.h"
34 #include "data/caseproto.h"
35 #include "data/subcase.h"
38 #include "data/format.h"
40 #include "math/interaction.h"
41 #include "math/box-whisker.h"
42 #include "math/categoricals.h"
43 #include "math/chart-geometry.h"
44 #include "math/histogram.h"
45 #include "math/moments.h"
46 #include "math/np.h"
47 #include "math/sort.h"
48 #include "math/order-stats.h"
49 #include "math/percentiles.h"
50 #include "math/shapiro-wilk.h"
51 #include "math/tukey-hinges.h"
52 #include "math/trimmed-mean.h"
54 #include "output/charts/boxplot.h"
55 #include "output/charts/np-plot.h"
56 #include "output/charts/spreadlevel-plot.h"
57 #include "output/charts/plot-hist.h"
59 #include "language/command.h"
60 #include "language/lexer/lexer.h"
61 #include "language/lexer/value-parser.h"
62 #include "language/lexer/variable-parser.h"
64 #include "output/pivot-table.h"
66 #include "gettext.h"
67 #define _(msgid) gettext (msgid)
68 #define N_(msgid) msgid
70 static void
71 append_value_name (const struct variable *var, const union value *val, struct string *str)
73 var_append_value_name (var, val, str);
74 if (var_is_value_missing (var, val, MV_ANY))
75 ds_put_cstr (str, _(" (missing)"));
78 enum bp_mode
80 BP_GROUPS,
81 BP_VARIABLES
85 /* Indices for the ex_proto member (below) */
86 enum
88 EX_VAL, /* value */
89 EX_ID, /* identity */
90 EX_WT /* weight */
94 #define PLOT_HISTOGRAM 0x1
95 #define PLOT_BOXPLOT 0x2
96 #define PLOT_NPPLOT 0x4
97 #define PLOT_SPREADLEVEL 0x8
99 struct examine
101 struct pool *pool;
103 /* A caseproto used to contain the data subsets under examination,
104 see (enum above) */
105 struct caseproto *ex_proto;
107 size_t n_dep_vars;
108 const struct variable **dep_vars;
110 size_t n_iacts;
111 struct interaction **iacts;
113 enum mv_class dep_excl;
114 enum mv_class fctr_excl;
116 const struct dictionary *dict;
118 struct categoricals *cats;
120 /* how many extremities to display */
121 int disp_extremes;
122 int calc_extremes;
123 bool descriptives;
125 double conf;
127 bool missing_pw;
129 /* The case index of the ID value (or -1) if not applicable */
130 size_t id_idx;
131 int id_width;
133 enum pc_alg pc_alg;
134 double *ptiles;
135 size_t n_percentiles;
137 unsigned int plot;
138 float sl_power;
140 enum bp_mode boxplot_mode;
142 const struct variable *id_var;
144 const struct variable *wv;
147 struct extremity
149 /* The value of this extremity */
150 double val;
152 /* Either the casenumber or the value of the variable specified
153 by the /ID subcommand which corresponds to this extremity */
154 union value identity;
157 struct exploratory_stats
159 double missing;
160 double non_missing;
162 struct moments *mom;
164 /* Most operations need a sorted reader/writer */
165 struct casewriter *sorted_writer;
166 struct casereader *sorted_reader;
168 struct extremity *minima;
169 struct extremity *maxima;
172 Minimum should alway equal mimima[0].val.
173 Likewise, maximum should alway equal maxima[0].val.
174 This redundancy exists as an optimisation effort.
175 Some statistics (eg histogram) require early calculation
176 of the min and max
178 double minimum;
179 double maximum;
181 struct trimmed_mean *trimmed_mean;
182 struct percentile *quartiles[3];
183 struct percentile **percentiles;
184 struct shapiro_wilk *shapiro_wilk;
186 struct tukey_hinges *hinges;
188 /* The data for the NP Plots */
189 struct np *np;
191 struct histogram *histogram;
193 /* The data for the box plots */
194 struct box_whisker *box_whisker;
196 /* Total weight */
197 double cc;
199 /* The minimum weight */
200 double cmin;
203 static void
204 show_boxplot_grouped (const struct examine *cmd, int iact_idx)
206 int v;
208 const struct interaction *iact = cmd->iacts[iact_idx];
209 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
211 for (v = 0; v < cmd->n_dep_vars; ++v)
213 double y_min = DBL_MAX;
214 double y_max = -DBL_MAX;
215 int grp;
216 struct boxplot *boxplot;
217 struct string title;
218 ds_init_empty (&title);
220 if (iact->n_vars > 0)
222 struct string istr;
223 ds_init_empty (&istr);
224 interaction_to_string (iact, &istr);
225 ds_put_format (&title, _("Boxplot of %s vs. %s"),
226 var_to_string (cmd->dep_vars[v]),
227 ds_cstr (&istr));
228 ds_destroy (&istr);
230 else
231 ds_put_format (&title, _("Boxplot of %s"), var_to_string (cmd->dep_vars[v]));
233 for (grp = 0; grp < n_cats; ++grp)
235 const struct exploratory_stats *es =
236 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
238 if (y_min > es[v].minimum)
239 y_min = es[v].minimum;
241 if (y_max < es[v].maximum)
242 y_max = es[v].maximum;
245 boxplot = boxplot_create (y_min, y_max, ds_cstr (&title));
247 ds_destroy (&title);
249 for (grp = 0; grp < n_cats; ++grp)
251 int ivar_idx;
252 struct string label;
254 const struct ccase *c =
255 categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp);
257 struct exploratory_stats *es =
258 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
260 ds_init_empty (&label);
261 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
263 struct string l;
264 const struct variable *ivar = iact->vars[ivar_idx];
265 const union value *val = case_data (c, ivar);
266 ds_init_empty (&l);
268 append_value_name (ivar, val, &l);
269 ds_ltrim (&l, ss_cstr (" "));
271 ds_put_substring (&label, l.ss);
272 if (ivar_idx < iact->n_vars - 1)
273 ds_put_cstr (&label, "; ");
275 ds_destroy (&l);
278 boxplot_add_box (boxplot, es[v].box_whisker, ds_cstr (&label));
279 es[v].box_whisker = NULL;
281 ds_destroy (&label);
284 boxplot_submit (boxplot);
288 static void
289 show_boxplot_variabled (const struct examine *cmd, int iact_idx)
291 int grp;
292 const struct interaction *iact = cmd->iacts[iact_idx];
293 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
295 for (grp = 0; grp < n_cats; ++grp)
297 struct boxplot *boxplot;
298 int v;
299 double y_min = DBL_MAX;
300 double y_max = -DBL_MAX;
302 const struct ccase *c =
303 categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp);
305 struct string title;
306 ds_init_empty (&title);
308 for (v = 0; v < cmd->n_dep_vars; ++v)
310 const struct exploratory_stats *es =
311 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
313 if (y_min > es[v].minimum)
314 y_min = es[v].minimum;
316 if (y_max < es[v].maximum)
317 y_max = es[v].maximum;
320 if (iact->n_vars == 0)
321 ds_put_format (&title, _("Boxplot"));
322 else
324 int ivar_idx;
325 struct string label;
326 ds_init_empty (&label);
327 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
329 const struct variable *ivar = iact->vars[ivar_idx];
330 const union value *val = case_data (c, ivar);
332 ds_put_cstr (&label, var_to_string (ivar));
333 ds_put_cstr (&label, " = ");
334 append_value_name (ivar, val, &label);
335 ds_put_cstr (&label, "; ");
338 ds_put_format (&title, _("Boxplot of %s"),
339 ds_cstr (&label));
341 ds_destroy (&label);
344 boxplot = boxplot_create (y_min, y_max, ds_cstr (&title));
346 ds_destroy (&title);
348 for (v = 0; v < cmd->n_dep_vars; ++v)
350 struct exploratory_stats *es =
351 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
353 boxplot_add_box (boxplot, es[v].box_whisker,
354 var_to_string (cmd->dep_vars[v]));
355 es[v].box_whisker = NULL;
358 boxplot_submit (boxplot);
363 static void
364 show_npplot (const struct examine *cmd, int iact_idx)
366 const struct interaction *iact = cmd->iacts[iact_idx];
367 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
369 int v;
371 for (v = 0; v < cmd->n_dep_vars; ++v)
373 int grp;
374 for (grp = 0; grp < n_cats; ++grp)
376 struct chart *npp, *dnpp;
377 struct casereader *reader;
378 struct np *np;
380 int ivar_idx;
381 const struct ccase *c =
382 categoricals_get_case_by_category_real (cmd->cats,
383 iact_idx, grp);
385 const struct exploratory_stats *es =
386 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
388 struct string label;
389 ds_init_cstr (&label,
390 var_to_string (cmd->dep_vars[v]));
392 if (iact->n_vars > 0)
394 ds_put_cstr (&label, " (");
395 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
397 const struct variable *ivar = iact->vars[ivar_idx];
398 const union value *val = case_data (c, ivar);
400 ds_put_cstr (&label, var_to_string (ivar));
401 ds_put_cstr (&label, " = ");
402 append_value_name (ivar, val, &label);
403 ds_put_cstr (&label, "; ");
406 ds_put_cstr (&label, ")");
409 np = es[v].np;
410 reader = casewriter_make_reader (np->writer);
411 np->writer = NULL;
413 npp = np_plot_create (np, reader, ds_cstr (&label));
414 dnpp = dnp_plot_create (np, reader, ds_cstr (&label));
416 if (npp == NULL || dnpp == NULL)
418 msg (MW, _("Not creating NP plot because data set is empty."));
419 chart_unref (npp);
420 chart_unref (dnpp);
422 else
424 chart_submit (npp);
425 chart_submit (dnpp);
427 casereader_destroy (reader);
429 ds_destroy (&label);
434 static void
435 show_spreadlevel (const struct examine *cmd, int iact_idx)
437 const struct interaction *iact = cmd->iacts[iact_idx];
438 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
440 int v;
442 /* Spreadlevel when there are no levels is not useful */
443 if (iact->n_vars == 0)
444 return;
446 for (v = 0; v < cmd->n_dep_vars; ++v)
448 int grp;
449 struct chart *sl;
451 struct string label;
452 ds_init_cstr (&label,
453 var_to_string (cmd->dep_vars[v]));
455 if (iact->n_vars > 0)
457 ds_put_cstr (&label, " (");
458 interaction_to_string (iact, &label);
459 ds_put_cstr (&label, ")");
462 sl = spreadlevel_plot_create (ds_cstr (&label), cmd->sl_power);
464 for (grp = 0; grp < n_cats; ++grp)
466 const struct exploratory_stats *es =
467 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
469 double median = percentile_calculate (es[v].quartiles[1], cmd->pc_alg);
471 double iqr = percentile_calculate (es[v].quartiles[2], cmd->pc_alg) -
472 percentile_calculate (es[v].quartiles[0], cmd->pc_alg);
474 spreadlevel_plot_add (sl, iqr, median);
477 if (sl == NULL)
478 msg (MW, _("Not creating spreadlevel chart for %s"), ds_cstr (&label));
479 else
480 chart_submit (sl);
482 ds_destroy (&label);
487 static void
488 show_histogram (const struct examine *cmd, int iact_idx)
490 const struct interaction *iact = cmd->iacts[iact_idx];
491 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
493 int v;
495 for (v = 0; v < cmd->n_dep_vars; ++v)
497 int grp;
498 for (grp = 0; grp < n_cats; ++grp)
500 double n, mean, var;
501 int ivar_idx;
502 const struct ccase *c =
503 categoricals_get_case_by_category_real (cmd->cats,
504 iact_idx, grp);
506 const struct exploratory_stats *es =
507 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
509 struct string label;
511 if (es[v].histogram == NULL)
512 continue;
514 ds_init_cstr (&label,
515 var_to_string (cmd->dep_vars[v]));
517 if (iact->n_vars > 0)
519 ds_put_cstr (&label, " (");
520 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
522 const struct variable *ivar = iact->vars[ivar_idx];
523 const union value *val = case_data (c, ivar);
525 ds_put_cstr (&label, var_to_string (ivar));
526 ds_put_cstr (&label, " = ");
527 append_value_name (ivar, val, &label);
528 ds_put_cstr (&label, "; ");
531 ds_put_cstr (&label, ")");
535 moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL);
537 chart_submit
538 (histogram_chart_create (es[v].histogram->gsl_hist,
539 ds_cstr (&label), n, mean,
540 sqrt (var), false));
543 ds_destroy (&label);
548 static struct pivot_value *
549 new_value_with_missing_footnote (const struct variable *var,
550 const union value *value,
551 struct pivot_footnote *missing_footnote)
553 struct pivot_value *pv = pivot_value_new_var_value (var, value);
554 if (var_is_value_missing (var, value, MV_USER))
555 pivot_value_add_footnote (pv, missing_footnote);
556 return pv;
559 static void
560 create_interaction_dimensions (struct pivot_table *table,
561 const struct categoricals *cats,
562 const struct interaction *iact,
563 struct pivot_footnote *missing_footnote)
565 for (size_t i = iact->n_vars; i-- > 0;)
567 const struct variable *var = iact->vars[i];
568 struct pivot_dimension *d = pivot_dimension_create__ (
569 table, PIVOT_AXIS_ROW, pivot_value_new_variable (var));
570 d->root->show_label = true;
572 size_t n;
573 union value *values = categoricals_get_var_values (cats, var, &n);
574 for (size_t j = 0; j < n; j++)
575 pivot_category_create_leaf (
576 d->root, new_value_with_missing_footnote (var, &values[j],
577 missing_footnote));
581 static struct pivot_footnote *
582 create_missing_footnote (struct pivot_table *table)
584 return pivot_table_create_footnote (
585 table, pivot_value_new_text (N_("User-missing value.")));
588 static void
589 percentiles_report (const struct examine *cmd, int iact_idx)
591 struct pivot_table *table = pivot_table_create (N_("Percentiles"));
593 struct pivot_dimension *percentiles = pivot_dimension_create (
594 table, PIVOT_AXIS_COLUMN, N_("Percentiles"));
595 percentiles->root->show_label = true;
596 for (int i = 0; i < cmd->n_percentiles; ++i)
597 pivot_category_create_leaf (
598 percentiles->root,
599 pivot_value_new_user_text_nocopy (xasprintf ("%g", cmd->ptiles[i])));
601 pivot_dimension_create (table, PIVOT_AXIS_ROW, N_("Statistics"),
602 N_("Weighted Average"), N_("Tukey's Hinges"));
604 const struct interaction *iact = cmd->iacts[iact_idx];
605 struct pivot_footnote *missing_footnote = create_missing_footnote (table);
606 create_interaction_dimensions (table, cmd->cats, iact, missing_footnote);
608 struct pivot_dimension *dep_dim = pivot_dimension_create (
609 table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
611 size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
613 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
614 for (size_t v = 0; v < cmd->n_dep_vars; ++v)
616 indexes[table->n_dimensions - 1] = pivot_category_create_leaf (
617 dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v]));
619 for (size_t i = 0; i < n_cats; ++i)
621 for (size_t j = 0; j < iact->n_vars; j++)
623 int idx = categoricals_get_value_index_by_category_real (
624 cmd->cats, iact_idx, i, j);
625 indexes[table->n_dimensions - 2 - j] = idx;
628 const struct exploratory_stats *ess
629 = categoricals_get_user_data_by_category_real (
630 cmd->cats, iact_idx, i);
631 const struct exploratory_stats *es = ess + v;
633 double hinges[3];
634 tukey_hinges_calculate (es->hinges, hinges);
636 for (size_t pc_idx = 0; pc_idx < cmd->n_percentiles; ++pc_idx)
638 indexes[0] = pc_idx;
640 indexes[1] = 0;
641 double value = percentile_calculate (es->percentiles[pc_idx],
642 cmd->pc_alg);
643 pivot_table_put (table, indexes, table->n_dimensions,
644 pivot_value_new_number (value));
646 double hinge = (cmd->ptiles[pc_idx] == 25.0 ? hinges[0]
647 : cmd->ptiles[pc_idx] == 50.0 ? hinges[1]
648 : cmd->ptiles[pc_idx] == 75.0 ? hinges[2]
649 : SYSMIS);
650 if (hinge != SYSMIS)
652 indexes[1] = 1;
653 pivot_table_put (table, indexes, table->n_dimensions,
654 pivot_value_new_number (hinge));
660 free (indexes);
662 pivot_table_submit (table);
665 static void
666 normality_report (const struct examine *cmd, int iact_idx)
668 struct pivot_table *table = pivot_table_create (N_("Tests of Normality"));
670 struct pivot_dimension *test =
671 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Shapiro-Wilk"),
672 N_("Statistic"),
673 N_("df"), PIVOT_RC_COUNT,
674 N_("Sig."));
676 test->root->show_label = true;
678 const struct interaction *iact = cmd->iacts[iact_idx];
679 struct pivot_footnote *missing_footnote = create_missing_footnote (table);
680 create_interaction_dimensions (table, cmd->cats, iact, missing_footnote);
682 struct pivot_dimension *dep_dim = pivot_dimension_create (
683 table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
685 size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
687 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
688 for (size_t v = 0; v < cmd->n_dep_vars; ++v)
690 indexes[table->n_dimensions - 1] =
691 pivot_category_create_leaf (dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v]));
693 for (size_t i = 0; i < n_cats; ++i)
695 indexes[1] = i;
697 const struct exploratory_stats *es
698 = categoricals_get_user_data_by_category_real (
699 cmd->cats, iact_idx, i);
701 struct shapiro_wilk *sw = es[v].shapiro_wilk;
703 if (sw == NULL)
704 continue;
706 double w = shapiro_wilk_calculate (sw);
708 int j = 0;
709 indexes[0] = j;
711 pivot_table_put (table, indexes, table->n_dimensions,
712 pivot_value_new_number (w));
714 indexes[0] = ++j;
715 pivot_table_put (table, indexes, table->n_dimensions,
716 pivot_value_new_number (sw->n));
718 indexes[0] = ++j;
719 pivot_table_put (table, indexes, table->n_dimensions,
720 pivot_value_new_number (shapiro_wilk_significance (sw->n, w)));
724 free (indexes);
726 pivot_table_submit (table);
730 static void
731 descriptives_report (const struct examine *cmd, int iact_idx)
733 struct pivot_table *table = pivot_table_create (N_("Descriptives"));
735 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Aspect"),
736 N_("Statistic"), N_("Std. Error"));
738 struct pivot_dimension *statistics = pivot_dimension_create (
739 table, PIVOT_AXIS_ROW, N_("Statistics"), N_("Mean"));
740 struct pivot_category *interval = pivot_category_create_group__ (
741 statistics->root,
742 pivot_value_new_text_format (N_("%g%% Confidence Interval for Mean"),
743 cmd->conf * 100.0));
744 pivot_category_create_leaves (interval, N_("Lower Bound"),
745 N_("Upper Bound"));
746 pivot_category_create_leaves (
747 statistics->root, N_("5% Trimmed Mean"), N_("Median"), N_("Variance"),
748 N_("Std. Deviation"), N_("Minimum"), N_("Maximum"), N_("Range"),
749 N_("Interquartile Range"), N_("Skewness"), N_("Kurtosis"));
751 const struct interaction *iact = cmd->iacts[iact_idx];
752 struct pivot_footnote *missing_footnote = create_missing_footnote (table);
753 create_interaction_dimensions (table, cmd->cats, iact, missing_footnote);
755 struct pivot_dimension *dep_dim = pivot_dimension_create (
756 table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
758 size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
760 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
761 for (size_t v = 0; v < cmd->n_dep_vars; ++v)
763 indexes[table->n_dimensions - 1] = pivot_category_create_leaf (
764 dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v]));
766 for (size_t i = 0; i < n_cats; ++i)
768 for (size_t j = 0; j < iact->n_vars; j++)
770 int idx = categoricals_get_value_index_by_category_real (
771 cmd->cats, iact_idx, i, j);
772 indexes[table->n_dimensions - 2 - j] = idx;
775 const struct exploratory_stats *ess
776 = categoricals_get_user_data_by_category_real (cmd->cats,
777 iact_idx, i);
778 const struct exploratory_stats *es = ess + v;
780 double m0, m1, m2, m3, m4;
781 moments_calculate (es->mom, &m0, &m1, &m2, &m3, &m4);
782 double tval = gsl_cdf_tdist_Qinv ((1.0 - cmd->conf) / 2.0, m0 - 1.0);
784 struct entry
786 int stat_idx;
787 int aspect_idx;
788 double x;
790 entries[] = {
791 { 0, 0, m1 },
792 { 0, 1, calc_semean (m2, m0) },
793 { 1, 0, m1 - tval * calc_semean (m2, m0) },
794 { 2, 0, m1 + tval * calc_semean (m2, m0) },
795 { 3, 0, trimmed_mean_calculate (es->trimmed_mean) },
796 { 4, 0, percentile_calculate (es->quartiles[1], cmd->pc_alg) },
797 { 5, 0, m2 },
798 { 6, 0, sqrt (m2) },
799 { 7, 0, es->minima[0].val },
800 { 8, 0, es->maxima[0].val },
801 { 9, 0, es->maxima[0].val - es->minima[0].val },
802 { 10, 0, (percentile_calculate (es->quartiles[2], cmd->pc_alg) -
803 percentile_calculate (es->quartiles[0], cmd->pc_alg)) },
804 { 11, 0, m3 },
805 { 11, 1, calc_seskew (m0) },
806 { 12, 0, m4 },
807 { 12, 1, calc_sekurt (m0) },
809 for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
811 const struct entry *e = &entries[j];
812 indexes[0] = e->aspect_idx;
813 indexes[1] = e->stat_idx;
814 pivot_table_put (table, indexes, table->n_dimensions,
815 pivot_value_new_number (e->x));
820 free (indexes);
822 pivot_table_submit (table);
826 static void
827 extremes_report (const struct examine *cmd, int iact_idx)
829 struct pivot_table *table = pivot_table_create (N_("Extreme Values"));
831 struct pivot_dimension *statistics = pivot_dimension_create (
832 table, PIVOT_AXIS_COLUMN, N_("Statistics"));
833 pivot_category_create_leaf (statistics->root,
834 (cmd->id_var
835 ? pivot_value_new_variable (cmd->id_var)
836 : pivot_value_new_text (N_("Case Number"))));
837 pivot_category_create_leaves (statistics->root, N_("Value"));
839 struct pivot_dimension *order = pivot_dimension_create (
840 table, PIVOT_AXIS_ROW, N_("Order"));
841 for (size_t i = 0; i < cmd->disp_extremes; i++)
842 pivot_category_create_leaf (order->root, pivot_value_new_integer (i + 1));
844 pivot_dimension_create (table, PIVOT_AXIS_ROW,
845 /* TRANSLATORS: This is a noun, not an adjective. */
846 N_("Extreme"),
847 N_("Highest"), N_("Lowest"));
849 const struct interaction *iact = cmd->iacts[iact_idx];
850 struct pivot_footnote *missing_footnote = create_missing_footnote (table);
851 create_interaction_dimensions (table, cmd->cats, iact, missing_footnote);
853 struct pivot_dimension *dep_dim = pivot_dimension_create (
854 table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
856 size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
858 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
859 for (size_t v = 0; v < cmd->n_dep_vars; ++v)
861 indexes[table->n_dimensions - 1] = pivot_category_create_leaf (
862 dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v]));
864 for (size_t i = 0; i < n_cats; ++i)
866 for (size_t j = 0; j < iact->n_vars; j++)
868 int idx = categoricals_get_value_index_by_category_real (
869 cmd->cats, iact_idx, i, j);
870 indexes[table->n_dimensions - 2 - j] = idx;
873 const struct exploratory_stats *ess
874 = categoricals_get_user_data_by_category_real (cmd->cats,
875 iact_idx, i);
876 const struct exploratory_stats *es = ess + v;
878 for (int e = 0 ; e < cmd->disp_extremes; ++e)
880 indexes[1] = e;
882 for (size_t j = 0; j < 2; j++)
884 const struct extremity *extremity
885 = j ? &es->minima[e] : &es->maxima[e];
886 indexes[2] = j;
888 indexes[0] = 0;
889 pivot_table_put (
890 table, indexes, table->n_dimensions,
891 (cmd->id_var
892 ? new_value_with_missing_footnote (cmd->id_var,
893 &extremity->identity,
894 missing_footnote)
895 : pivot_value_new_integer (extremity->identity.f)));
897 indexes[0] = 1;
898 union value val = { .f = extremity->val };
899 pivot_table_put (
900 table, indexes, table->n_dimensions,
901 new_value_with_missing_footnote (cmd->dep_vars[v], &val,
902 missing_footnote));
907 free (indexes);
909 pivot_table_submit (table);
913 static void
914 summary_report (const struct examine *cmd, int iact_idx)
916 struct pivot_table *table = pivot_table_create (
917 N_("Case Processing Summary"));
918 pivot_table_set_weight_var (table, dict_get_weight (cmd->dict));
920 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
921 N_("N"), PIVOT_RC_COUNT,
922 N_("Percent"), PIVOT_RC_PERCENT);
923 struct pivot_dimension *cases = pivot_dimension_create (
924 table, PIVOT_AXIS_COLUMN, N_("Cases"), N_("Valid"), N_("Missing"),
925 N_("Total"));
926 cases->root->show_label = true;
928 const struct interaction *iact = cmd->iacts[iact_idx];
929 struct pivot_footnote *missing_footnote = create_missing_footnote (table);
930 create_interaction_dimensions (table, cmd->cats, iact, missing_footnote);
932 struct pivot_dimension *dep_dim = pivot_dimension_create (
933 table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
935 size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
937 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
938 for (size_t v = 0; v < cmd->n_dep_vars; ++v)
940 indexes[table->n_dimensions - 1] = pivot_category_create_leaf (
941 dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v]));
943 for (size_t i = 0; i < n_cats; ++i)
945 for (size_t j = 0; j < iact->n_vars; j++)
947 int idx = categoricals_get_value_index_by_category_real (
948 cmd->cats, iact_idx, i, j);
949 indexes[table->n_dimensions - 2 - j] = idx;
952 const struct exploratory_stats *es
953 = categoricals_get_user_data_by_category_real (
954 cmd->cats, iact_idx, i);
956 double total = es[v].missing + es[v].non_missing;
957 struct entry
959 int stat_idx;
960 int case_idx;
961 double x;
963 entries[] = {
964 { 0, 0, es[v].non_missing },
965 { 1, 0, 100.0 * es[v].non_missing / total },
966 { 0, 1, es[v].missing },
967 { 1, 1, 100.0 * es[v].missing / total },
968 { 0, 2, total },
969 { 1, 2, 100.0 },
971 for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
973 const struct entry *e = &entries[j];
974 indexes[0] = e->stat_idx;
975 indexes[1] = e->case_idx;
976 pivot_table_put (table, indexes, table->n_dimensions,
977 pivot_value_new_number (e->x));
982 free (indexes);
984 pivot_table_submit (table);
987 /* Attempt to parse an interaction from LEXER */
988 static struct interaction *
989 parse_interaction (struct lexer *lexer, struct examine *ex)
991 const struct variable *v = NULL;
992 struct interaction *iact = NULL;
994 if (lex_match_variable (lexer, ex->dict, &v))
996 iact = interaction_create (v);
998 while (lex_match (lexer, T_BY))
1000 if (!lex_match_variable (lexer, ex->dict, &v))
1002 interaction_destroy (iact);
1003 return NULL;
1005 interaction_add_variable (iact, v);
1007 lex_match (lexer, T_COMMA);
1010 return iact;
1014 static void *
1015 create_n (const void *aux1, void *aux2 UNUSED)
1017 int v;
1019 const struct examine *examine = aux1;
1020 struct exploratory_stats *es = pool_calloc (examine->pool, examine->n_dep_vars, sizeof (*es));
1021 struct subcase ordering;
1022 subcase_init (&ordering, 0, 0, SC_ASCEND);
1024 for (v = 0; v < examine->n_dep_vars; v++)
1026 es[v].sorted_writer = sort_create_writer (&ordering, examine->ex_proto);
1027 es[v].sorted_reader = NULL;
1029 es[v].mom = moments_create (MOMENT_KURTOSIS);
1030 es[v].cmin = DBL_MAX;
1032 es[v].maximum = -DBL_MAX;
1033 es[v].minimum = DBL_MAX;
1036 subcase_destroy (&ordering);
1037 return es;
1040 static void
1041 update_n (const void *aux1, void *aux2 UNUSED, void *user_data,
1042 const struct ccase *c, double weight)
1044 int v;
1045 const struct examine *examine = aux1;
1046 struct exploratory_stats *es = user_data;
1048 bool this_case_is_missing = false;
1049 /* LISTWISE missing must be dealt with here */
1050 if (!examine->missing_pw)
1052 for (v = 0; v < examine->n_dep_vars; v++)
1054 const struct variable *var = examine->dep_vars[v];
1056 if (var_is_value_missing (var, case_data (c, var), examine->dep_excl))
1058 es[v].missing += weight;
1059 this_case_is_missing = true;
1064 if (this_case_is_missing)
1065 return;
1067 for (v = 0; v < examine->n_dep_vars; v++)
1069 struct ccase *outcase ;
1070 const struct variable *var = examine->dep_vars[v];
1071 const double x = case_num (c, var);
1073 if (var_is_value_missing (var, case_data (c, var), examine->dep_excl))
1075 es[v].missing += weight;
1076 continue;
1079 outcase = case_create (examine->ex_proto);
1081 if (x > es[v].maximum)
1082 es[v].maximum = x;
1084 if (x < es[v].minimum)
1085 es[v].minimum = x;
1087 es[v].non_missing += weight;
1089 moments_pass_one (es[v].mom, x, weight);
1091 /* Save the value and the ID to the writer */
1092 assert (examine->id_idx != -1);
1093 *case_num_rw_idx (outcase, EX_VAL) = x;
1094 value_copy (case_data_rw_idx (outcase, EX_ID),
1095 case_data_idx (c, examine->id_idx), examine->id_width);
1097 *case_num_rw_idx (outcase, EX_WT) = weight;
1099 es[v].cc += weight;
1101 if (es[v].cmin > weight)
1102 es[v].cmin = weight;
1104 casewriter_write (es[v].sorted_writer, outcase);
1108 static void
1109 calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data)
1111 int v;
1112 const struct examine *examine = aux1;
1113 struct exploratory_stats *es = user_data;
1115 for (v = 0; v < examine->n_dep_vars; v++)
1117 int i;
1118 casenumber imin = 0;
1119 casenumber imax;
1120 struct casereader *reader;
1121 struct ccase *c;
1123 if (examine->plot & PLOT_HISTOGRAM && es[v].non_missing > 0)
1125 /* Sturges Rule */
1126 double bin_width = fabs (es[v].minimum - es[v].maximum)
1127 / (1 + log2 (es[v].cc))
1130 es[v].histogram =
1131 histogram_create (bin_width, es[v].minimum, es[v].maximum);
1134 es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer);
1135 es[v].sorted_writer = NULL;
1137 imax = casereader_get_n_cases (es[v].sorted_reader);
1139 es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima));
1140 es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima));
1141 for (i = 0; i < examine->calc_extremes; ++i)
1143 value_init_pool (examine->pool, &es[v].maxima[i].identity, examine->id_width) ;
1144 value_init_pool (examine->pool, &es[v].minima[i].identity, examine->id_width) ;
1147 bool warn = true;
1148 for (reader = casereader_clone (es[v].sorted_reader);
1149 (c = casereader_read (reader)) != NULL; case_unref (c))
1151 const double val = case_num_idx (c, EX_VAL);
1152 double wt = case_num_idx (c, EX_WT);
1153 wt = var_force_valid_weight (examine->wv, wt, &warn);
1155 moments_pass_two (es[v].mom, val, wt);
1157 if (es[v].histogram)
1158 histogram_add (es[v].histogram, val, wt);
1160 if (imin < examine->calc_extremes)
1162 int x;
1163 for (x = imin; x < examine->calc_extremes; ++x)
1165 struct extremity *min = &es[v].minima[x];
1166 min->val = val;
1167 value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width);
1169 imin ++;
1172 imax --;
1173 if (imax < examine->calc_extremes)
1175 int x;
1177 for (x = imax; x < imax + 1; ++x)
1179 struct extremity *max;
1181 if (x >= examine->calc_extremes)
1182 break;
1184 max = &es[v].maxima[x];
1185 max->val = val;
1186 value_copy (&max->identity, case_data_idx (c, EX_ID), examine->id_width);
1190 casereader_destroy (reader);
1192 if (examine->calc_extremes > 0 && es[v].non_missing > 0)
1194 assert (es[v].minima[0].val == es[v].minimum);
1195 assert (es[v].maxima[0].val == es[v].maximum);
1199 const int n_os = 5 + examine->n_percentiles;
1200 es[v].percentiles = pool_calloc (examine->pool, examine->n_percentiles, sizeof (*es[v].percentiles));
1202 es[v].trimmed_mean = trimmed_mean_create (es[v].cc, 0.05);
1203 es[v].shapiro_wilk = NULL;
1205 struct order_stats **os = XCALLOC (n_os, struct order_stats *);
1206 os[0] = &es[v].trimmed_mean->parent;
1208 es[v].quartiles[0] = percentile_create (0.25, es[v].cc);
1209 es[v].quartiles[1] = percentile_create (0.5, es[v].cc);
1210 es[v].quartiles[2] = percentile_create (0.75, es[v].cc);
1212 os[1] = &es[v].quartiles[0]->parent;
1213 os[2] = &es[v].quartiles[1]->parent;
1214 os[3] = &es[v].quartiles[2]->parent;
1216 es[v].hinges = tukey_hinges_create (es[v].cc, es[v].cmin);
1217 os[4] = &es[v].hinges->parent;
1219 for (i = 0; i < examine->n_percentiles; ++i)
1221 es[v].percentiles[i] = percentile_create (examine->ptiles[i] / 100.00, es[v].cc);
1222 os[5 + i] = &es[v].percentiles[i]->parent;
1225 order_stats_accumulate_idx (os, n_os,
1226 casereader_clone (es[v].sorted_reader),
1227 EX_WT, EX_VAL);
1229 free (os);
1232 if (examine->plot & PLOT_BOXPLOT)
1234 struct order_stats *os;
1236 es[v].box_whisker = box_whisker_create (es[v].hinges,
1237 EX_ID, examine->id_var);
1239 os = &es[v].box_whisker->parent;
1240 order_stats_accumulate_idx (&os, 1,
1241 casereader_clone (es[v].sorted_reader),
1242 EX_WT, EX_VAL);
1245 if (examine->plot)
1247 double mean;
1249 moments_calculate (es[v].mom, NULL, &mean, NULL, NULL, NULL);
1251 es[v].shapiro_wilk = shapiro_wilk_create (es[v].non_missing, mean);
1253 if (es[v].shapiro_wilk)
1255 struct order_stats *os = &es[v].shapiro_wilk->parent;
1256 order_stats_accumulate_idx (&os, 1,
1257 casereader_clone (es[v].sorted_reader),
1258 EX_WT, EX_VAL);
1262 if (examine->plot & PLOT_NPPLOT)
1264 double n, mean, var;
1265 struct order_stats *os;
1267 moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL);
1269 es[v].np = np_create (n, mean, var);
1271 os = &es[v].np->parent;
1273 order_stats_accumulate_idx (&os, 1,
1274 casereader_clone (es[v].sorted_reader),
1275 EX_WT, EX_VAL);
1281 static void
1282 cleanup_exploratory_stats (struct examine *cmd)
1284 int i;
1285 for (i = 0; i < cmd->n_iacts; ++i)
1287 int v;
1288 const size_t n_cats = categoricals_n_count (cmd->cats, i);
1290 for (v = 0; v < cmd->n_dep_vars; ++v)
1292 int grp;
1293 for (grp = 0; grp < n_cats; ++grp)
1295 int q;
1296 const struct exploratory_stats *es =
1297 categoricals_get_user_data_by_category_real (cmd->cats, i, grp);
1299 struct order_stats *os = &es[v].hinges->parent;
1300 struct statistic *stat = &os->parent;
1301 stat->destroy (stat);
1303 for (q = 0; q < 3 ; q++)
1305 os = &es[v].quartiles[q]->parent;
1306 stat = &os->parent;
1307 stat->destroy (stat);
1310 for (q = 0; q < cmd->n_percentiles ; q++)
1312 os = &es[v].percentiles[q]->parent;
1313 stat = &os->parent;
1314 stat->destroy (stat);
1317 if (es[v].shapiro_wilk)
1319 stat = &es[v].shapiro_wilk->parent.parent;
1320 stat->destroy (stat);
1323 os = &es[v].trimmed_mean->parent;
1324 stat = &os->parent;
1325 stat->destroy (stat);
1327 os = &es[v].np->parent;
1328 if (os)
1330 stat = &os->parent;
1331 stat->destroy (stat);
1334 statistic_destroy (&es[v].histogram->parent);
1335 moments_destroy (es[v].mom);
1337 if (es[v].box_whisker)
1339 stat = &es[v].box_whisker->parent.parent;
1340 stat->destroy (stat);
1343 casereader_destroy (es[v].sorted_reader);
1350 static void
1351 run_examine (struct examine *cmd, struct casereader *input)
1353 int i;
1354 struct ccase *c;
1355 struct casereader *reader;
1357 struct payload payload;
1358 payload.create = create_n;
1359 payload.update = update_n;
1360 payload.calculate = calculate_n;
1361 payload.destroy = NULL;
1363 cmd->wv = dict_get_weight (cmd->dict);
1365 cmd->cats
1366 = categoricals_create (cmd->iacts, cmd->n_iacts, cmd->wv, cmd->fctr_excl);
1368 categoricals_set_payload (cmd->cats, &payload, cmd, NULL);
1370 if (cmd->id_var == NULL)
1372 struct ccase *c = casereader_peek (input, 0);
1374 cmd->id_idx = case_get_n_values (c);
1375 input = casereader_create_arithmetic_sequence (input, 1.0, 1.0);
1377 case_unref (c);
1380 for (reader = input;
1381 (c = casereader_read (reader)) != NULL; case_unref (c))
1383 categoricals_update (cmd->cats, c);
1385 casereader_destroy (reader);
1386 categoricals_done (cmd->cats);
1388 for (i = 0; i < cmd->n_iacts; ++i)
1390 summary_report (cmd, i);
1392 const size_t n_cats = categoricals_n_count (cmd->cats, i);
1393 if (n_cats == 0)
1394 continue;
1396 if (cmd->disp_extremes > 0)
1397 extremes_report (cmd, i);
1399 if (cmd->n_percentiles > 0)
1400 percentiles_report (cmd, i);
1402 if (cmd->plot & PLOT_BOXPLOT)
1404 switch (cmd->boxplot_mode)
1406 case BP_GROUPS:
1407 show_boxplot_grouped (cmd, i);
1408 break;
1409 case BP_VARIABLES:
1410 show_boxplot_variabled (cmd, i);
1411 break;
1412 default:
1413 NOT_REACHED ();
1414 break;
1418 if (cmd->plot & PLOT_HISTOGRAM)
1419 show_histogram (cmd, i);
1421 if (cmd->plot & PLOT_NPPLOT)
1422 show_npplot (cmd, i);
1424 if (cmd->plot & PLOT_SPREADLEVEL)
1425 show_spreadlevel (cmd, i);
1427 if (cmd->descriptives)
1428 descriptives_report (cmd, i);
1430 if (cmd->plot)
1431 normality_report (cmd, i);
1434 cleanup_exploratory_stats (cmd);
1435 categoricals_destroy (cmd->cats);
1440 cmd_examine (struct lexer *lexer, struct dataset *ds)
1442 int i;
1443 bool nototals_seen = false;
1444 bool totals_seen = false;
1446 struct interaction **iacts_mem = NULL;
1447 struct examine examine;
1448 bool percentiles_seen = false;
1450 examine.missing_pw = false;
1451 examine.disp_extremes = 0;
1452 examine.calc_extremes = 0;
1453 examine.descriptives = false;
1454 examine.conf = 0.95;
1455 examine.pc_alg = PC_HAVERAGE;
1456 examine.ptiles = NULL;
1457 examine.n_percentiles = 0;
1458 examine.id_idx = -1;
1459 examine.id_width = 0;
1460 examine.id_var = NULL;
1461 examine.boxplot_mode = BP_GROUPS;
1463 examine.ex_proto = caseproto_create ();
1465 examine.pool = pool_create ();
1467 /* Allocate space for the first interaction.
1468 This is interaction is an empty one (for the totals).
1469 If no totals are requested, we will simply ignore this
1470 interaction.
1472 examine.n_iacts = 1;
1473 examine.iacts = iacts_mem = pool_zalloc (examine.pool, sizeof (struct interaction *));
1474 examine.iacts[0] = interaction_create (NULL);
1476 examine.dep_excl = MV_ANY;
1477 examine.fctr_excl = MV_ANY;
1478 examine.plot = 0;
1479 examine.sl_power = 0;
1480 examine.dep_vars = NULL;
1481 examine.n_dep_vars = 0;
1482 examine.dict = dataset_dict (ds);
1484 /* Accept an optional, completely pointless "/VARIABLES=" */
1485 lex_match (lexer, T_SLASH);
1486 if (lex_match_id (lexer, "VARIABLES"))
1488 if (! lex_force_match (lexer, T_EQUALS))
1489 goto error;
1492 if (!parse_variables_const (lexer, examine.dict,
1493 &examine.dep_vars, &examine.n_dep_vars,
1494 PV_NO_DUPLICATE | PV_NUMERIC))
1495 goto error;
1497 if (lex_match (lexer, T_BY))
1499 struct interaction *iact = NULL;
1502 iact = parse_interaction (lexer, &examine);
1503 if (iact)
1505 examine.n_iacts++;
1506 iacts_mem =
1507 pool_nrealloc (examine.pool, iacts_mem,
1508 examine.n_iacts,
1509 sizeof (*iacts_mem));
1511 iacts_mem[examine.n_iacts - 1] = iact;
1514 while (iact);
1518 while (lex_token (lexer) != T_ENDCMD)
1520 lex_match (lexer, T_SLASH);
1522 if (lex_match_id (lexer, "STATISTICS"))
1524 lex_match (lexer, T_EQUALS);
1526 while (lex_token (lexer) != T_ENDCMD
1527 && lex_token (lexer) != T_SLASH)
1529 if (lex_match_id (lexer, "DESCRIPTIVES"))
1531 examine.descriptives = true;
1533 else if (lex_match_id (lexer, "EXTREME"))
1535 int extr = 5;
1536 if (lex_match (lexer, T_LPAREN))
1538 if (!lex_force_int_range (lexer, "EXTREME", 0, INT_MAX))
1539 goto error;
1540 extr = lex_integer (lexer);
1542 lex_get (lexer);
1543 if (! lex_force_match (lexer, T_RPAREN))
1544 goto error;
1546 examine.disp_extremes = extr;
1548 else if (lex_match_id (lexer, "NONE"))
1551 else if (lex_match (lexer, T_ALL))
1553 if (examine.disp_extremes == 0)
1554 examine.disp_extremes = 5;
1556 else
1558 lex_error (lexer, NULL);
1559 goto error;
1563 else if (lex_match_id (lexer, "PERCENTILES"))
1565 percentiles_seen = true;
1566 if (lex_match (lexer, T_LPAREN))
1568 while (lex_is_number (lexer))
1570 double p = lex_number (lexer);
1572 if (p <= 0 || p >= 100.0)
1574 lex_error (lexer,
1575 _("Percentiles must lie in the range (0, 100)"));
1576 goto error;
1579 examine.n_percentiles++;
1580 examine.ptiles =
1581 xrealloc (examine.ptiles,
1582 sizeof (*examine.ptiles) *
1583 examine.n_percentiles);
1585 examine.ptiles[examine.n_percentiles - 1] = p;
1587 lex_get (lexer);
1588 lex_match (lexer, T_COMMA);
1590 if (!lex_force_match (lexer, T_RPAREN))
1591 goto error;
1594 lex_match (lexer, T_EQUALS);
1596 while (lex_token (lexer) != T_ENDCMD
1597 && lex_token (lexer) != T_SLASH)
1599 if (lex_match_id (lexer, "HAVERAGE"))
1601 examine.pc_alg = PC_HAVERAGE;
1603 else if (lex_match_id (lexer, "WAVERAGE"))
1605 examine.pc_alg = PC_WAVERAGE;
1607 else if (lex_match_id (lexer, "ROUND"))
1609 examine.pc_alg = PC_ROUND;
1611 else if (lex_match_id (lexer, "EMPIRICAL"))
1613 examine.pc_alg = PC_EMPIRICAL;
1615 else if (lex_match_id (lexer, "AEMPIRICAL"))
1617 examine.pc_alg = PC_AEMPIRICAL;
1619 else if (lex_match_id (lexer, "NONE"))
1621 examine.pc_alg = PC_NONE;
1623 else
1625 lex_error (lexer, NULL);
1626 goto error;
1630 else if (lex_match_id (lexer, "TOTAL"))
1632 totals_seen = true;
1634 else if (lex_match_id (lexer, "NOTOTAL"))
1636 nototals_seen = true;
1638 else if (lex_match_id (lexer, "MISSING"))
1640 lex_match (lexer, T_EQUALS);
1642 while (lex_token (lexer) != T_ENDCMD
1643 && lex_token (lexer) != T_SLASH)
1645 if (lex_match_id (lexer, "LISTWISE"))
1647 examine.missing_pw = false;
1649 else if (lex_match_id (lexer, "PAIRWISE"))
1651 examine.missing_pw = true;
1653 else if (lex_match_id (lexer, "EXCLUDE"))
1655 examine.dep_excl = MV_ANY;
1657 else if (lex_match_id (lexer, "INCLUDE"))
1659 examine.dep_excl = MV_SYSTEM;
1661 else if (lex_match_id (lexer, "REPORT"))
1663 examine.fctr_excl = MV_NEVER;
1665 else if (lex_match_id (lexer, "NOREPORT"))
1667 examine.fctr_excl = MV_ANY;
1669 else
1671 lex_error (lexer, NULL);
1672 goto error;
1676 else if (lex_match_id (lexer, "COMPARE"))
1678 lex_match (lexer, T_EQUALS);
1679 if (lex_match_id (lexer, "VARIABLES"))
1681 examine.boxplot_mode = BP_VARIABLES;
1683 else if (lex_match_id (lexer, "GROUPS"))
1685 examine.boxplot_mode = BP_GROUPS;
1687 else
1689 lex_error (lexer, NULL);
1690 goto error;
1693 else if (lex_match_id (lexer, "PLOT"))
1695 lex_match (lexer, T_EQUALS);
1697 while (lex_token (lexer) != T_ENDCMD
1698 && lex_token (lexer) != T_SLASH)
1700 if (lex_match_id (lexer, "BOXPLOT"))
1702 examine.plot |= PLOT_BOXPLOT;
1704 else if (lex_match_id (lexer, "NPPLOT"))
1706 examine.plot |= PLOT_NPPLOT;
1708 else if (lex_match_id (lexer, "HISTOGRAM"))
1710 examine.plot |= PLOT_HISTOGRAM;
1712 else if (lex_match_id (lexer, "SPREADLEVEL"))
1714 examine.plot |= PLOT_SPREADLEVEL;
1715 examine.sl_power = 0;
1716 if (lex_match (lexer, T_LPAREN) && lex_force_num (lexer))
1718 examine.sl_power = lex_number (lexer);
1720 lex_get (lexer);
1721 if (! lex_force_match (lexer, T_RPAREN))
1722 goto error;
1725 else if (lex_match_id (lexer, "NONE"))
1727 examine.plot = 0;
1729 else if (lex_match (lexer, T_ALL))
1731 examine.plot = ~0;
1733 else
1735 lex_error (lexer, NULL);
1736 goto error;
1738 lex_match (lexer, T_COMMA);
1741 else if (lex_match_id (lexer, "CINTERVAL"))
1743 if (!lex_force_num (lexer))
1744 goto error;
1746 examine.conf = lex_number (lexer);
1747 lex_get (lexer);
1749 else if (lex_match_id (lexer, "ID"))
1751 lex_match (lexer, T_EQUALS);
1753 examine.id_var = parse_variable_const (lexer, examine.dict);
1755 else
1757 lex_error (lexer, NULL);
1758 goto error;
1763 if (totals_seen && nototals_seen)
1765 msg (SE, _("%s and %s are mutually exclusive"), "TOTAL", "NOTOTAL");
1766 goto error;
1769 /* If totals have been requested or if there are no factors
1770 in this analysis, then the totals need to be included. */
1771 if (!nototals_seen || examine.n_iacts == 1)
1773 examine.iacts = &iacts_mem[0];
1775 else
1777 examine.n_iacts--;
1778 examine.iacts = &iacts_mem[1];
1779 interaction_destroy (iacts_mem[0]);
1783 if (examine.id_var)
1785 examine.id_idx = var_get_case_index (examine.id_var);
1786 examine.id_width = var_get_width (examine.id_var);
1789 examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* value */
1790 examine.ex_proto = caseproto_add_width (examine.ex_proto, examine.id_width); /* id */
1791 examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* weight */
1794 if (examine.disp_extremes > 0)
1796 examine.calc_extremes = examine.disp_extremes;
1799 if (examine.descriptives && examine.calc_extremes == 0)
1801 /* Descriptives always displays the max and min */
1802 examine.calc_extremes = 1;
1805 if (percentiles_seen && examine.n_percentiles == 0)
1807 examine.n_percentiles = 7;
1808 examine.ptiles = xcalloc (examine.n_percentiles, sizeof (*examine.ptiles));
1810 examine.ptiles[0] = 5;
1811 examine.ptiles[1] = 10;
1812 examine.ptiles[2] = 25;
1813 examine.ptiles[3] = 50;
1814 examine.ptiles[4] = 75;
1815 examine.ptiles[5] = 90;
1816 examine.ptiles[6] = 95;
1819 assert (examine.calc_extremes >= examine.disp_extremes);
1821 struct casegrouper *grouper;
1822 struct casereader *group;
1823 bool ok;
1825 grouper = casegrouper_create_splits (proc_open (ds), examine.dict);
1826 while (casegrouper_get_next_group (grouper, &group))
1827 run_examine (&examine, group);
1828 ok = casegrouper_destroy (grouper);
1829 ok = proc_commit (ds) && ok;
1832 caseproto_unref (examine.ex_proto);
1834 for (i = 0; i < examine.n_iacts; ++i)
1835 interaction_destroy (examine.iacts[i]);
1836 free (examine.ptiles);
1837 free (examine.dep_vars);
1838 pool_destroy (examine.pool);
1840 return CMD_SUCCESS;
1842 error:
1843 caseproto_unref (examine.ex_proto);
1844 examine.iacts = iacts_mem;
1845 for (i = 0; i < examine.n_iacts; ++i)
1846 interaction_destroy (examine.iacts[i]);
1847 free (examine.dep_vars);
1848 free (examine.ptiles);
1849 pool_destroy (examine.pool);
1851 return CMD_FAILURE;