1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 - How to calculate significance of some directional measures?
20 - How to calculate ASE for symmetric Somers ' d?
21 - How to calculate ASE for Goodman and Kruskal's tau?
22 - How to calculate approx. T of symmetric uncertainty coefficient?
30 #include <gsl/gsl_cdf.h>
34 #include "data/case.h"
35 #include "data/casegrouper.h"
36 #include "data/casereader.h"
37 #include "data/data-out.h"
38 #include "data/dataset.h"
39 #include "data/dictionary.h"
40 #include "data/format.h"
41 #include "data/value-labels.h"
42 #include "data/variable.h"
43 #include "language/command.h"
44 #include "language/commands/freq.h"
45 #include "language/commands/split-file.h"
46 #include "language/lexer/lexer.h"
47 #include "language/lexer/variable-parser.h"
48 #include "libpspp/array.h"
49 #include "libpspp/assertion.h"
50 #include "libpspp/compiler.h"
51 #include "libpspp/hash-functions.h"
52 #include "libpspp/hmap.h"
53 #include "libpspp/hmapx.h"
54 #include "libpspp/message.h"
55 #include "libpspp/misc.h"
56 #include "libpspp/pool.h"
57 #include "libpspp/str.h"
58 #include "math/correlation.h"
59 #include "output/pivot-table.h"
60 #include "output/charts/barchart.h"
62 #include "gl/minmax.h"
63 #include "gl/xalloc-oversized.h"
64 #include "gl/xalloc.h"
68 #define _(msgid) gettext (msgid)
69 #define N_(msgid) msgid
71 /* Kinds of cells in the crosstabulation. */
73 C(COUNT, N_("Count"), PIVOT_RC_COUNT) \
74 C(EXPECTED, N_("Expected"), PIVOT_RC_OTHER) \
75 C(ROW, N_("Row %"), PIVOT_RC_PERCENT) \
76 C(COLUMN, N_("Column %"), PIVOT_RC_PERCENT) \
77 C(TOTAL, N_("Total %"), PIVOT_RC_PERCENT) \
78 C(RESIDUAL, N_("Residual"), PIVOT_RC_RESIDUAL) \
79 C(SRESIDUAL, N_("Std. Residual"), PIVOT_RC_RESIDUAL) \
80 C(ASRESIDUAL, N_("Adjusted Residual"), PIVOT_RC_RESIDUAL)
83 #define C(KEYWORD, STRING, RC) CRS_CL_##KEYWORD,
88 #define C(KEYWORD, STRING, RC) + 1
89 CRS_N_CELLS
= CRS_CELLS
92 #define CRS_ALL_CELLS ((1u << CRS_N_CELLS) - 1)
94 /* Kinds of statistics. */
95 #define CRS_STATISTICS \
109 enum crs_statistic_index
{
110 #define S(KEYWORD) CRS_ST_##KEYWORD##_INDEX,
114 enum crs_statistic_bit
{
115 #define S(KEYWORD) CRS_ST_##KEYWORD = 1u << CRS_ST_##KEYWORD##_INDEX,
120 #define S(KEYWORD) + 1
121 CRS_N_STATISTICS
= CRS_STATISTICS
124 #define CRS_ALL_STATISTICS ((1u << CRS_N_STATISTICS) - 1)
126 /* Number of chi-square statistics. */
129 /* Number of symmetric statistics. */
130 #define N_SYMMETRIC 9
132 /* Number of directional statistics. */
133 #define N_DIRECTIONAL 13
135 /* Indexes into the 'vars' member of struct crosstabulation and
136 struct crosstab member. */
139 ROW_VAR
= 0, /* Row variable. */
140 COL_VAR
= 1 /* Column variable. */
141 /* Higher indexes cause multiple tables to be output. */
146 const struct variable
*var
;
151 /* A crosstabulation of 2 or more variables. */
152 struct crosstabulation
154 struct crosstabs_proc
*proc
;
155 struct fmt_spec weight_format
; /* Format for weight variable. */
156 double missing
; /* Weight of missing cases. */
158 /* Variables (2 or more). */
160 struct xtab_var
*vars
;
162 /* Constants (0 or more). */
164 struct xtab_var
*const_vars
;
165 size_t *const_indexes
;
169 struct freq
**entries
;
172 /* Number of statistically interesting columns/rows
173 (columns/rows with data in them). */
174 size_t ns_cols
, ns_rows
;
176 /* Matrix contents. */
177 double *mat
; /* Matrix proper. */
178 double *row_tot
; /* Row totals. */
179 double *col_tot
; /* Column totals. */
180 double total
; /* Grand total. */
187 /* Integer mode variable info. */
190 struct hmap_node hmap_node
; /* In struct crosstabs_proc var_ranges map. */
191 const struct variable
*var
; /* The variable. */
192 int min
; /* Minimum value. */
193 int max
; /* Maximum value + 1. */
194 int count
; /* max - min. */
197 struct crosstabs_proc
199 const struct dictionary
*dict
;
200 enum { INTEGER
, GENERAL
} mode
;
201 enum mv_class exclude
;
204 struct fmt_spec weight_format
;
206 /* Variables specifies on VARIABLES. */
207 const struct variable
**variables
;
209 struct hmap var_ranges
;
212 struct crosstabulation
*pivots
;
216 size_t n_cells
; /* Number of cells requested. */
217 unsigned int cells
; /* Bit k is 1 if cell k is requested. */
218 int a_cells
[CRS_N_CELLS
]; /* 0...n_cells-1 are the requested cells. */
220 /* Rounding of cells. */
221 bool round_case_weights
; /* Round case weights? */
222 bool round_cells
; /* If !round_case_weights, round cells? */
223 bool round_down
; /* Round down? (otherwise to nearest) */
226 unsigned int statistics
; /* Bit k is 1 if statistic k is requested. */
228 bool descending
; /* True if descending sort order is requested. */
231 static bool parse_crosstabs_tables (struct lexer
*, struct dataset
*,
232 struct crosstabs_proc
*);
233 static bool parse_crosstabs_variables (struct lexer
*, struct dataset
*,
234 struct crosstabs_proc
*);
236 static const struct var_range
*get_var_range (const struct crosstabs_proc
*,
237 const struct variable
*);
239 static bool should_tabulate_case (const struct crosstabulation
*,
240 const struct ccase
*, enum mv_class exclude
);
241 static void tabulate_general_case (struct crosstabulation
*, const struct ccase
*,
243 static void tabulate_integer_case (struct crosstabulation
*, const struct ccase
*,
245 static void postcalc (struct crosstabs_proc
*, struct lexer
*);
248 round_weight (const struct crosstabs_proc
*proc
, double weight
)
250 return proc
->round_down
? floor (weight
) : floor (weight
+ 0.5);
253 #define FOR_EACH_POPULATED_COLUMN(C, XT) \
254 for (size_t C = next_populated_column (0, XT); \
255 C < (XT)->vars[COL_VAR].n_values; \
256 C = next_populated_column (C + 1, XT))
258 next_populated_column (size_t c
, const struct crosstabulation
*xt
)
260 size_t n_columns
= xt
->vars
[COL_VAR
].n_values
;
261 for (; c
< n_columns
; c
++)
267 #define FOR_EACH_POPULATED_ROW(R, XT) \
268 for (size_t R = next_populated_row (0, XT); R < (XT)->vars[ROW_VAR].n_values; \
269 R = next_populated_row (R + 1, XT))
271 next_populated_row (size_t r
, const struct crosstabulation
*xt
)
273 size_t n_rows
= xt
->vars
[ROW_VAR
].n_values
;
274 for (; r
< n_rows
; r
++)
280 /* Parses and executes the CROSSTABS procedure. */
282 cmd_crosstabs (struct lexer
*lexer
, struct dataset
*ds
)
284 int result
= CMD_FAILURE
;
286 struct crosstabs_proc proc
= {
287 .dict
= dataset_dict (ds
),
292 .weight_format
= dict_get_weight_format (dataset_dict (ds
)),
296 .var_ranges
= HMAP_INITIALIZER (proc
.var_ranges
),
301 .cells
= 1u << CRS_CL_COUNT
,
302 /* n_cells and a_cells will be filled in later. */
304 .round_case_weights
= false,
305 .round_cells
= false,
312 bool show_tables
= true;
314 lex_match (lexer
, T_SLASH
);
317 if (lex_match_id (lexer
, "VARIABLES"))
319 if (!parse_crosstabs_variables (lexer
, ds
, &proc
))
322 else if (lex_match_id (lexer
, "MISSING"))
324 lex_match (lexer
, T_EQUALS
);
325 exclude_ofs
= lex_ofs (lexer
);
326 if (lex_match_id (lexer
, "TABLE"))
327 proc
.exclude
= MV_ANY
;
328 else if (lex_match_id (lexer
, "INCLUDE"))
329 proc
.exclude
= MV_SYSTEM
;
330 else if (lex_match_id (lexer
, "REPORT"))
334 lex_error_expecting (lexer
, "TABLE", "INCLUDE", "REPORT");
338 else if (lex_match_id (lexer
, "COUNT"))
340 lex_match (lexer
, T_EQUALS
);
342 /* Default is CELL. */
343 proc
.round_case_weights
= false;
344 proc
.round_cells
= true;
346 while (lex_token (lexer
) != T_SLASH
&& lex_token (lexer
) != T_ENDCMD
)
348 if (lex_match_id (lexer
, "ASIS"))
350 proc
.round_case_weights
= false;
351 proc
.round_cells
= false;
353 else if (lex_match_id (lexer
, "CASE"))
355 proc
.round_case_weights
= true;
356 proc
.round_cells
= false;
358 else if (lex_match_id (lexer
, "CELL"))
360 proc
.round_case_weights
= false;
361 proc
.round_cells
= true;
363 else if (lex_match_id (lexer
, "ROUND"))
364 proc
.round_down
= false;
365 else if (lex_match_id (lexer
, "TRUNCATE"))
366 proc
.round_down
= true;
369 lex_error_expecting (lexer
, "ASIS", "CASE", "CELL",
370 "ROUND", "TRUNCATE");
373 lex_match (lexer
, T_COMMA
);
376 else if (lex_match_id (lexer
, "FORMAT"))
378 lex_match (lexer
, T_EQUALS
);
379 while (lex_token (lexer
) != T_SLASH
&& lex_token (lexer
) != T_ENDCMD
)
381 if (lex_match_id (lexer
, "AVALUE"))
382 proc
.descending
= false;
383 else if (lex_match_id (lexer
, "DVALUE"))
384 proc
.descending
= true;
385 else if (lex_match_id (lexer
, "TABLES"))
387 else if (lex_match_id (lexer
, "NOTABLES"))
391 lex_error_expecting (lexer
, "AVALUE", "DVALUE",
392 "TABLES", "NOTABLES");
395 lex_match (lexer
, T_COMMA
);
398 else if (lex_match_id (lexer
, "BARCHART"))
399 proc
.barchart
= true;
400 else if (lex_match_id (lexer
, "CELLS"))
402 lex_match (lexer
, T_EQUALS
);
404 if (lex_match_id (lexer
, "NONE"))
406 else if (lex_match (lexer
, T_ALL
))
407 proc
.cells
= CRS_ALL_CELLS
;
411 while (lex_token (lexer
) != T_SLASH
&& lex_token (lexer
) != T_ENDCMD
)
413 #define C(KEYWORD, STRING, RC) \
414 if (lex_match_id (lexer, #KEYWORD)) \
416 proc.cells |= 1u << CRS_CL_##KEYWORD; \
422 static const char *cells
[] =
424 #define C(KEYWORD, STRING, RC) #KEYWORD,
428 lex_error_expecting_array (lexer
, cells
,
429 sizeof cells
/ sizeof *cells
);
433 proc
.cells
= ((1u << CRS_CL_COUNT
) | (1u << CRS_CL_ROW
)
434 | (1u << CRS_CL_COLUMN
) | (1u << CRS_CL_TOTAL
));
437 else if (lex_match_id (lexer
, "STATISTICS"))
439 lex_match (lexer
, T_EQUALS
);
441 if (lex_match_id (lexer
, "NONE"))
443 else if (lex_match (lexer
, T_ALL
))
444 proc
.statistics
= CRS_ALL_STATISTICS
;
448 while (lex_token (lexer
) != T_SLASH
&& lex_token (lexer
) != T_ENDCMD
)
451 if (lex_match_id (lexer, #KEYWORD)) \
453 proc.statistics |= CRS_ST_##KEYWORD; \
458 static const char *stats
[] =
460 #define S(KEYWORD) #KEYWORD,
464 lex_error_expecting_array (lexer
, stats
,
465 sizeof stats
/ sizeof *stats
);
468 if (!proc
.statistics
)
469 proc
.statistics
= CRS_ST_CHISQ
;
472 else if (!parse_crosstabs_tables (lexer
, ds
, &proc
))
475 if (!lex_match (lexer
, T_SLASH
))
478 if (!lex_end_of_command (lexer
))
483 msg (SE
, _("At least one crosstabulation must be requested (using "
484 "the TABLES subcommand)."));
491 for (size_t i
= 0; i
< CRS_N_CELLS
; i
++)
492 if (proc
.cells
& (1u << i
))
493 proc
.a_cells
[proc
.n_cells
++] = i
;
494 assert (proc
.n_cells
< CRS_N_CELLS
);
496 /* Missing values. */
497 if (proc
.mode
== GENERAL
&& !proc
.exclude
)
499 lex_ofs_msg (lexer
, SW
, exclude_ofs
, exclude_ofs
,
500 _("Missing mode %s not allowed in general mode. "
501 "Assuming %s."), "REPORT", "MISSING=TABLE");
502 proc
.exclude
= MV_ANY
;
505 struct casereader
*input
= casereader_create_filter_weight (proc_open (ds
),
508 struct casegrouper
*grouper
= casegrouper_create_splits (input
, dataset_dict (ds
));
509 struct casereader
*group
;
510 while (casegrouper_get_next_group (grouper
, &group
))
512 output_split_file_values_peek (ds
, group
);
514 /* Initialize hash tables. */
515 for (struct crosstabulation
*xt
= &proc
.pivots
[0];
516 xt
< &proc
.pivots
[proc
.n_pivots
]; xt
++)
517 hmap_init (&xt
->data
);
521 for (; (c
= casereader_read (group
)) != NULL
; case_unref (c
))
522 for (struct crosstabulation
*xt
= &proc
.pivots
[0];
523 xt
< &proc
.pivots
[proc
.n_pivots
]; xt
++)
525 double weight
= dict_get_case_weight (dataset_dict (ds
), c
,
527 if (proc
.round_case_weights
)
529 weight
= round_weight (&proc
, weight
);
533 if (should_tabulate_case (xt
, c
, proc
.exclude
))
535 if (proc
.mode
== GENERAL
)
536 tabulate_general_case (xt
, c
, weight
);
538 tabulate_integer_case (xt
, c
, weight
);
541 xt
->missing
+= weight
;
543 casereader_destroy (group
);
546 postcalc (&proc
, lexer
);
548 bool ok
= casegrouper_destroy (grouper
);
549 ok
= proc_commit (ds
) && ok
;
551 result
= ok
? CMD_SUCCESS
: CMD_FAILURE
;
554 free (proc
.variables
);
556 struct var_range
*range
, *next_range
;
557 HMAP_FOR_EACH_SAFE (range
, next_range
, struct var_range
, hmap_node
,
560 hmap_delete (&proc
.var_ranges
, &range
->hmap_node
);
563 for (struct crosstabulation
*xt
= &proc
.pivots
[0];
564 xt
< &proc
.pivots
[proc
.n_pivots
]; xt
++)
567 free (xt
->const_vars
);
568 free (xt
->const_indexes
);
575 /* Parses the TABLES subcommand. */
577 parse_crosstabs_tables (struct lexer
*lexer
, struct dataset
*ds
,
578 struct crosstabs_proc
*proc
)
580 const struct variable
***by
= NULL
;
581 size_t *by_nvar
= NULL
;
584 /* Ensure that this is a TABLES subcommand. */
585 if (!lex_match_id (lexer
, "TABLES")
586 && (lex_token (lexer
) != T_ID
||
587 dict_lookup_var (dataset_dict (ds
), lex_tokcstr (lexer
)) == NULL
)
588 && lex_token (lexer
) != T_ALL
)
590 lex_error (lexer
, _("Syntax error expecting subcommand name or "
594 lex_match (lexer
, T_EQUALS
);
596 struct const_var_set
*var_set
598 ? const_var_set_create_from_array (proc
->variables
,
600 : const_var_set_create_from_dict (dataset_dict (ds
)));
604 int vars_start
= lex_ofs (lexer
);
605 bool overflow
= false;
608 by
= xnrealloc (by
, n_by
+ 1, sizeof *by
);
609 by_nvar
= xnrealloc (by_nvar
, n_by
+ 1, sizeof *by_nvar
);
610 if (!parse_const_var_set_vars (lexer
, var_set
, &by
[n_by
], &by_nvar
[n_by
],
611 PV_NO_DUPLICATE
| PV_NO_SCRATCH
))
613 size_t n
= by_nvar
[n_by
++];
614 if (xalloc_oversized (nx
, n
))
618 while (lex_match (lexer
, T_BY
));
621 lex_ofs_error (lexer
, vars_start
, lex_ofs (lexer
) - 1,
622 _("Too many cross-tabulation variables or dimensions."));
627 bool unused UNUSED
= lex_force_match (lexer
, T_BY
);
630 int vars_end
= lex_ofs (lexer
) - 1;
632 size_t *by_iter
= XCALLOC (n_by
, size_t);
633 proc
->pivots
= xnrealloc (proc
->pivots
,
634 proc
->n_pivots
+ nx
, sizeof *proc
->pivots
);
635 for (size_t i
= 0; i
< nx
; i
++)
637 struct crosstabulation
*xt
= &proc
->pivots
[proc
->n_pivots
++];
639 *xt
= (struct crosstabulation
) {
641 .weight_format
= proc
->weight_format
,
644 .vars
= xcalloc (n_by
, sizeof *xt
->vars
),
647 .const_indexes
= NULL
,
648 .start_ofs
= vars_start
,
652 for (size_t j
= 0; j
< n_by
; j
++)
653 xt
->vars
[j
].var
= by
[j
][by_iter
[j
]];
655 for (int j
= n_by
- 1; j
>= 0; j
--)
657 if (++by_iter
[j
] < by_nvar
[j
])
666 /* All return paths lead here. */
667 for (size_t i
= 0; i
< n_by
; i
++)
672 const_var_set_destroy (var_set
);
677 /* Parses the VARIABLES subcommand. */
679 parse_crosstabs_variables (struct lexer
*lexer
, struct dataset
*ds
,
680 struct crosstabs_proc
*proc
)
684 lex_next_error (lexer
, -1, -1, _("%s must be specified before %s."),
685 "VARIABLES", "TABLES");
689 lex_match (lexer
, T_EQUALS
);
693 size_t orig_nv
= proc
->n_variables
;
695 if (!parse_variables_const (lexer
, dataset_dict (ds
),
696 &proc
->variables
, &proc
->n_variables
,
697 (PV_APPEND
| PV_NUMERIC
698 | PV_NO_DUPLICATE
| PV_NO_SCRATCH
)))
701 if (!lex_force_match (lexer
, T_LPAREN
))
704 if (!lex_force_int (lexer
))
706 long min
= lex_integer (lexer
);
709 lex_match (lexer
, T_COMMA
);
711 if (!lex_force_int_range (lexer
, NULL
, min
, LONG_MAX
))
713 long max
= lex_integer (lexer
);
716 if (!lex_force_match (lexer
, T_RPAREN
))
719 for (size_t i
= orig_nv
; i
< proc
->n_variables
; i
++)
721 const struct variable
*var
= proc
->variables
[i
];
722 struct var_range
*vr
= xmalloc (sizeof *vr
);
723 *vr
= (struct var_range
) {
727 .count
= max
- min
+ 1,
729 hmap_insert (&proc
->var_ranges
, &vr
->hmap_node
,
730 hash_pointer (var
, 0));
733 if (lex_token (lexer
) == T_SLASH
)
737 proc
->mode
= INTEGER
;
741 free (proc
->variables
);
742 proc
->variables
= NULL
;
743 proc
->n_variables
= 0;
747 /* Data file processing. */
749 static const struct var_range
*
750 get_var_range (const struct crosstabs_proc
*proc
, const struct variable
*var
)
752 if (!hmap_is_empty (&proc
->var_ranges
))
754 const struct var_range
*range
;
756 HMAP_FOR_EACH_IN_BUCKET (range
, struct var_range
, hmap_node
,
757 hash_pointer (var
, 0), &proc
->var_ranges
)
758 if (range
->var
== var
)
766 should_tabulate_case (const struct crosstabulation
*xt
, const struct ccase
*c
,
767 enum mv_class exclude
)
769 for (size_t j
= 0; j
< xt
->n_vars
; j
++)
771 const struct variable
*var
= xt
->vars
[j
].var
;
772 const struct var_range
*range
= get_var_range (xt
->proc
, var
);
774 if (var_is_value_missing (var
, case_data (c
, var
)) & exclude
)
779 double num
= case_num (c
, var
);
780 if (num
< range
->min
|| num
>= range
->max
+ 1.)
788 tabulate_integer_case (struct crosstabulation
*xt
, const struct ccase
*c
,
792 for (size_t j
= 0; j
< xt
->n_vars
; j
++)
794 /* Throw away fractional parts of values. */
795 hash
= hash_int (case_num (c
, xt
->vars
[j
].var
), hash
);
799 HMAP_FOR_EACH_WITH_HASH (te
, struct freq
, node
, hash
, &xt
->data
)
801 for (size_t j
= 0; j
< xt
->n_vars
; j
++)
802 if ((int) case_num (c
, xt
->vars
[j
].var
) != (int) te
->values
[j
].f
)
805 /* Found an existing entry. */
812 /* No existing entry. Create a new one. */
813 te
= xmalloc (table_entry_size (xt
->n_vars
));
815 for (size_t j
= 0; j
< xt
->n_vars
; j
++)
816 te
->values
[j
].f
= (int) case_num (c
, xt
->vars
[j
].var
);
817 hmap_insert (&xt
->data
, &te
->node
, hash
);
821 tabulate_general_case (struct crosstabulation
*xt
, const struct ccase
*c
,
825 for (size_t j
= 0; j
< xt
->n_vars
; j
++)
827 const struct variable
*var
= xt
->vars
[j
].var
;
828 hash
= value_hash (case_data (c
, var
), var_get_width (var
), hash
);
832 HMAP_FOR_EACH_WITH_HASH (te
, struct freq
, node
, hash
, &xt
->data
)
834 for (size_t j
= 0; j
< xt
->n_vars
; j
++)
836 const struct variable
*var
= xt
->vars
[j
].var
;
837 if (!value_equal (case_data (c
, var
), &te
->values
[j
],
838 var_get_width (var
)))
842 /* Found an existing entry. */
849 /* No existing entry. Create a new one. */
850 te
= xmalloc (table_entry_size (xt
->n_vars
));
852 for (size_t j
= 0; j
< xt
->n_vars
; j
++)
854 const struct variable
*var
= xt
->vars
[j
].var
;
855 value_clone (&te
->values
[j
], case_data (c
, var
), var_get_width (var
));
857 hmap_insert (&xt
->data
, &te
->node
, hash
);
860 /* Post-data reading calculations. */
862 static int compare_table_entry_vars_3way (const struct freq
*a
,
863 const struct freq
*b
,
864 const struct crosstabulation
*xt
,
866 static int compare_table_entry_3way (const void *ap_
, const void *bp_
,
868 static int compare_table_entry_3way_inv (const void *ap_
, const void *bp_
,
871 static void enum_var_values (const struct crosstabulation
*, int var_idx
,
873 static void free_var_values (const struct crosstabulation
*, int var_idx
);
874 static void output_crosstabulation (struct crosstabs_proc
*,
875 struct crosstabulation
*,
877 static void make_crosstabulation_subset (struct crosstabulation
*xt
,
878 size_t row0
, size_t row1
,
879 struct crosstabulation
*subset
);
880 static void make_summary_table (struct crosstabs_proc
*);
881 static bool find_crosstab (struct crosstabulation
*, size_t *row0p
,
885 postcalc (struct crosstabs_proc
*proc
, struct lexer
*lexer
)
887 /* Round hash table entries, if requested
889 If this causes any of the cell counts to fall to zero, delete those
891 if (proc
->round_cells
)
892 for (struct crosstabulation
*xt
= proc
->pivots
;
893 xt
< &proc
->pivots
[proc
->n_pivots
]; xt
++)
895 struct freq
*e
, *next
;
896 HMAP_FOR_EACH_SAFE (e
, next
, struct freq
, node
, &xt
->data
)
898 e
->count
= round_weight (proc
, e
->count
);
901 hmap_delete (&xt
->data
, &e
->node
);
907 /* Convert hash tables into sorted arrays of entries. */
908 for (struct crosstabulation
*xt
= proc
->pivots
;
909 xt
< &proc
->pivots
[proc
->n_pivots
]; xt
++)
911 xt
->n_entries
= hmap_count (&xt
->data
);
912 xt
->entries
= xnmalloc (xt
->n_entries
, sizeof *xt
->entries
);
916 HMAP_FOR_EACH (e
, struct freq
, node
, &xt
->data
)
917 xt
->entries
[i
++] = e
;
919 hmap_destroy (&xt
->data
);
921 sort (xt
->entries
, xt
->n_entries
, sizeof *xt
->entries
,
922 proc
->descending
? compare_table_entry_3way_inv
: compare_table_entry_3way
,
926 make_summary_table (proc
);
928 /* Output each pivot table. */
929 for (struct crosstabulation
*xt
= proc
->pivots
;
930 xt
< &proc
->pivots
[proc
->n_pivots
]; xt
++)
932 output_crosstabulation (proc
, xt
, lexer
);
935 int n_vars
= (xt
->n_vars
> 2 ? 2 : xt
->n_vars
);
936 const struct variable
**vars
= XCALLOC (n_vars
, const struct variable
*);
937 for (size_t i
= 0; i
< n_vars
; i
++)
938 vars
[i
] = xt
->vars
[i
].var
;
939 chart_submit (barchart_create (vars
, n_vars
, _("Count"),
941 xt
->entries
, xt
->n_entries
));
946 /* Free output and prepare for next split file. */
947 for (struct crosstabulation
*xt
= proc
->pivots
;
948 xt
< &proc
->pivots
[proc
->n_pivots
]; xt
++)
952 /* Free the members that were allocated in this function(and the values
953 owned by the entries.
955 The other pointer members are either both allocated and destroyed at a
956 lower level (in output_crosstabulation), or both allocated and
957 destroyed at a higher level (in crs_custom_tables and free_proc,
959 for (size_t i
= 0; i
< xt
->n_vars
; i
++)
961 int width
= var_get_width (xt
->vars
[i
].var
);
962 if (value_needs_init (width
))
963 for (size_t j
= 0; j
< xt
->n_entries
; j
++)
964 value_destroy (&xt
->entries
[j
]->values
[i
], width
);
967 for (size_t i
= 0; i
< xt
->n_entries
; i
++)
968 free (xt
->entries
[i
]);
974 make_crosstabulation_subset (struct crosstabulation
*xt
, size_t row0
,
975 size_t row1
, struct crosstabulation
*subset
)
980 assert (xt
->n_consts
== 0);
982 subset
->vars
= xt
->vars
;
984 subset
->n_consts
= xt
->n_vars
- 2;
985 subset
->const_vars
= xt
->vars
+ 2;
986 subset
->const_indexes
= xcalloc (subset
->n_consts
,
987 sizeof *subset
->const_indexes
);
988 for (size_t i
= 0; i
< subset
->n_consts
; i
++)
990 const union value
*value
= &xt
->entries
[row0
]->values
[2 + i
];
992 for (size_t j
= 0; j
< xt
->vars
[2 + i
].n_values
; j
++)
993 if (value_equal (&xt
->vars
[2 + i
].values
[j
], value
,
994 var_get_width (xt
->vars
[2 + i
].var
)))
996 subset
->const_indexes
[i
] = j
;
1003 subset
->entries
= &xt
->entries
[row0
];
1004 subset
->n_entries
= row1
- row0
;
1008 compare_table_entry_var_3way (const struct freq
*a
,
1009 const struct freq
*b
,
1010 const struct crosstabulation
*xt
,
1013 return value_compare_3way (&a
->values
[idx
], &b
->values
[idx
],
1014 var_get_width (xt
->vars
[idx
].var
));
1018 compare_table_entry_vars_3way (const struct freq
*a
,
1019 const struct freq
*b
,
1020 const struct crosstabulation
*xt
,
1023 for (int i
= idx1
- 1; i
>= idx0
; i
--)
1025 int cmp
= compare_table_entry_var_3way (a
, b
, xt
, i
);
1032 /* Compare the struct freq at *AP to the one at *BP and
1033 return a strcmp()-type result. */
1035 compare_table_entry_3way (const void *ap_
, const void *bp_
, const void *xt_
)
1037 const struct freq
*const *ap
= ap_
;
1038 const struct freq
*const *bp
= bp_
;
1039 const struct freq
*a
= *ap
;
1040 const struct freq
*b
= *bp
;
1041 const struct crosstabulation
*xt
= xt_
;
1043 int cmp
= compare_table_entry_vars_3way (a
, b
, xt
, 2, xt
->n_vars
);
1047 cmp
= compare_table_entry_var_3way (a
, b
, xt
, ROW_VAR
);
1051 return compare_table_entry_var_3way (a
, b
, xt
, COL_VAR
);
1054 /* Inverted version of compare_table_entry_3way */
1056 compare_table_entry_3way_inv (const void *ap_
, const void *bp_
, const void *xt_
)
1058 return -compare_table_entry_3way (ap_
, bp_
, xt_
);
1061 /* Output a table summarizing the cases processed. */
1063 make_summary_table (struct crosstabs_proc
*proc
)
1065 struct pivot_table
*table
= pivot_table_create (N_("Summary"));
1066 pivot_table_set_weight_var (table
, dict_get_weight (proc
->dict
));
1068 pivot_dimension_create (table
, PIVOT_AXIS_COLUMN
, N_("Statistics"),
1069 N_("N"), PIVOT_RC_COUNT
,
1070 N_("Percent"), PIVOT_RC_PERCENT
);
1072 struct pivot_dimension
*cases
= pivot_dimension_create (
1073 table
, PIVOT_AXIS_COLUMN
, N_("Cases"),
1074 N_("Valid"), N_("Missing"), N_("Total"));
1075 cases
->root
->show_label
= true;
1077 struct pivot_dimension
*tables
= pivot_dimension_create (
1078 table
, PIVOT_AXIS_ROW
, N_("Crosstabulation"));
1079 for (struct crosstabulation
*xt
= &proc
->pivots
[0];
1080 xt
< &proc
->pivots
[proc
->n_pivots
]; xt
++)
1082 struct string name
= DS_EMPTY_INITIALIZER
;
1083 for (size_t i
= 0; i
< xt
->n_vars
; i
++)
1086 ds_put_cstr (&name
, " × ");
1087 ds_put_cstr (&name
, var_to_string (xt
->vars
[i
].var
));
1090 int row
= pivot_category_create_leaf (
1092 pivot_value_new_user_text_nocopy (ds_steal_cstr (&name
)));
1095 for (size_t i
= 0; i
< xt
->n_entries
; i
++)
1096 valid
+= xt
->entries
[i
]->count
;
1102 for (int i
= 0; i
< 3; i
++)
1104 pivot_table_put3 (table
, 0, i
, row
, pivot_value_new_number (n
[i
]));
1105 pivot_table_put3 (table
, 1, i
, row
,
1106 pivot_value_new_number (n
[i
] / n
[2] * 100.0));
1110 pivot_table_submit (table
);
1115 static struct pivot_table
*create_crosstab_table (
1116 struct crosstabs_proc
*, struct crosstabulation
*,
1117 size_t crs_leaves
[CRS_N_CELLS
]);
1118 static struct pivot_table
*create_chisq_table (struct crosstabulation
*);
1119 static struct pivot_table
*create_sym_table (struct crosstabulation
*);
1120 static struct pivot_table
*create_risk_table (
1121 struct crosstabulation
*, struct pivot_dimension
**risk_statistics
);
1122 static struct pivot_table
*create_direct_table (struct crosstabulation
*);
1123 static void display_crosstabulation (struct crosstabs_proc
*,
1124 struct crosstabulation
*,
1125 struct pivot_table
*,
1126 size_t crs_leaves
[CRS_N_CELLS
]);
1127 static void display_chisq (struct crosstabulation
*, struct pivot_table
*);
1128 static void display_symmetric (struct crosstabs_proc
*,
1129 struct crosstabulation
*, struct pivot_table
*);
1130 static void display_risk (struct crosstabulation
*, struct pivot_table
*,
1131 struct pivot_dimension
*risk_statistics
);
1132 static void display_directional (struct crosstabs_proc
*,
1133 struct crosstabulation
*,
1134 struct pivot_table
*);
1135 static void delete_missing (struct crosstabulation
*);
1136 static void build_matrix (struct crosstabulation
*);
1138 /* Output pivot table XT in the context of PROC. */
1140 output_crosstabulation (struct crosstabs_proc
*proc
, struct crosstabulation
*xt
,
1141 struct lexer
*lexer
)
1143 for (size_t i
= 0; i
< xt
->n_vars
; i
++)
1144 enum_var_values (xt
, i
, proc
->descending
);
1146 if (xt
->vars
[COL_VAR
].n_values
== 0)
1150 ds_init_cstr (&vars
, var_to_string (xt
->vars
[0].var
));
1151 for (size_t i
= 1; i
< xt
->n_vars
; i
++)
1152 ds_put_format (&vars
, " × %s", var_to_string (xt
->vars
[i
].var
));
1154 /* TRANSLATORS: The %s here describes a crosstabulation. It takes the
1155 form "var1 * var2 * var3 * ...". */
1156 lex_ofs_msg (lexer
, SW
, xt
->start_ofs
, xt
->end_ofs
,
1157 _("Crosstabulation %s contained no non-missing cases."),
1161 for (size_t i
= 0; i
< xt
->n_vars
; i
++)
1162 free_var_values (xt
, i
);
1166 size_t crs_leaves
[CRS_N_CELLS
];
1167 struct pivot_table
*table
= (proc
->cells
1168 ? create_crosstab_table (proc
, xt
, crs_leaves
)
1170 struct pivot_table
*chisq
= (proc
->statistics
& CRS_ST_CHISQ
1171 ? create_chisq_table (xt
)
1173 struct pivot_table
*sym
1174 = (proc
->statistics
& (CRS_ST_PHI
| CRS_ST_CC
| CRS_ST_BTAU
| CRS_ST_CTAU
1175 | CRS_ST_GAMMA
| CRS_ST_CORR
| CRS_ST_KAPPA
)
1176 ? create_sym_table (xt
)
1178 struct pivot_dimension
*risk_statistics
= NULL
;
1179 struct pivot_table
*risk
= (proc
->statistics
& CRS_ST_RISK
1180 ? create_risk_table (xt
, &risk_statistics
)
1182 struct pivot_table
*direct
1183 = (proc
->statistics
& (CRS_ST_LAMBDA
| CRS_ST_UC
| CRS_ST_D
| CRS_ST_ETA
)
1184 ? create_direct_table (xt
)
1189 while (find_crosstab (xt
, &row0
, &row1
))
1191 struct crosstabulation x
;
1193 make_crosstabulation_subset (xt
, row0
, row1
, &x
);
1195 size_t n_rows
= x
.vars
[ROW_VAR
].n_values
;
1196 size_t n_cols
= x
.vars
[COL_VAR
].n_values
;
1197 if (size_overflow_p (xtimes (xtimes (n_rows
, n_cols
), sizeof (double))))
1199 x
.row_tot
= xmalloc (n_rows
* sizeof *x
.row_tot
);
1200 x
.col_tot
= xmalloc (n_cols
* sizeof *x
.col_tot
);
1201 x
.mat
= xmalloc (n_rows
* n_cols
* sizeof *x
.mat
);
1205 /* Find the first variable that differs from the last subtable. */
1207 display_crosstabulation (proc
, &x
, table
, crs_leaves
);
1209 if (proc
->exclude
== 0)
1210 delete_missing (&x
);
1213 display_chisq (&x
, chisq
);
1216 display_symmetric (proc
, &x
, sym
);
1218 display_risk (&x
, risk
, risk_statistics
);
1220 display_directional (proc
, &x
, direct
);
1225 free (x
.const_indexes
);
1229 pivot_table_submit (table
);
1232 pivot_table_submit (chisq
);
1235 pivot_table_submit (sym
);
1239 if (!pivot_table_is_empty (risk
))
1240 pivot_table_submit (risk
);
1242 pivot_table_unref (risk
);
1246 pivot_table_submit (direct
);
1248 for (size_t i
= 0; i
< xt
->n_vars
; i
++)
1249 free_var_values (xt
, i
);
1253 build_matrix (struct crosstabulation
*x
)
1255 const int col_var_width
= var_get_width (x
->vars
[COL_VAR
].var
);
1256 const int row_var_width
= var_get_width (x
->vars
[ROW_VAR
].var
);
1257 size_t n_rows
= x
->vars
[ROW_VAR
].n_values
;
1258 size_t n_cols
= x
->vars
[COL_VAR
].n_values
;
1260 double *mp
= x
->mat
;
1263 for (struct freq
**p
= x
->entries
; p
< &x
->entries
[x
->n_entries
]; p
++)
1265 const struct freq
*te
= *p
;
1267 while (!value_equal (&x
->vars
[ROW_VAR
].values
[row
],
1268 &te
->values
[ROW_VAR
], row_var_width
))
1270 for (; col
< n_cols
; col
++)
1276 while (!value_equal (&x
->vars
[COL_VAR
].values
[col
],
1277 &te
->values
[COL_VAR
], col_var_width
))
1284 if (++col
>= n_cols
)
1290 while (mp
< &x
->mat
[n_cols
* n_rows
])
1292 assert (mp
== &x
->mat
[n_cols
* n_rows
]);
1294 /* Column totals, row totals, ns_rows. */
1296 for (col
= 0; col
< n_cols
; col
++)
1297 x
->col_tot
[col
] = 0.0;
1298 for (row
= 0; row
< n_rows
; row
++)
1299 x
->row_tot
[row
] = 0.0;
1301 for (row
= 0; row
< n_rows
; row
++)
1303 bool row_is_empty
= true;
1304 for (col
= 0; col
< n_cols
; col
++)
1308 row_is_empty
= false;
1309 x
->col_tot
[col
] += *mp
;
1310 x
->row_tot
[row
] += *mp
;
1317 assert (mp
== &x
->mat
[n_cols
* n_rows
]);
1321 for (col
= 0; col
< n_cols
; col
++)
1322 for (row
= 0; row
< n_rows
; row
++)
1323 if (x
->mat
[col
+ row
* n_cols
] != 0.0)
1331 for (col
= 0; col
< n_cols
; col
++)
1332 x
->total
+= x
->col_tot
[col
];
1336 add_var_dimension (struct pivot_table
*table
, const struct xtab_var
*var
,
1337 enum pivot_axis_type axis_type
, bool total
)
1339 struct pivot_dimension
*d
= pivot_dimension_create__ (
1340 table
, axis_type
, pivot_value_new_variable (var
->var
));
1342 struct pivot_footnote
*missing_footnote
= pivot_table_create_footnote (
1343 table
, pivot_value_new_text (N_("Missing value")));
1345 struct pivot_category
*group
= pivot_category_create_group__ (
1346 d
->root
, pivot_value_new_variable (var
->var
));
1347 for (size_t j
= 0; j
< var
->n_values
; j
++)
1349 struct pivot_value
*value
= pivot_value_new_var_value (
1350 var
->var
, &var
->values
[j
]);
1351 if (var_is_value_missing (var
->var
, &var
->values
[j
]))
1352 pivot_value_add_footnote (value
, missing_footnote
);
1353 pivot_category_create_leaf (group
, value
);
1357 pivot_category_create_leaf (d
->root
, pivot_value_new_text (N_("Total")));
1360 static struct pivot_table
*
1361 create_crosstab_table (struct crosstabs_proc
*proc
, struct crosstabulation
*xt
,
1362 size_t crs_leaves
[CRS_N_CELLS
])
1365 struct string title
= DS_EMPTY_INITIALIZER
;
1366 for (size_t i
= 0; i
< xt
->n_vars
; i
++)
1369 ds_put_cstr (&title
, " × ");
1370 ds_put_cstr (&title
, var_to_string (xt
->vars
[i
].var
));
1372 for (size_t i
= 0; i
< xt
->n_consts
; i
++)
1374 const struct variable
*var
= xt
->const_vars
[i
].var
;
1375 const union value
*value
= &xt
->entries
[0]->values
[2 + i
];
1378 ds_put_format (&title
, ", %s=", var_to_string (var
));
1380 /* Insert the formatted value of VAR without any leading spaces. */
1381 s
= data_out (value
, var_get_encoding (var
), var_get_print_format (var
),
1382 settings_get_fmt_settings ());
1383 ds_put_cstr (&title
, s
+ strspn (s
, " "));
1386 struct pivot_table
*table
= pivot_table_create__ (
1387 pivot_value_new_user_text_nocopy (ds_steal_cstr (&title
)),
1389 pivot_table_set_weight_format (table
, proc
->weight_format
);
1391 struct pivot_dimension
*statistics
= pivot_dimension_create (
1392 table
, PIVOT_AXIS_ROW
, N_("Statistics"));
1399 static const struct statistic stats
[CRS_N_CELLS
] =
1401 #define C(KEYWORD, STRING, RC) { STRING, RC },
1405 for (size_t i
= 0; i
< CRS_N_CELLS
; i
++)
1406 if (proc
->cells
& (1u << i
) && stats
[i
].label
)
1407 crs_leaves
[i
] = pivot_category_create_leaf_rc (
1408 statistics
->root
, pivot_value_new_text (stats
[i
].label
),
1411 for (size_t i
= 0; i
< xt
->n_vars
; i
++)
1412 add_var_dimension (table
, &xt
->vars
[i
],
1413 i
== COL_VAR
? PIVOT_AXIS_COLUMN
: PIVOT_AXIS_ROW
,
1419 static struct pivot_table
*
1420 create_chisq_table (struct crosstabulation
*xt
)
1422 struct pivot_table
*chisq
= pivot_table_create (N_("Chi-Square Tests"));
1423 pivot_table_set_weight_format (chisq
, xt
->weight_format
);
1425 pivot_dimension_create (
1426 chisq
, PIVOT_AXIS_ROW
, N_("Statistics"),
1427 N_("Pearson Chi-Square"),
1428 N_("Likelihood Ratio"),
1429 N_("Fisher's Exact Test"),
1430 N_("Continuity Correction"),
1431 N_("Linear-by-Linear Association"),
1432 N_("N of Valid Cases"), PIVOT_RC_COUNT
);
1434 pivot_dimension_create (
1435 chisq
, PIVOT_AXIS_COLUMN
, N_("Statistics"),
1436 N_("Value"), PIVOT_RC_OTHER
,
1437 N_("df"), PIVOT_RC_COUNT
,
1438 N_("Asymptotic Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE
,
1439 N_("Exact Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE
,
1440 N_("Exact Sig. (1-tailed)"), PIVOT_RC_SIGNIFICANCE
);
1442 for (size_t i
= 2; i
< xt
->n_vars
; i
++)
1443 add_var_dimension (chisq
, &xt
->vars
[i
], PIVOT_AXIS_ROW
, false);
1448 /* Symmetric measures. */
1449 static struct pivot_table
*
1450 create_sym_table (struct crosstabulation
*xt
)
1452 struct pivot_table
*sym
= pivot_table_create (N_("Symmetric Measures"));
1453 pivot_table_set_weight_format (sym
, xt
->weight_format
);
1455 pivot_dimension_create (
1456 sym
, PIVOT_AXIS_COLUMN
, N_("Values"),
1457 N_("Value"), PIVOT_RC_OTHER
,
1458 N_("Asymp. Std. Error"), PIVOT_RC_OTHER
,
1459 N_("Approx. T"), PIVOT_RC_OTHER
,
1460 N_("Approx. Sig."), PIVOT_RC_SIGNIFICANCE
);
1462 struct pivot_dimension
*statistics
= pivot_dimension_create (
1463 sym
, PIVOT_AXIS_ROW
, N_("Statistics"));
1464 pivot_category_create_group (
1465 statistics
->root
, N_("Nominal by Nominal"),
1466 N_("Phi"), N_("Cramer's V"), N_("Contingency Coefficient"));
1467 pivot_category_create_group (
1468 statistics
->root
, N_("Ordinal by Ordinal"),
1469 N_("Kendall's tau-b"), N_("Kendall's tau-c"),
1470 N_("Gamma"), N_("Spearman Correlation"));
1471 pivot_category_create_group (
1472 statistics
->root
, N_("Interval by Interval"),
1474 pivot_category_create_group (
1475 statistics
->root
, N_("Measure of Agreement"),
1477 pivot_category_create_leaves (statistics
->root
, N_("N of Valid Cases"),
1480 for (size_t i
= 2; i
< xt
->n_vars
; i
++)
1481 add_var_dimension (sym
, &xt
->vars
[i
], PIVOT_AXIS_ROW
, false);
1486 /* Risk estimate. */
1487 static struct pivot_table
*
1488 create_risk_table (struct crosstabulation
*xt
,
1489 struct pivot_dimension
**risk_statistics
)
1491 struct pivot_table
*risk
= pivot_table_create (N_("Risk Estimate"));
1492 pivot_table_set_weight_format (risk
, xt
->weight_format
);
1494 struct pivot_dimension
*values
= pivot_dimension_create (
1495 risk
, PIVOT_AXIS_COLUMN
, N_("Values"),
1496 N_("Value"), PIVOT_RC_OTHER
);
1497 pivot_category_create_group (
1498 /* xgettext:no-c-format */
1499 values
->root
, N_("95% Confidence Interval"),
1500 N_("Lower"), PIVOT_RC_OTHER
,
1501 N_("Upper"), PIVOT_RC_OTHER
);
1503 *risk_statistics
= pivot_dimension_create (
1504 risk
, PIVOT_AXIS_ROW
, N_("Statistics"));
1506 for (size_t i
= 2; i
< xt
->n_vars
; i
++)
1507 add_var_dimension (risk
, &xt
->vars
[i
], PIVOT_AXIS_ROW
, false);
1513 create_direct_stat (struct pivot_category
*parent
,
1514 const struct crosstabulation
*xt
,
1515 const char *name
, bool symmetric
)
1517 struct pivot_category
*group
= pivot_category_create_group (
1520 pivot_category_create_leaf (group
, pivot_value_new_text (N_("Symmetric")));
1522 char *row_label
= xasprintf (_("%s Dependent"),
1523 var_to_string (xt
->vars
[ROW_VAR
].var
));
1524 pivot_category_create_leaf (group
, pivot_value_new_user_text_nocopy (
1527 char *col_label
= xasprintf (_("%s Dependent"),
1528 var_to_string (xt
->vars
[COL_VAR
].var
));
1529 pivot_category_create_leaf (group
, pivot_value_new_user_text_nocopy (
1533 /* Directional measures. */
1534 static struct pivot_table
*
1535 create_direct_table (struct crosstabulation
*xt
)
1537 struct pivot_table
*direct
= pivot_table_create (N_("Directional Measures"));
1538 pivot_table_set_weight_format (direct
, xt
->weight_format
);
1540 pivot_dimension_create (
1541 direct
, PIVOT_AXIS_COLUMN
, N_("Values"),
1542 N_("Value"), PIVOT_RC_OTHER
,
1543 N_("Asymp. Std. Error"), PIVOT_RC_OTHER
,
1544 N_("Approx. T"), PIVOT_RC_OTHER
,
1545 N_("Approx. Sig."), PIVOT_RC_SIGNIFICANCE
);
1547 struct pivot_dimension
*statistics
= pivot_dimension_create (
1548 direct
, PIVOT_AXIS_ROW
, N_("Statistics"));
1549 struct pivot_category
*nn
= pivot_category_create_group (
1550 statistics
->root
, N_("Nominal by Nominal"));
1551 create_direct_stat (nn
, xt
, N_("Lambda"), true);
1552 create_direct_stat (nn
, xt
, N_("Goodman and Kruskal tau"), false);
1553 create_direct_stat (nn
, xt
, N_("Uncertainty Coefficient"), true);
1554 struct pivot_category
*oo
= pivot_category_create_group (
1555 statistics
->root
, N_("Ordinal by Ordinal"));
1556 create_direct_stat (oo
, xt
, N_("Somers' d"), true);
1557 struct pivot_category
*ni
= pivot_category_create_group (
1558 statistics
->root
, N_("Nominal by Interval"));
1559 create_direct_stat (ni
, xt
, N_("Eta"), false);
1561 for (size_t i
= 2; i
< xt
->n_vars
; i
++)
1562 add_var_dimension (direct
, &xt
->vars
[i
], PIVOT_AXIS_ROW
, false);
1567 /* Delete missing rows and columns for statistical analysis when
1570 delete_missing (struct crosstabulation
*xt
)
1572 size_t n_rows
= xt
->vars
[ROW_VAR
].n_values
;
1573 size_t n_cols
= xt
->vars
[COL_VAR
].n_values
;
1575 for (size_t r
= 0; r
< n_rows
; r
++)
1576 if (var_is_num_missing (xt
->vars
[ROW_VAR
].var
,
1577 xt
->vars
[ROW_VAR
].values
[r
].f
) == MV_USER
)
1579 for (size_t c
= 0; c
< n_cols
; c
++)
1580 xt
->mat
[c
+ r
* n_cols
] = 0.;
1585 for (size_t c
= 0; c
< n_cols
; c
++)
1586 if (var_is_num_missing (xt
->vars
[COL_VAR
].var
,
1587 xt
->vars
[COL_VAR
].values
[c
].f
) == MV_USER
)
1589 for (size_t r
= 0; r
< n_rows
; r
++)
1590 xt
->mat
[c
+ r
* n_cols
] = 0.;
1596 find_crosstab (struct crosstabulation
*xt
, size_t *row0p
, size_t *row1p
)
1598 size_t row0
= *row1p
;
1599 if (row0
>= xt
->n_entries
)
1603 for (row1
= row0
+ 1; row1
< xt
->n_entries
; row1
++)
1605 struct freq
*a
= xt
->entries
[row0
];
1606 struct freq
*b
= xt
->entries
[row1
];
1607 if (compare_table_entry_vars_3way (a
, b
, xt
, 2, xt
->n_vars
) != 0)
1615 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1616 result. WIDTH_ points to an int which is either 0 for a
1617 numeric value or a string width for a string value. */
1619 compare_value_3way (const void *a_
, const void *b_
, const void *width_
)
1621 const union value
*a
= a_
;
1622 const union value
*b
= b_
;
1623 const int *width
= width_
;
1625 return value_compare_3way (a
, b
, *width
);
1628 /* Inverted version of the above */
1630 compare_value_3way_inv (const void *a_
, const void *b_
, const void *width_
)
1632 return -compare_value_3way (a_
, b_
, width_
);
1636 /* Given an array of ENTRY_CNT table_entry structures starting at
1637 ENTRIES, creates a sorted list of the values that the variable
1638 with index VAR_IDX takes on. Stores the array of the values in
1639 XT->values and the number of values in XT->n_values. */
1641 enum_var_values (const struct crosstabulation
*xt
, int var_idx
,
1644 struct xtab_var
*xv
= &xt
->vars
[var_idx
];
1645 const struct var_range
*range
= get_var_range (xt
->proc
, xv
->var
);
1649 xv
->values
= xnmalloc (range
->count
, sizeof *xv
->values
);
1650 xv
->n_values
= range
->count
;
1651 for (size_t i
= 0; i
< range
->count
; i
++)
1652 xv
->values
[i
].f
= range
->min
+ i
;
1656 int width
= var_get_width (xv
->var
);
1657 struct hmapx set
= HMAPX_INITIALIZER (set
);
1659 for (size_t i
= 0; i
< xt
->n_entries
; i
++)
1661 const struct freq
*te
= xt
->entries
[i
];
1662 const union value
*value
= &te
->values
[var_idx
];
1663 size_t hash
= value_hash (value
, width
, 0);
1665 const union value
*iter
;
1666 struct hmapx_node
*node
;
1667 HMAPX_FOR_EACH_WITH_HASH (iter
, node
, hash
, &set
)
1668 if (value_equal (iter
, value
, width
))
1671 hmapx_insert (&set
, (union value
*) value
, hash
);
1676 xv
->n_values
= hmapx_count (&set
);
1677 xv
->values
= xnmalloc (xv
->n_values
, sizeof *xv
->values
);
1679 const union value
*iter
;
1680 struct hmapx_node
*node
;
1681 HMAPX_FOR_EACH (iter
, node
, &set
)
1682 xv
->values
[i
++] = *iter
;
1683 hmapx_destroy (&set
);
1685 sort (xv
->values
, xv
->n_values
, sizeof *xv
->values
,
1686 descending
? compare_value_3way_inv
: compare_value_3way
,
1692 free_var_values (const struct crosstabulation
*xt
, int var_idx
)
1694 struct xtab_var
*xv
= &xt
->vars
[var_idx
];
1700 /* Displays the crosstabulation table. */
1702 display_crosstabulation (struct crosstabs_proc
*proc
,
1703 struct crosstabulation
*xt
, struct pivot_table
*table
,
1704 size_t crs_leaves
[CRS_N_CELLS
])
1706 size_t n_rows
= xt
->vars
[ROW_VAR
].n_values
;
1707 size_t n_cols
= xt
->vars
[COL_VAR
].n_values
;
1709 size_t *indexes
= xnmalloc (table
->n_dimensions
, sizeof *indexes
);
1710 assert (xt
->n_vars
== 2);
1711 for (size_t i
= 0; i
< xt
->n_consts
; i
++)
1712 indexes
[i
+ 3] = xt
->const_indexes
[i
];
1714 /* Put in the actual cells. */
1715 for (size_t r
= 0; r
< n_rows
; r
++)
1717 if (!xt
->row_tot
[r
] && proc
->mode
!= INTEGER
)
1720 indexes
[ROW_VAR
+ 1] = r
;
1721 for (size_t c
= 0; c
< n_cols
; c
++)
1723 if (!xt
->col_tot
[c
] && proc
->mode
!= INTEGER
)
1726 indexes
[COL_VAR
+ 1] = c
;
1728 double *mp
= xt
->mat
+ r
* n_cols
+ c
;
1729 double expected_value
= xt
->row_tot
[r
] * xt
->col_tot
[c
] / xt
->total
;
1730 double residual
= *mp
- expected_value
;
1731 double sresidual
= residual
/ sqrt (expected_value
);
1733 = residual
/ sqrt (expected_value
1734 * (1. - xt
->row_tot
[r
] / xt
->total
)
1735 * (1. - xt
->col_tot
[c
] / xt
->total
));
1736 double entries
[CRS_N_CELLS
] = {
1737 [CRS_CL_COUNT
] = *mp
,
1738 [CRS_CL_ROW
] = *mp
/ xt
->row_tot
[r
] * 100.,
1739 [CRS_CL_COLUMN
] = *mp
/ xt
->col_tot
[c
] * 100.,
1740 [CRS_CL_TOTAL
] = *mp
/ xt
->total
* 100.,
1741 [CRS_CL_EXPECTED
] = expected_value
,
1742 [CRS_CL_RESIDUAL
] = residual
,
1743 [CRS_CL_SRESIDUAL
] = sresidual
,
1744 [CRS_CL_ASRESIDUAL
] = asresidual
,
1746 for (size_t i
= 0; i
< proc
->n_cells
; i
++)
1748 int cell
= proc
->a_cells
[i
];
1749 indexes
[0] = crs_leaves
[cell
];
1750 pivot_table_put (table
, indexes
, table
->n_dimensions
,
1751 pivot_value_new_number (entries
[cell
]));
1757 for (size_t r
= 0; r
< n_rows
; r
++)
1759 if (!xt
->row_tot
[r
] && proc
->mode
!= INTEGER
)
1762 double expected_value
= xt
->row_tot
[r
] / xt
->total
;
1763 double entries
[CRS_N_CELLS
] = {
1764 [CRS_CL_COUNT
] = xt
->row_tot
[r
],
1765 [CRS_CL_ROW
] = 100.0,
1766 [CRS_CL_COLUMN
] = expected_value
* 100.,
1767 [CRS_CL_TOTAL
] = expected_value
* 100.,
1768 [CRS_CL_EXPECTED
] = expected_value
,
1769 [CRS_CL_RESIDUAL
] = SYSMIS
,
1770 [CRS_CL_SRESIDUAL
] = SYSMIS
,
1771 [CRS_CL_ASRESIDUAL
] = SYSMIS
,
1773 for (size_t i
= 0; i
< proc
->n_cells
; i
++)
1775 int cell
= proc
->a_cells
[i
];
1776 double entry
= entries
[cell
];
1777 if (entry
!= SYSMIS
)
1779 indexes
[ROW_VAR
+ 1] = r
;
1780 indexes
[COL_VAR
+ 1] = n_cols
;
1781 indexes
[0] = crs_leaves
[cell
];
1782 pivot_table_put (table
, indexes
, table
->n_dimensions
,
1783 pivot_value_new_number (entry
));
1788 for (size_t c
= 0; c
<= n_cols
; c
++)
1790 if (c
< n_cols
&& !xt
->col_tot
[c
] && proc
->mode
!= INTEGER
)
1793 double ct
= c
< n_cols
? xt
->col_tot
[c
] : xt
->total
;
1794 double expected_value
= ct
/ xt
->total
;
1795 double entries
[CRS_N_CELLS
] = {
1796 [CRS_CL_COUNT
] = ct
,
1797 [CRS_CL_ROW
] = expected_value
* 100.0,
1798 [CRS_CL_COLUMN
] = 100.0,
1799 [CRS_CL_TOTAL
] = expected_value
* 100.,
1800 [CRS_CL_EXPECTED
] = expected_value
,
1801 [CRS_CL_RESIDUAL
] = SYSMIS
,
1802 [CRS_CL_SRESIDUAL
] = SYSMIS
,
1803 [CRS_CL_ASRESIDUAL
] = SYSMIS
,
1805 for (size_t i
= 0; i
< proc
->n_cells
; i
++)
1807 size_t cell
= proc
->a_cells
[i
];
1808 double entry
= entries
[cell
];
1809 if (entry
!= SYSMIS
)
1811 indexes
[ROW_VAR
+ 1] = n_rows
;
1812 indexes
[COL_VAR
+ 1] = c
;
1813 indexes
[0] = crs_leaves
[cell
];
1814 pivot_table_put (table
, indexes
, table
->n_dimensions
,
1815 pivot_value_new_number (entry
));
1823 struct symmetric_statistic
1825 double v
; /* Value. */
1826 double ase
; /* Appropriate standard error. */
1827 double t
; /* Student's t value. */
1828 double sig
; /* Significance. */
1831 static void calc_r (struct crosstabulation
*,
1832 double *XT
, double *Y
, struct symmetric_statistic
*);
1833 static void calc_chisq (struct crosstabulation
*,
1834 double[N_CHISQ
], int[N_CHISQ
], double *, double *);
1836 /* Display chi-square statistics. */
1838 display_chisq (struct crosstabulation
*xt
, struct pivot_table
*chisq
)
1840 double chisq_v
[N_CHISQ
];
1841 double fisher1
, fisher2
;
1843 calc_chisq (xt
, chisq_v
, df
, &fisher1
, &fisher2
);
1845 size_t *indexes
= xnmalloc (chisq
->n_dimensions
, sizeof *indexes
);
1846 assert (xt
->n_vars
== 2);
1847 for (size_t i
= 0; i
< xt
->n_consts
; i
++)
1848 indexes
[i
+ 2] = xt
->const_indexes
[i
];
1849 for (size_t i
= 0; i
< N_CHISQ
; i
++)
1853 double entries
[5] = { SYSMIS
, SYSMIS
, SYSMIS
, SYSMIS
, SYSMIS
};
1856 entries
[3] = fisher2
;
1857 entries
[4] = fisher1
;
1859 else if (chisq_v
[i
] != SYSMIS
)
1861 entries
[0] = chisq_v
[i
];
1863 entries
[2] = gsl_cdf_chisq_Q (chisq_v
[i
], df
[i
]);
1866 for (size_t j
= 0; j
< sizeof entries
/ sizeof *entries
; j
++)
1867 if (entries
[j
] != SYSMIS
)
1870 pivot_table_put (chisq
, indexes
, chisq
->n_dimensions
,
1871 pivot_value_new_number (entries
[j
]));
1877 pivot_table_put (chisq
, indexes
, chisq
->n_dimensions
,
1878 pivot_value_new_number (xt
->total
));
1890 static bool calc_symmetric (struct crosstabs_proc
*, struct crosstabulation
*,
1891 struct symmetric_statistic
[N_SYMMETRIC
],
1892 struct somers_d
[3]);
1894 /* Display symmetric measures. */
1896 display_symmetric (struct crosstabs_proc
*proc
, struct crosstabulation
*xt
,
1897 struct pivot_table
*sym
)
1899 struct symmetric_statistic ss
[N_SYMMETRIC
];
1900 struct somers_d somers_d
[3];
1902 if (!calc_symmetric (proc
, xt
, ss
, somers_d
))
1905 size_t *indexes
= xnmalloc (sym
->n_dimensions
, sizeof *indexes
);
1906 assert (xt
->n_vars
== 2);
1907 for (size_t i
= 0; i
< xt
->n_consts
; i
++)
1908 indexes
[i
+ 2] = xt
->const_indexes
[i
];
1910 for (size_t i
= 0; i
< N_SYMMETRIC
; i
++)
1912 struct symmetric_statistic
*s
= &ss
[i
];
1918 double entries
[] = { s
->v
, s
->ase
, s
->t
, s
->sig
};
1919 for (size_t j
= 0; j
< sizeof entries
/ sizeof *entries
; j
++)
1920 if (entries
[j
] != SYSMIS
)
1923 pivot_table_put (sym
, indexes
, sym
->n_dimensions
,
1924 pivot_value_new_number (entries
[j
]));
1928 indexes
[1] = N_SYMMETRIC
;
1930 struct pivot_value
*total
= pivot_value_new_number (xt
->total
);
1931 pivot_value_set_rc (sym
, total
, PIVOT_RC_COUNT
);
1932 pivot_table_put (sym
, indexes
, sym
->n_dimensions
, total
);
1937 static bool calc_risk (struct crosstabulation
*,
1938 double[], double[], double[], union value
*,
1941 /* Display risk estimate. */
1943 display_risk (struct crosstabulation
*xt
, struct pivot_table
*risk
,
1944 struct pivot_dimension
*risk_statistics
)
1946 double risk_v
[3], lower
[3], upper
[3], n_valid
;
1948 if (!calc_risk (xt
, risk_v
, upper
, lower
, c
, &n_valid
))
1950 assert (risk_statistics
);
1952 size_t *indexes
= xnmalloc (risk
->n_dimensions
, sizeof *indexes
);
1953 assert (xt
->n_vars
== 2);
1954 for (size_t i
= 0; i
< xt
->n_consts
; i
++)
1955 indexes
[i
+ 2] = xt
->const_indexes
[i
];
1957 for (size_t i
= 0; i
< 3; i
++)
1959 const struct variable
*cv
= xt
->vars
[COL_VAR
].var
;
1960 const struct variable
*rv
= xt
->vars
[ROW_VAR
].var
;
1962 if (risk_v
[i
] == SYSMIS
)
1965 struct string label
= DS_EMPTY_INITIALIZER
;
1969 ds_put_format (&label
, _("Odds Ratio for %s"), var_to_string (rv
));
1970 ds_put_cstr (&label
, " (");
1971 var_append_value_name (rv
, &c
[0], &label
);
1972 ds_put_cstr (&label
, " / ");
1973 var_append_value_name (rv
, &c
[1], &label
);
1974 ds_put_cstr (&label
, ")");
1978 ds_put_format (&label
, _("For cohort %s = "), var_to_string (cv
));
1979 var_append_value_name (cv
, &xt
->vars
[ROW_VAR
].values
[i
- 1], &label
);
1983 indexes
[1] = pivot_category_create_leaf (
1984 risk_statistics
->root
,
1985 pivot_value_new_user_text_nocopy (ds_steal_cstr (&label
)));
1987 double entries
[] = { risk_v
[i
], lower
[i
], upper
[i
] };
1988 for (size_t j
= 0; j
< sizeof entries
/ sizeof *entries
; j
++)
1991 pivot_table_put (risk
, indexes
, risk
->n_dimensions
,
1992 pivot_value_new_number (entries
[j
]));
1995 indexes
[1] = pivot_category_create_leaf (
1996 risk_statistics
->root
,
1997 pivot_value_new_text (N_("N of Valid Cases")));
1999 pivot_table_put (risk
, indexes
, risk
->n_dimensions
,
2000 pivot_value_new_number (n_valid
));
2004 static void calc_directional (struct crosstabs_proc
*, struct crosstabulation
*,
2005 double[N_DIRECTIONAL
], double[N_DIRECTIONAL
],
2006 double[N_DIRECTIONAL
], double[N_DIRECTIONAL
]);
2008 /* Display directional measures. */
2010 display_directional (struct crosstabs_proc
*proc
,
2011 struct crosstabulation
*xt
, struct pivot_table
*direct
)
2013 double direct_v
[N_DIRECTIONAL
];
2014 double direct_ase
[N_DIRECTIONAL
];
2015 double direct_t
[N_DIRECTIONAL
];
2016 double sig
[N_DIRECTIONAL
];
2017 calc_directional (proc
, xt
, direct_v
, direct_ase
, direct_t
, sig
);
2019 size_t *indexes
= xnmalloc (direct
->n_dimensions
, sizeof *indexes
);
2020 assert (xt
->n_vars
== 2);
2021 for (size_t i
= 0; i
< xt
->n_consts
; i
++)
2022 indexes
[i
+ 2] = xt
->const_indexes
[i
];
2024 for (size_t i
= 0; i
< N_DIRECTIONAL
; i
++)
2026 if (direct_v
[i
] == SYSMIS
)
2031 double entries
[] = {
2032 direct_v
[i
], direct_ase
[i
], direct_t
[i
], sig
[i
],
2034 for (size_t j
= 0; j
< sizeof entries
/ sizeof *entries
; j
++)
2035 if (entries
[j
] != SYSMIS
)
2038 pivot_table_put (direct
, indexes
, direct
->n_dimensions
,
2039 pivot_value_new_number (entries
[j
]));
2046 /* Statistical calculations. */
2048 /* Returns the value of the logarithm of gamma (factorial) function for an integer
2051 log_gamma_int (double xt
)
2054 for (int i
= 2; i
< xt
; i
++)
2059 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2061 static inline double
2062 Pr (int a
, int b
, int c
, int d
)
2064 return exp (log_gamma_int (a
+ b
+ 1.) - log_gamma_int (a
+ 1.)
2065 + log_gamma_int (c
+ d
+ 1.) - log_gamma_int (b
+ 1.)
2066 + log_gamma_int (a
+ c
+ 1.) - log_gamma_int (c
+ 1.)
2067 + log_gamma_int (b
+ d
+ 1.) - log_gamma_int (d
+ 1.)
2068 - log_gamma_int (a
+ b
+ c
+ d
+ 1.));
2071 /* Swap the contents of A and B. */
2073 swap (int *a
, int *b
)
2080 /* Calculate significance for Fisher's exact test as specified in
2081 _SPSS Statistical Algorithms_, Appendix 5. */
2083 calc_fisher (int a
, int b
, int c
, int d
, double *fisher1
, double *fisher2
)
2085 if (MIN (c
, d
) < MIN (a
, b
))
2086 swap (&a
, &c
), swap (&b
, &d
);
2087 if (MIN (b
, d
) < MIN (a
, c
))
2088 swap (&a
, &b
), swap (&c
, &d
);
2092 swap (&a
, &b
), swap (&c
, &d
);
2094 swap (&a
, &c
), swap (&b
, &d
);
2097 double pn1
= Pr (a
, b
, c
, d
);
2099 for (int xt
= 1; xt
<= a
; xt
++)
2100 *fisher1
+= Pr (a
- xt
, b
+ xt
, c
+ xt
, d
- xt
);
2102 *fisher2
= *fisher1
;
2103 for (int xt
= 1; xt
<= b
; xt
++)
2105 double p
= Pr (a
+ xt
, b
- xt
, c
- xt
, d
+ xt
);
2111 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2112 columns with values COLS and N_ROWS rows with values ROWS. Values
2113 in the matrix sum to xt->total. */
2115 calc_chisq (struct crosstabulation
*xt
,
2116 double chisq
[N_CHISQ
], int df
[N_CHISQ
],
2117 double *fisher1
, double *fisher2
)
2119 chisq
[0] = chisq
[1] = 0.;
2120 chisq
[2] = chisq
[3] = chisq
[4] = SYSMIS
;
2121 *fisher1
= *fisher2
= SYSMIS
;
2123 df
[0] = df
[1] = (xt
->ns_cols
- 1) * (xt
->ns_rows
- 1);
2125 if (xt
->ns_rows
<= 1 || xt
->ns_cols
<= 1)
2127 chisq
[0] = chisq
[1] = SYSMIS
;
2131 size_t n_cols
= xt
->vars
[COL_VAR
].n_values
;
2132 FOR_EACH_POPULATED_ROW (r
, xt
)
2133 FOR_EACH_POPULATED_COLUMN (c
, xt
)
2135 const double expected
= xt
->row_tot
[r
] * xt
->col_tot
[c
] / xt
->total
;
2136 const double freq
= xt
->mat
[n_cols
* r
+ c
];
2137 const double residual
= freq
- expected
;
2139 chisq
[0] += residual
* residual
/ expected
;
2141 chisq
[1] += freq
* log (expected
/ freq
);
2152 /* Calculate Yates and Fisher exact test. */
2153 if (xt
->ns_cols
== 2 && xt
->ns_rows
== 2)
2158 FOR_EACH_POPULATED_COLUMN (c
, xt
)
2166 double f11
= xt
->mat
[nz_cols
[0]];
2167 double f12
= xt
->mat
[nz_cols
[1]];
2168 double f21
= xt
->mat
[nz_cols
[0] + n_cols
];
2169 double f22
= xt
->mat
[nz_cols
[1] + n_cols
];
2172 const double xt_
= fabs (f11
* f22
- f12
* f21
) - 0.5 * xt
->total
;
2175 chisq
[3] = (xt
->total
* pow2 (xt_
)
2176 / (f11
+ f12
) / (f21
+ f22
)
2177 / (f11
+ f21
) / (f12
+ f22
));
2184 calc_fisher (f11
+ .5, f12
+ .5, f21
+ .5, f22
+ .5, fisher1
, fisher2
);
2187 /* Calculate Mantel-Haenszel. */
2188 if (var_is_numeric (xt
->vars
[ROW_VAR
].var
)
2189 && var_is_numeric (xt
->vars
[COL_VAR
].var
))
2191 struct symmetric_statistic r
;
2192 calc_r (xt
, (double *) xt
->vars
[ROW_VAR
].values
,
2193 (double *) xt
->vars
[COL_VAR
].values
, &r
);
2195 chisq
[4] = (xt
->total
- 1.) * pow2 (r
.v
);
2200 /* Calculate the value of Pearson's r and stores it into *R. The row and
2201 column values must be passed in XT and Y. */
2203 calc_r (struct crosstabulation
*xt
,
2204 double *XT
, double *Y
, struct symmetric_statistic
*r
)
2206 size_t n_rows
= xt
->vars
[ROW_VAR
].n_values
;
2207 size_t n_cols
= xt
->vars
[COL_VAR
].n_values
;
2210 for (size_t i
= 0; i
< n_rows
; i
++)
2211 for (size_t j
= 0; j
< n_cols
; j
++)
2213 double fij
= xt
->mat
[j
+ i
* n_cols
];
2214 double product
= XT
[i
] * Y
[j
];
2215 double temp
= fij
* product
;
2221 for (size_t i
= 0; i
< n_rows
; i
++)
2223 sum_Xr
+= XT
[i
] * xt
->row_tot
[i
];
2224 sum_X2r
+= pow2 (XT
[i
]) * xt
->row_tot
[i
];
2226 double Xbar
= sum_Xr
/ xt
->total
;
2230 for (size_t i
= 0; i
< n_cols
; i
++)
2232 sum_Yc
+= Y
[i
] * xt
->col_tot
[i
];
2233 sum_Y2c
+= Y
[i
] * Y
[i
] * xt
->col_tot
[i
];
2235 double Ybar
= sum_Yc
/ xt
->total
;
2237 double S
= sum_XYf
- sum_Xr
* sum_Yc
/ xt
->total
;
2238 double SX
= sum_X2r
- pow2 (sum_Xr
) / xt
->total
;
2239 double SY
= sum_Y2c
- pow2 (sum_Yc
) / xt
->total
;
2240 double T
= sqrt (SX
* SY
);
2242 r
->t
= r
->v
/ sqrt (1 - pow2 (r
->v
)) * sqrt (xt
->total
- 2);
2243 r
->sig
= 2 * significance_of_correlation (r
->v
, xt
->total
);
2247 for (size_t i
= 0; i
< n_rows
; i
++)
2248 for (size_t j
= 0; j
< n_cols
; j
++)
2250 double Xresid
= XT
[i
] - Xbar
;
2251 double Yresid
= Y
[j
] - Ybar
;
2252 double temp
= (T
* Xresid
* Yresid
2254 * (Xresid
* Xresid
* SY
+ Yresid
* Yresid
* SX
)));
2255 double y
= xt
->mat
[j
+ i
* n_cols
] * temp
* temp
- c
;
2260 r
->ase
= sqrt (s
) / (T
* T
);
2263 /* Calculate symmetric statistics and their asymptotic standard
2264 errors. Returns false if none could be calculated. */
2266 calc_symmetric (struct crosstabs_proc
*proc
, struct crosstabulation
*xt
,
2267 struct symmetric_statistic sym
[N_SYMMETRIC
],
2268 struct somers_d somers_d
[3])
2270 size_t n_rows
= xt
->vars
[ROW_VAR
].n_values
;
2271 size_t n_cols
= xt
->vars
[COL_VAR
].n_values
;
2273 size_t q
= MIN (xt
->ns_rows
, xt
->ns_cols
);
2277 for (size_t i
= 0; i
< N_SYMMETRIC
; i
++)
2278 sym
[i
].v
= sym
[i
].ase
= sym
[i
].t
= sym
[i
].sig
= SYSMIS
;
2280 /* Phi, Cramer's V, contingency coefficient. */
2281 if (proc
->statistics
& (CRS_ST_PHI
| CRS_ST_CC
))
2283 double Xp
= 0.; /* Pearson chi-square. */
2285 FOR_EACH_POPULATED_ROW (r
, xt
)
2286 FOR_EACH_POPULATED_COLUMN (c
, xt
)
2288 double expected
= xt
->row_tot
[r
] * xt
->col_tot
[c
] / xt
->total
;
2289 double freq
= xt
->mat
[n_cols
* r
+ c
];
2290 double residual
= freq
- expected
;
2292 Xp
+= residual
* residual
/ expected
;
2295 if (proc
->statistics
& CRS_ST_PHI
)
2297 sym
[0].v
= sqrt (Xp
/ xt
->total
);
2298 sym
[1].v
= sqrt (Xp
/ (xt
->total
* (q
- 1)));
2300 if (proc
->statistics
& CRS_ST_CC
)
2301 sym
[2].v
= sqrt (Xp
/ (Xp
+ xt
->total
));
2304 if (proc
->statistics
& (CRS_ST_BTAU
| CRS_ST_CTAU
2305 | CRS_ST_GAMMA
| CRS_ST_D
))
2307 double Dr
= pow2 (xt
->total
);
2308 for (size_t r
= 0; r
< n_rows
; r
++)
2309 Dr
-= pow2 (xt
->row_tot
[r
]);
2311 double Dc
= pow2 (xt
->total
);
2312 for (size_t c
= 0; c
< n_cols
; c
++)
2313 Dc
-= pow2 (xt
->col_tot
[c
]);
2315 double *cum
= xnmalloc (n_cols
* n_rows
, sizeof *cum
);
2316 for (size_t c
= 0; c
< n_cols
; c
++)
2320 for (size_t r
= 0; r
< n_rows
; r
++)
2321 cum
[c
+ r
* n_cols
] = ct
+= xt
->mat
[c
+ r
* n_cols
];
2327 for (size_t i
= 0; i
< n_rows
; i
++)
2330 for (size_t j
= 1; j
< n_cols
; j
++)
2331 Cij
+= xt
->col_tot
[j
] - cum
[j
+ i
* n_cols
];
2335 for (size_t j
= 1; j
< n_cols
; j
++)
2336 Dij
+= cum
[j
+ (i
- 1) * n_cols
];
2338 for (size_t j
= 0;;)
2340 double fij
= xt
->mat
[j
+ i
* n_cols
];
2347 Cij
-= xt
->col_tot
[j
] - cum
[j
+ i
* n_cols
];
2348 Dij
+= xt
->col_tot
[j
- 1] - cum
[j
- 1 + i
* n_cols
];
2352 Cij
+= cum
[j
- 1 + (i
- 1) * n_cols
];
2353 Dij
-= cum
[j
+ (i
- 1) * n_cols
];
2358 if (proc
->statistics
& CRS_ST_BTAU
)
2359 sym
[3].v
= (P
- Q
) / sqrt (Dr
* Dc
);
2360 if (proc
->statistics
& CRS_ST_CTAU
)
2361 sym
[4].v
= (q
* (P
- Q
)) / (pow2 (xt
->total
) * (q
- 1));
2362 if (proc
->statistics
& CRS_ST_GAMMA
)
2363 sym
[5].v
= (P
- Q
) / (P
+ Q
);
2365 /* ASE for tau-b, tau-c, gamma. Calculations could be
2366 eliminated here, at expense of memory. */
2367 double btau_cum
= 0;
2368 double ctau_cum
= 0;
2369 double gamma_cum
= 0;
2370 double d_yx_cum
= 0;
2371 double d_xy_cum
= 0;
2372 for (size_t i
= 0; i
< n_rows
; i
++)
2375 for (size_t j
= 1; j
< n_cols
; j
++)
2376 Cij
+= xt
->col_tot
[j
] - cum
[j
+ i
* n_cols
];
2380 for (size_t j
= 1; j
< n_cols
; j
++)
2381 Dij
+= cum
[j
+ (i
- 1) * n_cols
];
2383 for (size_t j
= 0;;)
2385 double fij
= xt
->mat
[j
+ i
* n_cols
];
2387 if (proc
->statistics
& CRS_ST_BTAU
)
2388 btau_cum
+= fij
* pow2 (2. * sqrt (Dr
* Dc
) * (Cij
- Dij
)
2389 + sym
[3].v
* (xt
->row_tot
[i
] * Dc
2390 + xt
->col_tot
[j
] * Dr
));
2391 ctau_cum
+= fij
* pow2 (Cij
- Dij
);
2393 if (proc
->statistics
& CRS_ST_GAMMA
)
2394 gamma_cum
+= fij
* pow2 (Q
* Cij
- P
* Dij
);
2396 if (proc
->statistics
& CRS_ST_D
)
2398 d_yx_cum
+= fij
* pow2 (Dr
* (Cij
- Dij
)
2399 - (P
- Q
) * (xt
->total
- xt
->row_tot
[i
]));
2400 d_xy_cum
+= fij
* pow2 (Dc
* (Dij
- Cij
)
2401 - (Q
- P
) * (xt
->total
- xt
->col_tot
[j
]));
2407 Cij
-= xt
->col_tot
[j
] - cum
[j
+ i
* n_cols
];
2408 Dij
+= xt
->col_tot
[j
- 1] - cum
[j
- 1 + i
* n_cols
];
2412 Cij
+= cum
[j
- 1 + (i
- 1) * n_cols
];
2413 Dij
-= cum
[j
+ (i
- 1) * n_cols
];
2418 if (proc
->statistics
& CRS_ST_BTAU
)
2420 double btau_var
= ((btau_cum
2421 - (xt
->total
* pow2 (xt
->total
* (P
- Q
) / sqrt (Dr
* Dc
) * (Dr
+ Dc
))))
2423 sym
[3].ase
= sqrt (btau_var
);
2424 sym
[3].t
= sym
[3].v
/ (2 * sqrt ((ctau_cum
- (P
- Q
) * (P
- Q
) / xt
->total
)
2427 if (proc
->statistics
& CRS_ST_CTAU
)
2429 sym
[4].ase
= ((2 * q
/ ((q
- 1) * pow2 (xt
->total
)))
2430 * sqrt (ctau_cum
- (P
- Q
) * (P
- Q
) / xt
->total
));
2431 sym
[4].t
= sym
[4].v
/ sym
[4].ase
;
2433 if (proc
->statistics
& CRS_ST_GAMMA
)
2435 sym
[5].ase
= ((4. / ((P
+ Q
) * (P
+ Q
))) * sqrt (gamma_cum
));
2436 sym
[5].t
= sym
[5].v
/ (2. / (P
+ Q
)
2437 * sqrt (ctau_cum
- (P
- Q
) * (P
- Q
) / xt
->total
));
2439 if (proc
->statistics
& CRS_ST_D
)
2441 somers_d
[0].v
= (P
- Q
) / (.5 * (Dc
+ Dr
));
2442 somers_d
[0].ase
= SYSMIS
;
2443 somers_d
[0].t
= (somers_d
[0].v
2445 * sqrt (ctau_cum
- pow2 (P
- Q
) / xt
->total
)));
2446 somers_d
[1].v
= (P
- Q
) / Dc
;
2447 somers_d
[1].ase
= 2. / pow2 (Dc
) * sqrt (d_xy_cum
);
2448 somers_d
[1].t
= (somers_d
[1].v
2450 * sqrt (ctau_cum
- pow2 (P
- Q
) / xt
->total
)));
2451 somers_d
[2].v
= (P
- Q
) / Dr
;
2452 somers_d
[2].ase
= 2. / pow2 (Dr
) * sqrt (d_yx_cum
);
2453 somers_d
[2].t
= (somers_d
[2].v
2455 * sqrt (ctau_cum
- pow2 (P
- Q
) / xt
->total
)));
2461 /* Spearman correlation, Pearson's r. */
2462 if (proc
->statistics
& CRS_ST_CORR
)
2464 double *R
= xmalloc (sizeof *R
* n_rows
);
2467 for (size_t i
= 0; i
< n_rows
; i
++)
2469 R
[i
] = s
+ (xt
->row_tot
[i
] + 1.) / 2.;
2470 double y
= xt
->row_tot
[i
] - c
;
2476 double *C
= xmalloc (sizeof *C
* n_cols
);
2478 for (size_t j
= 0; j
< n_cols
; j
++)
2480 C
[j
] = s
+ (xt
->col_tot
[j
] + 1.) / 2;
2481 double y
= xt
->col_tot
[j
] - c
;
2487 calc_r (xt
, R
, C
, &sym
[6]);
2492 calc_r (xt
, (double *) xt
->vars
[ROW_VAR
].values
,
2493 (double *) xt
->vars
[COL_VAR
].values
,
2497 /* Cohen's kappa. */
2498 if (proc
->statistics
& CRS_ST_KAPPA
&& xt
->ns_rows
== xt
->ns_cols
)
2501 double sum_rici
= 0;
2502 double sum_fiiri_ci
= 0;
2503 double sum_riciri_ci
= 0;
2504 for (size_t i
= 0, j
= 0; i
< xt
->ns_rows
; i
++, j
++)
2506 while (xt
->col_tot
[j
] == 0.)
2509 double prod
= xt
->row_tot
[i
] * xt
->col_tot
[j
];
2510 double sum
= xt
->row_tot
[i
] + xt
->col_tot
[j
];
2512 sum_fii
+= xt
->mat
[j
+ i
* n_cols
];
2514 sum_fiiri_ci
+= xt
->mat
[j
+ i
* n_cols
] * sum
;
2515 sum_riciri_ci
+= prod
* sum
;
2518 double sum_fijri_ci2
= 0;
2519 for (size_t i
= 0; i
< xt
->ns_rows
; i
++)
2520 for (size_t j
= 0; j
< xt
->ns_cols
; j
++)
2522 double sum
= xt
->row_tot
[i
] + xt
->col_tot
[j
];
2523 sum_fijri_ci2
+= xt
->mat
[j
+ i
* n_cols
] * sum
* sum
;
2526 sym
[8].v
= (xt
->total
* sum_fii
- sum_rici
) / (pow2 (xt
->total
) - sum_rici
);
2528 double ase_under_h0
= sqrt ((pow2 (xt
->total
) * sum_rici
2529 + sum_rici
* sum_rici
2530 - xt
->total
* sum_riciri_ci
)
2531 / (xt
->total
* (pow2 (xt
->total
) - sum_rici
) * (pow2 (xt
->total
) - sum_rici
)));
2533 sym
[8].ase
= sqrt (xt
->total
* (((sum_fii
* (xt
->total
- sum_fii
))
2534 / pow2 (pow2 (xt
->total
) - sum_rici
))
2535 + ((2. * (xt
->total
- sum_fii
)
2536 * (2. * sum_fii
* sum_rici
2537 - xt
->total
* sum_fiiri_ci
))
2538 / pow3 (pow2 (xt
->total
) - sum_rici
))
2539 + (pow2 (xt
->total
- sum_fii
)
2540 * (xt
->total
* sum_fijri_ci2
- 4.
2541 * sum_rici
* sum_rici
)
2542 / pow4 (pow2 (xt
->total
) - sum_rici
))));
2544 sym
[8].t
= sym
[8].v
/ ase_under_h0
;
2550 /* Calculate risk estimate. */
2552 calc_risk (struct crosstabulation
*xt
,
2553 double *value
, double *upper
, double *lower
, union value
*c
,
2556 size_t n_cols
= xt
->vars
[COL_VAR
].n_values
;
2558 for (size_t i
= 0; i
< 3; i
++)
2559 value
[i
] = upper
[i
] = lower
[i
] = SYSMIS
;
2561 if (xt
->ns_rows
!= 2 || xt
->ns_cols
!= 2)
2564 /* Find populated columns. */
2567 FOR_EACH_POPULATED_COLUMN (c
, xt
)
2571 /* Find populated rows. */
2574 FOR_EACH_POPULATED_ROW (r
, xt
)
2578 double f11
= xt
->mat
[nz_cols
[0] + n_cols
* nz_rows
[0]];
2579 double f12
= xt
->mat
[nz_cols
[1] + n_cols
* nz_rows
[0]];
2580 double f21
= xt
->mat
[nz_cols
[0] + n_cols
* nz_rows
[1]];
2581 double f22
= xt
->mat
[nz_cols
[1] + n_cols
* nz_rows
[1]];
2582 *n_valid
= f11
+ f12
+ f21
+ f22
;
2584 c
[0] = xt
->vars
[COL_VAR
].values
[nz_cols
[0]];
2585 c
[1] = xt
->vars
[COL_VAR
].values
[nz_cols
[1]];
2587 value
[0] = (f11
* f22
) / (f12
* f21
);
2588 double v
= sqrt (1. / f11
+ 1. / f12
+ 1. / f21
+ 1. / f22
);
2589 lower
[0] = value
[0] * exp (-1.960 * v
);
2590 upper
[0] = value
[0] * exp (1.960 * v
);
2592 value
[1] = (f11
* (f21
+ f22
)) / (f21
* (f11
+ f12
));
2593 v
= sqrt ((f12
/ (f11
* (f11
+ f12
)))
2594 + (f22
/ (f21
* (f21
+ f22
))));
2595 lower
[1] = value
[1] * exp (-1.960 * v
);
2596 upper
[1] = value
[1] * exp (1.960 * v
);
2598 value
[2] = (f12
* (f21
+ f22
)) / (f22
* (f11
+ f12
));
2599 v
= sqrt ((f11
/ (f12
* (f11
+ f12
)))
2600 + (f21
/ (f22
* (f21
+ f22
))));
2601 lower
[2] = value
[2] * exp (-1.960 * v
);
2602 upper
[2] = value
[2] * exp (1.960 * v
);
2607 /* Calculate directional measures. */
2609 calc_directional (struct crosstabs_proc
*proc
, struct crosstabulation
*xt
,
2610 double v
[N_DIRECTIONAL
], double ase
[N_DIRECTIONAL
],
2611 double t
[N_DIRECTIONAL
], double sig
[N_DIRECTIONAL
])
2613 size_t n_rows
= xt
->vars
[ROW_VAR
].n_values
;
2614 size_t n_cols
= xt
->vars
[COL_VAR
].n_values
;
2615 for (size_t i
= 0; i
< N_DIRECTIONAL
; i
++)
2616 v
[i
] = ase
[i
] = t
[i
] = sig
[i
] = SYSMIS
;
2619 if (proc
->statistics
& CRS_ST_LAMBDA
)
2621 /* Find maximum for each row and their sum. */
2622 double *fim
= xnmalloc (n_rows
, sizeof *fim
);
2623 size_t *fim_index
= xnmalloc (n_rows
, sizeof *fim_index
);
2624 double sum_fim
= 0.0;
2625 for (size_t i
= 0; i
< n_rows
; i
++)
2627 double max
= xt
->mat
[i
* n_cols
];
2630 for (size_t j
= 1; j
< n_cols
; j
++)
2631 if (xt
->mat
[j
+ i
* n_cols
] > max
)
2633 max
= xt
->mat
[j
+ i
* n_cols
];
2639 fim_index
[i
] = index
;
2642 /* Find maximum for each column. */
2643 double *fmj
= xnmalloc (n_cols
, sizeof *fmj
);
2644 size_t *fmj_index
= xnmalloc (n_cols
, sizeof *fmj_index
);
2645 double sum_fmj
= 0.0;
2646 for (size_t j
= 0; j
< n_cols
; j
++)
2648 double max
= xt
->mat
[j
];
2651 for (size_t i
= 1; i
< n_rows
; i
++)
2652 if (xt
->mat
[j
+ i
* n_cols
] > max
)
2654 max
= xt
->mat
[j
+ i
* n_cols
];
2660 fmj_index
[j
] = index
;
2663 /* Find maximum row total. */
2664 double rm
= xt
->row_tot
[0];
2665 size_t rm_index
= 0;
2666 for (size_t i
= 1; i
< n_rows
; i
++)
2667 if (xt
->row_tot
[i
] > rm
)
2669 rm
= xt
->row_tot
[i
];
2673 /* Find maximum column total. */
2674 double cm
= xt
->col_tot
[0];
2675 size_t cm_index
= 0;
2676 for (size_t j
= 1; j
< n_cols
; j
++)
2677 if (xt
->col_tot
[j
] > cm
)
2679 cm
= xt
->col_tot
[j
];
2683 v
[0] = (sum_fim
+ sum_fmj
- cm
- rm
) / (2. * xt
->total
- rm
- cm
);
2684 v
[1] = (sum_fmj
- rm
) / (xt
->total
- rm
);
2685 v
[2] = (sum_fim
- cm
) / (xt
->total
- cm
);
2687 /* ASE1 for Y given XT. */
2690 for (size_t i
= 0; i
< n_rows
; i
++)
2691 if (cm_index
== fim_index
[i
])
2693 ase
[2] = sqrt ((xt
->total
- sum_fim
) * (sum_fim
+ cm
- 2. * accum
)
2694 / pow3 (xt
->total
- cm
));
2697 /* ASE0 for Y given XT. */
2700 for (size_t i
= 0; i
< n_rows
; i
++)
2701 if (cm_index
!= fim_index
[i
])
2702 accum
+= (xt
->mat
[i
* n_cols
+ fim_index
[i
]]
2703 + xt
->mat
[i
* n_cols
+ cm_index
]);
2704 t
[2] = v
[2] / (sqrt (accum
- pow2 (sum_fim
- cm
) / xt
->total
) / (xt
->total
- cm
));
2707 /* ASE1 for XT given Y. */
2710 for (size_t j
= 0; j
< n_cols
; j
++)
2711 if (rm_index
== fmj_index
[j
])
2713 ase
[1] = sqrt ((xt
->total
- sum_fmj
) * (sum_fmj
+ rm
- 2. * accum
)
2714 / pow3 (xt
->total
- rm
));
2717 /* ASE0 for XT given Y. */
2720 for (size_t j
= 0; j
< n_cols
; j
++)
2721 if (rm_index
!= fmj_index
[j
])
2722 accum
+= (xt
->mat
[j
+ n_cols
* fmj_index
[j
]]
2723 + xt
->mat
[j
+ n_cols
* rm_index
]);
2724 t
[1] = v
[1] / (sqrt (accum
- pow2 (sum_fmj
- rm
) / xt
->total
) / (xt
->total
- rm
));
2727 /* Symmetric ASE0 and ASE1. */
2729 double accum0
= 0.0;
2730 double accum1
= 0.0;
2731 for (size_t i
= 0; i
< n_rows
; i
++)
2732 for (size_t j
= 0; j
< n_cols
; j
++)
2734 int temp0
= (fmj_index
[j
] == i
) + (fim_index
[i
] == j
);
2735 int temp1
= (i
== rm_index
) + (j
== cm_index
);
2736 accum0
+= xt
->mat
[j
+ i
* n_cols
] * pow2 (temp0
- temp1
);
2737 accum1
+= (xt
->mat
[j
+ i
* n_cols
]
2738 * pow2 (temp0
+ (v
[0] - 1.) * temp1
));
2740 ase
[0] = sqrt (accum1
- 4. * xt
->total
* v
[0] * v
[0]) / (2. * xt
->total
- rm
- cm
);
2741 t
[0] = v
[0] / (sqrt (accum0
- pow2 (sum_fim
+ sum_fmj
- cm
- rm
) / xt
->total
)
2742 / (2. * xt
->total
- rm
- cm
));
2745 for (size_t i
= 0; i
< 3; i
++)
2746 sig
[i
] = 2 * gsl_cdf_ugaussian_Q (t
[i
]);
2754 double sum_fij2_ri
= 0.0;
2755 double sum_fij2_ci
= 0.0;
2756 FOR_EACH_POPULATED_ROW (i
, xt
)
2757 FOR_EACH_POPULATED_COLUMN (j
, xt
)
2759 double temp
= pow2 (xt
->mat
[j
+ i
* n_cols
]);
2760 sum_fij2_ri
+= temp
/ xt
->row_tot
[i
];
2761 sum_fij2_ci
+= temp
/ xt
->col_tot
[j
];
2764 double sum_ri2
= 0.0;
2765 for (size_t i
= 0; i
< n_rows
; i
++)
2766 sum_ri2
+= pow2 (xt
->row_tot
[i
]);
2768 double sum_cj2
= 0.0;
2769 for (size_t j
= 0; j
< n_cols
; j
++)
2770 sum_cj2
+= pow2 (xt
->col_tot
[j
]);
2772 v
[3] = (xt
->total
* sum_fij2_ci
- sum_ri2
) / (pow2 (xt
->total
) - sum_ri2
);
2773 v
[4] = (xt
->total
* sum_fij2_ri
- sum_cj2
) / (pow2 (xt
->total
) - sum_cj2
);
2776 if (proc
->statistics
& CRS_ST_UC
)
2779 FOR_EACH_POPULATED_ROW (i
, xt
)
2780 UX
-= xt
->row_tot
[i
] / xt
->total
* log (xt
->row_tot
[i
] / xt
->total
);
2783 FOR_EACH_POPULATED_COLUMN (j
, xt
)
2784 UY
-= xt
->col_tot
[j
] / xt
->total
* log (xt
->col_tot
[j
] / xt
->total
);
2788 for (size_t i
= 0; i
< n_rows
; i
++)
2789 for (size_t j
= 0; j
< n_cols
; j
++)
2791 double entry
= xt
->mat
[j
+ i
* n_cols
];
2796 P
+= entry
* pow2 (log (xt
->col_tot
[j
] * xt
->row_tot
[i
] / (xt
->total
* entry
)));
2797 UXY
-= entry
/ xt
->total
* log (entry
/ xt
->total
);
2800 double ase1_yx
= 0.0;
2801 double ase1_xy
= 0.0;
2802 double ase1_sym
= 0.0;
2803 for (size_t i
= 0; i
< n_rows
; i
++)
2804 for (size_t j
= 0; j
< n_cols
; j
++)
2806 double entry
= xt
->mat
[j
+ i
* n_cols
];
2811 ase1_yx
+= entry
* pow2 (UY
* log (entry
/ xt
->row_tot
[i
])
2812 + (UX
- UXY
) * log (xt
->col_tot
[j
] / xt
->total
));
2813 ase1_xy
+= entry
* pow2 (UX
* log (entry
/ xt
->col_tot
[j
])
2814 + (UY
- UXY
) * log (xt
->row_tot
[i
] / xt
->total
));
2815 ase1_sym
+= entry
* pow2 ((UXY
2816 * log (xt
->row_tot
[i
] * xt
->col_tot
[j
] / pow2 (xt
->total
)))
2817 - (UX
+ UY
) * log (entry
/ xt
->total
));
2820 v
[5] = 2. * ((UX
+ UY
- UXY
) / (UX
+ UY
));
2821 ase
[5] = (2. / (xt
->total
* pow2 (UX
+ UY
))) * sqrt (ase1_sym
);
2824 v
[6] = (UX
+ UY
- UXY
) / UX
;
2825 ase
[6] = sqrt (ase1_xy
) / (xt
->total
* UX
* UX
);
2826 t
[6] = v
[6] / (sqrt (P
- xt
->total
* pow2 (UX
+ UY
- UXY
)) / (xt
->total
* UX
));
2828 v
[7] = (UX
+ UY
- UXY
) / UY
;
2829 ase
[7] = sqrt (ase1_yx
) / (xt
->total
* UY
* UY
);
2830 t
[7] = v
[7] / (sqrt (P
- xt
->total
* pow2 (UX
+ UY
- UXY
)) / (xt
->total
* UY
));
2834 if (proc
->statistics
& CRS_ST_D
)
2836 struct symmetric_statistic ss
[N_SYMMETRIC
];
2837 struct somers_d somers_d
[3];
2839 if (calc_symmetric (proc
, xt
, ss
, somers_d
))
2841 for (size_t i
= 0; i
< 3; i
++)
2843 v
[8 + i
] = somers_d
[i
].v
;
2844 ase
[8 + i
] = somers_d
[i
].ase
;
2845 t
[8 + i
] = somers_d
[i
].t
;
2846 sig
[8 + i
] = 2 * gsl_cdf_ugaussian_Q (fabs (somers_d
[i
].t
));
2852 if (proc
->statistics
& CRS_ST_ETA
)
2855 double sum_Xr
= 0.0;
2856 double sum_X2r
= 0.0;
2857 for (size_t i
= 0; i
< n_rows
; i
++)
2859 sum_Xr
+= xt
->vars
[ROW_VAR
].values
[i
].f
* xt
->row_tot
[i
];
2860 sum_X2r
+= pow2 (xt
->vars
[ROW_VAR
].values
[i
].f
) * xt
->row_tot
[i
];
2862 double SX
= sum_X2r
- pow2 (sum_Xr
) / xt
->total
;
2865 FOR_EACH_POPULATED_COLUMN (j
, xt
)
2869 for (size_t i
= 0; i
< n_rows
; i
++)
2871 SXW
+= (pow2 (xt
->vars
[ROW_VAR
].values
[i
].f
)
2872 * xt
->mat
[j
+ i
* n_cols
]);
2873 cum
+= (xt
->vars
[ROW_VAR
].values
[i
].f
2874 * xt
->mat
[j
+ i
* n_cols
]);
2877 SXW
-= cum
* cum
/ xt
->col_tot
[j
];
2879 v
[11] = sqrt (1. - SXW
/ SX
);
2882 double sum_Yc
= 0.0;
2883 double sum_Y2c
= 0.0;
2884 for (size_t i
= 0; i
< n_cols
; i
++)
2886 sum_Yc
+= xt
->vars
[COL_VAR
].values
[i
].f
* xt
->col_tot
[i
];
2887 sum_Y2c
+= pow2 (xt
->vars
[COL_VAR
].values
[i
].f
) * xt
->col_tot
[i
];
2889 double SY
= sum_Y2c
- pow2 (sum_Yc
) / xt
->total
;
2892 FOR_EACH_POPULATED_ROW (i
, xt
)
2895 for (size_t j
= 0; j
< n_cols
; j
++)
2897 SYW
+= (pow2 (xt
->vars
[COL_VAR
].values
[j
].f
)
2898 * xt
->mat
[j
+ i
* n_cols
]);
2899 cum
+= (xt
->vars
[COL_VAR
].values
[j
].f
2900 * xt
->mat
[j
+ i
* n_cols
]);
2903 SYW
-= cum
* cum
/ xt
->row_tot
[i
];
2905 v
[12] = sqrt (1. - SYW
/ SY
);