pivot-table: Define numeric formats of categories as well as their cells.
[pspp.git] / src / math / categoricals.h
blob5e49b4504ab1b9338c19fe089b861fa8d82a328f
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 #ifndef _CATEGORICALS__
19 #define _CATEGORICALS__
21 #include <stddef.h>
22 #include "data/missing-values.h"
24 struct categoricals;
25 struct ccase;
26 struct interaction;
27 struct variable;
28 union value;
30 /* Categoricals.
32 A categorical variable has a finite and usually small number of possible
33 values. The categoricals data structure organizes an array of interactions
34 maong categorical variables, that is, a set of sets of categorical
35 variables. (Both levels of "set" are ordered.)
37 The life cycle of a categoricals object looks like this:
39 1. Create it with categoricals_create(). This fixes the set of interactions
40 and other parameters.
42 2. Pass all of the desired cases through the object with
43 categoricals_update().
45 3. Finalize the object with categoricals_done(). Only at this point may
46 most of the categoricals query functions be called.
48 4. Use the categoricals object as desired.
50 5. Destroy the object with categoricals_destroy().
53 /* Creating and destroying categoricals. */
54 struct categoricals *categoricals_create (struct interaction *const *,
55 size_t n,
56 const struct variable *wv,
57 enum mv_class fctr_excl);
58 void categoricals_destroy (struct categoricals *);
60 /* Updating categoricals. */
61 void categoricals_update (struct categoricals *, const struct ccase *);
62 void categoricals_done (const struct categoricals *);
63 bool categoricals_is_complete (const struct categoricals *);
65 /* Categories.
67 A variable's number of categories is the number of unique values observed in
68 the data passed to categoricals_update().
70 An interaction's number of categories is the number of observed unique
71 values of its variables, which will often be less than the product of its
72 variables' numbers of categories.
74 A categorical object's number of categories is the sum of its interactions'
75 categories. */
76 size_t categoricals_n_count (const struct categoricals *, size_t idx);
77 size_t categoricals_n_total (const struct categoricals *);
79 union value *categoricals_get_var_values (const struct categoricals *,
80 const struct variable *, size_t *n);
82 /* Degrees of freedom.
84 A categorical variable with N_CATS categories has N_CATS - 1 degrees of
85 freedom.
87 An interaction's degrees of freedom is the product of its variables' degrees
88 of freedom.
90 A categorical object's degrees of freedom is the sum of its interactions'
91 degrees of freedom. */
92 size_t categoricals_df (const struct categoricals *, size_t idx);
93 size_t categoricals_df_total (const struct categoricals *);
95 /* Sanity. */
96 bool categoricals_sane (const struct categoricals *cat);
98 /* "Short map".
100 These look up an interaction within a categoricals object on the basis of a
101 "subscript". Interaction 0 with DF_0 degrees of freedom is assigned
102 subscripts [0, DF_0 - 1], interaction 1 with DF_1 degrees of freedom is
103 assigned subscripts [DF_0, DF_0 + DF_1 - 1], and so on. The subscripts
104 passed in must be in the range [0, DF_SUM - 1] where DF_SUM is the total
105 number of degrees of freedom for the object, as returned by
106 categoricals_df_total().
108 These functions are intended for covariance matrix routines, where normally
109 1 less than the total number of distinct values of each categorical variable
110 should be considered.
112 These functions may be used on an object only after calling
113 categoricals_done().
115 double categoricals_get_weight_by_subscript (const struct categoricals *,
116 int subscript);
117 const struct interaction *categoricals_get_interaction_by_subscript (
118 const struct categoricals *, int subscript);
119 double categoricals_get_sum_by_subscript (const struct categoricals *,
120 int subscript);
121 double categoricals_get_dummy_code_for_case (const struct categoricals *,
122 int subscript,
123 const struct ccase *);
124 double categoricals_get_effects_code_for_case (const struct categoricals *,
125 int subscript,
126 const struct ccase *);
129 /* "Long map".
131 These look up an interaction within a categoricals object on the basis of a
132 "category index". Interaction 0 in CAT with CAT_0 categories has indexes
133 [0, CAT_0 - 1], interaction 1 with CAT_1 categories has indexes [CAT_0,
134 CAT_0 + CAT_1 - 1], and so on. The indexes passed in must be in the range
135 [0, CAT_TOTAL - 1] where CAT_TOTAL is the total number of categories for the
136 object, as returned by categoricals_n_total().
138 These functions are useful for descriptive statistics.
140 These functions may be used on an object only after calling
141 categoricals_done().
143 const struct ccase *categoricals_get_case_by_category_real (
144 const struct categoricals *, int iact, int n);
145 void *categoricals_get_user_data_by_category_real (
146 const struct categoricals *, int iact, int n);
148 int categoricals_get_value_index_by_category_real (
149 const struct categoricals *, int iact_idx, int cat_idx, int var_idx);
151 void *categoricals_get_user_data_by_category (const struct categoricals *,
152 int category);
153 const struct ccase *categoricals_get_case_by_category (
154 const struct categoricals *cat, int subscript);
156 struct payload
158 void *(*create) (const void *aux1, void *aux2);
159 void (*update) (const void *aux1, void *aux2, void *user_data,
160 const struct ccase *, double weight);
161 void (*calculate) (const void *aux1, void *aux2, void *user_data);
162 void (*destroy) (const void *aux1, void *aux2, void *user_data);
165 void categoricals_set_payload (struct categoricals *, const struct payload *,
166 const void *aux1, void *aux2);
167 bool categoricals_isbalanced (const struct categoricals *);
169 #endif