expressions: Merge all the little generator programs into generate.pl.
[pspp.git] / src / math / linreg.h
blobb08f7d0e431dd61190efbf82fb2850e9c7053792
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2005, 2011 Free Software Foundation, Inc. Written by Jason H. Stover.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #ifndef LINREG_H
18 #define LINREG_H
20 #include <gsl/gsl_math.h>
21 #include <gsl/gsl_matrix.h>
22 #include <gsl/gsl_vector.h>
23 #include <stdbool.h>
25 enum
27 LINREG_CONDITIONAL_INVERSE,
28 LINREG_QR,
29 LINREG_SWEEP,
35 Options describing what special values should be computed.
37 struct pspp_linreg_opts_struct
39 int get_depvar_mean_std;
40 int *get_indep_mean_std; /* Array of booleans
41 dictating which
42 independent variables need
43 their means and standard
44 deviations computed within
45 pspp_linreg. This array
46 MUST be of length
47 n_indeps. If element i is
48 1, pspp_linreg will
49 compute the mean and
50 variance of indpendent
51 variable i. If element i
52 is 0, it will not compute
53 the mean and standard
54 deviation, and assume the
55 values are stored.
56 cache->indep_mean[i] is
57 the mean and
58 cache->indep_std[i] is the
59 sample standard deviation. */
61 typedef struct pspp_linreg_opts_struct pspp_linreg_opts;
65 Find the least-squares estimate of b for the linear model:
67 Y = Xb + Z
69 where Y is an n-by-1 column vector, X is an n-by-p matrix of
70 independent variables, b is a p-by-1 vector of regression coefficients,
71 and Z is an n-by-1 normally-distributed random vector with independent
72 identically distributed components with mean 0.
74 This estimate is found via the sweep operator or singular-value
75 decomposition with gsl.
78 References:
80 1. Matrix Computations, third edition. GH Golub and CF Van Loan.
81 The Johns Hopkins University Press. 1996. ISBN 0-8018-5414-8.
83 2. Numerical Analysis for Statisticians. K Lange. Springer. 1999.
84 ISBN 0-387-94979-8.
86 3. Numerical Linear Algebra for Applications in Statistics. JE Gentle.
87 Springer. 1998. ISBN 0-387-98542-5.
91 struct linreg_struct
93 double n_obs; /* Number of observations. */
94 int n_indeps; /* Number of independent variables. */
95 int n_coeffs; /* The intercept is not considered a
96 coefficient here. */
99 Pointers to the variables.
101 const struct variable *depvar;
102 const struct variable **indep_vars;
104 double *coeff;
105 double intercept;
106 int method; /* Method to use to estimate parameters. */
108 Means and standard deviations of the variables.
109 If these pointers are null when pspp_linreg() is
110 called, pspp_linreg() will compute their values.
112 Entry i of indep_means is the mean of independent
113 variable i, whose observations are stored in the ith
114 column of the design matrix.
116 double depvar_mean;
117 gsl_vector *indep_means;
118 gsl_vector *indep_std;
121 Sums of squares.
123 double ssm; /* Sums of squares for the overall model. */
124 double sst; /* Sum of squares total. */
125 double sse; /* Sum of squares error. */
126 double mse; /* Mean squared error. This is just sse /
127 dfe, but since it is the best unbiased
128 estimate of the population variance, it
129 has its own entry here. */
131 Covariance matrix of the parameter estimates.
133 gsl_matrix *cov;
135 Degrees of freedom.
137 double dft;
138 double dfe;
139 double dfm;
141 int dependent_column; /* Column containing the dependent variable. Defaults to last column. */
142 int refcnt;
145 typedef struct linreg_struct linreg;
149 linreg *linreg_alloc (const struct variable *, const struct variable **,
150 double, size_t);
152 void linreg_unref (linreg *);
153 void linreg_ref (linreg *);
156 Fit the linear model via least squares. All pointers passed to pspp_linreg
157 are assumed to be allocated to the correct size and initialized to the
158 values as indicated by opts.
160 void linreg_fit (const gsl_matrix *, linreg *);
162 double linreg_predict (const linreg *, const double *, size_t);
163 double linreg_residual (const linreg *, double, const double *, size_t);
164 const struct variable ** linreg_get_vars (const linreg *);
167 Mean of the independent variable.
169 double linreg_get_indep_variable_mean (const linreg *, size_t);
170 void linreg_set_indep_variable_mean (linreg *, size_t, double);
172 double linreg_mse (const linreg *);
174 double linreg_intercept (const linreg *);
176 const gsl_matrix * linreg_cov (const linreg *);
177 double linreg_coeff (const linreg *, size_t);
178 const struct variable * linreg_indep_var (const linreg *, size_t);
179 size_t linreg_n_coeffs (const linreg *);
180 double linreg_n_obs (const linreg *);
181 double linreg_sse (const linreg *);
182 double linreg_ssreg (const linreg *);
183 double linreg_dfmodel (const linreg *);
184 double linreg_sst (const linreg *);
185 void linreg_set_depvar_mean (linreg *, double);
186 double linreg_get_depvar_mean (const linreg *);
187 #endif