use names, start constructing model generics.
[CommonLispStat.git] / src / stat-models / model.lisp
blobf69c31589b85cc85e596cecfa7cd3b58aa6ff21e
1 ;;; -*- mode: lisp -*-
3 ;;; File: model.lisp
4 ;;; Time-stamp: <2010-01-25 15:52:30 tony>
5 ;;; Creation: <2006-05-17 21:34:07 rossini>
6 ;;; Author: AJ Rossini <blindglobe@gmail.com>
7 ;;; Copyright: (c)2007-- , AJ Rossini. BSD, LLGPL, or GPLv2, depending
8 ;;; on how it arrives.
9 ;;; Purpose: models as a data summarization tools. Work towards an
10 ;;; object system with a comprehensive theory of data
11 ;;; summarization through models. The basic idea is that
12 ;;; models are used to summarize different aspects of a
13 ;;; data generating process, possibly realized by a
14 ;;; dataset.
16 ;;; What is this talk of 'release'? Klingons do not make software
17 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
18 ;;; designers and quality assurance people in its wake.
20 (in-package :lisp-stat-model)
22 (defclass model ()
23 ((name
24 :initform nil
25 :initarg :name
26 :accessor name
27 :reader model-name
28 :type string)
29 (form
30 :initform nil
31 :initarg :formula
32 :accessor form
33 :reader model-formula
34 :type list)
35 ;; The following might not be all part of the model?!?
36 (parameter-vars
37 :initform nil
38 :initarg :parameter-vars
39 :accessor param-vars
40 :reader model-formula
41 :type list)
42 (data-vars
43 :initform nil
44 :initarg :data-vars
45 :accessor data-vars
46 :reader model-formula)
47 (fixed-vars
48 :initform nil
49 :initarg :fixed-vars
50 :accessor fixed-vars
51 :reader model-formula)
53 (solution :initform nil
54 :initarg :criteriaFunction
55 :accessor critFcn-vars
56 :reader model-formula)
57 (done-solution? :initform nil :reader done-setup?)
59 (current-values :initform nil :accessor current-values)
61 (log-file :initform nil :initarg :log-file :reader log-file)
62 (test-data :initform nil :accessor test-data)
63 (expected-failure-p :initform nil :initarg :expected-failure-p
64 :reader expected-failure-p)
65 (expected-error-p :initform nil :initarg :expected-error-p
66 :reader expected-error-p)
67 (expected-problem-p :initform nil :initarg :expected-problem-p
68 :reader expected-problem-p))
69 (:documentation "Mathematical Model"))
71 (defclass result ()
72 ((param-values )
73 (param-uncertainity )
74 (param-characterization )))
76 ;; The following are types of models -- in particular, we can consider
77 ;; that these models
79 (defclass statistical-model (model result) )
81 (defclass ode-model (model result ))
83 (defclass regression-model-linear (statistical-mode))
85 (defclass regression-model-generalized-linear (statistical-model))
87 (defclass regression-model-nonlinear-linear (statistical-model))
89 ;;;;;;;;;
91 (defgeneric =model (lhs rhs)
92 (:documentation "returns a class describing the model specified")
93 (:method ((lhs atom) (rhs list))
94 (if (linear-model rhs)
95 (make-instance 'regression-model-linear
96 lhs
97 rhs)
98 (make-instance 'general-model))))
100 ;;; Centrality
101 (defgeneric =mean (lhs rhs))
102 (defgeneric =quantile (lhs rhs quantile))
104 ;;; Variable
105 (defgeneric =var (lhs rhs)) ; lhs should spec parameter
109 ;;; garbage follows
112 ;; modelType
114 root
115 opt (min / max)
117 diffeqn
119 diffeqn + solve
121 ;; dataType
123 varExtract
124 tableExtract
125 relationExtract
129 ;; mappingType
131 (setf myFirstModel
132 (defineModel normalLikelihood '((* (/ 1 (sqrt (* 2 (pi) sigma)))
133 (exp (/ (- x mu)
134 sigma))))
135 :fixed '()
136 :param '(mu sigma)
137 :data '(x)
138 :critFcnSoln '(:max (one-of bfgs nelder-mead conjugate-gradient))))
140 (defclass model ()
141 ((name )
142 (vars-fixed :initarg nil :arg fixed)
143 (vars-param :initarg nil :arg param)
144 (vars-data :initarg nil :arg data)
145 (critFcn ))
148 (defclass meanModel (model) ... ) ;; a macro to map onto Model
149 (defclass meanVarModel (model) ... )
150 (defclass regressionModel (meanModel) )
151 (defclass mixedModel (regressionModel) ... )
152 (defclass bayesianModel (model) ... )
153 (defclass diffintgleqnModel (model) ) ;;(ODE, PDF, integral equations)
157 (setf modX
158 (solveModel :model myFirstModel
159 :data myVar
160 :mapping '((x . myVar))))
162 ;; result structure
164 (:solution
165 :params
166 :params-characterisation
167 :paradigm '(bayesian frequentist)
168 :dataname
169 :modelname )
173 (with-mapping map :model mymod :data mydata
174 (bootstrap mymod mydata))
177 ;; solution should inherit from model and data (and be recomputable)
178 ;; func args override embedded args
180 ;; (solveModel firstSoln) "=" firstSoln
182 ;; unless stoch approx used.
192 ;;; -*- mode: lisp -*-
193 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
194 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
195 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
197 ;;; File: model-fit.lisp
198 ;;; Author: AJ Rossini <blindglobe@gmail.com>
199 ;;; Copyright: (c)2007, AJ Rossini. BSD, LLGPL, or GPLv2, depending
200 ;;; on how it arrives.
201 ;;; Purpose: models as a data summarization tools.
202 ;;; Time-stamp: <2006-05-19 12:33:41 rossini>
203 ;;; Creation: <2006-05-17 21:34:07 rossini>
205 ;;; What is this talk of 'release'? Klingons do not make software
206 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
207 ;;; designers and quality assurance people in its wake.
209 ;;; The model class is used to map a partial or complete model
210 ;;; specification (partial specification made complete by assumptions
211 ;;; inherent in the type of model used, i.e. linear regression assumes
212 ;;; mean-zero and homoskedasticity) and the model-fit class is used to
213 ;;; complete the model specification into how the model instance and
214 ;;; type will then get computed. The approach tken is that we
215 ;;; definitiely want to be able to see how we can explicitly try to
216 ;;; characterize the work that we are trying to infere about the data.
218 (in-package :cl-user)
220 (defpackage :lisp-stat-model-fit
221 (:documentation "Model fitting theory.")
222 (:nicknames :ls-model-fit)
223 (:use :common-lisp
224 :lisp-stat-config
225 :lisp-stat-object-system
226 :lisp-stat-types
227 :lisp-stat-compound-data
229 :lisp-stat-matrix
230 :lisp-stat-linalg
233 (:shadowing-import-from :lisp-stat-object-system
234 slot-value call-method call-next-method)
235 (:export fit))
237 (in-package :lisp-stat-model)
239 (defclass fit ()
240 ((criteria-functin-name :initform nil
241 :initarg :name
242 :accessor name
243 :reader model-name)
244 (criteria-function :initform nil
245 :initarg :formula
246 :accessor function
247 :reader fit-criteria)
248 (parameter-vars :initform nil
249 :initarg :parameter-vars
250 :accessor param-vars
251 :reader model-formula)
252 (data-vars :initform nil
253 :initarg :data-vars
254 :accessor data-vars
255 :reader model-formula)
256 (fixed-vars :initform nil
257 :initarg :fixed-vars
258 :accessor fixed-vars
259 :reader model-formula)
261 (:documentation "Mathematical Model Fit approach"))
263 (defclass optimization (model-fit))
266 (defclass least-squares (optimization))
267 (defclass weighted-least-squares (least-squares))
269 (defclass maximum-likelihood (optimization))
270 (defclass minimax (optimization))
271 (defclass maximin (optimization))
273 (defclass minimum-entropy (optimization))
274 (defclass lq-norm (optimization))
276 (defclass root-finding (model-fit))
278 (defclass method-of-moments (root-finding))
279 (defclass marginal-models (method-of-moments))
280 (defclass gee (marginal-models))
281 (defclass gee2 (marginal-models))
285 ;;; How would this be used?
287 (setf my-regr-model-1
288 (new 'least-squares '(- y (+ (* beta1 x1) (* beta2 x2)))))
290 ;; and there should be an approach which could provide a mapping to this, i.e.
292 (regression-model (list y) (list x1 x2))
294 ;; could map to the above via macros.
297 ;;;;; More misc protos...
300 (defclass model-specification ()
301 ((spec-string :initform nil
302 :initarg :specification
303 :accessor :specification)
304 (spec-form :initform nil
305 :initarg :spec-form
306 :accessor :spec-form)
307 (model-class :initform nil))
308 (:documentation "container for mathematical structure"))
310 (defclass bayesian-model-specification (model-specification)
311 ((prior-model-class)
312 (spec-string :initform nil
313 :initarg :specification
314 :accessor :specification)
315 (spec-form :initform nil
316 :initarg :spec-form
317 :accessor :spec-form))
318 (:documentation "adds structure holding priors to the model"))
320 ;;; The following should be self-created based on introspection of
321 ;;; available:
322 ;;; ## inferential technologies (bayesian, frequentist, etc),
323 ;;; ## optimization criteria (likelihood, least-squares, min-entropy,
324 ;;; minimax, etc)
325 ;;; ## simplification macros, i.e. mapping directly to linear
326 ;;; regression and other applications. fast specialized
327 ;;; algorithms for edge cases and narrow conditions.
328 ;;; ##
330 (defparameter *model-class-list*
331 '((linear-regression frequentist)
332 (generalized-linear-regression parametric)
333 (linear-regression bayesian)
334 ()))
336 ;;;;; More mischief from a different time
339 ;; regression-model is the old API, but regression as a generic will
340 ;; be the new API. We need to distinguish between APIs which enable
341 ;; the user to do clear activities, and APIs which enable developers
342 ;; to do clear extensions and development, and underlying
343 ;; infrastructure to keep everything straight and enabled.
345 ;; There are conflicting theories for how to structure the
346 ;; specification of mathematical models, along with the statistical
347 ;; inference, along with the data which is instantiating the model.
349 ;; i.e.: mathematical model for the relationships between components,
350 ;; between a component and a summarizing parameter, and between
351 ;; parameters.
353 ;; statistical inference describes the general approach for
354 ;; aggregating into a decision and has impliciations for the scale up
355 ;; from the model on a single instance to the generalization.
357 ;; The data represents the particular substantive context that is
358 ;; driving the model/inference combination, and about which we hope to
359 ;; generate knowledge.
361 ;; numerical analysis selects appropriate algorithms/implementations
362 ;; for combining the above 3.
364 ;; the end result is input on the decision being made (which could be
365 ;; specific (decision analysis/testing), risk-analysis (interval
366 ;; estimation) , most likely/appropriate selection (point estimation)