restructured talk, show what we have then what we plan.
[CommonLispStat.git] / src / stat-models / model.lisp
blob4e6bfabadef5316e4fef746dfc36737eea8830a0
1 ;;; -*- mode: lisp -*-
3 ;;; File: model.lisp
4 ;;; Time-stamp: <2009-04-12 12:09:16 tony>
5 ;;; Creation: <2006-05-17 21:34:07 rossini>
6 ;;; Author: AJ Rossini <blindglobe@gmail.com>
7 ;;; Copyright: (c)2007-- , AJ Rossini. BSD, LLGPL, or GPLv2, depending
8 ;;; on how it arrives.
9 ;;; Purpose: models as a data summarization tools. Work towards an
10 ;;; object system with a comprehensive theory of data
11 ;;; summarization through models. The basic idea is that
12 ;;; models are used to summarize different aspects of a
13 ;;; data generating process, possibly realized by a
14 ;;; dataset.
16 ;;; What is this talk of 'release'? Klingons do not make software
17 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
18 ;;; designers and quality assurance people in its wake.
20 (in-package :lisp-stat-model)
22 (defclass model ()
23 ((name
24 :initform nil
25 :initarg :name
26 :accessor name
27 :reader model-name
28 :type string)
29 (form
30 :initform nil
31 :initarg :formula
32 :accessor form
33 :reader model-formula
34 :type list)
35 ;; The following might not be all part of the model?!?
36 (parameter-vars
37 :initform nil
38 :initarg :parameter-vars
39 :accessor param-vars
40 :reader model-formula
41 :type list)
42 (data-vars
43 :initform nil
44 :initarg :data-vars
45 :accessor data-vars
46 :reader model-formula)
47 (fixed-vars
48 :initform nil
49 :initarg :fixed-vars
50 :accessor fixed-vars
51 :reader model-formula)
53 (solution :initform nil
54 :initarg :criteriaFunction
55 :accessor critFcn-vars
56 :reader model-formula)
57 (done-solution? :initform nil :reader done-setup?)
59 (current-values :initform nil :accessor current-values)
61 (log-file :initform nil :initarg :log-file :reader log-file)
62 (test-data :initform nil :accessor test-data)
63 (expected-failure-p :initform nil :initarg :expected-failure-p
64 :reader expected-failure-p)
65 (expected-error-p :initform nil :initarg :expected-error-p
66 :reader expected-error-p)
67 (expected-problem-p :initform nil :initarg :expected-problem-p
68 :reader expected-problem-p))
69 (:documentation "Mathematical Model"))
71 (defclass result ()
72 ((param-values )
73 (param-uncertainity )
74 (param-characterization ))
76 ;; The following are types of models -- in particular, we can consider
77 ;; that these models
79 (defclass statistical-model (model result) )
81 (defclass ode-model (model result ))
83 (defclass linear-regression-model (statistical-mode))
85 (defclass generalized-linear-regression-model (statistical-model))
87 (defclass nonlinear-linear-regression-model (statistical-model))
91 #|
92 ;;; garbage follows
95 ;; modelType
97 root
98 opt (min / max)
100 diffeqn
102 diffeqn + solve
104 ;; dataType
106 varExtract
107 tableExtract
108 relationExtract
112 ;; mappingType
114 (setf myFirstModel
115 (defineModel normalLikelihood '((* (/ 1 (sqrt (* 2 (pi) sigma)))
116 (exp (/ (- x mu)
117 sigma))))
118 :fixed '()
119 :param '(mu sigma)
120 :data '(x)
121 :critFcnSoln '(:max (one-of bfgs nelder-mead conjugate-gradient))))
123 (defclass model ()
124 ((name )
125 (vars-fixed :initarg nil :arg fixed)
126 (vars-param :initarg nil :arg param)
127 (vars-data :initarg nil :arg data)
128 (critFcn ))
131 (defclass meanModel (model) ... ) ;; a macro to map onto Model
132 (defclass meanVarModel (model) ... )
133 (defclass regressionModel (meanModel) )
134 (defclass mixedModel (regressionModel) ... )
135 (defclass bayesianModel (model) ... )
136 (defclass diffintgleqnModel (model) ) ;;(ODE, PDF, integral equations)
140 (setf modX
141 (solveModel :model myFirstModel
142 :data myVar
143 :mapping '((x . myVar))))
145 ;; result structure
147 (:solution
148 :params
149 :params-characterisation
150 :paradigm '(bayesian frequentist)
151 :dataname
152 :modelname )
156 (with-mapping map :model mymod :data mydata
157 (bootstrap mymod mydata))
160 ;; solution should inherit from model and data (and be recomputable)
161 ;; func args override embedded args
163 ;; (solveModel firstSoln) "=" firstSoln
165 ;; unless stoch approx used.
175 ;;; -*- mode: lisp -*-
176 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
177 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
178 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
180 ;;; File: model-fit.lisp
181 ;;; Author: AJ Rossini <blindglobe@gmail.com>
182 ;;; Copyright: (c)2007, AJ Rossini. BSD, LLGPL, or GPLv2, depending
183 ;;; on how it arrives.
184 ;;; Purpose: models as a data summarization tools.
185 ;;; Time-stamp: <2006-05-19 12:33:41 rossini>
186 ;;; Creation: <2006-05-17 21:34:07 rossini>
188 ;;; What is this talk of 'release'? Klingons do not make software
189 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
190 ;;; designers and quality assurance people in its wake.
192 ;;; The model class is used to map a partial or complete model
193 ;;; specification (partial specification made complete by assumptions
194 ;;; inherent in the type of model used, i.e. linear regression assumes
195 ;;; mean-zero and homoskedasticity) and the model-fit class is used to
196 ;;; complete the model specification into how the model instance and
197 ;;; type will then get computed. The approach tken is that we
198 ;;; definitiely want to be able to see how we can explicitly try to
199 ;;; characterize the work that we are trying to infere about the data.
201 (in-package :cl-user)
203 (defpackage :lisp-stat-model-fit
204 (:documentation "Model fitting theory.")
205 (:nicknames :ls-model-fit)
206 (:use :common-lisp
207 :lisp-stat-config
208 :lisp-stat-object-system
209 :lisp-stat-types
210 :lisp-stat-compound-data
212 :lisp-stat-matrix
213 :lisp-stat-linalg
216 (:shadowing-import-from :lisp-stat-object-system
217 slot-value call-method call-next-method)
218 (:export fit))
220 (in-package :lisp-stat-model)
222 (defclass fit ()
223 ((criteria-functin-name :initform nil
224 :initarg :name
225 :accessor name
226 :reader model-name)
227 (criteria-function :initform nil
228 :initarg :formula
229 :accessor function
230 :reader fit-criteria)
231 (parameter-vars :initform nil
232 :initarg :parameter-vars
233 :accessor param-vars
234 :reader model-formula)
235 (data-vars :initform nil
236 :initarg :data-vars
237 :accessor data-vars
238 :reader model-formula)
239 (fixed-vars :initform nil
240 :initarg :fixed-vars
241 :accessor fixed-vars
242 :reader model-formula)
244 (:documentation "Mathematical Model Fit approach"))
246 (defclass optimization (model-fit))
249 (defclass least-squares (optimization))
250 (defclass weighted-least-squares (least-squares))
252 (defclass maximum-likelihood (optimization))
253 (defclass minimax (optimization))
254 (defclass maximin (optimization))
256 (defclass minimum-entropy (optimization))
257 (defclass lq-norm (optimization))
259 (defclass root-finding (model-fit))
261 (defclass method-of-moments (root-finding))
262 (defclass marginal-models (method-of-moments))
263 (defclass gee (marginal-models))
264 (defclass gee2 (marginal-models))
268 ;;; How would this be used?
270 (setf my-regr-model-1
271 (new 'least-squares '(- y (+ (* beta1 x1) (* beta2 x2)))))
273 ;; and there should be an approach which could provide a mapping to this, i.e.
275 (regression-model (list y) (list x1 x2))
277 ;; could map to the above via macros.
280 ;;;;; More misc protos...
283 (defclass model-specification ()
284 ((spec-string :initform nil
285 :initarg :specification
286 :accessor :specification)
287 (spec-form :initform nil
288 :initarg :spec-form
289 :accessor :spec-form)
290 (model-class :initform nil))
291 (:documentation "container for mathematical structure"))
293 (defclass bayesian-model-specification (model-specification)
294 ((prior-model-class)
295 (spec-string :initform nil
296 :initarg :specification
297 :accessor :specification)
298 (spec-form :initform nil
299 :initarg :spec-form
300 :accessor :spec-form))
301 (:documentation "adds structure holding priors to the model"))
303 ;;; The following should be self-created based on introspection of
304 ;;; available:
305 ;;; ## inferential technologies (bayesian, frequentist, etc),
306 ;;; ## optimization criteria (likelihood, least-squares, min-entropy,
307 ;;; minimax, etc)
308 ;;; ## simplification macros, i.e. mapping directly to linear
309 ;;; regression and other applications. fast specialized
310 ;;; algorithms for edge cases and narrow conditions.
311 ;;; ##
313 (defparameter *model-class-list*
314 '((linear-regression frequentist)
315 (generalized-linear-regression parametric)
316 (linear-regression bayesian)
317 ()))
319 ;;;;; More mischief from a different time
322 ;; regression-model is the old API, but regression as a generic will
323 ;; be the new API. We need to distinguish between APIs which enable
324 ;; the user to do clear activities, and APIs which enable developers
325 ;; to do clear extensions and development, and underlying
326 ;; infrastructure to keep everything straight and enabled.
328 ;; There are conflicting theories for how to structure the
329 ;; specification of mathematical models, along with the statistical
330 ;; inference, along with the data which is instantiating the model.
332 ;; i.e.: mathematical model for the relationships between components,
333 ;; between a component and a summarizing parameter, and between
334 ;; parameters.
336 ;; statistical inference describes the general approach for
337 ;; aggregating into a decision and has impliciations for the scale up
338 ;; from the model on a single instance to the generalization.
340 ;; The data represents the particular substantive context that is
341 ;; driving the model/inference combination, and about which we hope to
342 ;; generate knowledge.
344 ;; numerical analysis selects appropriate algorithms/implementations
345 ;; for combining the above 3.
347 ;; the end result is input on the decision being made (which could be
348 ;; specific (decision analysis/testing), risk-analysis (interval
349 ;; estimation) , most likely/appropriate selection (point estimation)