4 ;;; Time-stamp: <2010-01-25 15:52:30 tony>
5 ;;; Creation: <2006-05-17 21:34:07 rossini>
6 ;;; Author: AJ Rossini <blindglobe@gmail.com>
7 ;;; Copyright: (c)2007-- , AJ Rossini. BSD, LLGPL, or GPLv2, depending
9 ;;; Purpose: models as a data summarization tools. Work towards an
10 ;;; object system with a comprehensive theory of data
11 ;;; summarization through models. The basic idea is that
12 ;;; models are used to summarize different aspects of a
13 ;;; data generating process, possibly realized by a
16 ;;; What is this talk of 'release'? Klingons do not make software
17 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
18 ;;; designers and quality assurance people in its wake.
20 (in-package :lisp-stat-model
)
35 ;; The following might not be all part of the model?!?
38 :initarg
:parameter-vars
46 :reader model-formula
)
51 :reader model-formula
)
53 (solution :initform nil
54 :initarg
:criteriaFunction
55 :accessor critFcn-vars
56 :reader model-formula
)
57 (done-solution?
:initform nil
:reader done-setup?
)
59 (current-values :initform nil
:accessor current-values
)
61 (log-file :initform nil
:initarg
:log-file
:reader log-file
)
62 (test-data :initform nil
:accessor test-data
)
63 (expected-failure-p :initform nil
:initarg
:expected-failure-p
64 :reader expected-failure-p
)
65 (expected-error-p :initform nil
:initarg
:expected-error-p
66 :reader expected-error-p
)
67 (expected-problem-p :initform nil
:initarg
:expected-problem-p
68 :reader expected-problem-p
))
69 (:documentation
"Mathematical Model"))
74 (param-characterization )))
76 ;; The following are types of models -- in particular, we can consider
79 (defclass statistical-model
(model result
) )
81 (defclass ode-model
(model result
))
83 (defclass regression-model-linear
(statistical-mode))
85 (defclass regression-model-generalized-linear
(statistical-model))
87 (defclass regression-model-nonlinear-linear
(statistical-model))
91 (defgeneric =model
(lhs rhs
)
92 (:documentation
"returns a class describing the model specified")
93 (:method
((lhs atom
) (rhs list
))
94 (if (linear-model rhs
)
95 (make-instance 'regression-model-linear
98 (make-instance 'general-model
))))
101 (defgeneric =mean
(lhs rhs
))
102 (defgeneric =quantile
(lhs rhs quantile
))
105 (defgeneric =var
(lhs rhs
)) ; lhs should spec parameter
132 (defineModel normalLikelihood
'((* (/ 1 (sqrt (* 2 (pi) sigma
)))
138 :critFcnSoln
'(:max
(one-of bfgs nelder-mead conjugate-gradient
))))
142 (vars-fixed :initarg nil
:arg fixed
)
143 (vars-param :initarg nil
:arg param
)
144 (vars-data :initarg nil
:arg data
)
148 (defclass meanModel
(model) ...
) ;; a macro to map onto Model
149 (defclass meanVarModel
(model) ...
)
150 (defclass regressionModel
(meanModel) )
151 (defclass mixedModel
(regressionModel) ...
)
152 (defclass bayesianModel
(model) ...
)
153 (defclass diffintgleqnModel
(model) ) ;;(ODE, PDF, integral equations)
158 (solveModel :model myFirstModel
160 :mapping
'((x . myVar
))))
166 :params-characterisation
167 :paradigm
'(bayesian frequentist
)
173 (with-mapping map
:model mymod
:data mydata
174 (bootstrap mymod mydata
))
177 ;; solution should inherit from model and data (and be recomputable)
178 ;; func args override embedded args
180 ;; (solveModel firstSoln) "=" firstSoln
182 ;; unless stoch approx used.
192 ;;; -*- mode: lisp -*-
193 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
194 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
195 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
197 ;;; File: model-fit.lisp
198 ;;; Author: AJ Rossini <blindglobe@gmail.com>
199 ;;; Copyright: (c)2007, AJ Rossini. BSD, LLGPL, or GPLv2, depending
200 ;;; on how it arrives.
201 ;;; Purpose: models as a data summarization tools.
202 ;;; Time-stamp: <2006-05-19 12:33:41 rossini>
203 ;;; Creation: <2006-05-17 21:34:07 rossini>
205 ;;; What is this talk of 'release'? Klingons do not make software
206 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
207 ;;; designers and quality assurance people in its wake.
209 ;;; The model class is used to map a partial or complete model
210 ;;; specification (partial specification made complete by assumptions
211 ;;; inherent in the type of model used, i.e. linear regression assumes
212 ;;; mean-zero and homoskedasticity) and the model-fit class is used to
213 ;;; complete the model specification into how the model instance and
214 ;;; type will then get computed. The approach tken is that we
215 ;;; definitiely want to be able to see how we can explicitly try to
216 ;;; characterize the work that we are trying to infere about the data.
218 (in-package :cl-user
)
220 (defpackage :lisp-stat-model-fit
221 (:documentation
"Model fitting theory.")
222 (:nicknames
:ls-model-fit
)
225 :lisp-stat-object-system
227 :lisp-stat-compound-data
233 (:shadowing-import-from
:lisp-stat-object-system
234 slot-value call-method call-next-method
)
237 (in-package :lisp-stat-model
)
240 ((criteria-functin-name :initform nil
244 (criteria-function :initform nil
247 :reader fit-criteria
)
248 (parameter-vars :initform nil
249 :initarg
:parameter-vars
251 :reader model-formula
)
252 (data-vars :initform nil
255 :reader model-formula
)
256 (fixed-vars :initform nil
259 :reader model-formula
)
261 (:documentation
"Mathematical Model Fit approach"))
263 (defclass optimization
(model-fit))
266 (defclass least-squares
(optimization))
267 (defclass weighted-least-squares
(least-squares))
269 (defclass maximum-likelihood
(optimization))
270 (defclass minimax
(optimization))
271 (defclass maximin
(optimization))
273 (defclass minimum-entropy
(optimization))
274 (defclass lq-norm
(optimization))
276 (defclass root-finding
(model-fit))
278 (defclass method-of-moments
(root-finding))
279 (defclass marginal-models
(method-of-moments))
280 (defclass gee
(marginal-models))
281 (defclass gee2
(marginal-models))
285 ;;; How would this be used?
287 (setf my-regr-model-1
288 (new 'least-squares
'(- y
(+ (* beta1 x1
) (* beta2 x2
)))))
290 ;; and there should be an approach which could provide a mapping to this, i.e.
292 (regression-model (list y
) (list x1 x2
))
294 ;; could map to the above via macros.
297 ;;;;; More misc protos...
300 (defclass model-specification
()
301 ((spec-string :initform nil
302 :initarg
:specification
303 :accessor
:specification
)
304 (spec-form :initform nil
306 :accessor
:spec-form
)
307 (model-class :initform nil
))
308 (:documentation
"container for mathematical structure"))
310 (defclass bayesian-model-specification
(model-specification)
312 (spec-string :initform nil
313 :initarg
:specification
314 :accessor
:specification
)
315 (spec-form :initform nil
317 :accessor
:spec-form
))
318 (:documentation
"adds structure holding priors to the model"))
320 ;;; The following should be self-created based on introspection of
322 ;;; ## inferential technologies (bayesian, frequentist, etc),
323 ;;; ## optimization criteria (likelihood, least-squares, min-entropy,
325 ;;; ## simplification macros, i.e. mapping directly to linear
326 ;;; regression and other applications. fast specialized
327 ;;; algorithms for edge cases and narrow conditions.
330 (defparameter *model-class-list
*
331 '((linear-regression frequentist
)
332 (generalized-linear-regression parametric
)
333 (linear-regression bayesian
)
336 ;;;;; More mischief from a different time
339 ;; regression-model is the old API, but regression as a generic will
340 ;; be the new API. We need to distinguish between APIs which enable
341 ;; the user to do clear activities, and APIs which enable developers
342 ;; to do clear extensions and development, and underlying
343 ;; infrastructure to keep everything straight and enabled.
345 ;; There are conflicting theories for how to structure the
346 ;; specification of mathematical models, along with the statistical
347 ;; inference, along with the data which is instantiating the model.
349 ;; i.e.: mathematical model for the relationships between components,
350 ;; between a component and a summarizing parameter, and between
353 ;; statistical inference describes the general approach for
354 ;; aggregating into a decision and has impliciations for the scale up
355 ;; from the model on a single instance to the generalization.
357 ;; The data represents the particular substantive context that is
358 ;; driving the model/inference combination, and about which we hope to
359 ;; generate knowledge.
361 ;; numerical analysis selects appropriate algorithms/implementations
362 ;; for combining the above 3.
364 ;; the end result is input on the decision being made (which could be
365 ;; specific (decision analysis/testing), risk-analysis (interval
366 ;; estimation) , most likely/appropriate selection (point estimation)