From 43c1e5569c2707e3eca68d00a5245ead810bcc81 Mon Sep 17 00:00:00 2001 From: AJ Rossini Date: Fri, 9 Oct 2009 12:04:00 +0200 Subject: [PATCH] working through the statistical procedures structure. Unlike R, any new statistical procedure will have to be fit into the structure so to provide the additional needed characterizations. Signed-off-by: AJ Rossini --- src/procedures/README.txt | 47 +++++++++++++++++++++ src/procedures/procedures.lisp | 94 ++++++++++++++++++++++++++++++++++++++++++ src/procedures/template.lisp | 64 ++++++++++++++++++++++++++++ src/procedures/ttest.lisp | 74 +++++++++++++++++++++++++++++++++ 4 files changed, 279 insertions(+) create mode 100644 src/procedures/README.txt create mode 100644 src/procedures/procedures.lisp create mode 100644 src/procedures/template.lisp create mode 100644 src/procedures/ttest.lisp diff --git a/src/procedures/README.txt b/src/procedures/README.txt new file mode 100644 index 0000000..171c6ce --- /dev/null +++ b/src/procedures/README.txt @@ -0,0 +1,47 @@ + +Procedure components to write: + +criteria-functions +- Least squares +- maximum likelihood + +approximate criteria functions +- FO, FOCE, SAEM +- + +root-functions +- influence-function +- score-function + +functional forms +- linear predictor +- emax +- logistic +- inverse-logistic + +test-procedures (compare a constructed test stat against a distribution) +- t-test +- wald-test +- score-test +- likelihood-ratio-test + +hypothesis-test-philosophies +- neyman-pearson-test +- fisher-test + + +specific-procedures: + +t-test: +- test-procedure ( + +distributional-procedures +- bootstrap + - parametric (model-based) + - nonparametric (empirical) +- jackknife +- martingale resampling + +goodness-of-fit / lack-of-fit + + diff --git a/src/procedures/procedures.lisp b/src/procedures/procedures.lisp new file mode 100644 index 0000000..8c65354 --- /dev/null +++ b/src/procedures/procedures.lisp @@ -0,0 +1,94 @@ +;;; -*- mode: lisp -*- + +;;; Time-stamp: <2009-10-09 12:02:09 tony> +;;; Creation: <2009-03-12 17:14:56 tony> +;;; File: procedures.lisp +;;; Author: AJ Rossini +;;; Copyright: (c)2009--, AJ Rossini. Currently licensed under MIT +;;; license. See file LICENSE.mit in top-level directory +;;; for information. +;;; Purpose: Classes for statistical procedures, and generics +;;; supporting use of such procedures. + +;;; What is this talk of 'release'? Klingons do not make software +;;; 'releases'. Our software 'escapes', leaving a bloody trail of +;;; designers and quality assurance people in its wake. + +;;; This organization and structure is new to the 21st Century +;;; version.. Think, "21st Century Schizoid Man". + +(in-package :cls-statproc) + +;;; Statistical procedures can consist of one or more of: + +;;; - mathematical forms: linear predictors, splines, functional +;;; forms) which get spliced together. + +;;; - criteria functions (likelihoods, sum-of-squares, information), params and data. +;;; - optimization algorithmsfunctions + +;;; - root functions (score functions, influence functions), params and data +;;; - zero-finding algorithms + +;;; - point estimate: special case of... +;;; - interval estimation (with uncertainty criteria to support range or point) +;;; - hypothesis test -- which can be thought of as a point estimator +;;; for a coarsened problem, or as a simple interval-based decision with +;;; uncertanty criteria. + +;;; Estimators are reaasonably straightforward. Either we are +;;; providing a point estimate, or we provide an interval estimate. +;;; But shouldn't these be characterized in the same manner? + +;;; We are currently building up an ontology (ADG/DAG) or +;;; knowledge-base whose leaves are described by the path to them. + +(defvar *statistical-procedure-components* + '(wald-test + t-test + likelihood-ratio-test + score-test + + optimization + root-finding + + criteria-functions + + mathematical-forms + )) + + +(defclass statistical-decision () + (:documentation "instance describing the end result, if it is an + interval/range/region, or point estimate, or a conclusion from a + test (i.e. hypothesis(es) selected, strength of conclusion)")) + +(defclass statistical-dataset (dataframe-like) + (:documentation "a particular dataset, usually the subset from a + larger set, which is used as the input to the procedure.")) + +(defclass statistical-metadata (dataframe-like) + (:documentation "the description of the dataset's statistical + properties which are required for the procedure to work or meet + assumptions.")) + +(defclass statistical-procedure () + ((ontological-spec :initform nil + :initarg :ontology-def + :type list + :accessor ontology + :documentation "list of symbols describing + ontological classification") + (how-to-fit) + (how-to-simulate) + (instance-data))) + +;;; +(defgeneric proc-consistents-data-p (metadata data) + (:documentation "verify that the metadata required for a procedure + is present in a particular dataset. The dataset will usually be a + subset of the full working dataset.")) + +(defgeneric process-data (proc data) + (:documentation "Run the statistical procedure on the dataset and + report the decision.")) \ No newline at end of file diff --git a/src/procedures/template.lisp b/src/procedures/template.lisp new file mode 100644 index 0000000..f5a96d9 --- /dev/null +++ b/src/procedures/template.lisp @@ -0,0 +1,64 @@ +;;; -*- mode: lisp -*- + +;;; Time-stamp: <2009-10-09 12:01:02 tony> +;;; Creation: <2009-03-12 17:14:56 tony> +;;; File: template.lisp +;;; Author: AJ Rossini +;;; Copyright: (c)2009--, AJ Rossini. Currently licensed under MIT +;;; license. See file LICENSE.mit in top-level directory +;;; for information. +;;; Purpose: Template header file for Statistical Procedures + +;;; What is this talk of 'release'? Klingons do not make software +;;; 'releases'. Our software 'escapes', leaving a bloody trail of +;;; designers and quality assurance people in its wake. + +;;; This organization and structure is new to the 21st Century +;;; version.. Think, "21st Century Schizoid Man". + +(in-package :cls-statproc) + +;;; Basic Idea: do a grep-and-replace to identify the procedure as a +;;; new procedure. The name of the procedure should not use "/" as +;;; part of the identifier. (for example, +;;; M-x query-replace procedure t-test + +;;; Set up the data and metadata requirements (input components) +(define-statistical-proc-metadata procedure/metadata ()) + +;;; Set up the procedure class (processing) +(define-statistical-proc-class procedure/class ()) + +;;; Set up the class for the results (output components) +(define-statistical-proc-results procedure/results () + '((:variables '(var1 var2 ...)) + (:evaluate '(defun proc (var1 var2 ...) "doc-string" t)) + (:return-form '()) + (:documentation "..."))) + + + + +;;; Define how the generics should work with this +(defmethod print-object ((obj procedure/class))) +(defmethod print-object ((obj procedure/results))) +(defmethod print-object ((obj procedure/metadata))) + + +(defmethod proc-consistent-data-p ((metadata procedure/metadata) + (data dataframe-like))) + +(defmethod process-data ((obj procedure/class) + (data dataframe-like))) + + +(defmethod display-results ((results procedure/results))) + +(defmethod print-results ((results procedure/results))) + +(defmethod visualize-results ((results procedure/results))) + +(defmethod simulate-data-from-results ((results procedure/results) + &key (newdata (obj2 procedure/data)))) + +(defmethod simulate-data-from-proc ((obj procedure/results))) diff --git a/src/procedures/ttest.lisp b/src/procedures/ttest.lisp new file mode 100644 index 0000000..e4e4674 --- /dev/null +++ b/src/procedures/ttest.lisp @@ -0,0 +1,74 @@ +;;; -*- mode: lisp -*- + +;;; Time-stamp: <2009-10-06 11:17:37 tony> +;;; Creation: <2009-03-12 17:14:56 tony> +;;; File: ttest.lisp +;;; Author: AJ Rossini +;;; Copyright: (c)2009--, AJ Rossini. Currently licensed under MIT +;;; license. See file LICENSE.mit in top-level directory +;;; for information. +;;; Purpose: Template header file for Statistical Procedures + +;;; What is this talk of 'release'? Klingons do not make software +;;; 'releases'. Our software 'escapes', leaving a bloody trail of +;;; designers and quality assurance people in its wake. + +;;; This organization and structure is new to the 21st Century +;;; version.. Think, "21st Century Schizoid Man". + +(in-package :cls-statproc) + +;;; Set up the data and metadata requirements +(define-statistical-proc-metadata t-test/metadata () + '((verify :number-of-variables 2) + (verify :variable-exists (discrete :levels 2) 'independent 'group) + (verify :variable-exists 'continuous 'dependent))) + +;;; Set up the t-test class. This should provide the code for processing. +(define-statistical-proc-class t-test/class ()) + +;;; Set up the class for the results. This should store the processed +;;; results. i.e. instantiated proc: +;;; data + analytic proc(s) => instantiated proc +;;; +(define-statistical-proc-results t-test/results () + ((:variables '(group response)) + (:evaluate '(defun t-test (group response) + "Estimate t-test statistic from data." + (let ((2listsofdata (split response :by group)) + (mean1 (mean (elt 2listsofdata 0))) + (mean2 (mean (elt 2listsofdata 1))) + (stddev1 (standard-deviation (elt 2listsofdata 0))) + (stddev2 (standard-deviation (elt 2listsofdata 1)))) + (/ (- mean1 mean2) + (sqrt (* (/ stddev1 n1) (/ stddev2 n2) )))))) + (:result-form '(test-statistic :following t-distribution)) + (:documentation "..."))) + +;;; Define how the generics should work with this +(defmethod print-object ((proc t-test/class))) +(defmethod print-object ((results t-test/results))) +(defmethod print-object ((metadata t-test/metadata))) + + +(defmethod proc-consistent-data-p ((metadata t-test/metadata) + (data dataframe-like)) + ;; verify only 2 variables. + ;; verify that one variable has the attributes response and continuous. + ;; verify that the other variable has the attributes dependent, discrete, group (2 levels). + ) + +(defmethod process-data ((obj t-test/class) + (data dataframe-like))) + + +(defmethod display-results ((results t-test/results))) + +(defmethod print-results ((results t-test/results))) + +(defmethod visualize-results ((results t-test/results))) + +(defmethod simulate-data-from-results ((results t-test/results) + &key (newdata (obj2 t-test/data)))) + +(defmethod simulate-data-from-proc ((obj t-test/results))) -- 2.11.4.GIT