From b294b5d30e8505a0d53fd41ac319e137586dd5a8 Mon Sep 17 00:00:00 2001 From: AJ Rossini Date: Sun, 20 Sep 2009 18:14:17 +0200 Subject: [PATCH] dataframe refactoring into core and storage-specific components. Signed-off-by: AJ Rossini --- TODO.lisp | 37 +++++++- src/data/dataframe-array.lisp | 75 +++++++++++++++ src/data/dataframe-listoflist.lisp | 106 +++++++++++++++++++++ src/data/dataframe-matrixlike.lisp | 56 +++++++++++ src/data/dataframe.lisp | 189 +------------------------------------ 5 files changed, 274 insertions(+), 189 deletions(-) create mode 100644 src/data/dataframe-array.lisp create mode 100644 src/data/dataframe-listoflist.lisp create mode 100644 src/data/dataframe-matrixlike.lisp diff --git a/TODO.lisp b/TODO.lisp index 1d5b265..5a4cfb9 100644 --- a/TODO.lisp +++ b/TODO.lisp @@ -1,6 +1,6 @@ ;;; -*- mode: lisp -*- -;;; Time-stamp: <2009-09-15 06:51:08 tony> +;;; Time-stamp: <2009-09-19 23:08:58 tony> ;;; Creation: <2008-09-08 08:06:30 tony> ;;; File: TODO.lisp ;;; Author: AJ Rossini @@ -38,7 +38,7 @@ (in-package :lisp-stat-unittests) -;; tests = 80, failures = 8, errors = 20 +;; tests = 80, failures = 7, errors = 21 (run-tests :suite 'lisp-stat-ut) (describe (run-tests :suite 'lisp-stat-ut)) @@ -114,5 +114,36 @@ (xref *df-test* 2 1) (xref *df-test* 0 0) (xref *df-test* 1 0) +(xref *df-test* 1 '*) + +;;; Experiments with cl-variates + +(asdf:oos 'asdf:compile-op 'cl-variates :force t) +(asdf:oos 'asdf:compile-op 'cl-variates-test :force t) + +(asdf:oos 'asdf:load-op 'lift) + +(asdf:oos 'asdf:load-op 'cl-variates) +(asdf:oos 'asdf:load-op 'cl-variates-test) + +(in-package :cl-variates-test) +(run-tests :suite 'cl-variates-test) +(describe (run-tests :suite 'cl-variates-test)) + +(in-package :cl-variates-user) + +(defparameter state (make-random-number-generator)) +(setf (random-seed state) 44) +(random-seed state) +(loop for i from 1 to 10 collect + (random-range state 0 10)) + + + + +;;; experiments with LLA +(in-package :cl-user) +(asdf:oos 'asdf:load-op 'lla) + + -(xref *df-test* 1 '*) \ No newline at end of file diff --git a/src/data/dataframe-array.lisp b/src/data/dataframe-array.lisp new file mode 100644 index 0000000..a7f5375 --- /dev/null +++ b/src/data/dataframe-array.lisp @@ -0,0 +1,75 @@ +;;; -*- mode: lisp -*- + +;;; Time-stamp: <2009-09-19 23:13:00 tony> +;;; Creation: <2009-03-12 17:14:56 tony> +;;; File: dataframe-array.lisp +;;; Author: AJ Rossini +;;; Copyright: (c)2009--, AJ Rossini. BSD, LLGPL, or GPLv2, depending +;;; on how it arrives. +;;; Purpose: real dataframe class using lisp-arrays as storage. + +;;; What is this talk of 'release'? Klingons do not make software +;;; 'releases'. Our software 'escapes', leaving a bloody trail of +;;; designers and quality assurance people in its wake. + +;;; This organization and structure is new to the 21st Century +;;; version.. Think, "21st Century Schizoid Man". + + +;;;;; DATAFRAME-ARRAY + +(defclass dataframe-array (dataframe-like) + ((store :initform nil + :initarg :storage + :type (array * *) + :accessor dataset + :documentation "Data storage: typed as array.")) + (:documentation "example implementation of dataframe-like using storage + based on lisp arrays. An obvious alternative could be a + dataframe-matrix-like which uses the lisp-matrix classes.")) + +(defmethod nrows ((df dataframe-array)) + "specializes on inheritance from matrix-like in lisp-matrix." + (array-dimension (dataset df) 0)) + +(defmethod ncols ((df dataframe-array)) + "specializes on inheritance from matrix-like in lisp-matrix." + (array-dimension (dataset df) 1)) + +(defmethod xref ((df dataframe-array) &rest subscripts) + "Returns a scalar in array, in the same vein as aref, mref, vref, etc. +idx1/2 is row/col or case/var." + (assert (>= 2 (length subscripts))) +#| ;; needed? + (assert (typep (elt subscripts 0) integer)) + (assert (typep (elt subscripts 1) integer)) +|# + (aref (dataset df) (elt subscripts 0) (elt subscripts 1))) + +(defmethod (setf xref) (value (df dataframe-array) &rest subscripts) + "set value for df-ar." + ;; (check-type val (elt (var-type df) index2)) + (setf (aref (dataset df) (elt subscripts 0) (elt subscripts 1)) value)) + +(defparameter *default-dataframe-class* 'dataframe-array) + +(defmethod dfselect ((df dataframe-array) + &optional cases vars indices) + "Extract the OR of cases, vars, or have a list of indices to extract" + (if indices (error "Indicies not used yet")) + (let ((newdf (make-instance *default-dataframe-class* + :storage (make-array (list (length cases) (length vars))) + :nrows (length cases) + :ncols (length vars) +#| + :case-labels (select-list caselist (case-labels df)) + :var-labels (select-list varlist (var-labels df)) + :var-types (select-list varlist (vartypes df)) +|# + ))) + (dotimes (i (length cases)) + (dotimes (j (length vars)) + (setf (xref newdf i j) + (xref df + (position (elt cases i) (case-labels df)) + (position (elt vars j) (var-labels df)))))))) diff --git a/src/data/dataframe-listoflist.lisp b/src/data/dataframe-listoflist.lisp new file mode 100644 index 0000000..47e6b9e --- /dev/null +++ b/src/data/dataframe-listoflist.lisp @@ -0,0 +1,106 @@ +;;; -*- mode: lisp -*- + +;;; Time-stamp: <2009-09-19 23:17:25 tony> +;;; Creation: <2009-03-12 17:14:56 tony> +;;; File: dataframe-listoflist.lisp +;;; Author: AJ Rossini +;;; Copyright: (c)2009--, AJ Rossini. BSD, LLGPL, or GPLv2, depending +;;; on how it arrives. +;;; Purpose: Instance of dataframe with the storage done using +;;; LISTOFLIST data storage. + +;;; What is this talk of 'release'? Klingons do not make software +;;; 'releases'. Our software 'escapes', leaving a bloody trail of +;;; designers and quality assurance people in its wake. + +;;; This organization and structure is new to the 21st Century +;;; version.. Think, "21st Century Schizoid Man". + +;;; DATAFRAME-LISTOFLIST +;;; +;;; example/implementatin of using lisp-matrix datastructures for +;;; dataframe storage. + +(defclass dataframe-listoflist (dataframe-like) + ((store :initform nil + :initarg :storage + :type list + :accessor dataset + :documentation "Data storage: typed as matrix-like + (numerical only).")) + (:documentation "example implementation of dataframe-like using storage + based on lisp-matrix structures.")) + +(defmethod nrows ((df dataframe-listoflist)) + "specializes on inheritance from listoflist in lisp-matrix." + (length (dataset df))) + +(defmethod ncols ((df dataframe-listoflist)) + "specializes on inheritance from matrix-like in lisp-matrix." + (length (elt (dataset df) 0))) + +(defmethod xref ((df dataframe-listoflist) &rest subscripts) + "Returns a scalar in array, in the same vein as aref, mref, vref, etc. +idx1/2 is row/col or case/var." + (elt (elt (dataset df) (elt subscripts 0)) (elt subscripts 1))) ;; ?? + +(defmethod (setf xref) (value (df dataframe-listoflist) &rest subscripts) + "Sets a value for df-ml." + ;; NEED TO CHECK TYPE! + ;; (check-type val (elt (vartype df) index2)) + (setf (elt (elt (dataset df) (elt subscripts 1)) (elt subscripts 0)) value)) + +;;;;;; IMPLEMENTATION INDEPENDENT FUNCTIONS AND METHODS +;;;;;; (use only xref, nrows, ncols and similar dataframe-like +;;;;;; components as core). + +(defun xref-var (df index return-type) + "Returns the data in a single variable as type. +type = sequence, vector, vector-like (if valid numeric type) or dataframe." + (ecase return-type + (('list) + (map 'list + #'(lambda (x) (xref df index x)) + (gen-seq (nth 2 (array-dimensions (dataset df)))))) + (('vector) t) + (:vector-like t) + (:matrix-like t) + (:dataframe t))) + +(defun xref-case (df index return-type) + "Returns row as sequence." + (ecase return-type + (:list + (map 'list + #'(lambda (x) (xref df x index)) + (gen-seq (nth 1 (array-dimensions (dataset df)))))) + (:vector t) + (:vector-like t) + (:matrix-like t) + (:dataframe t))) + +;; FIXME +(defun xref-2indexlist (df indexlist1 indexlist2 &key (return-type :array)) + "return an array, row X col dims. FIXME TESTME" + (case return-type + (:array + (let ((my-pre-array (list))) + (dolist (x indexlist1) + (dolist (y indexlist2) + (append my-pre-array (xref df x y)))) + (make-array (list (length indexlist1) + (length indexlist2)) + :initial-contents my-pre-array))) + (:dataframe + (make-instance 'dataframe-array + :storage (make-array + (list (length indexlist1) + (length indexlist2)) + :initial-contents (dataset df)) + ;; ensure copy for this and following + :doc (doc-string df) + ;; the following 2 need to be subseted based on + ;; the values of indexlist1 and indexlist2 + :case-labels (case-labels df) + :var-labels (var-labels df))))) + diff --git a/src/data/dataframe-matrixlike.lisp b/src/data/dataframe-matrixlike.lisp new file mode 100644 index 0000000..2ce5786 --- /dev/null +++ b/src/data/dataframe-matrixlike.lisp @@ -0,0 +1,56 @@ +;;; -*- mode: lisp -*- + +;;; Time-stamp: <2009-09-19 23:21:30 tony> +;;; Creation: <2009-03-12 17:14:56 tony> +;;; File: dataframe-matrixlike.lisp +;;; Author: AJ Rossini +;;; Copyright: (c)2009--, AJ Rossini. BSD, LLGPL, or GPLv2, depending +;;; on how it arrives. +;;; Purpose: Uses the lisp-matrix dataframe instance for storage. +;;; Useful if we only have numerical data and no missing +;;; data, strings, or categorical stuff... + +;;; What is this talk of 'release'? Klingons do not make software +;;; 'releases'. Our software 'escapes', leaving a bloody trail of +;;; designers and quality assurance people in its wake. + +;;; This organization and structure is new to the 21st Century +;;; version.. Think, "21st Century Schizoid Man". +;;; DATAFRAME-MATRIXLIKE +;;; +;;; example/implementatin of using lisp-matrix datastructures for +;;; dataframe storage. + +(defclass dataframe-matrixlike (dataframe-like) + ((store :initform nil + :initarg :storage + :type matrix-like + :accessor dataset + :documentation "Data storage: typed as matrix-like + (numerical only).")) + (:documentation "example implementation of dataframe-like using storage + based on lisp-matrix structures.")) + +(defmethod nrows ((df dataframe-matrixlike)) + "specializes on inheritance from matrix-like in lisp-matrix." + (matrix-dimension (dataset df) 0)) + +(defmethod ncols ((df dataframe-matrixlike)) + "specializes on inheritance from matrix-like in lisp-matrix." + (matrix-dimension (dataset df) 1)) + +;;; *** FIXME: change mref to xref when we establish lisp-matrix +;;; change to use xarray access facility. Need to dummy-proof the +;;; following. +(defmethod xref ((df dataframe-matrixlike) &rest subscripts) + "Returns a scalar in array, in the same vein as aref, mref, vref, etc. +idx1/2 is row/col or case/var." + (mref (dataset df) (elt subscripts 0) (elt subscripts 1))) + +(defmethod (setf xref) (value (df dataframe-matrixlike) &rest subscripts) + "Sets a value for df-ml." + ;; NEED TO CHECK TYPE! + ;; (check-type val (elt (vartype df) index2)) + (setf (mref (dataset df) (elt subscripts 0) (elt subscripts 1)) value)) + + diff --git a/src/data/dataframe.lisp b/src/data/dataframe.lisp index 14e8b76..c636b77 100644 --- a/src/data/dataframe.lisp +++ b/src/data/dataframe.lisp @@ -1,6 +1,6 @@ ;;; -*- mode: lisp -*- -;;; Time-stamp: <2009-08-27 08:16:33 tony> +;;; Time-stamp: <2009-09-19 23:17:32 tony> ;;; Creation: <2008-03-12 17:18:42 blindglobe@gmail.com> ;;; File: dataframe.lisp ;;; Author: AJ Rossini @@ -333,191 +333,8 @@ construction of proper DF-array." ;; See also: ;; (documentation 'dataframe-like 'type) -;;;;; DATAFRAME-ARRAY - -(defclass dataframe-array (dataframe-like) - ((store :initform nil - :initarg :storage - :type (array * *) - :accessor dataset - :documentation "Data storage: typed as array.")) - (:documentation "example implementation of dataframe-like using storage - based on lisp arrays. An obvious alternative could be a - dataframe-matrix-like which uses the lisp-matrix classes.")) - -(defmethod nrows ((df dataframe-array)) - "specializes on inheritance from matrix-like in lisp-matrix." - (array-dimension (dataset df) 0)) - -(defmethod ncols ((df dataframe-array)) - "specializes on inheritance from matrix-like in lisp-matrix." - (array-dimension (dataset df) 1)) - -(defmethod xref ((df dataframe-array) &rest subscripts) - "Returns a scalar in array, in the same vein as aref, mref, vref, etc. -idx1/2 is row/col or case/var." - (assert (>= 2 (length subscripts))) -#| ;; needed? - (assert (typep (elt subscripts 0) integer)) - (assert (typep (elt subscripts 1) integer)) -|# - (aref (dataset df) (elt subscripts 0) (elt subscripts 1))) - -(defmethod (setf xref) (value (df dataframe-array) &rest subscripts) - "set value for df-ar." - ;; (check-type val (elt (var-type df) index2)) - (setf (aref (dataset df) (elt subscripts 0) (elt subscripts 1)) value)) - -(defparameter *default-dataframe-class* 'dataframe-array) - -(defmethod dfselect ((df dataframe-array) - &optional cases vars indices) - "Extract the OR of cases, vars, or have a list of indices to extract" - (if indices (error "Indicies not used yet")) - (let ((newdf (make-instance *default-dataframe-class* - :storage (make-array (list (length cases) (length vars))) - :nrows (length cases) - :ncols (length vars) -#| - :case-labels (select-list caselist (case-labels df)) - :var-labels (select-list varlist (var-labels df)) - :var-types (select-list varlist (vartypes df)) -|# - ))) - (dotimes (i (length cases)) - (dotimes (j (length vars)) - (setf (xref newdf i j) - (xref df - (position (elt cases i) (case-labels df)) - (position (elt vars j) (var-labels df)))))))) - -;;; DATAFRAME-MATRIXLIKE -;;; -;;; example/implementatin of using lisp-matrix datastructures for -;;; dataframe storage. - -(defclass dataframe-matrixlike (dataframe-like) - ((store :initform nil - :initarg :storage - :type matrix-like - :accessor dataset - :documentation "Data storage: typed as matrix-like - (numerical only).")) - (:documentation "example implementation of dataframe-like using storage - based on lisp-matrix structures.")) - -(defmethod nrows ((df dataframe-matrixlike)) - "specializes on inheritance from matrix-like in lisp-matrix." - (matrix-dimension (dataset df) 0)) - -(defmethod ncols ((df dataframe-matrixlike)) - "specializes on inheritance from matrix-like in lisp-matrix." - (matrix-dimension (dataset df) 1)) - -;;; *** FIXME: change mref to xref when we establish lisp-matrix -;;; change to use xarray access facility. Need to dummy-proof the -;;; following. -(defmethod xref ((df dataframe-matrixlike) &rest subscripts) - "Returns a scalar in array, in the same vein as aref, mref, vref, etc. -idx1/2 is row/col or case/var." - (mref (dataset df) (elt subscripts 0) (elt subscripts 1))) - -(defmethod (setf xref) (value (df dataframe-matrixlike) &rest subscripts) - "Sets a value for df-ml." - ;; NEED TO CHECK TYPE! - ;; (check-type val (elt (vartype df) index2)) - (setf (mref (dataset df) (elt subscripts 0) (elt subscripts 1)) value)) - - - - -;;; DATAFRAME-LISTOFLIST -;;; -;;; example/implementatin of using lisp-matrix datastructures for -;;; dataframe storage. - -(defclass dataframe-listoflist (dataframe-like) - ((store :initform nil - :initarg :storage - :type list - :accessor dataset - :documentation "Data storage: typed as matrix-like - (numerical only).")) - (:documentation "example implementation of dataframe-like using storage - based on lisp-matrix structures.")) - -(defmethod nrows ((df dataframe-listoflist)) - "specializes on inheritance from listoflist in lisp-matrix." - (length (dataset df))) - -(defmethod ncols ((df dataframe-listoflist)) - "specializes on inheritance from matrix-like in lisp-matrix." - (length (elt (dataset df) 0))) - -(defmethod xref ((df dataframe-listoflist) &rest subscripts) - "Returns a scalar in array, in the same vein as aref, mref, vref, etc. -idx1/2 is row/col or case/var." - (elt (elt (dataset df) (elt subscripts 0)) (elt subscripts 1))) ;; ?? - -(defmethod (setf xref) (value (df dataframe-listoflist) &rest subscripts) - "Sets a value for df-ml." - ;; NEED TO CHECK TYPE! - ;; (check-type val (elt (vartype df) index2)) - (setf (elt (elt (dataset df) (elt subscripts 1)) (elt subscripts 0)) value)) - -;;;;;; IMPLEMENTATION INDEPENDENT FUNCTIONS AND METHODS -;;;;;; (use only xref, nrows, ncols and similar dataframe-like -;;;;;; components as core). - -(defun xref-var (df index return-type) - "Returns the data in a single variable as type. -type = sequence, vector, vector-like (if valid numeric type) or dataframe." - (ecase return-type - (('list) - (map 'list - #'(lambda (x) (xref df index x)) - (gen-seq (nth 2 (array-dimensions (dataset df)))))) - (('vector) t) - (:vector-like t) - (:matrix-like t) - (:dataframe t))) - -(defun xref-case (df index return-type) - "Returns row as sequence." - (ecase return-type - (:list - (map 'list - #'(lambda (x) (xref df x index)) - (gen-seq (nth 1 (array-dimensions (dataset df)))))) - (:vector t) - (:vector-like t) - (:matrix-like t) - (:dataframe t))) - -;; FIXME -(defun xref-2indexlist (df indexlist1 indexlist2 &key (return-type :array)) - "return an array, row X col dims. FIXME TESTME" - (case return-type - (:array - (let ((my-pre-array (list))) - (dolist (x indexlist1) - (dolist (y indexlist2) - (append my-pre-array (xref df x y)))) - (make-array (list (length indexlist1) - (length indexlist2)) - :initial-contents my-pre-array))) - (:dataframe - (make-instance 'dataframe-array - :storage (make-array - (list (length indexlist1) - (length indexlist2)) - :initial-contents (dataset df)) - ;; ensure copy for this and following - :doc (doc-string df) - ;; the following 2 need to be subseted based on - ;; the values of indexlist1 and indexlist2 - :case-labels (case-labels df) - :var-labels (var-labels df))))) + + ;;; Do we establish methods for dataframe-like, which specialize to ;;; particular instances of storage? -- 2.11.4.GIT