From fa872cda24c909ed88963bbbd8b81bd8b4676bb1 Mon Sep 17 00:00:00 2001 From: AJ Rossini Date: Sat, 21 Mar 2009 09:37:55 +0100 Subject: [PATCH] cleanup of dataframe code. Still lots more work to go. Signed-off-by: AJ Rossini --- src/data/data-clos.lisp | 249 ++++++++++++++++++++++++++---------------------- 1 file changed, 136 insertions(+), 113 deletions(-) diff --git a/src/data/data-clos.lisp b/src/data/data-clos.lisp index dc296d6..159509f 100644 --- a/src/data/data-clos.lisp +++ b/src/data/data-clos.lisp @@ -1,6 +1,6 @@ ;;; -*- mode: lisp -*- -;;; Time-stamp: <2009-03-16 21:03:58 tony> +;;; Time-stamp: <2009-03-21 09:24:56 tony> ;;; Creation: <2008-03-12 17:18:42 blindglobe@gmail.com> ;;; File: data-clos.lisp ;;; Author: AJ Rossini @@ -66,147 +66,98 @@ (defclass dataframe-like (matrix-like) ( -#| - ;; STORE is the storage component. We ignore this in the -like ; + ;; STORE is the storage component. We ignore this in the DATAFRAME-LIKE ;; class, as it is the primary differentiator, driving how access ;; (getting/setting) is done. We create methods depending on the - ;; storage component, which access data as appropriate. - - ;; so: subclass this based on storage type, and ensure that generic - ;; accessors have the right methods to do the right thing. - - (store :initform nil - :initarg :storage - :accessor dataset - :documentation "Data storage: typed as table, array, - relation, or pointer/reference to such.") -|# - (documentation-string :initform nil - :initarg :doc - :accessor doc-string - :documentation "uncomputable information - about dataframe-like - instance.") - + ;; storage component, which access data as appropriate. See + ;; DATAFRAME-ARRAY for an example implementation. ;; the rest of this is metadata. In particular, we should find a ;; more flexible, compact way to store this. (case-labels :initform nil - :initarg :case-labels + :initarg :case-labels + :type list :accessor case-labels :documentation "labels used for describing cases (doc metadata), possibly used for merging.") (var-labels :initform nil :initarg :var-labels + :type list :accessor var-labels :documentation "Variable names.") (var-types :initform nil :initarg :var-types + :type list :accessor var-types - :documentation "variable types to ensure fit" - )) + :documentation "variable types to ensure fit") + (documentation-string :initform nil + :initarg :doc + :accessor doc-string + :documentation "additional information, + potentially uncomputable, about dataframe-like instance.")) (:documentation "Abstract class for standard statistical analysis dataset for independent data. Rows are considered to be independent, matching observations. Columns are considered to be type-consistent, match a - varioable with distribution. inherits from - lisp-matrix base matrix-like class. - - dataframe-like - is the basic cases by variables framework. Need to - embed this within other structures which allow for - generalized relations. Goal is to ensure that - relations imply and drive the potential for - statistical relativeness such as correlation, - interference, and similar concepts. -")) - -;; (documentation 'dataframe-like 'type) - -(defclass dataframe-array (dataframe-like) - ((store :initform nil - :initarg :storage - :type (array * *) - :accessor dataset - :documentation "Data storage: typed as array.")) - (:documentation "example implementation of dataframe-like using storage - based on lisp arrays.")) - -#| - (let ((df (make-new 'dataframe-array)))) - -|# - -;; Actions on a statistical data structure. - - -(defgeneric consistent-dataframe-like-p (ds) - (:documentation "methods to check for consistency.")) - -(defmethod consistent-dataframe-like-p ((ds dataframe-like)) - "Test that dataframe-like is internally consistent with metadata. -Ensure that dims of stored data are same as case and var labels." - (equal (array-dimensions (dataset ds)) - (list (length (var-labels ds)) - (length (case-labels ds))))) -;; FIXME: NEED TO CHECK TYPING AS WELL! - - -;;; Extraction - -(defgeneric access (dataframe-like spec-list) - (:documentation "access to array presevingtype.")) - -(defgeneric get-variable-matrix (dataframe-like-object list-of-variable-names) - (:documentation "retrieves a matrix whose columns are the variable - names in same order specified.")) + variable with distribution. inherits from + lisp-matrix base MATRIX-LIKE class. + + DATAFRAME-LIKE is the basic cases by variables + framework. Need to embed this within other + structures which allow for generalized relations. + Goal is to ensure that relations imply and drive + the potential for statistical relativeness such as + correlation, interference, and similar concepts.")) + + +;;; Access and Extraction + +(defun dfref (df idx1 idx2 &key (type :scalar)) + "Returns a scalar in array, in the same vein as aref, mref, vref, etc. +idx1/2 is row/col or case/var." + (case type + (:scalar (aref (dataset df) idx1 idx2)) + (:dataframe (make-instance 'dataframe-array + :storage (make-array + (list 1 1) + :initial-contents (dfref df idx1 idx2)) + ;; ensure copy for this and following + :doc (doc-string df) + :case-labels (nth idx1 (caseNames df)) + :var-labels (nth idx2 (varNames df)) + ;; shound the type spec assume, as + ;; below, or should it inherit from the + ;; dataframe we are selecting from? + :var-types (nth idx2 (var-types df)))))) -(defgeneric get-variable-vector (dataframe-like-object variable-name)) - -(defun extract-1 (sds idx1 idx2) - "Returns a scalar." - (aref (dataset sds) idx1 idx2)) - -(defun extract-1-as-sds (sds idx1 idx2) - "Need a version which returns a dataset." - (make-instance 'dataframe-array - :storage (make-array - (list 1 1) - :initial-contents (extract-1 sds idx1 idx2)) - ;; ensure copy for this and following - :doc (doc-string sds) - :case-labels (caseNames sds) - :var-labels (varNames sds))) (defun gen-seq (n &optional (start 1)) - "There has to be a better way -- I'm sure of it! Always count from 1." + "There has to be a better way -- I'm sure of it! default count from 1. + (gen-seq 4) ; => (1 2 3 4) + (gen-seq 0) ; => nil + (gen-seq 5 3) ; => 3 4 5 +" (if (>= n start) (append (gen-seq (- n 1) start) (list n)))) -;; (gen-seq 4) -;; => (1 2 3 4) -;; (gen-seq 0) -;; => nil -;; (gen-seq 5 3) -;; => 3 4 5 -;; - -(defun extract-col (sds index) - "Returns data as sequence." - (map 'sequence - #'(lambda (x) (extract-1 sds index x)) - (gen-seq (nth 2 (array-dimensions (dataset sds)))))) - -(defun extract-col-as-sds (sds index) - "Returns data as SDS, copied." - (map 'sequence - #'(lambda (x) (extract-1 sds index x)) - (gen-seq (nth 2 (array-dimensions (dataset sds)))))) -(defun extract-row (sds index) +(defun dfref-var (sds index &key (type :list)) + "Returns data as type. +type = sequence, vector, vector-like (if valid numeric type) or dataframe." + (ecase type + (:list + (map 'list + #'(lambda (x) (dfref sds index x)) + (gen-seq (nth 2 (array-dimensions (dataset sds)))))) + (:vector t) + (:vector-like t) + (:dataframe t))) + +(defun dfref-obsn (sds index) "Returns row as sequence." (map 'sequence #'(lambda (x) (extract-1 sds x index)) (gen-seq (nth 1 (array-dimensions (dataset sds)))))) +;; FIXME (defun extract-idx (sds idx1Lst idx2Lst) "return an array, row X col dims. FIXME TESTME" (let ((my-pre-array (list))) @@ -231,6 +182,41 @@ Ensure that dims of stored data are same as case and var labels." (defgeneric extract (sds whatAndRange) (:documentation "data extraction approach")) +;; Testing consistency/coherency. + +(defgeneric consistent-dataframe-like-p (ds) + (:documentation "methods to check for consistency.")) + +(defmethod consistent-dataframe-like-p ((ds dataframe-like)) + "Test that dataframe-like is internally consistent with metadata. +Ensure that dims of stored data are same as case and var labels. + +Currently checks length of things, but needs to check type of things +as well." + (and + ;; ensure dimensionality + (equal (list (ncols ds) (nrows ds)) ; array-dimensions (dataset ds)) + (list (length (var-labels ds)) + (length (case-labels ds)))) + ;; when dims sane, check-type for each variable + (progn + (dolist (i (ncols ds)) + (dotimes (j (nrows ds)) + (typep (aref (dataset ds) i j) (nth i (var-types ds))))) + t))) + + +#| + + (defun ensure-consistent-datatable-type (dt lot) + "given a datatable and a listoftypes, ensure that the datatble + variables are consistent." + (destructuring-bind (n p) + (array-dimensions dt) + (dotimes (i n) + (dotimes (j p) + (check-type (aref dt i j) (elt lot j)))))) +|# ;;; Printing methods and support. @@ -250,7 +236,7 @@ of like a spreadsheet if the storage is a table." (extract-row (dataset ds) (incf j))))))) #| -(defun print-structure-relational (ds) + (defun print-structure-relational (ds) "example of what we want the methods to look like. Should be sort of like a graph of spreadsheets if the storage is a relational structure." @@ -327,3 +313,40 @@ approaches to redistribution.") (defsetf caseNames set-caseNames) +;;;;;;;;;;;; IMPLEMENTATIONS, with appropriate methods. + +;; (documentation 'dataframe-like 'type) + +(defclass dataframe-array (dataframe-like) + ((store :initform nil + :initarg :storage + :type (array * *) + :accessor dataset + :documentation "Data storage: typed as array.")) + (:documentation "example implementation of dataframe-like using storage + based on lisp arrays. An obvious alternative could be a + dataframe-matrix-like which uses the lisp-matrix classes.")) + +(defmethod nrows ((df dataframe-array)) + (array-dimension (dataset df) 0)) + +(defmethod ncols ((df dataframe-array)) + (array-dimension (dataset df) 1)) + +;;; NEED TO FIGURE OUT HOW TO EXTEND THE MATRIX-LIKE CLASS PRINT +;;; METHOD! + + +(defmethod print-object ((object dataframe-array) stream) + (print-unreadable-object (object stream :type t) + (format stream " ~d x ~d" (nrows object) (ncols object)) + (terpri stream) + (format stream "~{~A~}" (var-labels object)) + (dotimes (i (nrows object)) + (terpri stream) + (dotimes (j (ncols object)) + (format stream " obs ~A" (nth i (case-labels object))) + (write-char #\space stream) + (write (dfref object i j) :stream stream))))) + + -- 2.11.4.GIT