From 057722dede3879f99b361888303f5d7741897621 Mon Sep 17 00:00:00 2001 From: AJ Rossini Date: Tue, 24 Mar 2009 08:16:03 +0100 Subject: [PATCH] more clean up for dataframe -- description of general array storage, methods and generics to implement for each new store. Signed-off-by: AJ Rossini --- src/data/data-clos.lisp | 292 ++++++++++++++++++++++++++---------------------- 1 file changed, 161 insertions(+), 131 deletions(-) diff --git a/src/data/data-clos.lisp b/src/data/data-clos.lisp index 15d5971..49a2848 100644 --- a/src/data/data-clos.lisp +++ b/src/data/data-clos.lisp @@ -1,6 +1,6 @@ ;;; -*- mode: lisp -*- -;;; Time-stamp: <2009-03-23 17:47:38 tony> +;;; Time-stamp: <2009-03-24 08:14:16 tony> ;;; Creation: <2008-03-12 17:18:42 blindglobe@gmail.com> ;;; File: data-clos.lisp ;;; Author: AJ Rossini @@ -64,8 +64,26 @@ ;;; data information. ;;; - + +;;;; Misc Fucntions + +(defun gen-seq (n &optional (start 1)) + "There has to be a better way -- I'm sure of it! default count from 1. + (gen-seq 4) ; => (1 2 3 4) + (gen-seq 0) ; => nil + (gen-seq 5 3) ; => 3 4 5 +" + (if (>= n start) + (append (gen-seq (- n 1) start) (list n)))) + +;;;; abstract dataframe class + (defclass dataframe-like (matrix-like) ( + ;; Matrix-like (from lisp-matrix) is basically a rectangular table + ;; without storage. We emulate that, and add storage, row/column + ;; labels, and within-column-typing. + ;; STORE is the storage component. We ignore this in the DATAFRAME-LIKE ;; class, as it is the primary differentiator, driving how access ;; (getting/setting) is done. We create methods depending on the @@ -93,7 +111,8 @@ :initarg :doc :accessor doc-string :documentation "additional information, - potentially uncomputable, about dataframe-like instance.")) + potentially uncomputable, possibly metadata, about dataframe-like + instance.")) (:documentation "Abstract class for standard statistical analysis dataset for independent data. Rows are considered to be independent, matching observations. Columns @@ -109,149 +128,49 @@ correlation, interference, and similar concepts.")) -;;; Access and Extraction +;;; Generics specialized above matrix-like, particularly for +;;; dataframe-like objects. Need methods for any storage +;;; implementation. -(defun dfref (df idx1 idx2 &key (type :scalar)) - "Returns a scalar in array, in the same vein as aref, mref, vref, etc. -idx1/2 is row/col or case/var." - (case type - (:scalar (aref (dataset df) idx1 idx2)) - (:dataframe (make-instance 'dataframe-array - :storage (make-array - (list 1 1) - :initial-contents (dfref df idx1 idx2)) - ;; ensure copy for this and following - :doc (doc-string df) - :case-labels (nth idx1 (caseNames df)) - :var-labels (nth idx2 (varNames df)) - ;; shound the type spec assume, as - ;; below, or should it inherit from the - ;; dataframe we are selecting from? - :var-types (nth idx2 (var-types df)))))) +(defgeneric dfref (df index1 index2 &key return-type) + (:documentation "scalar access with selection of possible return + object types.") + (:method ((df dataframe-like) index1 index2 &key return-type) + (error "need a real class with real storage to reference elements."))) +;;; Access and Extraction: implementations needed for any storage +;;; type. But here, just to point out that we've got a specializing +;;; virtual subclass (DATAFRAME-LIKE specializing MATRIX-LIKE). -(defun gen-seq (n &optional (start 1)) - "There has to be a better way -- I'm sure of it! default count from 1. - (gen-seq 4) ; => (1 2 3 4) - (gen-seq 0) ; => nil - (gen-seq 5 3) ; => 3 4 5 -" - (if (>= n start) - (append (gen-seq (- n 1) start) (list n)))) - -(defun dfref-var (df index &key (return-type :list)) - "Returns the data in a single variable as type. -type = sequence, vector, vector-like (if valid numeric type) or dataframe." - (ecase return-type - (:list - (map 'list - #'(lambda (x) (dfref df index x)) - (gen-seq (nth 2 (array-dimensions (dataset df)))))) - (:vector t) - (:vector-like t) - (:dataframe t))) - -(defun dfref-obsn (df index) - "Returns row as sequence." - (map 'sequence - #'(lambda (x) (dfref df x index)) - (gen-seq (nth 1 (array-dimensions (dataset df)))))) - -;; FIXME -(defun dfref-2indexlist (df indexlist1 indexlist2 &key (return-type :array)) - "return an array, row X col dims. FIXME TESTME" - (case return-type - (:array - (let ((my-pre-array (list))) - (dolist (x indexlist1) - (dolist (y indexlist2) - (append my-pre-array (dfref df x y)))) - (make-array (list (length indexlist1) - (length indexlist2)) - :initial-contents my-pre-array))) - (:dataframe - (make-instance 'dataframe-array - :storage (make-array - (list (length indexlist1) - (length indexlist2)) - :initial-contents (dataset df)) - ;; ensure copy for this and following - :doc (doc-string df) - ;; the following 2 need to be subseted based on - ;; the values of indexlist1 and indexlist2 - :case-labels (case-labels df) - :var-labels (var-labels df))))) +(defmethod nrows ((df dataframe-like)) + "specializes on inheritance from matrix-like in lisp-matrix." + (error "Need implementation; can't dispatch on virtual class.")) +(defmethod ncols ((df dataframe-like)) + "specializes on inheritance from matrix-like in lisp-matrix." + (error "Need implementation; can't dispatch on virtual class.")) ;; Testing consistency/coherency. -(defgeneric consistent-dataframe-like-p (ds) - (:documentation "methods to check for consistency.")) - -(defmethod consistent-dataframe-like-p ((ds dataframe-like)) - "Test that dataframe-like is internally consistent with metadata. -Ensure that dims of stored data are same as case and var labels. - -Currently checks length of things, but needs to check type of things -as well." - (and - ;; ensure dimensionality - (equal (list (ncols ds) (nrows ds)) ; array-dimensions (dataset ds)) - (list (length (var-labels ds)) - (length (case-labels ds)))) - ;; when dims sane, check-type for each variable - (progn - (dolist (i (ncols ds)) - (dotimes (j (nrows ds)) - (typep (aref (dataset ds) i j) (nth i (var-types ds))))) - t))) - +(defgeneric consistent-dataframe-like-p (df) + (:documentation "methods to check for consistency.") + (:method ((df dataframe-like)) + (error "need a real class with real storage to reference elements."))) #| (defun ensure-consistent-datatable-type (dt lot) "given a datatable and a listoftypes, ensure that the datatble variables are consistent." - (destructuring-bind (n p) + (destructuring-bind (n p) ;; why use let when we can be cool? Sigh. (array-dimensions dt) (dotimes (i n) (dotimes (j p) (check-type (aref dt i j) (elt lot j)))))) |# - -;;; Printing methods and support. - -(defun print-as-row (seq) - "Print a sequence formated as a row in a table." - (format t "~{~D~T~}" seq)) - -;; (print-as-row (list 1 2 3)) - -(defun print-structure-table (ds) - "example of what we want the methods to look like. Should be sort -of like a spreadsheet if the storage is a table." - (print-as-row (var-labels ds)) - (let ((j -1)) - (dolist (i (case-labels ds)) - (print-as-row (append (list i) - (dfref-obsn (dataset ds) (incf j))))))) - -#| - (defun print-structure-relational (ds) - "example of what we want the methods to look like. Should be sort -of like a graph of spreadsheets if the storage is a relational -structure." - (dolist (k (relations ds)) - (let ((currentRelationSet (getRelation ds k))) - (print-as-row (var-labels currentRelationSet)) - (let ((j -1)) - (dolist (i (case-labels currentRelationSet)) - (print-as-row - (append (list i) - (dfref-obsn (dataset currentRelationSet) - (incf j))))))))) -|# +;;; change the following to generic functions and dispatch on +;;; array, matrix, and dataframe? Others? (defun row-order-as-list (ary) "Pull out data in row order into a list." @@ -271,18 +190,21 @@ structure." (dotimes (j ncols) (append result (aref ary i j)))))) - (defun transpose-array (ary) "map NxM to MxN." (make-array (reverse (array-dimensions ary)) :initial-contents (col-order-as-list ary))) -;;; Variable-name handling for Tables. Needs error checking. +;;;; THE FOLLOWING 2 dual-sets done to provide error checking +;;;; possibilities. Not intended as make-work! + (defun varNames (ds) + "Variable-name handling for DATAFRAME-LIKE. Needs error checking." (var-labels ds)) (defun set-varNames (ds vN) + "Variable-name handling for DATAFRAME-LIKE. Needs error checking." (if (= (length (var-labels ds)) (length vN)) (setf (var-labels ds) vN) @@ -292,9 +214,11 @@ structure." ;;; Case-name handling for Tables. Needs error checking. (defun caseNames (ds) + "Case-name handling for DATAFRAME-LIKE. Needs error checking." (case-labels ds)) (defun set-caseNames (ds vN) + "Case-name handling for DATAFRAME-LIKE. Needs error checking." (if (= (length (case-labels ds)) (length vN)) (setf (case-labels ds) vN) @@ -304,6 +228,7 @@ structure." ;;;;;;;;;;;; IMPLEMENTATIONS, with appropriate methods. +;; See also: ;; (documentation 'dataframe-like 'type) (defclass dataframe-array (dataframe-like) @@ -317,13 +242,104 @@ structure." dataframe-matrix-like which uses the lisp-matrix classes.")) (defmethod nrows ((df dataframe-array)) + "specializes on inheritance from matrix-like in lisp-matrix." (array-dimension (dataset df) 0)) (defmethod ncols ((df dataframe-array)) + "specializes on inheritance from matrix-like in lisp-matrix." (array-dimension (dataset df) 1)) -;;; NEED TO FIGURE OUT HOW TO EXTEND THE MATRIX-LIKE CLASS PRINT -;;; METHOD! +(defmethod consistent-dataframe-like-p ((ds dataframe-array)) + "Test that dataframe-like is internally consistent with metadata. +Ensure that dims of stored data are same as case and var labels. + +Currently checks length of things, but needs to check type of things +as well." + (and + ;; ensure dimensionality + (equal (list (ncols ds) (nrows ds)) ; array-dimensions (dataset ds)) + (list (length (var-labels ds)) + (length (case-labels ds)))) + ;; when dims sane, check-type for each variable + (progn + (dolist (i (ncols ds)) + (dotimes (j (nrows ds)) + (typep (aref (dataset ds) i j) (nth i (var-types ds))))) + t))) + + +(defmethod dfref ((df dataframe-array) index1 index2 &key return-type) + "Returns a scalar in array, in the same vein as aref, mref, vref, etc. +idx1/2 is row/col or case/var." + (ecase return-type + ((scalar) (aref (dataset df) index1 index2)) + ((dataframe) (make-instance 'dataframe-array + :storage (make-array + (list 1 1) + :initial-contents (dfref df index1 index2)) + ;; ensure copy for this and following + :doc (doc-string df) + :case-labels (nth index1 (caseNames df)) + :var-labels (nth index2 (varNames df)) + ;; shound the type spec assume, as + ;; below, or should it inherit from the + ;; dataframe we are selecting from? + :var-types (nth index2 (var-types df)))))) + + + +(defun dfref-var (df index return-type) + "Returns the data in a single variable as type. +type = sequence, vector, vector-like (if valid numeric type) or dataframe." + (ecase return-type + (('list) + (map 'list + #'(lambda (x) (dfref df index x)) + (gen-seq (nth 2 (array-dimensions (dataset df)))))) + (('vector) t) + (:vector-like t) + (:matrix-like t) + (:dataframe t))) + +(defun dfref-obsn (df index return-type) + "Returns row as sequence." + (ecase return-type + (:list + (map 'list + #'(lambda (x) (dfref df x index)) + (gen-seq (nth 1 (array-dimensions (dataset df)))))) + (:vector t) + (:vector-like t) + (:matrix-like t) + (:dataframe t))) + +;; FIXME +(defun dfref-2indexlist (df indexlist1 indexlist2 &key (return-type :array)) + "return an array, row X col dims. FIXME TESTME" + (case return-type + (:array + (let ((my-pre-array (list))) + (dolist (x indexlist1) + (dolist (y indexlist2) + (append my-pre-array (dfref df x y)))) + (make-array (list (length indexlist1) + (length indexlist2)) + :initial-contents my-pre-array))) + (:dataframe + (make-instance 'dataframe-array + :storage (make-array + (list (length indexlist1) + (length indexlist2)) + :initial-contents (dataset df)) + ;; ensure copy for this and following + :doc (doc-string df) + ;; the following 2 need to be subseted based on + ;; the values of indexlist1 and indexlist2 + :case-labels (case-labels df) + :var-labels (var-labels df))))) + +;;; Do we establish methods for dataframe-like, which specialize to +;;; particular instances of storage? (defmethod print-object ((object dataframe-array) stream) (print-unreadable-object (object stream :type t) @@ -341,4 +357,18 @@ structure." (write-char #\tab stream) (write (dfref object i j) :stream stream))))) - +#| + (defun print-structure-relational (ds) + "example of what we want the methods to look like. Should be sort +of like a graph of spreadsheets if the storage is a relational +structure." + (dolist (k (relations ds)) + (let ((currentRelationSet (getRelation ds k))) + (print-as-row (var-labels currentRelationSet)) + (let ((j -1)) + (dolist (i (case-labels currentRelationSet)) + (print-as-row + (append (list i) + (dfref-obsn (dataset currentRelationSet) + (incf j))))))))) +|# -- 2.11.4.GIT