2 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
7 ;;; Author: AJ Rossini <blindglobe@gmail.com>
8 ;;; Copyright: (c)2007, AJ Rossini. BSD, LLGPL, or GPLv2, depending
10 ;;; Purpose: data package for lispstat
11 ;;; Time-stamp: <2006-05-19 12:33:41 rossini>
12 ;;; Creation: <2006-05-17 21:34:07 rossini>
14 ;;; What is this talk of 'release'? Klingons do not make software
15 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
16 ;;; designers and quality assurance people in its wake.
18 ;;; This organization and structure is new to the 21st Century
21 ;;; conside that data has 3 genotypic chracteristrics. The first
22 ;;; would be form -- scalar, vector, array. second would be
23 ;;; datarep type. in particular integer, real, string, symbol. The last
24 ;;; would be statistical type. augmenting datarep type with use in a
25 ;;; statistical context, i.e. that would include nominal, ordinal,
26 ;;; integer, continous, interval (orderable subtypes). Clearly, the
27 ;;; statistical type can be inherited, likewise the numerical type as
28 ;;; well. The form can be pushed up or simplified as necessary, but
29 ;;; this can be challenging.
33 (defpackage :lisp-stat-data
34 (:documentation
"Data I/O, management, other data technologies.")
39 :lisp-stat-object-system
41 :lisp-stat-compound-data
44 (:shadowing-import-from
:lisp-stat-object-system
45 slot-value call-method call-next-method
)
46 (:export open-file-dialog read-data-file read-data-columns load-data
47 load-example
*variables
* *ask-on-redefine
*
48 def variables savevar undef
))
50 (in-package :lisp-stat-data
)
52 ;;; The purpose of this package is to manage data which will be
53 ;;; processed by LispStat. In particular, it willbe importnat to
54 ;;; register variables, datasets, relational structures, and other
55 ;;; objects which could be the target for statistical modeling and
58 (defvar *lisp-stat-data-table
* (make-hash-table)
59 "Marks up the data the could be used by.")
61 (defvar *lisp-stat-data-count
* 0
62 "number of items currently recorded.")
65 ;;; Data (storage) Types, dt-{.*}
67 ;;; Data types are the representation of data from a computer-science
68 ;;; perspective, i.e. what it is that they contain. These types
69 ;;; include particular forms of compound types (i.e. dataframe is
70 ;;; array-like, but types differ, difference is row-wise, while array
71 ;;; is a compound of elements of the same type.
75 ;; (defun equidimensional (a)
76 ;; (or (< (array-rank a) 2)
77 ;; (apply #'= (array-dimensions a)))) => EQUIDIMENSIONAL
78 ;; (deftype square-matrix (&optional type size)
79 ;; `(and (array ,type (,size ,size))
80 ;; (satisfies equidimensional))) => SQUARE-MATRIX
82 (defun array-of-equal-dt-scalar-type (x)
83 ;; return dt-scalar-type which fits (more precise that works)
88 (defun array-of-equal-dt-scalar-type-within-column (x)
89 ;; return dt-scalar-type which fits (more precise that works)
96 (deftype dt-scalar
(&optional type
)
97 `(or integer double complex symbol
))
99 (deftype dt-array
(&optional ndim dimlist
)
100 `(satisfies array-of-equal-dt-scalar-type
))
102 (deftype dt-dataframe
(&optional
)
103 `(satisfies array-of-equal-dt-scalar-type-within-column
))
105 ;(deftype dt-relationaldata ()
106 ; `(satisfies (foreach unit in relationalUnit
107 ; (typep unit 'dt-dataframe))))
110 ;;; Statistical Variable Types, sv-{.*}
112 ;;; Statistical variable types work to represent the statistical
113 ;;; category represented by the variable, i.e. nominal, ordinal,
114 ;;; integral, continous, ratio. This metadata can be used to hint at
115 ;;; appropriate analysis methods -- or perhaps more critically, to
116 ;;; define how these methods will fail in the final interrpretation.
118 (deftype sv-nominal
(&optional n
)
121 (deftype sv-ordinal
(ordering &optional n
)
124 (deftype sv-categorical
()
125 `(satisfies (or sv-nominal sv-ordinal
)))
126 ;;(deftype sv-integer )
127 ;;(deftype sv-real ) ;; precision could be a secondary component of real, rational, complex.
128 ;;(deftype sv-rational )
129 ;;(deftype sv-complex )
130 ;;(deftype sv-continuous (or 'sv-integer 'sv-real 'sv-rational 'sv-complex)) ;; perhaps, call it "mostly contin..."
135 ;; We can read 2 types of data -- those which are pure data, and those
136 ;; which are impure (lisp-enabled, data as program as data thingy's).
138 (defparameter *lisp-stat-data-formats
*
141 ;; (defgeneric data-read (srce frmt)
142 ;; "read data from stream srce, in format frmt.")
144 ;; (defgeneric data-write (srce frmt)
145 ;; "read data from stream srce, in format frmt.")
147 ;; (defmacro with-data (body)
148 ;; "Stream-handling, maintaining I/O through object typing.")
150 ;; design-wise should these be replaced with a "with-data" form?
158 ;; the goal is to have 2 operations which can be used to create new
159 ;; data formats out of old ones.
161 ;; (defgeneric data-subset (ds description)
162 ;; "Take a dataset and make it smaller.")
164 ;; (defgeneric data-relate (ds description)
165 ;; "Take 2 or more datasets, and grow them into a bigger one through
166 ;; relating them (i.e. merge is one example).")
168 ;;; Data tools from "statistics.lsp"
171 ;;;; Data File Reading
174 (defun count-file-columns (fname)
176 Returns the number of lisp items on the first nonblank line of file FNAME."
177 (with-open-file (f fname
)
179 (let ((line (do ((line (read-line f
) (read-line f
)))
180 ((or (null line
) (< 0 (length line
))) line
))))
182 (with-input-from-string (s line
)
183 (do ((n 0 (+ n
1)) (eof (gensym)))
184 ((eq eof
(read s nil eof
)) n
))))))))
186 #+xlisp
(defvar *xlisptable
* *readtable
*)
188 (if (not (fboundp 'open-file-dialog
))
190 (defun open-file-dialog () ;; why?(&optional set)
191 (get-string-dialog "Enter a data file name:"))
193 (defun open-file-dialog () ;; why? (&optional set)
194 (error "You must provide a file name explicitly")))
196 (defun read-data-file (&optional
(file (open-file-dialog t
)))
198 Returns a list of all lisp objects in FILE. FILE can be a string or a symbol,
199 in which case the symbol'f print name is used."
201 (let ((eof (gensym)))
202 (with-open-file (f file
)
204 (do* ((r (read f nil eof
) (read f nil eof
))
208 (setf (cdr tail
) (list r
))))))))
210 ;;; New definition to avoid stack size limit in apply
211 (defun read-data-columns (&optional
(file (open-file-dialog t
))
213 (count-file-columns file
))))
214 "Args: (&optional file cols)
215 Reads the data in FILE as COLS columns and returns a list of lists representing the columns."
217 (transpose (split-list (read-data-file file
) cols
))))
220 ;;; FIXME:AJR: ALL THE FOLLOWING NEED TO BE SOLVED BY PLATFORM-INDEP PATHNAME WORK!
221 ;;; FIXME:AJR: use either string or pathname.
223 (defun path-string-to-path (p s
)
224 (pathname (concatenate 'string
(namestring p
) s
)))
226 (defun load-data (file)
227 "Args: (file) as string
228 Read in data file from the data examples library."
229 (if (load (path-string-to-path *lispstat-data-dir
* file
))
231 (load (path-string-to-path *lispstat-examples-dir
* file
))))
233 (defun load-example (file)
234 "Args: (file) as string
235 Read in lisp example file from the examples library."
236 (if (load (path-string-to-path *lispstat-examples-dir
* file
))
238 (load (path-string-to-path *lispstat-data-dir
* file
))))
241 ;;;; Listing and Saving Variables and Functions
244 (defvar *variables
* nil
)
245 (defvar *ask-on-redefine
* nil
)
247 (defmacro def
(symbol value
)
248 "Syntax: (def var form)
249 VAR is not evaluated and must be a symbol. Assigns the value of FORM to
250 VAR and adds VAR to the list *VARIABLES* of def'ed variables. Returns VAR.
251 If VAR is already bound and the global variable *ASK-ON-REDEFINE*
252 is not nil then you are asked if you want to redefine the variable."
253 `(unless (and *ask-on-redefine
*
255 (not (y-or-n-p "Variable has a value. Redefine?")))
256 (if (boundp ',symbol
)
257 (setf ,symbol
,value
)
258 (defvar ,symbol
,value
))
259 (pushnew ',symbol
*variables
*)
262 (defun variables-list ()
263 (mapcar #'intern
(sort-data (mapcar #'string
*variables
*))))
267 Returns a list of the names of all def'ed variables to STREAM"
269 (mapcar #'intern
(sort-data (mapcar #'string
*variables
*)))))
271 (defun savevar (vars file
)
272 "Args: (vars file-name-root)
273 VARS is a symbol or a list of symbols. FILE-NAME-ROOT is a string (or a symbol
274 whose print name is used) not endinf in .lsp. The VARS and their current values
275 are written to the file FILE-NAME-ROOT.lsp in a form suitable for use with the
277 (with-open-file (f (concatenate 'string
(namestring file
) ".lsp")
279 (let ((vars (if (consp vars
) vars
(list vars
))))
281 (let ((v (symbol-value x
)))
283 (format f
"(def ~s ~s)~%" x
(send v
:save
))
284 (format f
"(def ~s '~s)~%" x v
)))))
285 (mapcar #'save-one vars
))
290 If V is the symbol of a defined variable the variable it is unbound and
291 removed from the list of defined variables. If V is a list of variable
292 names each is unbound and removed. Returns V."
293 (dolist (s (if (listp v
) v
(list v
)))
294 (when (member s
*variables
*)
295 (setq *variables
* (delete s
*variables
*))