2 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
7 ;;; Author: AJ Rossini <blindglobe@gmail.com>
8 ;;; Copyright: (c)2007, AJ Rossini. BSD, LLGPL, or GPLv2, depending on how it arrives.
9 ;;; Purpose: data package for lispstat
10 ;;; Time-stamp: <2006-05-19 12:33:41 rossini>
11 ;;; Creation: <2006-05-17 21:34:07 rossini>
13 ;;; What is this talk of 'release'? Klingons do not make software
14 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
15 ;;; designers and quality assurance people in its wake.
17 ;;; This organization and structure is new to the 21st Century
20 ;;; conside that dataa has 3 genotypic chracteristrics. The first
21 ;;; would be form -- scalar, vector, array. second would be
22 ;;; datarep type. in particular integer, real, string, symbol. The last
23 ;;; would be statistical type. augmenting datarep type with use in a
24 ;;; statistical context, i.e. that would include nominal, ordinal,
25 ;;; integer, continous, interval (orderable subtypes)
29 (defpackage :lisp-stat-data
30 (:documentation
"Data I/O, management, other data technologies.")
35 :lisp-stat-object-system
36 :lisp-stat-compound-data
40 (:shadowing-import-from
:lisp-stat-object-system
41 slot-value call-method call-next-method
)
43 ;;; from statistics.lsp
44 open-file-dialog read-data-file read-data-columns load-data
45 load-example
*variables
* *ask-on-redefine
* def variables savevar
48 (in-package :lisp-stat-data
)
50 ;; (deftype dataformtype (list scalar vector sequence list array relation))
51 ;; (deftype datareptype (list integer rational real complex string symbol=
52 ;; (deftype stattzpe (list
57 ;;; The purpose of this package is to manage data which will be
58 ;;; processed by LispStat. In particular, it willbe importnat to
59 ;;; register variables, datasets, relational structures, and other
60 ;;; objects which could be the target for statistical modeling and
63 (defvar *lisp-stat-data-table
* (make-hash-table)
64 "Marks up the data the could be used by.")
66 (defvar *lisp-stat-data-count
* 0
67 "number of items currently recorded.")
71 ;;; Data types are the representation of data from a computer-science
72 ;;; perspective, i.e. what it is that they contain. These types
73 ;;; include particular forms of compound types (i.e. dataframe,
74 ;;; relationdata are compounds of arrays of different types where the
75 ;;; difference is row-wise, while array is a compound of elements of
79 ;; (defun equidimensional (a)
80 ;; (or (< (array-rank a) 2)
81 ;; (apply #'= (array-dimensions a)))) => EQUIDIMENSIONAL
82 ;; (deftype square-matrix (&optional type size)
83 ;; `(and (array ,type (,size ,size))
84 ;; (satisfies equidimensional))) => SQUARE-MATRIX
86 (deftype dt-scalar
(&optional type
)
87 `(or integer double complex
))
89 (deftype dt-array
(&optional type
)
90 `(satisfies equal-type
))
92 (deftype dt-dataframe
()
93 `(satisfies equal-type-within-column
))
95 (deftype dt-relationdata
()
96 `(satisfies (foreach unit in relationalUnit
97 (typep unit
'dt-dataframe
))))
102 ;;; Statistical Variable Classes
103 ;;(deftype sv-nominal )
104 ;;(deftype sv-ordinal )
105 ;;(deftype sv-categorical (or 'sv-nominal 'sv-ordinal))
106 ;;(deftype sv-integer )
108 ;;(deftype sv-rational )
109 ;;(deftype sv-complex )
110 ;;(deftype sv-continuous (or 'sv-integer 'sv-real 'sv-rational 'sv-complex))
115 ;; We can read 2 types of data -- those which are pure data, and those
116 ;; which are imprue (lisp-enables).
118 (defparameter *lisp-stat-data-formats
*
121 ;; (defgeneric data-read (srce frmt)
122 ;; "read data from stream srce, in format frmt.")
124 ;; (defgeneric data-write (srce frmt)
125 ;; "read data from stream srce, in format frmt.")
127 ;; (defmacro with-data (body)
128 ;; "Stream-handling, maintaining I/O through object typing.")
130 ;; design-wise should these be replaced with a "with-data" form?
139 ;; the goal is to have 2 operations which can be used to create new
140 ;; data formats out of old ones.
142 ;; (defgeneric data-subset (ds description)
143 ;; "Take a dataset and make it smaller.")
145 ;; (defgeneric data-relate (ds description)
146 ;; "Take 2 or more datasets, and grow them into a bigger one through
147 ;; relating them (i.e. merge is one example).")
149 ;;; Data tools from "statistics.lsp"
152 ;;;; Data File Reading
155 (defun count-file-columns (fname)
157 Returns the number of lisp items on the first nonblank line of file FNAME."
158 (with-open-file (f fname
)
160 (let ((line (do ((line (read-line f
) (read-line f
)))
161 ((or (null line
) (< 0 (length line
))) line
))))
163 (with-input-from-string (s line
)
164 (do ((n 0 (+ n
1)) (eof (gensym)))
165 ((eq eof
(read s nil eof
)) n
))))))))
167 #+xlisp
(defvar *xlisptable
* *readtable
*)
169 (if (not (fboundp 'open-file-dialog
))
171 (defun open-file-dialog (&optional set
)
172 (get-string-dialog "Enter a data file name:"))
174 (defun open-file-dialog (&optional set
)
175 (error "You must provide a file name explicitly")))
177 (defun read-data-file (&optional
(file (open-file-dialog t
)))
179 Returns a list of all lisp objects in FILE. FILE can be a string or a symbol,
180 in which case the symbol'f print name is used."
182 (let ((eof (gensym)))
183 (with-open-file (f file
)
185 (do* ((r (read f nil eof
) (read f nil eof
))
189 (setf (cdr tail
) (list r
))))))))
191 ;;; New definition to avoid stack size limit in apply
192 (defun read-data-columns (&optional
(file (open-file-dialog t
))
194 (count-file-columns file
))))
195 "Args: (&optional file cols)
196 Reads the data in FILE as COLS columns and returns a list of lists representing the columns."
198 (transpose (split-list (read-data-file file
) cols
))))
201 ;;; FIXME:AJR: ALL THE FOLLOWING ARE SOLVED BY PLATFORM-INDEP PATHNAME WORK!
204 (defun load-data (file)
206 Read in data file from the data examples library."
207 (if (load (format nil
"~aData/~a" *default-path
* file
))
209 (load (format nil
"~aExamples/~a" *default-path
* file
))))
212 (defun load-example (file)
214 Read in lisp example file from the examples library."
215 (if (load (format nil
"~aExamples/~a" *default-path
* file
))
217 (load (format nil
"~aData/~a" *default-path
* file
))))
219 (defun load-data (s) (require s
(concatenate 'string
":Data:" s
)))
221 (defun load-example (s) (require s
(concatenate 'string
":Examples:" s
)))
224 (defun load-data (file)
226 Read in data file from the data examples library."
227 (load (format nil
"~aData\\~a" *default-path
* file
)))
230 (defun load-example (file)
232 Read in lisp example file from the examples library."
233 (load (format nil
"~aExamples\\~a" *default-path
* file
)))
236 ;;;; Listing and Saving Variables and Functions
239 (defvar *variables
* nil
)
240 (defvar *ask-on-redefine
* nil
)
242 (defmacro def
(symbol value
)
243 "Syntax: (def var form)
244 VAR is not evaluated and must be a symbol. Assigns the value of FORM to
245 VAR and adds VAR to the list *VARIABLES* of def'ed variables. Returns VAR.
246 If VAR is already bound and the global variable *ASK-ON-REDEFINE*
247 is not nil then you are asked if you want to redefine the variable."
248 `(unless (and *ask-on-redefine
*
250 (not (y-or-n-p "Variable has a value. Redefine?")))
251 (pushnew ',symbol
*variables
*)
252 (setf ,symbol
,value
)
255 (defun variables-list ()
256 (mapcar #'intern
(sort-data (mapcar #'string
*variables
*))))
260 Returns a list of the names of all def'ed variables to STREAM"
262 (mapcar #'intern
(sort-data (mapcar #'string
*variables
*)))))
264 (defun savevar (vars file
)
265 "Args: (vars file-name-root)
266 VARS is a symbol or a list of symbols. FILE-NAME-ROOT is a string (or a symbol
267 whose print name is used) not endinf in .lsp. The VARS and their current values
268 are written to the file FILE-NAME-ROOT.lsp in a form suitable for use with the
270 (with-open-file (f (concatenate 'string
(namestring file
) ".lsp")
272 (let ((vars (if (consp vars
) vars
(list vars
))))
274 (let ((v (symbol-value x
)))
276 (format f
"(def ~s ~s)~%" x
(send v
:save
))
277 (format f
"(def ~s '~s)~%" x v
)))))
278 (mapcar #'save-one vars
))
283 If V is the symbol of a defined variable the variable it is unbound and
284 removed from the list of defined variables. If V is a list of variable
285 names each is unbound and removed. Returns V."
286 (dolist (s (if (listp v
) v
(list v
)))
287 (when (member s
*variables
*)
288 (setq *variables
* (delete s
*variables
*))
293 ;;;; Miscellaneous Routines
296 (defun split-list (x n
)
298 Returns a list of COLS lists of equal length of the elements of LIST.
299 Example: (split-list '(1 2 3 4 5 6) 2) returns ((1 2 3) (4 5 6))"
301 (if (/= (rem (length x
) n
) 0) (error "length not divisible by ~a" n
))
302 (flet ((next-split ()
305 (dotimes (i n result
)
307 (let ((c-elem (list (first x
))))
312 (setf (rest end
) c-elem
)
313 (setf end
(rest end
)))))
314 (setf x
(rest x
))))))
317 (k (/ (length x
) n
)))
319 (dotimes (i k result
)
321 (let ((c-sub (list (next-split))))
326 (setf (rest end
) c-sub
)
327 (setf end
(rest end
)))))))))