reduce/apply/values corrections
[CommonLispStat.git] / data.lisp
blobee45b5e8b8ef2f2fed4411c2b14cb7edc6ce9811
1 ;;; -*- mode: lisp -*-
2 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; File: data.lisp
7 ;;; Author: AJ Rossini <blindglobe@gmail.com>
8 ;;; Copyright: (c)2007, AJ Rossini. BSD, LLGPL, or GPLv2, depending on how it arrives.
9 ;;; Purpose: data package for lispstat
10 ;;; Time-stamp: <2006-05-19 12:33:41 rossini>
11 ;;; Creation: <2006-05-17 21:34:07 rossini>
13 ;;; What is this talk of 'release'? Klingons do not make software
14 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
15 ;;; designers and quality assurance people in its wake.
17 ;;; This organization and structure is new to the 21st Century
18 ;;; version.
21 (in-package :cl-user)
23 (defpackage :lisp-stat-data
24 (:documentation "Data I/O, management, other data technologies.")
25 (:nicknames :ls-data)
26 (:use :common-lisp
27 :cxml
28 :lisp-stat-object-system
29 :lisp-stat-compound-data
30 :lisp-stat-matrix
31 :lisp-stat-linalg
32 :lisp-stat-sequence)
33 (:shadowing-import-from :lisp-stat-object-system
34 slot-value call-method call-next-method)
35 (:export
36 ;;; from statistics.lsp
37 open-file-dialog read-data-file read-data-columns load-data
38 load-example *variables* *ask-on-redefine* def variables savevar
39 undef ))
41 (in-package :lisp-stat-data)
43 ;;; The purpose of this package is to manage data which will be
44 ;;; processed by LispStat. In particular, it willbe importnat to
45 ;;; register variables, datasets, relational structures, and other
46 ;;; objects which could be the target for statistical modeling and
47 ;;; inference.
49 (defvar *lisp-stat-data-table* (make-hash-table)
50 "Marks up the data the could be used by.")
52 (defvar *lisp-stat-data-count* 0
53 "number of items currently recorded.")
55 ;;; Data Types:
56 ;;;
57 ;;; Data types are the representation of data from a computer-science
58 ;;; perspective, i.e. what it is that they contain. These types
59 ;;; include particular forms of compound types (i.e. dataframe,
60 ;;; relationdata are compounds of arrays of different types where the
61 ;;; difference is row-wise, while array is a compound of elements of
62 ;;; the same type.
64 ;;Examples:
65 ;; (defun equidimensional (a)
66 ;; (or (< (array-rank a) 2)
67 ;; (apply #'= (array-dimensions a)))) => EQUIDIMENSIONAL
68 ;; (deftype square-matrix (&optional type size)
69 ;; `(and (array ,type (,size ,size))
70 ;; (satisfies equidimensional))) => SQUARE-MATRIX
72 (deftype dt-scalar (&optional type)
73 `(or integer double complex))
75 (deftype dt-array (&optional type)
76 `(satisfies equal-type))
78 (deftype dt-dataframe ()
79 `(satisfies equal-type-within-column))
81 (deftype dt-relationdata ()
82 `(satisfies (foreach unit in relationalUnit
83 (typep unit 'dt-dataframe))))
88 ;;; Statistical Variable Classes
89 ;;(deftype sv-nominal )
90 ;;(deftype sv-ordinal )
91 ;;(deftype sv-categorical (or 'sv-nominal 'sv-ordinal))
92 ;;(deftype sv-integer )
93 ;;(deftype sv-real )
94 ;;(deftype sv-rational )
95 ;;(deftype sv-complex )
96 ;;(deftype sv-continuous (or 'sv-integer 'sv-real 'sv-rational 'sv-complex))
99 ;;; Data I/O
101 ;; We can read 2 types of data -- those which are pure data, and those
102 ;; which are imprue (lisp-enables).
104 (defparameter *lisp-stat-data-formats*
105 '(csv tsv))
107 ;; (defgeneric data-read (srce frmt)
108 ;; "read data from stream srce, in format frmt.")
110 ;; (defgeneric data-write (srce frmt)
111 ;; "read data from stream srce, in format frmt.")
113 ;; (defmacro with-data (body)
114 ;; "Stream-handling, maintaining I/O through object typing.")
116 ;; design-wise should these be replaced with a "with-data" form?
119 ;; DSV processing
121 ;; XML processing
123 ;;; Data Management
125 ;; the goal is to have 2 operations which can be used to create new
126 ;; data formats out of old ones.
128 ;; (defgeneric data-subset (ds description)
129 ;; "Take a dataset and make it smaller.")
131 ;; (defgeneric data-relate (ds description)
132 ;; "Take 2 or more datasets, and grow them into a bigger one through
133 ;; relating them (i.e. merge is one example).")
135 ;;; Data tools from "statistics.lsp"
137 ;;;;
138 ;;;; Data File Reading
139 ;;;;
141 (defun count-file-columns (fname)
142 "Args: (fname)
143 Returns the number of lisp items on the first nonblank line of file FNAME."
144 (with-open-file (f fname)
145 (if f
146 (let ((line (do ((line (read-line f) (read-line f)))
147 ((or (null line) (< 0 (length line))) line))))
148 (if line
149 (with-input-from-string (s line)
150 (do ((n 0 (+ n 1)) (eof (gensym)))
151 ((eq eof (read s nil eof)) n))))))))
153 #+xlisp (defvar *xlisptable* *readtable*)
155 (if (not (fboundp 'open-file-dialog))
156 #+dialogs
157 (defun open-file-dialog (&optional set)
158 (get-string-dialog "Enter a data file name:"))
159 #-dialogs
160 (defun open-file-dialog (&optional set)
161 (error "You must provide a file name explicitly")))
163 (defun read-data-file (&optional (file (open-file-dialog t)))
164 "Args: (file)
165 Returns a list of all lisp objects in FILE. FILE can be a string or a symbol,
166 in which case the symbol'f print name is used."
167 (if file
168 (let ((eof (gensym)))
169 (with-open-file (f file)
170 (if f
171 (do* ((r (read f nil eof) (read f nil eof))
172 (x (list nil))
173 (tail x (cdr tail)))
174 ((eq r eof) (cdr x))
175 (setf (cdr tail) (list r))))))))
177 ;;; New definition to avoid stack size limit in apply
178 (defun read-data-columns (&optional (file (open-file-dialog t))
179 (cols (if file
180 (count-file-columns file))))
181 "Args: (&optional file cols)
182 Reads the data in FILE as COLS columns and returns a list of lists representing the columns."
183 (if (and file cols)
184 (transpose (split-list (read-data-file file) cols))))
187 ;;; FIXME:AJR: ALL THE FOLLOWING ARE SOLVED BY PLATFORM-INDEP PATHNAME WORK!
189 #+unix
190 (defun load-data (file)
191 "Args: (file)
192 Read in data file from the data examples library."
193 (if (load (format nil "~aData/~a" *default-path* file))
195 (load (format nil "~aExamples/~a" *default-path* file))))
197 #+unix
198 (defun load-example (file)
199 "Args: (file)
200 Read in lisp example file from the examples library."
201 (if (load (format nil "~aExamples/~a" *default-path* file))
203 (load (format nil "~aData/~a" *default-path* file))))
204 #+macintosh
205 (defun load-data (s) (require s (concatenate 'string ":Data:" s)))
206 #+macintosh
207 (defun load-example (s) (require s (concatenate 'string ":Examples:" s)))
209 #+msdos
210 (defun load-data (file)
211 "Args: (file)
212 Read in data file from the data examples library."
213 (load (format nil "~aData\\~a" *default-path* file)))
215 #+msdos
216 (defun load-example (file)
217 "Args: (file)
218 Read in lisp example file from the examples library."
219 (load (format nil "~aExamples\\~a" *default-path* file)))
221 ;;;;
222 ;;;; Listing and Saving Variables and Functions
223 ;;;;
225 (defvar *variables* nil)
226 (defvar *ask-on-redefine* nil)
228 (defmacro def (symbol value)
229 "Syntax: (def var form)
230 VAR is not evaluated and must be a symbol. Assigns the value of FORM to
231 VAR and adds VAR to the list *VARIABLES* of def'ed variables. Returns VAR.
232 If VAR is already bound and the global variable *ASK-ON-REDEFINE*
233 is not nil then you are asked if you want to redefine the variable."
234 `(unless (and *ask-on-redefine*
235 (boundp ',symbol)
236 (not (y-or-n-p "Variable has a value. Redefine?")))
237 (pushnew ',symbol *variables*)
238 (setf ,symbol ,value)
239 ',symbol))
241 (defun variables-list ()
242 (mapcar #'intern (sort-data (mapcar #'string *variables*))))
244 (defun variables ()
245 "Args:()
246 Returns a list of the names of all def'ed variables to STREAM"
247 (if *variables*
248 (mapcar #'intern (sort-data (mapcar #'string *variables*)))))
250 (defun savevar (vars file)
251 "Args: (vars file-name-root)
252 VARS is a symbol or a list of symbols. FILE-NAME-ROOT is a string (or a symbol
253 whose print name is used) not endinf in .lsp. The VARS and their current values
254 are written to the file FILE-NAME-ROOT.lsp in a form suitable for use with the
255 load command."
256 (with-open-file (f (strcat (string file) ".lsp") :direction :output)
257 (let ((vars (if (consp vars) vars (list vars))))
258 (flet ((save-one (x)
259 (let ((v (symbol-value x)))
260 (if (objectp v)
261 (format f "(def ~s ~s)~%" x (send v :save))
262 (format f "(def ~s '~s)~%" x v)))))
263 (mapcar #'save-one vars))
264 vars)))
266 (defun undef (v)
267 "Args: (v)
268 If V is the symbol of a defined variable the variable it is unbound and
269 removed from the list of defined variables. If V is a list of variable
270 names each is unbound and removed. Returns V."
271 (dolist (s (if (listp v) v (list v)))
272 (when (member s *variables*)
273 (setq *variables* (delete s *variables*))
274 (makunbound s)))
277 ;;;;
278 ;;;; Miscellaneous Routines
279 ;;;;
281 (defun split-list (x n)
282 "Args: (list cols)
283 Returns a list of COLS lists of equal length of the elements of LIST.
284 Example: (split-list '(1 2 3 4 5 6) 2) returns ((1 2 3) (4 5 6))"
285 (check-one-fixnum n)
286 (if (/= (rem (length x) n) 0) (error "length not divisible by ~a" n))
287 (flet ((next-split ()
288 (let ((result nil)
289 (end nil))
290 (dotimes (i n result)
291 (declare (fixnum i))
292 (let ((c-elem (list (first x))))
293 (cond ((null result)
294 (setf result c-elem)
295 (setf end result))
297 (setf (rest end) c-elem)
298 (setf end (rest end)))))
299 (setf x (rest x))))))
300 (let ((result nil)
301 (end nil)
302 (k (/ (length x) n)))
303 (declare (fixnum k))
304 (dotimes (i k result)
305 (declare (fixnum i))
306 (let ((c-sub (list (next-split))))
307 (cond ((null result)
308 (setf result c-sub)
309 (setf end result))
311 (setf (rest end) c-sub)
312 (setf end (rest end)))))))))