Updated documentation and "expected results" within the demonstration.
[CommonLispStat.git] / data.lisp
bloba42c0147d7285083f5754c8b7d2ebffa28df6932
1 ;;; -*- mode: lisp -*-
2 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; File: data.lisp
7 ;;; Author: AJ Rossini <blindglobe@gmail.com>
8 ;;; Copyright: (c)2007, AJ Rossini. BSD, LLGPL, or GPLv2, depending on how it arrives.
9 ;;; Purpose: data package for lispstat
10 ;;; Time-stamp: <2006-05-19 12:33:41 rossini>
11 ;;; Creation: <2006-05-17 21:34:07 rossini>
13 ;;; What is this talk of 'release'? Klingons do not make software
14 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
15 ;;; designers and quality assurance people in its wake.
17 ;;; This organization and structure is new to the 21st Century
18 ;;; version.
20 ;;; conside that dataa has 3 genotypic chracteristrics. The first
21 ;;; would be form -- scalar, vector, array. second would be
22 ;;; datarep type. in particular integer, real, string, symbol. The last
23 ;;; would be statistical type. augmenting datarep type with use in a
24 ;;; statistical context, i.e. that would include nominal, ordinal,
25 ;;; integer, continous, interval (orderable subtypes)
27 (in-package :cl-user)
29 (defpackage :lisp-stat-data
30 (:documentation "Data I/O, management, other data technologies.")
31 (:nicknames :ls-data)
32 (:use :common-lisp
33 ;;:cxml
34 :lisp-stat-config
35 :lisp-stat-object-system
36 :lisp-stat-types
37 :lisp-stat-compound-data
38 :lisp-stat-matrix
39 :lisp-stat-linalg)
40 (:shadowing-import-from :lisp-stat-object-system
41 slot-value call-method call-next-method)
42 (:export open-file-dialog read-data-file read-data-columns load-data
43 load-example *variables* *ask-on-redefine*
44 def variables savevar undef))
46 (in-package :lisp-stat-data)
48 ;;; The purpose of this package is to manage data which will be
49 ;;; processed by LispStat. In particular, it willbe importnat to
50 ;;; register variables, datasets, relational structures, and other
51 ;;; objects which could be the target for statistical modeling and
52 ;;; inference.
54 (defvar *lisp-stat-data-table* (make-hash-table)
55 "Marks up the data the could be used by.")
57 (defvar *lisp-stat-data-count* 0
58 "number of items currently recorded.")
61 ;;; Data (storage) Types, dt-{.*}
62 ;;;
63 ;;; Data types are the representation of data from a computer-science
64 ;;; perspective, i.e. what it is that they contain. These types
65 ;;; include particular forms of compound types (i.e. dataframe is
66 ;;; array-like, but types differ, difference is row-wise, while array
67 ;;; is a compound of elements of the same type.
68 ;;;
70 ;;Examples:
71 ;; (defun equidimensional (a)
72 ;; (or (< (array-rank a) 2)
73 ;; (apply #'= (array-dimensions a)))) => EQUIDIMENSIONAL
74 ;; (deftype square-matrix (&optional type size)
75 ;; `(and (array ,type (,size ,size))
76 ;; (satisfies equidimensional))) => SQUARE-MATRIX
78 (defun array-of-equal-dt-scalar-type (x)
79 ;; return dt-scalar-type which fits (more precise that works)
80 (if x
81 'integer
82 nil))
84 (defun array-of-equal-dt-scalar-type-within-column (x)
85 ;; return dt-scalar-type which fits (more precise that works)
86 (if x
87 'integer
88 nil))
92 (deftype dt-scalar (&optional type)
93 `(or integer double complex symbol))
95 (deftype dt-array (&optional ndim dimlist)
96 `(satisfies array-of-equal-dt-scalar-type))
98 (deftype dt-dataframe (&optional )
99 `(satisfies array-of-equal-dt-scalar-type-within-column))
101 ;(deftype dt-relationaldata ()
102 ; `(satisfies (foreach unit in relationalUnit
103 ; (typep unit 'dt-dataframe))))
106 ;;; Statistical Variable Types, sv-{.*}
107 ;;;
108 ;;; Statistical variable types work to represent the statistical
109 ;;; category represented by the variable, i.e. nominal, ordinal,
110 ;;; integral, continous, ratio. This metadata can be used to hint at
111 ;;; appropriate analysis methods -- or perhaps more critically, to
112 ;;; define how these methods will fail in the final interrpretation.
114 (deftype sv-nominal (&optional n)
117 (deftype sv-ordinal (ordering &optional n)
120 (deftype sv-categorical ()
121 `(satisfies (or sv-nominal sv-ordinal)))
122 ;;(deftype sv-integer )
123 ;;(deftype sv-real ) ;; precision could be a secondary component of real, rational, complex.
124 ;;(deftype sv-rational )
125 ;;(deftype sv-complex )
126 ;;(deftype sv-continuous (or 'sv-integer 'sv-real 'sv-rational 'sv-complex)) ;; perhaps, call it "mostly contin..."
129 ;;; Data I/O
131 ;; We can read 2 types of data -- those which are pure data, and those
132 ;; which are impure (lisp-enabled, data as program as data thingy's).
134 (defparameter *lisp-stat-data-formats*
135 '(csv tsv))
137 ;; (defgeneric data-read (srce frmt)
138 ;; "read data from stream srce, in format frmt.")
140 ;; (defgeneric data-write (srce frmt)
141 ;; "read data from stream srce, in format frmt.")
143 ;; (defmacro with-data (body)
144 ;; "Stream-handling, maintaining I/O through object typing.")
146 ;; design-wise should these be replaced with a "with-data" form?
148 ;; DSV processing
150 ;; XML processing
152 ;;; Data Management
154 ;; the goal is to have 2 operations which can be used to create new
155 ;; data formats out of old ones.
157 ;; (defgeneric data-subset (ds description)
158 ;; "Take a dataset and make it smaller.")
160 ;; (defgeneric data-relate (ds description)
161 ;; "Take 2 or more datasets, and grow them into a bigger one through
162 ;; relating them (i.e. merge is one example).")
164 ;;; Data tools from "statistics.lsp"
166 ;;;;
167 ;;;; Data File Reading
168 ;;;;
170 (defun count-file-columns (fname)
171 "Args: (fname)
172 Returns the number of lisp items on the first nonblank line of file FNAME."
173 (with-open-file (f fname)
174 (if f
175 (let ((line (do ((line (read-line f) (read-line f)))
176 ((or (null line) (< 0 (length line))) line))))
177 (if line
178 (with-input-from-string (s line)
179 (do ((n 0 (+ n 1)) (eof (gensym)))
180 ((eq eof (read s nil eof)) n))))))))
182 #+xlisp (defvar *xlisptable* *readtable*)
184 (if (not (fboundp 'open-file-dialog))
185 #+dialogs
186 (defun open-file-dialog () ;; why?(&optional set)
187 (get-string-dialog "Enter a data file name:"))
188 #-dialogs
189 (defun open-file-dialog () ;; why? (&optional set)
190 (error "You must provide a file name explicitly")))
192 (defun read-data-file (&optional (file (open-file-dialog t)))
193 "Args: (file)
194 Returns a list of all lisp objects in FILE. FILE can be a string or a symbol,
195 in which case the symbol'f print name is used."
196 (if file
197 (let ((eof (gensym)))
198 (with-open-file (f file)
199 (if f
200 (do* ((r (read f nil eof) (read f nil eof))
201 (x (list nil))
202 (tail x (cdr tail)))
203 ((eq r eof) (cdr x))
204 (setf (cdr tail) (list r))))))))
206 ;;; New definition to avoid stack size limit in apply
207 (defun read-data-columns (&optional (file (open-file-dialog t))
208 (cols (if file
209 (count-file-columns file))))
210 "Args: (&optional file cols)
211 Reads the data in FILE as COLS columns and returns a list of lists representing the columns."
212 (if (and file cols)
213 (transpose (split-list (read-data-file file) cols))))
216 ;;; FIXME:AJR: ALL THE FOLLOWING NEED TO BE SOLVED BY PLATFORM-INDEP PATHNAME WORK!
217 ;;; FIXME:AJR: use either string or pathname.
219 (defun path-string-to-path (p s)
220 (pathname (concatenate 'string (namestring p) s)))
222 (defun load-data (file)
223 "Args: (file) as string
224 Read in data file from the data examples library."
225 (if (load (path-string-to-path *lispstat-data-dir* file))
227 (load (path-string-to-path *lispstat-examples-dir* file))))
229 (defun load-example (file)
230 "Args: (file) as string
231 Read in lisp example file from the examples library."
232 (if (load (path-string-to-path *lispstat-examples-dir* file))
234 (load (path-string-to-path *lispstat-data-dir* file))))
236 ;;;;
237 ;;;; Listing and Saving Variables and Functions
238 ;;;;
240 (defvar *variables* nil)
241 (defvar *ask-on-redefine* nil)
243 (defmacro def (symbol value)
244 "Syntax: (def var form)
245 VAR is not evaluated and must be a symbol. Assigns the value of FORM to
246 VAR and adds VAR to the list *VARIABLES* of def'ed variables. Returns VAR.
247 If VAR is already bound and the global variable *ASK-ON-REDEFINE*
248 is not nil then you are asked if you want to redefine the variable."
249 `(unless (and *ask-on-redefine*
250 (boundp ',symbol)
251 (not (y-or-n-p "Variable has a value. Redefine?")))
252 (if (boundp ',symbol)
253 (setf ,symbol ,value)
254 (defvar ,symbol ,value))
255 (pushnew ',symbol *variables*)
256 ',symbol))
258 (defun variables-list ()
259 (mapcar #'intern (sort-data (mapcar #'string *variables*))))
261 (defun variables ()
262 "Args:()
263 Returns a list of the names of all def'ed variables to STREAM"
264 (if *variables*
265 (mapcar #'intern (sort-data (mapcar #'string *variables*)))))
267 (defun savevar (vars file)
268 "Args: (vars file-name-root)
269 VARS is a symbol or a list of symbols. FILE-NAME-ROOT is a string (or a symbol
270 whose print name is used) not endinf in .lsp. The VARS and their current values
271 are written to the file FILE-NAME-ROOT.lsp in a form suitable for use with the
272 load command."
273 (with-open-file (f (concatenate 'string (namestring file) ".lsp")
274 :direction :output)
275 (let ((vars (if (consp vars) vars (list vars))))
276 (flet ((save-one (x)
277 (let ((v (symbol-value x)))
278 (if (objectp v)
279 (format f "(def ~s ~s)~%" x (send v :save))
280 (format f "(def ~s '~s)~%" x v)))))
281 (mapcar #'save-one vars))
282 vars)))
284 (defun undef (v)
285 "Args: (v)
286 If V is the symbol of a defined variable the variable it is unbound and
287 removed from the list of defined variables. If V is a list of variable
288 names each is unbound and removed. Returns V."
289 (dolist (s (if (listp v) v (list v)))
290 (when (member s *variables*)
291 (setq *variables* (delete s *variables*))
292 (makunbound s)))