filename->dataframe becomes filename.dsv->dataframe and is moved into the API. Not...
[CommonLispStat.git] / src / data / import.lisp
blobd56a38e317d6dc3d862f888af2ba190d2a39fae6
1 ;;; -*- mode: lisp -*-
2 ;;; Copyright (c) 2008, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; Time-stamp: <2009-12-20 22:27:42 tony>
7 ;;; Creation: <2008-09-03 08:10:00 tony>
8 ;;; File: import.lisp
9 ;;; Author: AJ Rossini <blindglobe@gmail.com>
10 ;;; Copyright: (c)2007--2009, AJ Rossini. GPLv2
11 ;;; Purpose: base structures for importing data into CLS
13 ;;; What is this talk of 'release'? Klingons do not make software
14 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
15 ;;; designers and quality assurance people in its wake.
17 (in-package :lisp-stat-data)
19 ;;; Data I/O
21 ;; We can read 2 types of data -- those which are non-lisp-native
22 ;; data, and those which are lisp-native (lisp-enabled, an extension
23 ;; of lisp-serialized, i.e. data as program as data thingy's).
25 ;; of the non-native, there could be raw sources (ascii file formats),
26 ;; xml sources (xml -> lisp, possible with some preprocessing.
28 ;;; Reading from DSV files:
30 ;;; consider either the cybertyggyr-dsv package, or the rsm.string
31 ;;; package. The latter seems to actually work a bit at what we need
32 ;;; to acccomplish, but the former is a challenge to get right when we
33 ;;; need to think abut what it is that we need to get done. The
34 ;;; latter is also better licensed. i.e. BSD-style. The latter is
35 ;;; implemented through filename->dataframe
38 (defparameter *lisp-stat-data-external-source-formats*
39 '(csv tsv xml ;; ex of text-based (UTF, ASCII, or similar) formats
40 sql ;; ex of RDBMS call
41 fcs affy)) ;; ex of binary formats
43 (defparameter *lisp-stat-data-import-referencing-type*
44 '(lisp-data-structure reference lisp-function))
46 (defgeneric data-import (source source-format referencing-type)
47 (:documentation "read data from stream srce, in format srce-frmt;
48 return a reftype, which could be a
49 lisp-data-structure, a reference to such, or a lisp
50 function which can be evaluated to generate
51 either."))
53 (defgeneric data-export (data target-format target-referencing-type)
54 (:documentation "write data from stream srce, in format srce-frmt;
55 return a reftype, which could be a
56 lisp-data-structure, a reference to such, or a lisp
57 function which can be evaluated to generate
58 either."))
63 ;;; Potentially useful functions
65 ;; the following belongs here if we are working externally, but might
66 ;; belong with data if we are working internlly
68 ;; (defmacro with-data (body)
69 ;; "Stream-handling, maintaining I/O through object typing.")
72 ;;;
73 ;;; Related to data file reading
74 ;;;
76 (defun count-file-columns (fname)
77 "Args: (fname)
78 Returns the number of lisp items on the first nonblank line of file FNAME."
79 (with-open-file (f fname)
80 (if f
81 (let ((line (do ((line (read-line f) (read-line f)))
82 ((or (null line) (< 0 (length line))) line))))
83 (if line
84 (with-input-from-string (s line)
85 (do ((n 0 (+ n 1)) (eof (gensym)))
86 ((eq eof (read s nil eof)) n))))))))
88 (if (not (fboundp 'open-file-dialog))
89 #+dialogs
90 (defun open-file-dialog () ;; why?(&optional set)
91 (get-string-dialog "Enter a data file name:"))
92 #-dialogs
93 (defun open-file-dialog () ;; why? (&optional set)
94 (error "You must provide a file name explicitly")))
96 (defun read-data-file (&optional (file (open-file-dialog)))
97 "Args: (file)
98 Returns a list of all lisp objects in FILE. FILE can be a string or a symbol,
99 in which case the symbol'f print name is used."
100 (if file
101 (let ((eof (gensym)))
102 (with-open-file (f file)
103 (if f
104 (do* ((r (read f nil eof) (read f nil eof))
105 (x (list nil))
106 (tail x (cdr tail)))
107 ((eq r eof) (cdr x))
108 (setf (cdr tail) (list r))))))))
110 ;;; New definition to avoid stack size limit in apply
112 (defun read-data-columns (&optional (file (open-file-dialog))
113 (cols (if file
114 (count-file-columns file))))
115 "Args: (&optional file cols)
116 Reads the data in FILE as COLS columns and returns a list of lists representing the columns."
117 (if (and file cols)
118 (transpose (split-list (read-data-file file) cols))))
121 ;;; FIXME:AJR: ALL THE FOLLOWING NEED TO BE SOLVED BY PLATFORM-INDEP PATHNAME WORK!
122 ;;; FIXME:AJR: use either string or pathname.
124 (defun path-string-to-path (p s)
125 (pathname (concatenate 'string (namestring p) s)))
127 (defun load-data (file)
128 "Args: (file) as string
129 Read in data file from the data examples library."
130 (if (load (path-string-to-path *cls-data-dir* file))
132 (load (path-string-to-path *cls-data-dir* file))))
134 (defun load-example (file)
135 "Args: (file) as string
136 Read in lisp example file from the examples library."
137 (if (load (path-string-to-path *cls-examples-dir* file))
139 (load (path-string-to-path *cls-examples-dir* file))))
142 ;;; Saving Variables and Functions
145 (defun savevar (vars file)
146 "Args: (vars file-name-root)
147 VARS is a symbol or a list of symbols. FILE-NAME-ROOT is a string (or a symbol
148 whose print name is used) not endinf in .lsp. The VARS and their current values
149 are written to the file FILE-NAME-ROOT.lsp in a form suitable for use with the
150 load command."
151 (with-open-file (f (concatenate 'string (namestring file) ".lsp")
152 :direction :output)
153 (let ((vars (if (consp vars) vars (list vars))))
154 (flet ((save-one (x)
155 (let ((v (symbol-value x)))
156 (if (objectp v)
157 (format f "(def ~s ~s)~%" x (send v :save))
158 (format f "(def ~s '~s)~%" x v)))))
159 (mapcar #'save-one vars))
160 vars)))
164 ;;; General modification approaches.
166 (defgeneric importData (source featureList)
167 (:documentation "command to get data into CLS. Specific methods
168 will need to handle pathnames, internal data structures, and
169 external services such as DBMS's. We would like to be able to do
170 thinks like:
171 (importData MyPathName '(:formattype 'csvString))
172 (importData '(sqlConnection :server host.domain.net :port 666)
173 '(:formattype 'table
174 and so on."))
177 (defun pathname-example (name)
178 (let ((my-path (parse-namestring name)))
179 (values (pathname-name my-path :case :common)
180 (pathname-name my-path :case :local))))
182 (defvar sourceTypes (list 'csv 'lisp 'tsv 'special)
183 "list of possible symbols.
185 Thsees are used to specify source formats that might be supported for
186 input. CSV and TSV are standard, LISP refers to forms, and SPECIAL
187 refers to a FUNCTION which parses as appropriately.")
190 ;;; WRONG LOGIC.
191 (defmethod importData ((fileHandle pathname)
192 (fmt list)) ;sourceTypes))
193 "File-based input for data.
194 Usually used by:
195 (importData (parse-namestring 'path/to/file')
196 (list :format 'csv))
198 (importData myPathName (list :format 'lisp))
200 (let* ((fmtType (getf fmt :format))
201 (newData (getDataAsLists fileHandle fmtType)))
202 (case fmtType
203 ('csv ( ))
204 ('tsv ( ))
205 ('lisp ( ))
206 ('special (let ((parserFcn (getf fmt :special-parser)))))
207 (:default (error "no standard default importData format")))))
210 (defmethod importData ((ds array) (fmt list))
211 "mapping arrays into CLS data.")
214 (defmethod importData ((dsSpec DBMSandSQLextract)
215 (fmt mappingTypes))
216 "mapping DBMS into CLS data.")
220 (defun filename.dsv->dataframe (filename &optional
221 (delimchar ",")
222 (varnameheader 't)
223 (docstring "This is an amusing dataframe array")
224 (arraystorage-object 'dataframe-array))
225 "Reads the DSV file FILENAME and returns a dataframe-array object.
226 By default, the delimiter is a ',' which can be changed.
227 FIXME: could read first 2 lines, and logically guess if the first is variable name or not."
228 (let ((csv-file-data (rsm.string:file->number-table
229 filename
230 :delims delimchar)))
231 (let ((var-name-list (if varnameheader
232 (car csv-file-data)
233 (make-labels "V" (length (car csv-file-data)))))
234 (data-list (listoflist:listoflist->array (cdr csv-file-data))))
235 (make-instance arraystorage-object ; 'dataframe-array, but all DF-likes have the following attrs
236 :storage data-list
237 :var-labels var-name-list
238 :doc docstring))))