CSV reader task entered
[CommonLispStat.git] / src / data / import.lisp
blob27569312b2002e524eb73954ca9b25173b8fd0a5
1 ;;; -*- mode: lisp -*-
2 ;;; Copyright (c) 2008, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; Time-stamp: <2009-03-16 14:47:15 tony>
7 ;;; Creation: <2008-09-03 08:10:00 tony>
8 ;;; File: import.lisp
9 ;;; Author: AJ Rossini <blindglobe@gmail.com>
10 ;;; Copyright: (c)2007, AJ Rossini. BSD, LLGPL, or GPLv2, depending
11 ;;; on how it arrives.
12 ;;; Purpose: base structures for importing data into CLS
14 ;;; What is this talk of 'release'? Klingons do not make software
15 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
16 ;;; designers and quality assurance people in its wake.
19 (in-package :cl-user)
21 (defpackage :lisp-stat-data-import
22 (:documentation "Data I/O and similar import technologies.")
23 (:nicknames :ls-data-import)
24 (:use :common-lisp
25 :lisp-stat-object-system
26 :lisp-stat-data)
27 (:shadowing-import-from :lisp-stat-object-system
28 slot-value call-method call-next-method)
29 (:export data-import data-export))
31 (in-package :lisp-stat-data-import)
33 ;;; The purpose of this package is to provide the basic structures for
34 ;;; importing data, to be further processed by LispStat.
38 ;;; Data I/O
40 ;; We can read 2 types of data -- those which are non-lisp-native
41 ;; data, and those which are lisp-native (lisp-enabled, an extension
42 ;; of lisp-serialized, i.e. data as program as data thingy's).
44 ;; of the non-native, there could be raw sources (ascii file formats),
45 ;; xml sources (xml -> lisp, possible with some preprocessing.
47 (defparameter *lisp-stat-data-external-source-formats*
48 '(csv tsv xml ;; ex of text-based (UTF, ASCII, or similar) formats
49 sql ;; ex of RDBMS call
50 fcs affy)) ;; ex of binary formats
52 (defparameter *lisp-stat-data-import-referencing-type*
53 '(lisp-data-structure reference lisp-function))
57 (defgeneric data-import (source source-format referencing-type)
58 (:documentation "read data from stream srce, in format srce-frmt;
59 return a reftype, which could be a
60 lisp-data-structure, a reference to such, or a lisp
61 function which can be evaluated to generate
62 either."))
64 (defgeneric data-export (data target-format target-referencing-type)
65 (:documentation "write data from stream srce, in format srce-frmt;
66 return a reftype, which could be a
67 lisp-data-structure, a reference to such, or a lisp
68 function which can be evaluated to generate
69 either."))
74 ;;; Potentially useful functions
76 ;; the following belongs here if we are working externally, but might
77 ;; belong with data if we are working internlly
79 ;; (defmacro with-data (body)
80 ;; "Stream-handling, maintaining I/O through object typing.")
83 ;;;
84 ;;; Related to data file reading
85 ;;;
87 (defun count-file-columns (fname)
88 "Args: (fname)
89 Returns the number of lisp items on the first nonblank line of file FNAME."
90 (with-open-file (f fname)
91 (if f
92 (let ((line (do ((line (read-line f) (read-line f)))
93 ((or (null line) (< 0 (length line))) line))))
94 (if line
95 (with-input-from-string (s line)
96 (do ((n 0 (+ n 1)) (eof (gensym)))
97 ((eq eof (read s nil eof)) n))))))))
99 (if (not (fboundp 'open-file-dialog))
100 #+dialogs
101 (defun open-file-dialog () ;; why?(&optional set)
102 (get-string-dialog "Enter a data file name:"))
103 #-dialogs
104 (defun open-file-dialog () ;; why? (&optional set)
105 (error "You must provide a file name explicitly")))
107 (defun read-data-file (&optional (file (open-file-dialog t)))
108 "Args: (file)
109 Returns a list of all lisp objects in FILE. FILE can be a string or a symbol,
110 in which case the symbol'f print name is used."
111 (if file
112 (let ((eof (gensym)))
113 (with-open-file (f file)
114 (if f
115 (do* ((r (read f nil eof) (read f nil eof))
116 (x (list nil))
117 (tail x (cdr tail)))
118 ((eq r eof) (cdr x))
119 (setf (cdr tail) (list r))))))))
121 ;;; New definition to avoid stack size limit in apply
122 (defun read-data-columns (&optional (file (open-file-dialog t))
123 (cols (if file
124 (count-file-columns file))))
125 "Args: (&optional file cols)
126 Reads the data in FILE as COLS columns and returns a list of lists representing the columns."
127 (if (and file cols)
128 (transpose (split-list (read-data-file file) cols))))
131 ;;; FIXME:AJR: ALL THE FOLLOWING NEED TO BE SOLVED BY PLATFORM-INDEP PATHNAME WORK!
132 ;;; FIXME:AJR: use either string or pathname.
134 (defun path-string-to-path (p s)
135 (pathname (concatenate 'string (namestring p) s)))
137 (defun load-data (file)
138 "Args: (file) as string
139 Read in data file from the data examples library."
140 (if (load (path-string-to-path *lispstat-data-dir* file))
142 (load (path-string-to-path *lispstat-data-dir* file))))
144 (defun load-example (file)
145 "Args: (file) as string
146 Read in lisp example file from the examples library."
147 (if (load (path-string-to-path *lispstat-examples-dir* file))
149 (load (path-string-to-path *lispstat-examples-dir* file))))
152 ;;; Saving Variables and Functions
155 (defun savevar (vars file)
156 "Args: (vars file-name-root)
157 VARS is a symbol or a list of symbols. FILE-NAME-ROOT is a string (or a symbol
158 whose print name is used) not endinf in .lsp. The VARS and their current values
159 are written to the file FILE-NAME-ROOT.lsp in a form suitable for use with the
160 load command."
161 (with-open-file (f (concatenate 'string (namestring file) ".lsp")
162 :direction :output)
163 (let ((vars (if (consp vars) vars (list vars))))
164 (flet ((save-one (x)
165 (let ((v (symbol-value x)))
166 (if (objectp v)
167 (format f "(def ~s ~s)~%" x (send v :save))
168 (format f "(def ~s '~s)~%" x v)))))
169 (mapcar #'save-one vars))
170 vars)))
174 ;;; General modification approaches.
176 (defgeneric importData (source featureList)
177 (:documentation "command to get data into CLS. Specific methods
178 will need to handle pathnames, internal data structures, and
179 external services such as DBMS's. We would like to be able to do
180 thinks like:
181 (importData MyPathName '(:formattype 'csvString))
182 (importData '(sqlConnection :server host.domain.net :port 666)
183 '(:formattype 'table
184 and so on."))
187 (defun pathname-example (name)
188 (let ((my-path (parse-namestring name)))
189 (values (pathname-name my-path :case :common)
190 (pathname-name my-path :case :local))))
192 (defvar sourceTypes (list 'csv 'lisp 'tsv 'special)
193 "list of possible symbols.
195 Thsees are used to specify source formats that might be supported for
196 input. CSV and TSV are standard, LISP refers to forms, and SPECIAL
197 refers to a FUNCTION which parses as appropriately.")
199 ;;; WRONG LOGIC.
200 (defmethod importData ((fileHandle pathname)
201 (fmt list)) ;sourceTypes))
202 "File-based input for data.
203 Usually used by:
204 (importData (parse-namestring 'path/to/file')
205 (list :format 'csv))
207 (importData myPathName (list :format 'lisp))
209 (let* ((fmtType (getf fmt :format))
210 (newData (getDataAsLists fileHandle fmtType)))
211 (case fmtType
212 ('csv ( ))
213 ('tsv ( ))
214 ('lisp ( ))
215 ('special (let ((parserFcn (getf fmt :special-parser)))))
216 (:default (error "no standard default importData format")))))
218 (defmethod importData ((ds array) (fmt list))
219 "mapping arrays into CLS data.")
221 (defmethod importData ((dsSpec DBMSandSQLextract)
222 (fmt mappingTypes))
223 "mapping DBMS into CLS data.")
227 ;;(defmacro with-dataframe (env &rest progn)
228 ;; "Compute using variable names with with.data.frame type semantics.")