better docs.
[CommonLispStat.git] / src / data / import.lisp
blob1917dfeed308b89779207b66183257c8141bed65
1 ;;; -*- mode: lisp -*-
2 ;;; Copyright (c) 2008, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; Time-stamp: <2010-01-25 17:15:35 tony>
7 ;;; Creation: <2008-09-03 08:10:00 tony>
8 ;;; File: import.lisp
9 ;;; Author: AJ Rossini <blindglobe@gmail.com>
10 ;;; Copyright: (c)2007--2009, AJ Rossini. GPLv2
11 ;;; Purpose: base structures for importing data into CLS
13 ;;; What is this talk of 'release'? Klingons do not make software
14 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
15 ;;; designers and quality assurance people in its wake.
17 (in-package :cls-dataio)
19 ;;; Data I/O
21 ;; We can read 2 types of data -- those which are non-lisp-native
22 ;; data, and those which are lisp-native (lisp-enabled, an extension
23 ;; of lisp-serialized, i.e. data as program as data thingy's).
25 ;; of the non-native, there could be raw sources (ascii file formats),
26 ;; xml sources (xml -> lisp, possible with some preprocessing.
28 ;;; Reading from DSV files:
30 ;;; consider either the cybertyggyr-dsv package, or the rsm.string
31 ;;; package. Decision: RSM.STRING
32 ;;; The latter seems to actually work a bit at what we need to
33 ;;; acccomplish, is better licensed (i.e. BSD-style) and is now
34 ;;; implemented through filename.dsv->dataframe
36 (defparameter *lisp-stat-data-external-source-formats*
37 '(dsv xml ;; ex of text-based (UTF, ASCII, or similar) formats
38 sql ;; ex of RDBMS call
39 fcs affy)) ;; ex of binary formats
41 (defparameter *lisp-stat-data-import-referencing-type*
42 '(lisp-data-structure reference lisp-function))
44 (defgeneric data-import (source source-format referencing-type)
45 (:documentation "read data from stream srce, in format srce-frmt;
46 return a reftype, which could be a
47 lisp-data-structure, a reference to such, or a lisp
48 function which can be evaluated to generate
49 either."))
51 (defgeneric data-export (data target-format target-referencing-type)
52 (:documentation "write data from stream srce, in format srce-frmt;
53 return a reftype, which could be a
54 lisp-data-structure, a reference to such, or a lisp
55 function which can be evaluated to generate
56 either."))
58 ;;;
59 ;;; Related to data file reading
60 ;;;
62 (defun count-file-columns (fname)
63 "Args: (fname)
64 Returns the number of lisp items on the first nonblank line of file FNAME."
65 (with-open-file (f fname)
66 (if f
67 (let ((line (do ((line (read-line f) (read-line f)))
68 ((or (null line) (< 0 (length line))) line))))
69 (if line
70 (with-input-from-string (s line)
71 (do ((n 0 (+ n 1)) (eof (gensym)))
72 ((eq eof (read s nil eof)) n))))))))
74 (if (not (fboundp 'open-file-dialog))
75 #+dialogs
76 (defun open-file-dialog () ;; why?(&optional set)
77 (get-string-dialog "Enter a data file name:"))
78 #-dialogs
79 (defun open-file-dialog () ;; why? (&optional set)
80 (error "You must provide a file name explicitly")))
83 ;;; General modification approaches.
85 (defgeneric importData (source featureList)
86 (:documentation "command to get data into CLS. Specific methods
87 will need to handle pathnames, internal data structures, and
88 external services such as DBMS's. We would like to be able to do
89 thinks like:
90 (importData MyPathName '(:formattype 'csvString))
91 (importData '(sqlConnection :server host.domain.net :port 666)
92 '(:formattype 'table
93 and so on."))
96 (defun pathname-example (name)
97 (let ((my-path (parse-namestring name)))
98 (values (pathname-name my-path :case :common)
99 (pathname-name my-path :case :local))))
101 (defvar sourceTypes (list 'csv 'lisp 'tsv 'special)
102 "list of possible symbols.
104 Thsees are used to specify source formats that might be supported for
105 input. CSV and TSV are standard, LISP refers to forms, and SPECIAL
106 refers to a FUNCTION which parses as appropriately.")
109 ;;; WRONG LOGIC.
110 (defmethod importData ((fileHandle pathname)
111 (fmt list)) ;sourceTypes))
112 "File-based input for data.
113 Usually used by:
114 (importData (parse-namestring 'path/to/file')
115 (list :format 'csv))
117 (importData myPathName (list :format 'lisp))
119 (let* ((fmtType (getf fmt :format))
120 (newData (getDataAsLists fileHandle fmtType)))
121 (case fmtType
122 ('csv ( ))
123 ('tsv ( ))
124 ('lisp ( ))
125 ('special (let ((parserFcn (getf fmt :special-parser)))))
126 (:default (error "no standard default importData format")))))
129 (defmethod importData ((ds array) (fmt list))
130 "mapping arrays into CLS data.")
133 (defmethod importData ((dsSpec DBMSandSQLextract)
134 (fmt mappingTypes))
135 "mapping DBMS into CLS data.")
139 ;; Support functions
141 (defun filename.dsv->dataframe (filename &optional
142 (delimchar ",")
143 (varnameheader 't)
144 (docstring "This is an amusing dataframe array")
145 (arraystorage-object 'dataframe-array))
146 "Reads the DSV file FILENAME and returns a dataframe-array object.
147 By default, the delimiter is a ',' which can be changed. FIXME: could
148 read first 2 lines, and logically guess if the first is variable name
149 or not. If so, we'd probably like to return what was guessed and how
150 to use it next time if wanted."
151 (let ((csv-file-data (rsm.string:file->number-table
152 filename
153 :delims delimchar)))
154 (let ((var-name-list (if varnameheader
155 (car csv-file-data)
156 (make-labels "V" (length (car csv-file-data)))))
157 (data-list (listoflist:listoflist->array (cdr csv-file-data))))
158 (make-instance arraystorage-object ; 'dataframe-array, but all DF-likes have the following attrs
159 :storage data-list
160 :var-labels var-name-list
161 :doc docstring))))