data import, using RSM strings.
[CommonLispStat.git] / src / data / dsv-examples.lisp
blob908dd7798adafc4568a185b28dc55f61d88b739c
2 (progn ;; FIXME: read data from CSV file. To do.
5 ;; challenge is to ensure that we get mixed arrays when we want them,
6 ;; and single-type (simple) arrays in other cases.
9 (defparameter *csv-num*
10 (cybertiggyr-dsv::load-escaped
11 #p"/media/disk/Desktop/sandbox/CLS.git/Data/example-numeric.csv"
12 :field-separator #\,
13 :trace T))
15 (nth 0 (nth 0 *csv-num*))
17 (defparameter *csv-num*
18 (cybertiggyr-dsv::load-escaped
19 #p"/media/disk/Desktop/sandbox/CLS.git/Data/example-numeric2.dsv"
20 :field-separator #\:))
22 (nth 0 (nth 0 *csv-num*))
25 ;; The handling of these types should be compariable to what we do for
26 ;; matrices, but without the numerical processing. i.e. mref, bind2,
27 ;; make-dataframe, and the class structure should be similar.
29 ;; With numerical data, there should be a straightforward mapping from
30 ;; the data.frame to a matrix. With categorical data (including
31 ;; dense categories such as doc-strings, as well as sparse categories
32 ;; such as binary data), we need to include metadata about ordering,
33 ;; coding, and such. So the structures should probably consider
35 ;; Using the CSV file:
37 (defun parse-number (s)
38 (let* ((*read-eval* nil)
39 (n (read-from-string s)))
40 (if (numberp n) n)))
42 (parse-number "34")
43 (parse-number "34 ")
44 (parse-number " 34")
45 (parse-number " 34 ")
47 (+ (parse-number "3.4") 3)
48 (parse-number "3.4 ")
49 (parse-number " 3.4")
50 (+ (parse-number " 3.4 ") 3)
52 (parse-number "a")
54 ;; (coerce "2.3" 'number) => ERROR
55 ;; (coerce "2" 'float) => ERROR
57 (defparameter *csv-num*
58 (cybertiggyr-dsv::load-escaped
59 #p"/media/disk/Desktop/sandbox/CLS.git/Data/example-numeric.csv"
60 :field-separator #\,
61 :filter #'parse-number
62 :trace T))
64 (nth 0 (nth 0 *csv-num*))
66 (defparameter *csv-num*
67 (cybertiggyr-dsv::load-escaped
68 #p"/media/disk/Desktop/sandbox/CLS.git/Data/example-numeric2.dsv"
69 :field-separator #\:
70 :filter #'parse-number))
72 (nth 0 (nth 0 *csv-num*))
74 ;; now we've got the DSV code in the codebase, auto-loaded I hope:
75 cybertiggyr-dsv:*field-separator*
76 (defparameter *example-numeric.csv*
77 (cybertiggyr-dsv:load-escaped "Data/example-numeric.csv"
78 :field-separator #\,))
79 *example-numeric.csv*
81 ;; the following fails because we've got a bit of string conversion
82 ;; to do. 2 thoughts: #1 modify dsv package, but mucking with
83 ;; encapsulation. #2 add a coercion tool (better, but potentially
84 ;; inefficient).
85 #+nil(coerce (nth 3 (nth 3 *example-numeric.csv*)) 'double-float)
87 ;; cases, simple to not so
88 (defparameter *test-string1* "1.2")
89 (defparameter *test-string2* " 1.2")
90 (defparameter *test-string3* " 1.2 ")