factored duplicate structure/code from data into import, and made import work again...
[CommonLispStat.git] / src / data / data.lisp
blob78720ed84eb03956e4492bbe0d7aac29b3954010
1 ;;; -*- mode: lisp -*-
3 ;;; Time-stamp: <2009-08-26 13:54:50 tony>
4 ;;; Creation: <2005-08-xx 21:34:07 rossini>
5 ;;; File: data.lisp
6 ;;; Author: AJ Rossini <blindglobe@gmail.com>
7 ;;; Copyright: (c)2005--2009, AJ Rossini. GPLv2
8 ;;; Purpose: data package for lispstat
10 ;;; What is this talk of 'release'? Klingons do not make software
11 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
12 ;;; designers and quality assurance people in its wake.
14 ;;; This organization and structure is new to the 21st Century
15 ;;; version.
17 (in-package :lisp-stat-data)
19 ;;; consider that data has 3 genotypic characteristics. The first
20 ;;; is storage form -- scalar, vector, array. second would be datarep
21 ;;; ("computer science simplistic data" type. in particular integer,
22 ;;; real, string, symbol. The last would be statistical type
23 ;;; ("usually handled by computer science approaches via metadata").
24 ;;; augmenting datarep type with use in a statistical context,
25 ;;; i.e. that would include nominal, ordinal, integer, continous,
26 ;;; interval (orderable subtypes). Clearly, the statistical type can
27 ;;; be inherited, likewise the numerical type as well. The form can
28 ;;; be pushed up or simplified as necessary, but this can be
29 ;;; challenging.
31 ;;; The first approach considered is for CLS to handle this as
32 ;;; lisp-only structures. When we realize an "abstract" model, the
33 ;;; data should be pushed into an appropriate form (either "en masse",
34 ;;; or "on-demand") into a linear algebra framework.
36 ;;; There is some excellent material on this by John Chambers in one
37 ;;; of his earlier books. Reference is being ignored to encourage
38 ;;; people to read them all. With all due respect to John, they've
39 ;;; lasted quite well, but need to be updated.
41 ;;; The purpose of this package is to manage data which will be
42 ;;; processed by LispStat. In particular, it will be important to
43 ;;; register variables, datasets, relational structures, and other
44 ;;; objects which could be the target for statistical modeling and
45 ;;; inference.
47 (defvar *lisp-stat-data-table* (make-hash-table)
48 "Marks up the data the could be used by.")
50 (defvar *lisp-stat-data-count* 0
51 "number of items currently recorded.")
53 ;;; Data (storage) Types, dt-{.*}
54 ;;;
55 ;;; Data types are the representation of data from a computer-science
56 ;;; perspective, i.e. what it is that they contain, in the sense of
57 ;;; scalars, arrays, networks, but not the actual values or
58 ;;; statistical behavour of the values. These types include
59 ;;; particular forms of compound types (i.e. dataframe is array-like,
60 ;;; but types differ, difference is row-wise, while array is a
61 ;;; compound of elements of the same type.
62 ;;;
63 ;;; This is completely subject to change, AND HAS. We use a class
64 ;;; heirarchy to generate the types.
67 ;;; Statistical Variable Types, sv-{.*} or statistical-variable-{.*}
68 ;;;
69 ;;; Statistical variable types work to represent the statistical
70 ;;; category represented by the variable, i.e. nominal, ordinal,
71 ;;; integral, continous, ratio. This metadata can be used to hint at
72 ;;; appropriate analysis methods -- or perhaps more critically, to
73 ;;; define how these methods will fail in the final interrpretation.
75 ;;; originally, these were considered to be types, but now, we
76 ;;; consider this in terms of abstract classes and mix-ins.
80 ;;;;
81 ;;;; Listing and Saving Variables and Functions
82 ;;;;
84 (defvar *variables* nil)
85 (defvar *ask-on-redefine* nil)
87 (defmacro def (symbol value)
88 "Syntax: (def var form)
89 VAR is not evaluated and must be a symbol. Assigns the value of FORM to
90 VAR and adds VAR to the list *VARIABLES* of def'ed variables. Returns VAR.
91 If VAR is already bound and the global variable *ASK-ON-REDEFINE*
92 is not nil then you are asked if you want to redefine the variable."
93 `(progn
94 (unless (and *ask-on-redefine*
95 (boundp ',symbol)
96 (not (y-or-n-p "Variable has a value. Redefine?")))
97 (defparameter ,symbol ,value))
98 (pushnew ',symbol *variables*)
99 ',symbol))
101 (defun variables-list ()
102 "Return list of variables as a lisp list of strings."
103 (mapcar #'intern (sort-data (mapcar #'string *variables*))))
105 (defun variables ()
106 "Args:()
107 Returns a list of the names of all def'ed variables to STREAM"
108 (if *variables*
109 (mapcar #'intern (sort-data (mapcar #'string *variables*)))))
111 (defun savevar (vars file)
112 "Args: (vars file-name-root)
113 VARS is a symbol or a list of symbols. FILE-NAME-ROOT is a string (or a symbol
114 whose print name is used) not endinf in .lsp. The VARS and their current values
115 are written to the file FILE-NAME-ROOT.lsp in a form suitable for use with the
116 load command."
117 (with-open-file (f (concatenate 'string (namestring file) ".lsp")
118 :direction :output)
119 (let ((vars (if (consp vars) vars (list vars))))
120 (flet ((save-one (x)
121 (let ((v (symbol-value x)))
122 (if (objectp v)
123 (format f "(def ~s ~s)~%" x (send v :save))
124 (format f "(def ~s '~s)~%" x v)))))
125 (mapcar #'save-one vars))
126 vars)))
128 (defun undef (v)
129 "Args: (v)
130 If V is the symbol of a defined variable the variable it is unbound and
131 removed from the list of defined variables. If V is a list of variable
132 names each is unbound and removed. Returns V."
133 (dolist (s (if (listp v) v (list v)))
134 (when (member s *variables*)
135 (setq *variables* (delete s *variables*))
136 (makunbound s)))