2 ;;; $Header: /home/gene/library/website/docsrc/dsv/RCS/dsv.lisp,v 395.1 2008/04/20 17:25:46 gene Exp $
4 ;;; Copyright (c) 2005 Gene Michael Stover. All rights reserved.
6 ;;; This program is free software; you can redistribute it and/or modify
7 ;;; it under the terms of the GNU Lesser General Public License as
8 ;;; published by the Free Software Foundation; either version 2 of the
9 ;;; License, or (at your option) any later version.
11 ;;; This program is distributed in the hope that it will be useful,
12 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;;; GNU Lesser General Public License for more details.
16 ;;; You should have received a copy of the GNU Lesser General Public
17 ;;; License along with this program; if not, write to the Free Software
18 ;;; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 ;;; This package provides tools for reading a DSV file on a stream and
22 ;;; returning a list of lists of strings, in row-major form ("list of
23 ;;; rows" format). One needs to convert these strings to something
24 ;;; useful at the end if strings are not useful.
26 (defpackage :cybertiggyr-dsv
28 (:import-from
:cybertiggyr-test
"DEFTEST")
29 (:export
"*END-OF-RECORD*"
36 (in-package :cybertiggyr-dsv
)
39 ;;; UNEXPORTED HELPER FUNCTIONS & STOOF
43 "Return the next character without consuming it, or return STRM on
44 end-of-input or other error."
45 (peek-char nil strm nil strm
))
47 (defun consume-leading-crap (strm crap
)
48 "Read (consume) newlines until the next character is not a newline or
49 there is no next character (end-of-input, which isn't an error)."
50 (loop while
(eql (xpeek strm
) crap
) do
(read-char strm
))
51 'consume-leading-crap
)
53 (defun read-escaped-field (strm terminators escape
)
54 "Return the next field as a string. Return STRM if there is no next
55 field, which is when the stream is already at its end. Assumes caller
56 has already consumed white-space crap that might preceed the field.
57 Consumes the character which ends the field. TERMINATORS is a list
58 of characters & the stream which could terminate the field."
59 (if (eq (xpeek strm
) strm
)
60 strm
; already at end-of-input
61 ;; else, Consume & collect characters until we find a terminator (field
62 ;; terminator, record terminator, or end-of-input). Do not collect
66 until
(member (xpeek strm
) terminators
)
67 collect
(if (eql (xpeek strm
) escape
)
68 ;; It's an escape, so discard it & use the next
69 ;; character, verbatim.
73 ;; else, Use this character.
81 (defvar *field-separator
* #\
:
82 "The default field separator character. It defaults to colon (:).")
84 (defvar *end-of-record
* #\Newline
85 "The end-of-record character. Defaults to Newline.")
88 "The default escape character for unix-style DSV files. It uses a single
89 escape character to allow the field separator character to occur
90 within fields. The escape character can be used to allow an end-of-line
91 character or an escape character to occur in fields, too.
92 Defaults to backslash (\\). You can change it with SETQ. If you do not
93 want to allow separator characters at all, bind it to NIL.")
95 (defun read-escaped (strm &key
(field-separator *field-separator
*)
96 (end-of-record *end-of-record
*)
98 "Read (consume) & return the next DSV record from STRM. The record
99 will be a list of fields. The fields will be strings. Field separator
100 & end-of-record characters may not occur within fields unless escaped.
101 If you don't want to allow any kind of escape, use NIL for the escape
102 character. Since NIL is not a character, it will never be equal to a
103 character read from STRM, so there will be no possible escape character.
104 In fact, you could use any non-character to disable the escape
105 character. Ignors empty lines. On end-of-input, returns STRM. It is
106 an error if an escape character is followed by end-of-input.
108 <example broken=TRUE>
109 (with-open-stream (s (make-stream-from-file \"test.csv\"))
110 (loop (read-escaped s) collecting result))
113 (consume-leading-crap strm end-of-record
)
114 (if (eq (xpeek strm
) strm
)
115 strm
; normal end-of-input
116 ;; else, Let's collect fields until we have read an entire record.
118 (loop until
(member (xpeek strm
) (list strm end-of-record
))
120 (read-escaped-field strm
121 (list strm field-separator
124 (when (eql (xpeek strm
) field-separator
)
125 ;; Consume the character which ended the field.
126 ;; Notice that we do not consume end-of-record
129 (consume-leading-crap strm end-of-record
))))
131 (defun load-escaped (pathname &key
(field-separator *field-separator
*)
132 (end-of-record *end-of-record
*)
136 "Return the entire contents of an escaped DSV file as a list of
137 records. Each record is a list."
138 (with-open-file (strm pathname
:direction
:input
)
139 (labels ((is-good (x) (funcall filter x
))
140 (xread () (read-escaped strm
:field-separator field-separator
141 :end-of-record end-of-record
143 (do ((lst () (if (is-good x
) (cons x lst
) lst
)) ;second (is-good x) was x
147 (when (and trace
(zerop (mod i
1000)))
148 (format trace
"~&~A: [~D] ~S" 'load-escaped i x
))))))
151 ;;; todo: new new new. Document me!!!
153 (defmacro do-escaped
((var pathname
) &body body
)
154 (let ((strm (gensym)))
155 `(with-open-file (,strm
,pathname
:element-type
'character
156 :direction
:input
:if-does-not-exist
:error
)
157 (loop for
,var
= (read-escaped ,strm
)
158 while
(not (eq ,var
,strm
))
160 (truename ,pathname
))))
167 "Null test. Always succeeds."
171 "Test that XPEEK returns the correct character from a stream, does
172 not consume the character. The character is NOT the last in the stream."
173 (with-input-from-string (strm "abc")
174 (and (eql (xpeek strm
) #\a)
175 (eql (read-char strm
) #\a))))
178 "Like TEST0011 except that it tests XPEEK on the last character in the
179 stream. In other words, tests that XPEEK returns the correct value &
180 does not consume it, & that character is the last in the stream."
181 (with-input-from-string (strm "c")
182 (and (eql (xpeek strm
) #\c
)
183 (eql (read-char strm
) #\c
))))
186 "Test XPEEK on an empty stream."
187 (with-input-from-string (strm "")
188 (and (eq (xpeek strm
) strm
)
189 (eq (read-char strm nil strm
) strm
))))
192 "Test CONSUME-LEADING-CRAP on a stream that contains nothing but leading
194 (with-input-from-string (strm (format nil
"~%~%~%"))
195 (and (eql (xpeek strm
) #\Newline
) ; not at end
196 (consume-leading-crap strm
#\Newline
) ; doesn't matter what it returns
197 (eq (read-char strm nil strm
) strm
)))) ; now we're at end
200 "Test CONSUME-LEADING-CRAP on a streeam that starts with leading crap,
201 then has some non-crap."
202 (with-input-from-string (strm (format nil
"~%~%~%a"))
203 (and (eql (xpeek strm
) #\Newline
) ; not at end
204 (consume-leading-crap strm
#\Newline
) ; doesn't matter what it returns
205 (eql (read-char strm
) #\a))))
208 "Test CONSUME-LEADING-CRAP on a stream that starts with non-crap, then
209 has some crap. CONSUME-LEADING-CRAP should not consume the leading
211 (with-input-from-string (strm (format nil
"a~%"))
212 (and (eql (xpeek strm
) #\a) ; not at end
213 (consume-leading-crap strm
#\Newline
) ; doesn't matter what it returns
214 (eql (read-char strm
) #\a)))) ; the "a" char should remain
217 "Test READ-ESCAPED-FIELD on a stream that contains a single field
218 followed by end-of-input. Uses the default field separator, end-of-record
219 character, & escape character. Just test that the field is read, not that
220 the next READ-ESCAPED-FIELD indicates end-of-input."
221 (with-input-from-string (strm "abc")
222 (equal (read-escaped-field strm
223 (list strm
*field-separator
* *end-of-record
*)
228 "Like TEST0020, but also checks that another call to READ-ESCAPED-FIELD
229 indicates end-of-input by returning STRM."
230 (with-input-from-string (strm "abc")
231 (let* ((a (read-escaped-field strm
232 (list strm
*field-separator
* *end-of-record
*)
234 (b (read-escaped-field strm
235 (list strm
*field-separator
* *end-of-record
*)
237 (unless (equal a
"abc")
238 (format t
"~&~A: First read should have returned" 'test0021
)
239 (format t
" ~S, but it returned ~S" "abc" a
))
241 (format t
"~&~A: Second read should have returned" 'test0021
)
242 (format t
" ~S, but it returned ~S" strm b
))
243 (and (equal a
"abc") (eq b strm
)))))
246 "Test that READ-ESCAPED-FIELD works on two consecutive fields."
247 (let ((a "abc") (b "xyz"))
248 (with-input-from-string (strm (format nil
"~A~A~A" a
*field-separator
* b
))
249 (let* ((terminators (list strm
*field-separator
* *end-of-record
*))
250 (xa (read-escaped-field strm terminators
*escape
*))
251 (xseparator (read-char strm
))
252 (xb (read-escaped-field strm terminators
*escape
*))
253 (xstrm (xpeek strm
)))
254 (and (equal xa a
) (eql xseparator
*field-separator
*) (equal xb b
)
258 "Test that READ-ESCAPED-FIELD works on two records of two fields each.
259 The second record does not end with an end-of-record character. It
260 ends with end-of-input on the stream."
261 (let* ((a "abc") (b "123") ; first record
262 (c "def") (d "456") ; second record
263 (string (format nil
"~A~A~A~A~A~A~A" a
*field-separator
* b
264 *end-of-record
* c
*field-separator
* d
)))
265 (with-input-from-string (strm string
)
266 (let* ((terminators (list strm
*field-separator
* *end-of-record
*))
267 (xa (read-escaped-field strm terminators
*escape
*))
268 (xseparator0 (read-char strm
))
269 (xb (read-escaped-field strm terminators
*escape
*))
270 (xend-of-record0 (read-char strm
))
271 (xc (read-escaped-field strm terminators
*escape
*))
272 (xseparator1 (read-char strm
))
273 (xd (read-escaped-field strm terminators
*escape
*))
274 (xstrm (xpeek strm
)))
276 (eql xseparator0
*field-separator
*)
278 (eql xend-of-record0
*end-of-record
*)
280 (eql xseparator1
*field-separator
*)
285 "Like TEST0026 except that the second record ends with an end-of-
287 (let* ((a "abc") (b "123") ; first record
288 (c "def") (d "456") ; second record
289 (string (format nil
"~A~A~A~A~A~A~A~A" a
*field-separator
* b
290 *end-of-record
* c
*field-separator
* d
292 (with-input-from-string (strm string
)
293 (let* ((terminators (list strm
*field-separator
* *end-of-record
*))
294 (xa (read-escaped-field strm terminators
*escape
*))
295 (xseparator0 (read-char strm
))
296 (xb (read-escaped-field strm terminators
*escape
*))
297 (xend-of-record0 (read-char strm
))
298 (xc (read-escaped-field strm terminators
*escape
*))
299 (xseparator1 (read-char strm
))
300 (xd (read-escaped-field strm terminators
*escape
*))
301 (xend-of-record1 (read-char strm
))
302 (xstrm (xpeek strm
)))
304 (eql xseparator0
*field-separator
*)
306 (eql xend-of-record0
*end-of-record
*)
308 (eql xseparator1
*field-separator
*)
310 (eql xend-of-record1
*end-of-record
*)
314 "Test READ-ESCAPED on an input stream containing a single record of a
316 (let* ((record (list "abc"))
317 (string (format nil
"~A" (first record
))))
318 (with-input-from-string (strm string
)
319 (let* ((xrecord (read-escaped strm
))
320 (xstrm (xpeek strm
)))
321 (and (equal xrecord record
)
325 "Test READ-ESCAPED on an input stream containing a single record of two
327 (let* ((record (list "abc" "123"))
328 (string (format nil
"~A~A~A" (first record
) *field-separator
*
330 (with-input-from-string (strm string
)
331 (let* ((xrecord (read-escaped strm
))
332 (xstrm (xpeek strm
)))
333 (and (equal xrecord record
)
337 "Test READ-ESCAPED. After reading the single record of two fields,
338 the stream should be at its end. The record is followed by several
339 end-of-record characters, & the stream should be at its end after
340 reading the record because no records follow the record terminators."
341 (let* ((record (list "abc" "123"))
342 (string (format nil
"~A~A~A~A~A~A" (first record
) *field-separator
*
343 (second record
) *end-of-record
* *end-of-record
*
345 (with-input-from-string (strm string
)
346 (let* ((xrecord (read-escaped strm
))
347 (xstrm (xpeek strm
)))
348 (and (equal xrecord record
)
352 "Test READ-ESCAPED on an input of two, two-field records. The second
353 record is followed by one end-of-record character."
354 (let ((record0 '("aaa" "111"))
355 (record1 '("bbb" "222"))
356 (string (format nil
"aaa~A111~Abbb~A222~A"
357 *field-separator
* *end-of-record
*
358 *field-separator
* *end-of-record
*)))
359 (with-input-from-string (strm string
)
360 (let* ((xrecord0 (read-escaped strm
))
361 (xrecord1 (read-escaped strm
)))
362 (unless (equal xrecord0 record0
)
363 (format t
"~&First record is ~S. Expected ~S." xrecord0 record0
))
364 (unless (equal xrecord1 record1
)
365 (format t
"~&Second record is ~S. Expected~S." xrecord1 record1
))
366 (and (equal xrecord0 record0
)
367 (equal xrecord1 record1
))))))
369 ;;; --- end of file ---