From 317395baebe869587f8d48b5b5a2f90ad614c4d7 Mon Sep 17 00:00:00 2001 From: Sumant Oemrawsingh Date: Sat, 31 Mar 2012 02:11:58 +0200 Subject: [PATCH] first commit --- README | 18 ++++ ascii-data.asd | 11 +++ ascii-data.lisp | 253 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ package.lisp | 17 ++++ 4 files changed, 299 insertions(+) create mode 100644 README create mode 100644 ascii-data.asd create mode 100644 ascii-data.lisp create mode 100644 package.lisp diff --git a/README b/README new file mode 100644 index 0000000..e22930f --- /dev/null +++ b/README @@ -0,0 +1,18 @@ +This package provides functions for reading numerical data from ASCII +text files into arrays, and writing numerical data into arrays (up to +rank 2) into ASCII text files. + +If you compare this to numpy's loadtxt, keep in mind that loadtxt will +return data consisting of a single row or single column as an array of +rank 1, while it will return multi-row, multi-column data as an array +of rank 2. Read-ascii-array will will always return an array of rank +2. This seems to make most sense to me from a programming and ASCII +layout point of view. + +For behaviour similar to numpy's loadtxt, use +read-ascii-vector-or-array, which will reduce the rank for you when +possible. + +Note that this package relies on parse-float, which you can find on +github: +git clone git://github.com/soemraws/parse-float.git \ No newline at end of file diff --git a/ascii-data.asd b/ascii-data.asd new file mode 100644 index 0000000..e92b0e5 --- /dev/null +++ b/ascii-data.asd @@ -0,0 +1,11 @@ +;;;; ascii-data.asd + +(asdf:defsystem #:ascii-data + :name "parse-float" + :description "Read numerical data from an ascii text file into an array and vice versa." + :license "Public Domain" + :author "Sumant Oemrawsingh" + :depends-on (#:parse-float) + :components ((:file "package") + (:file "ascii-data" + :depends-on ("package")))) diff --git a/ascii-data.lisp b/ascii-data.lisp new file mode 100644 index 0000000..f2dd807 --- /dev/null +++ b/ascii-data.lisp @@ -0,0 +1,253 @@ +;;;; ascii-data.lisp +;;;; +;;;; A quick and dirty way of loading numerical data from an ASCII +;;;; (text) file into a CL-ARRAY. +;;;; +;;;; (C) 2012, Sumant S.R. Oemrawsingh + +(in-package #:loadtxt) + +(eval-when (compile) + (declaim (optimize (speed 3) (safety 1) (debug 0)))) + +(defvar *comment-characters* '(#\# #\' #\;) + "LIST of characters that sigal a comment in the data.") + +;;;; Utility functions and macros + +(let ((type-functions + (list (list 'integer + #'(lambda (string start) + (parse-integer string :start start :junk-allowed t)) + #'(lambda (value &optional formatspec) + (format nil (or formatspec "~D") value))) + (list 'double-float + #'(lambda (string start) + (parse-float:parse-float string :start start :type 'double-float :junk-allowed t)) + #'(lambda (value &optional formatspec) + (format nil (or formatspec "~,18,2,,,,'eE") value))) + (list 'float + #'(lambda (string start) + (parse-float:parse-float string :start start :type 'float :junk-allowed t)) + #'(lambda (value &optional formatspec) + (format nil (or formatspec "~,18,2,,,,'eE") value)))))) + + (defun get-closest-type-function (type) + (car (member type type-functions :key #'car :test #'subtypep))) + + (defun type-reader (type) + (or (cadr (get-closest-type-function type)) + (cadr (get-closest-type-function 'double-float)))) + + (defun type-writer (type) + (or (caddr (get-closest-type-function type)) + (caddr (get-closest-type-function 'double-float))))) + +(defmacro loop-at-most ((n) &body body) + "Modified loop macro that repeats at most N times, if N is a +positive integer. Else, the behaviour is the same as LOOP." + (let ((total (gensym))) + `(let ((,total ,n)) + (if (numberp ,total) + (loop repeat ,total + ,@body) + (loop ,@body))))) + +;;; At the moment, the data is read by creating a list of values. This +;;; list constitutes a row. These rows are then collected into a list, +;;; thus creating a nested list, suitable for providing to +;;; :initial-contents of make-array. +;;; +;;; Another strategy would be to create an adjustable vector with a +;;; fill pointer. Then, vector-push-extend all values into this vector +;;; and finally, having determined the shape of the array, return an +;;; array displaced to that vector (is this even +;;; possible?). Theoretically, this means we could return two values: +;;; the displaced array, and the target vector. A user would then be +;;; free to modify the original vector any way he/she wants +;;; (e.g. adding extra values) and make a new array displaced to the +;;; adjusted vector. +;;; +;;; I'm not sure which approach is "better". Does "better" mean +;;; "faster", or "more flexible"? +(defun read-row* (stream reader &optional number-of-columns) + "Read a row from STREAM and convert the values using READER. if +NUMBER-OF-COLUMNS is given, read at most that many values. The values +are returned in a LIST." + (let ((row (loop for line = (read-line stream nil nil) + while (and line (member (char line 0) *comment-characters* :test #'char=)) + finally (return line)))) + (when row + (loop-at-most (number-of-columns) + with start = 0 + with value + + do (multiple-value-bind (val pos) + (funcall reader row start) + (setf start pos + value val)) + + while value + + collect value)))) + +(defun read-ascii-list* (stream &key (element-type 'float) + number-of-rows + number-of-columns) + "Read data from STREAM into a LIST. The elements will have the +given ELEMENT-TYPE. Optionally, you can limit the NUMBER-OF-ROWS and +NUMBER-OF-COLUMNS that are read, thus truncating the data. If these +are not positive integers (e.g. NIL), just read everything." + (let ((reader (type-reader element-type))) + (loop-at-most (number-of-rows) + for row = (read-row* stream reader number-of-columns) + + unless row + do (loop-finish) + + unless number-of-columns + do (setf number-of-columns (length row)) + + collect row))) + +(defun read-ascii-array* (stream &key (element-type 'float) + number-of-rows + number-of-columns) + "Read data from STREAM into a CL-ARRAY. The array will have the +given ELEMENT-TYPE. STREAM must contain a rectangular grid of values, +i.e. each row must contain the same number of values. + +NUMBER-OF-ROWS can be used to limit the number of rows that are +read. The default is NIL, which will result in all rows being read. + +NUMBER-OF-COLUMNS works as NUMBER-OF-ROWS, but for columns." + (let ((values (read-ascii-list* stream :element-type element-type + :number-of-rows number-of-rows + :number-of-columns number-of-columns))) + (make-array (list (length values) (length (car values))) + :element-type element-type + :initial-contents values))) + +(defun read-ascii-array (file &key (element-type 'float) + number-of-rows + number-of-columns) + "Read data from FILE into an ARRAY. It opens FILE and calls +READ-ASCII-ARRAY* on the resulting stream." + (with-open-file (s file :direction :input :element-type 'base-char) + (read-ascii-array* s :element-type element-type + :number-of-rows number-of-rows + :number-of-columns number-of-columns))) + +(defun read-ascii-vector-or-array* (stream &key (element-type 'float) + number-of-rows + number-of-columns) + "Read data from STREAM into an ARRAY or a VECTOR. The array will +have the given ELEMENT-TYPE. STREAM must contain a rectangular grid of +values, i.e. each row must contain the same number of values. + +NUMBER-OF-ROWS can be used to limit the number of rows that are +read. The default is NIL, which will result in all rows being read. + +NUMBER-OF-COLUMNS works as NUMBER-OF-ROWS, but for columns. + +This function uses READ-ASCII-ARRAY*. If the returned ARRAY has only a +single row or a single column, the return rank-1 array, a VECTOR, is +displaced to that ARRAY. Else the original ARRAY is returned." + (let ((read-array (read-ascii-array* stream :element-type element-type + :number-of-rows number-of-rows + :number-of-columns number-of-columns))) + (if (or (= (array-dimension read-array 0) 1) + (= (array-dimension read-array 1) 1)) + (make-array (apply #'* (array-dimensions read-array)) + :element-type element-type + :displaced-to read-array + :displaced-index-offset 0) + read-array))) + + + +#+nil ; Written before I knew about and understood displaced arrays. +(defun read-ascii-vector-or-array* (stream &key (element-type 'float) + number-of-rows + number-of-columns) + (let* ((values (read-ascii-list* stream :element-type element-type + :number-of-rows number-of-rows + :number-of-columns number-of-columns)) + (num-rows (length values)) + (num-cols (length (car values))) + len) + + (cond ((= num-rows 1) + (setf len num-cols + values (car values))) + ((= num-cols 1) + (setf len num-rows + values (alexandria:flatten values))) + (t (setf len (list num-rows num-cols)))) + (make-array len + :element-type element-type + :initial-contents values))) + +(defun read-ascii-vector-or-array (file &key (element-type 'float) + number-of-rows + number-of-columns) + "Read data from FILE into an ARRAY or a VECTOR. It opens FILE and +calls READ-ASCII-VECTOR-OR-ARRAY* on the resulting stream." + (with-open-file (s file) + (read-ascii-vector-or-array* s :element-type element-type + :number-of-rows number-of-rows + :number-of-columns number-of-columns))) + +(defun write-ascii-array% (array stream formatspec number-of-rows number-of-columns) + (let ((writer (type-writer (array-element-type array)))) + (loop for row below number-of-rows + for start-column = (* row number-of-columns) + for stop-column = (+ start-column number-of-columns) + do (progn + (loop for column from start-column below stop-column + do (princ (funcall writer (aref array column) formatspec) stream) + unless (= column (- stop-column 1)) + do (princ " " stream)) + (terpri stream))))) + +(defun write-ascii-array* (array stream &key formatspec) + "Write the values in ARRAY to the given STREAM, optionally making +use of FORMATSPEC, which is a control string for FORMAT to write out a +single numerical value." + (assert (= (array-rank array) 2)) + (destructuring-bind (rows columns) + (array-dimensions array) + (write-ascii-array% + (make-array (* rows columns) + :element-type (array-element-type array) + :displaced-to array + :displaced-index-offset 0) + stream + formatspec + rows + columns))) + +(defun write-ascii-array (array file &key formatspec + (if-exists :supersede) + (if-does-not-exist :create)) + "Write the values in ARRAY to the given FILE. It opens the FILE and +writes the data to the restulting stream using WRITE-ASCII-ARRAY*." + (with-open-file (s file :direction :output :if-exists if-exists :if-does-not-exist if-does-not-exist) + (write-ascii-array* array s :formatspec formatspec))) + +(defun write-ascii-vector-or-array* (vector-or-array stream &key formatspec) + "Write the values in VECTOR-OR-ARRAY to the given STREAM, optionally +making use of FORMATSPEC, which is a control string for FORMAT to +write out a single numerical value." + (assert (< (array-rank array) 3)) + (if (= (array-rank array) 1) + (write-ascii-array% array stream formatspec (array-dimension array 0) 1) + (write-ascii-array* vector-or-array stream :formatspec formatspec))) + +(defun write-ascii-vector-or-array (vector-or-array file &key formatspec + (if-exists :supersede) + (if-does-not-exist :create)) + "Write the values in VECTOR-OR-ARRAY to the given FILE. It opens the FILE and +writes the data to the restulting stream using WRITE-ASCII-VECTOR-OR-ARRAY*." + (with-open-file (s file :direction :output :if-exists if-exists :if-does-not-exist if-does-not-exist) + (write-ascii-vector-or-array* vector-or-array s :formatspec formatspec))) diff --git a/package.lisp b/package.lisp new file mode 100644 index 0000000..478cd39 --- /dev/null +++ b/package.lisp @@ -0,0 +1,17 @@ +;;;; package.lisp + +(defpackage #:loadtxt + (:use #:cl) + (:export #:read-ascii-array* + #:read-ascii-array + + #:read-ascii-vector-or-array* + #:read-ascii-vector-or-array + + #:write-ascii-array* + #:write-ascii-array + + #:write-ascii-vector-or-array* + #:write-ascii-vector-or-array)) + + -- 2.11.4.GIT