1 ;; calling nvidias cufft from sbcl
3 ;; the graphics card is:
4 ;; 02:00.0 VGA compatible controller: nVidia Corporation G94 [GeForce 9600 GT] (rev a1)
5 ;; and the Cuda Toolkit 3.1
6 ;; http://developer.nvidia.com/object/cuda_3_1_downloads.html
7 ;; 2010-08-12 kielhorn.martin@googlemail.com
10 (:use
:cl
:sb-alien
:sb-c-call
)
19 (in-package :cuda-fft
)
21 (declaim (optimize (speed 2) (debug 3) (safety 3)))
23 (load-shared-object "/usr/local/cuda/lib64/libcudart.so")
24 (load-shared-object "/usr/local/cuda/lib64/libcufft.so")
27 (define-alien-type cufft-handle unsigned-int
)
28 (define-alien-type cuda-device-ptr unsigned-int
)
29 (define-alien-type cuda-error int
) ;; enum, 0 is success
30 (define-alien-type cufft-result int
) ;; enum, 0 is success
31 (define-alien-type cufft-type int
) ;; enum, c2c is #x29
32 (define-alien-type cuda-memcpy-kind
39 (defconstant +host-
>device
+ 1)
40 (defconstant +device-
>host
+ 2)
41 (defconstant +cufft-c2c
+ #x29
) ;; compex single-float
42 (defconstant +cufft-z2z
+ #x69
) ;; complex double-float
43 (defconstant +cufft-forward
+ -
1)
44 (defconstant +cufft-inverse
+ 1)
45 (define-alien-type size-t unsigned-long
)
46 (define-alien-type cufft-complex single-float
) ;; there is no complex
47 ;; support in sb-alien
49 (define-alien-routine ("cudaMalloc" cuda-malloc
)
51 (device-pointer cuda-device-ptr
:out
)
54 (define-alien-routine ("cufftPlan2d" cufft-plan-2d
)
56 (plan cufft-handle
:out
)
61 (define-alien-routine ("cufftPlan3d" cufft-plan-3d
)
63 (plan cufft-handle
:out
)
69 (define-alien-routine ("cufftExecC2C" cufft-exec-c2c
)
72 (in-data (* cufft-complex
))
73 (out-data (* cufft-complex
))
76 (define-alien-routine ("cufftDestroy" cufft-destroy
)
80 (defun cu-plan (x y
&optional z
)
81 (multiple-value-bind (result plan
)
83 (cufft-plan-3d x y z
+cufft-c2c
+)
84 (cufft-plan-2d x y
+cufft-c2c
+))
86 (error "cu-plan error: ~a" result
))
90 (defun cu-malloc-csf (n)
92 (let ((complex-single-float-size (* 4 2)))
93 (multiple-value-bind (result device-ptr
)
94 (cuda-malloc (* complex-single-float-size n
))
96 (error "cuda-malloc error: ~a" result
))
99 (define-alien-routine ("cudaFree" cuda-free
)
101 (device-pointer cuda-device-ptr
:copy
))
103 (define-alien-routine ("cudaMemcpy" cuda-memcpy
)
108 (kind cuda-memcpy-kind
))
110 ;; same semantics as ft3 wrapper to fftw3, input array isn't modified
112 (defmacro def-ft?-csf
(rank)
113 (let ((ift (intern (format nil
"IFT~d-CSF" rank
)))
114 (ft (intern (format nil
"FT~d-CSF" rank
))))
116 (defun ,ft
(in &key
(forward t
))
117 (declare ((simple-array (complex single-float
) ,rank
) in
)
119 (values (simple-array (complex single-float
) ,rank
) &optional
))
120 (let ((dims (array-dimensions in
)))
121 (let* ((out (make-array dims
:element-type
'(complex single-float
)))
122 (out1 (sb-ext:array-storage-vector out
))
123 (in1 (sb-ext:array-storage-vector in
))
125 ;; allocate array on device
126 (device (cu-malloc-csf (length in1
)))
127 (complex-single-float-size (* 4 2)))
128 ;; copy data to device
129 (cuda-memcpy (sb-sys:int-sap device
)
130 (sb-sys:vector-sap in1
)
131 (* n complex-single-float-size
)
133 ;; plan and execute in-place transform on device
134 (let ((plan ,(ecase rank
135 (3 `(destructuring-bind (z y x
)
138 (2 `(destructuring-bind (y x
)
142 (sb-sys:int-sap device
)
143 (sb-sys:int-sap device
)
147 (cufft-destroy plan
))
149 (cuda-memcpy (sb-sys:vector-sap in1
)
150 (sb-sys:int-sap device
)
151 (* n complex-single-float-size
) 'device-
>host
)
152 ;; deallocate array on device
154 ;; normalize if forward
156 (let* ((1/n
(/ 1s0 n
)))
158 (setf (aref out1 i
) (* 1/n
(aref out1 i
))))))
161 `(,',ft
,in
:forward nil
)))))
167 (defmacro def-ft?
(rank)
168 (let ((ift (intern (format nil
"IFT~d" rank
)))
169 (ft (intern (format nil
"FT~d" rank
)))
170 (ift-c (intern (format nil
"IFT~d-CSF" rank
)))
171 (ft-c (intern (format nil
"FT~d-CSF" rank
))))
173 (defun ,ft
(in &key
(forward t
))
174 (,ft-c in
:forward forward
))
188 (a (vol:convert3-ub8
/csf-complex
189 (vol:draw-sphere-ub8
20d0 nz ny nx
))))
190 (vol:write-pgm
"cufft.pgm"
191 (vol:normalize2-csf
/ub8-abs
192 (vol:cross-section-xz-csf
(ft3-csf a
)))))))