3 This WSM5 microphysics accelerated for the NVIDIA GPU. It is experimental and
4 is not supported as part of WRF. There is additional information available
5 at http://www.mmm.ucar.edu/people/michalakes. Requests for assistance will be
6 considered only on a case by case basis, favoring active collaborators.
8 Required: a Linux x86 or x86_64 system with a CUDA-enabled NVIDIA GPU installed
9 as a co-processor as well as the CUDA libraries on a directory in your system,
12 /usr/local/cuda/lib/libcublas.so
14 included in the CUDA SDK 1.1 from NVIDIA (see nvidia.com).
18 1) Compile this file and companion file as:
23 producing wsm5.cu.o and wsm5_gpu.cu.o
25 2) configure WRF, generating a configure.wrf file for your system
26 Note that serial and dmpar work with the GPU, but smpar
29 3) Modify configure.wrf:
31 a) add -DTEST_ON_GPU_RK -DRUN_ON_GPU to ARCH_LOCAL
32 b) add ../phys/wsm5.cu.o and ../phys/wsm5_gpu.cu.o to LIB_LOCAL
33 (define LIB_LOCAL it does not already exist)
34 c) add -L/usr/local/cuda/lib -lcuda -lcudart to LIB_LOCAL
35 (or wherever the cuda lib is on your system)
37 3) Compile wrf as usual.
39 Note: The GPU code is compiled for a maximum number of 41 vertical levels
40 If you need a larger number, contact below.
42 20080721, JM (michalak@ucar.edu)
46 # 1 "/tmp/tmpxft_00001ecc_00000000-0.c"
49 # 1 "/tmp/tmpxft_00001ecc_00000000-0.c"
51 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
52 struct _Z7textureIcLi1EL19cudaTextureReadMode0EE
;
53 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
54 struct _Z7textureIaLi1EL19cudaTextureReadMode0EE
;
55 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
56 struct _Z7textureIhLi1EL19cudaTextureReadMode0EE
;
57 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
58 struct _Z7textureI5char1Li1EL19cudaTextureReadMode0EE
;
59 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
60 struct _Z7textureI6uchar1Li1EL19cudaTextureReadMode0EE
;
61 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
62 struct _Z7textureI5char2Li1EL19cudaTextureReadMode0EE
;
63 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
64 struct _Z7textureI6uchar2Li1EL19cudaTextureReadMode0EE
;
65 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
66 struct _Z7textureI5char3Li1EL19cudaTextureReadMode0EE
;
67 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
68 struct _Z7textureI6uchar3Li1EL19cudaTextureReadMode0EE
;
69 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
70 struct _Z7textureI5char4Li1EL19cudaTextureReadMode0EE
;
71 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
72 struct _Z7textureI6uchar4Li1EL19cudaTextureReadMode0EE
;
73 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
74 struct _Z7textureIsLi1EL19cudaTextureReadMode0EE
;
75 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
76 struct _Z7textureItLi1EL19cudaTextureReadMode0EE
;
77 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
78 struct _Z7textureI6short1Li1EL19cudaTextureReadMode0EE
;
79 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
80 struct _Z7textureI7ushort1Li1EL19cudaTextureReadMode0EE
;
81 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
82 struct _Z7textureI6short2Li1EL19cudaTextureReadMode0EE
;
83 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
84 struct _Z7textureI7ushort2Li1EL19cudaTextureReadMode0EE
;
85 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
86 struct _Z7textureI6short3Li1EL19cudaTextureReadMode0EE
;
87 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
88 struct _Z7textureI7ushort3Li1EL19cudaTextureReadMode0EE
;
89 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
90 struct _Z7textureI6short4Li1EL19cudaTextureReadMode0EE
;
91 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
92 struct _Z7textureI7ushort4Li1EL19cudaTextureReadMode0EE
;
93 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
94 struct _Z7textureIiLi1EL19cudaTextureReadMode0EE
;
95 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
96 struct _Z7textureIjLi1EL19cudaTextureReadMode0EE
;
97 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
98 struct _Z7textureI4int1Li1EL19cudaTextureReadMode0EE
;
99 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
100 struct _Z7textureI5uint1Li1EL19cudaTextureReadMode0EE
;
101 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
102 struct _Z7textureI4int2Li1EL19cudaTextureReadMode0EE
;
103 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
104 struct _Z7textureI5uint2Li1EL19cudaTextureReadMode0EE
;
105 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
106 struct _Z7textureI4int3Li1EL19cudaTextureReadMode0EE
;
107 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
108 struct _Z7textureI5uint3Li1EL19cudaTextureReadMode0EE
;
109 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
110 struct _Z7textureI4int4Li1EL19cudaTextureReadMode0EE
;
111 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
112 struct _Z7textureI5uint4Li1EL19cudaTextureReadMode0EE
;
113 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
114 struct _Z7textureIcLi1EL19cudaTextureReadMode1EE
;
115 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
116 struct _Z7textureIaLi1EL19cudaTextureReadMode1EE
;
117 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
118 struct _Z7textureIhLi1EL19cudaTextureReadMode1EE
;
119 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
120 struct _Z7textureI5char1Li1EL19cudaTextureReadMode1EE
;
121 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
122 struct _Z7textureI6uchar1Li1EL19cudaTextureReadMode1EE
;
123 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
124 struct _Z7textureI5char2Li1EL19cudaTextureReadMode1EE
;
125 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
126 struct _Z7textureI6uchar2Li1EL19cudaTextureReadMode1EE
;
127 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
128 struct _Z7textureI5char3Li1EL19cudaTextureReadMode1EE
;
129 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
130 struct _Z7textureI6uchar3Li1EL19cudaTextureReadMode1EE
;
131 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
132 struct _Z7textureI5char4Li1EL19cudaTextureReadMode1EE
;
133 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
134 struct _Z7textureI6uchar4Li1EL19cudaTextureReadMode1EE
;
135 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
136 struct _Z7textureIsLi1EL19cudaTextureReadMode1EE
;
137 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
138 struct _Z7textureItLi1EL19cudaTextureReadMode1EE
;
139 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
140 struct _Z7textureI6short1Li1EL19cudaTextureReadMode1EE
;
141 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
142 struct _Z7textureI7ushort1Li1EL19cudaTextureReadMode1EE
;
143 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
144 struct _Z7textureI6short2Li1EL19cudaTextureReadMode1EE
;
145 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
146 struct _Z7textureI7ushort2Li1EL19cudaTextureReadMode1EE
;
147 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
148 struct _Z7textureI6short3Li1EL19cudaTextureReadMode1EE
;
149 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
150 struct _Z7textureI7ushort3Li1EL19cudaTextureReadMode1EE
;
151 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
152 struct _Z7textureI6short4Li1EL19cudaTextureReadMode1EE
;
153 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
154 struct _Z7textureI7ushort4Li1EL19cudaTextureReadMode1EE
;
155 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
156 struct _Z7textureIcLi2EL19cudaTextureReadMode0EE
;
157 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
158 struct _Z7textureIaLi2EL19cudaTextureReadMode0EE
;
159 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
160 struct _Z7textureIhLi2EL19cudaTextureReadMode0EE
;
161 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
162 struct _Z7textureI5char1Li2EL19cudaTextureReadMode0EE
;
163 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
164 struct _Z7textureI6uchar1Li2EL19cudaTextureReadMode0EE
;
165 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
166 struct _Z7textureI5char2Li2EL19cudaTextureReadMode0EE
;
167 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
168 struct _Z7textureI6uchar2Li2EL19cudaTextureReadMode0EE
;
169 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
170 struct _Z7textureI5char3Li2EL19cudaTextureReadMode0EE
;
171 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
172 struct _Z7textureI6uchar3Li2EL19cudaTextureReadMode0EE
;
173 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
174 struct _Z7textureI5char4Li2EL19cudaTextureReadMode0EE
;
175 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
176 struct _Z7textureI6uchar4Li2EL19cudaTextureReadMode0EE
;
177 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
178 struct _Z7textureIsLi2EL19cudaTextureReadMode0EE
;
179 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
180 struct _Z7textureItLi2EL19cudaTextureReadMode0EE
;
181 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
182 struct _Z7textureI6short1Li2EL19cudaTextureReadMode0EE
;
183 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
184 struct _Z7textureI7ushort1Li2EL19cudaTextureReadMode0EE
;
185 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
186 struct _Z7textureI6short2Li2EL19cudaTextureReadMode0EE
;
187 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
188 struct _Z7textureI7ushort2Li2EL19cudaTextureReadMode0EE
;
189 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
190 struct _Z7textureI6short3Li2EL19cudaTextureReadMode0EE
;
191 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
192 struct _Z7textureI7ushort3Li2EL19cudaTextureReadMode0EE
;
193 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
194 struct _Z7textureI6short4Li2EL19cudaTextureReadMode0EE
;
195 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
196 struct _Z7textureI7ushort4Li2EL19cudaTextureReadMode0EE
;
197 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
198 struct _Z7textureIiLi2EL19cudaTextureReadMode0EE
;
199 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
200 struct _Z7textureIjLi2EL19cudaTextureReadMode0EE
;
201 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
202 struct _Z7textureI4int1Li2EL19cudaTextureReadMode0EE
;
203 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
204 struct _Z7textureI5uint1Li2EL19cudaTextureReadMode0EE
;
205 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
206 struct _Z7textureI4int2Li2EL19cudaTextureReadMode0EE
;
207 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
208 struct _Z7textureI5uint2Li2EL19cudaTextureReadMode0EE
;
209 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
210 struct _Z7textureI4int3Li2EL19cudaTextureReadMode0EE
;
211 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
212 struct _Z7textureI5uint3Li2EL19cudaTextureReadMode0EE
;
213 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
214 struct _Z7textureI4int4Li2EL19cudaTextureReadMode0EE
;
215 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
216 struct _Z7textureI5uint4Li2EL19cudaTextureReadMode0EE
;
217 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
218 struct _Z7textureIiLi1EL19cudaTextureReadMode1EE
;
219 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
220 struct _Z7textureIjLi1EL19cudaTextureReadMode1EE
;
221 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
222 struct _Z7textureI4int1Li1EL19cudaTextureReadMode1EE
;
223 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
224 struct _Z7textureI5uint1Li1EL19cudaTextureReadMode1EE
;
225 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
226 struct _Z7textureI4int2Li1EL19cudaTextureReadMode1EE
;
227 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
228 struct _Z7textureI5uint2Li1EL19cudaTextureReadMode1EE
;
229 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
230 struct _Z7textureI4int3Li1EL19cudaTextureReadMode1EE
;
231 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
232 struct _Z7textureI5uint3Li1EL19cudaTextureReadMode1EE
;
233 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
234 struct _Z7textureI4int4Li1EL19cudaTextureReadMode1EE
;
235 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
236 struct _Z7textureI5uint4Li1EL19cudaTextureReadMode1EE
;
237 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
238 struct _Z7textureIcLi2EL19cudaTextureReadMode1EE
;
239 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
240 struct _Z7textureIaLi2EL19cudaTextureReadMode1EE
;
241 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
242 struct _Z7textureIhLi2EL19cudaTextureReadMode1EE
;
243 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
244 struct _Z7textureI5char1Li2EL19cudaTextureReadMode1EE
;
245 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
246 struct _Z7textureI6uchar1Li2EL19cudaTextureReadMode1EE
;
247 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
248 struct _Z7textureI5char2Li2EL19cudaTextureReadMode1EE
;
249 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
250 struct _Z7textureI6uchar2Li2EL19cudaTextureReadMode1EE
;
251 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
252 struct _Z7textureI5char3Li2EL19cudaTextureReadMode1EE
;
253 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
254 struct _Z7textureI6uchar3Li2EL19cudaTextureReadMode1EE
;
255 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
256 struct _Z7textureI5char4Li2EL19cudaTextureReadMode1EE
;
257 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
258 struct _Z7textureI6uchar4Li2EL19cudaTextureReadMode1EE
;
259 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
260 struct _Z7textureIsLi2EL19cudaTextureReadMode1EE
;
261 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
262 struct _Z7textureItLi2EL19cudaTextureReadMode1EE
;
263 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
264 struct _Z7textureI6short1Li2EL19cudaTextureReadMode1EE
;
265 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
266 struct _Z7textureI7ushort1Li2EL19cudaTextureReadMode1EE
;
267 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
268 struct _Z7textureI6short2Li2EL19cudaTextureReadMode1EE
;
269 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
270 struct _Z7textureI7ushort2Li2EL19cudaTextureReadMode1EE
;
271 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
272 struct _Z7textureI6short3Li2EL19cudaTextureReadMode1EE
;
273 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
274 struct _Z7textureI7ushort3Li2EL19cudaTextureReadMode1EE
;
275 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
276 struct _Z7textureI6short4Li2EL19cudaTextureReadMode1EE
;
277 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
278 struct _Z7textureI7ushort4Li2EL19cudaTextureReadMode1EE
;
279 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
280 struct _Z7textureIiLi2EL19cudaTextureReadMode1EE
;
281 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
282 struct _Z7textureIjLi2EL19cudaTextureReadMode1EE
;
283 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
284 struct _Z7textureI4int1Li2EL19cudaTextureReadMode1EE
;
285 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
286 struct _Z7textureI5uint1Li2EL19cudaTextureReadMode1EE
;
287 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
288 struct _Z7textureI4int2Li2EL19cudaTextureReadMode1EE
;
289 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
290 struct _Z7textureI5uint2Li2EL19cudaTextureReadMode1EE
;
291 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
292 struct _Z7textureI4int3Li2EL19cudaTextureReadMode1EE
;
293 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
294 struct _Z7textureI5uint3Li2EL19cudaTextureReadMode1EE
;
295 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
296 struct _Z7textureI4int4Li2EL19cudaTextureReadMode1EE
;
297 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
298 struct _Z7textureI5uint4Li2EL19cudaTextureReadMode1EE
;
299 # 268 "/usr/include/libio.h" 3
301 # 214 "/usr/lib/gcc/x86_64-redhat-linux/4.1.2/include/stddef.h" 3
302 typedef unsigned long size_t;
303 # 1 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 1 3
304 # 56 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 3
309 # 1 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 1 3
310 # 48 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 3
311 # 1 "/usr/local/cuda/bin/../include/host_defines.h" 1 3
312 # 49 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 2 3
313 # 1 "/usr/local/cuda/bin/../include/builtin_types.h" 1 3
314 # 42 "/usr/local/cuda/bin/../include/builtin_types.h" 3
315 # 1 "/usr/local/cuda/bin/../include/device_types.h" 1 3
316 # 46 "/usr/local/cuda/bin/../include/device_types.h" 3
324 # 43 "/usr/local/cuda/bin/../include/builtin_types.h" 2 3
325 # 1 "/usr/local/cuda/bin/../include/driver_types.h" 1 3
326 # 60 "/usr/local/cuda/bin/../include/driver_types.h" 3
330 cudaErrorMissingConfiguration
,
331 cudaErrorMemoryAllocation
,
332 cudaErrorInitializationError
,
333 cudaErrorLaunchFailure
,
334 cudaErrorPriorLaunchFailure
,
335 cudaErrorLaunchTimeout
,
336 cudaErrorLaunchOutOfResources
,
337 cudaErrorInvalidDeviceFunction
,
338 cudaErrorInvalidConfiguration
,
339 cudaErrorInvalidDevice
,
340 cudaErrorInvalidValue
,
341 cudaErrorInvalidPitchValue
,
342 cudaErrorInvalidSymbol
,
343 cudaErrorMapBufferObjectFailed
,
344 cudaErrorUnmapBufferObjectFailed
,
345 cudaErrorInvalidHostPointer
,
346 cudaErrorInvalidDevicePointer
,
347 cudaErrorInvalidTexture
,
348 cudaErrorInvalidTextureBinding
,
349 cudaErrorInvalidChannelDescriptor
,
350 cudaErrorInvalidMemcpyDirection
,
351 cudaErrorAddressOfConstant
,
352 cudaErrorTextureFetchFailed
,
353 cudaErrorTextureNotBound
,
354 cudaErrorSynchronizationError
,
355 cudaErrorInvalidFilterSetting
,
356 cudaErrorInvalidNormSetting
,
357 cudaErrorMixedDeviceExecution
,
358 cudaErrorCudartUnloading
,
360 cudaErrorNotYetImplemented
,
361 cudaErrorMemoryValueTooLarge
,
362 cudaErrorInvalidResourceHandle
,
364 cudaErrorStartupFailure
= 0x7f,
365 cudaErrorApiFailureBase
= 10000
371 cudaMemcpyHostToHost
= 0,
372 cudaMemcpyHostToDevice
,
373 cudaMemcpyDeviceToHost
,
374 cudaMemcpyDeviceToDevice
378 struct cudaDeviceProp
381 size_t totalGlobalMem
;
382 size_t sharedMemPerBlock
;
386 int maxThreadsPerBlock
;
387 int maxThreadsDim
[3];
389 size_t totalConstMem
;
393 size_t textureAlignment
;
395 # 154 "/usr/local/cuda/bin/../include/driver_types.h" 3
396 typedef enum cudaError cudaError_t
;
399 typedef int cudaStream_t
;
402 typedef int cudaEvent_t
;
403 # 44 "/usr/local/cuda/bin/../include/builtin_types.h" 2 3
404 # 1 "/usr/local/cuda/bin/../include/texture_types.h" 1 3
405 # 46 "/usr/local/cuda/bin/../include/texture_types.h" 3
409 enum cudaChannelFormatKind
411 cudaChannelFormatKindSigned
,
412 cudaChannelFormatKindUnsigned
,
413 cudaChannelFormatKindFloat
417 struct cudaChannelFormatDesc
423 enum cudaChannelFormatKind f
;
427 enum cudaTextureAddressMode
434 enum cudaTextureFilterMode
441 enum cudaTextureReadMode
443 cudaReadModeElementType
,
444 cudaReadModeNormalizedFloat
448 struct textureReference
451 enum cudaTextureFilterMode filterMode
;
452 enum cudaTextureAddressMode addressMode
[2];
453 struct cudaChannelFormatDesc channelDesc
;
455 # 45 "/usr/local/cuda/bin/../include/builtin_types.h" 2 3
456 # 1 "/usr/local/cuda/bin/../include/vector_types.h" 1 3
457 # 54 "/usr/local/cuda/bin/../include/vector_types.h" 3
490 unsigned char x
, y
, z
;
496 signed char x
, y
, z
, w
;
502 unsigned char x
, y
, z
, w
;
538 unsigned short x
, y
, z
;
550 unsigned short x
, y
, z
, w
;
586 unsigned int x
, y
, z
;
598 unsigned int x
, y
, z
, w
;
634 unsigned long x
, y
, z
;
646 unsigned long x
, y
, z
, w
;
678 # 282 "/usr/local/cuda/bin/../include/vector_types.h" 3
679 typedef struct char1 char1
;
681 typedef struct uchar1 uchar1
;
683 typedef struct char2 char2
;
685 typedef struct uchar2 uchar2
;
687 typedef struct char3 char3
;
689 typedef struct uchar3 uchar3
;
691 typedef struct char4 char4
;
693 typedef struct uchar4 uchar4
;
695 typedef struct short1 short1
;
697 typedef struct ushort1 ushort1
;
699 typedef struct short2 short2
;
701 typedef struct ushort2 ushort2
;
703 typedef struct short3 short3
;
705 typedef struct ushort3 ushort3
;
707 typedef struct short4 short4
;
709 typedef struct ushort4 ushort4
;
711 typedef struct int1 int1
;
713 typedef struct uint1 uint1
;
715 typedef struct int2 int2
;
717 typedef struct uint2 uint2
;
719 typedef struct int3 int3
;
721 typedef struct uint3 uint3
;
723 typedef struct int4 int4
;
725 typedef struct uint4 uint4
;
727 typedef struct long1 long1
;
729 typedef struct ulong1 ulong1
;
731 typedef struct long2 long2
;
733 typedef struct ulong2 ulong2
;
735 typedef struct long3 long3
;
737 typedef struct ulong3 ulong3
;
739 typedef struct long4 long4
;
741 typedef struct ulong4 ulong4
;
743 typedef struct float1 float1
;
745 typedef struct float2 float2
;
747 typedef struct float3 float3
;
749 typedef struct float4 float4
;
751 typedef struct double2 double2
;
752 # 363 "/usr/local/cuda/bin/../include/vector_types.h" 3
753 typedef struct dim3 dim3
;
758 unsigned int x
, y
, z
;
765 # 45 "/usr/local/cuda/bin/../include/builtin_types.h" 2 3
766 # 50 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 2 3
767 # 82 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 3
768 extern cudaError_t
cudaMalloc(void **devPtr
, size_t size
);
769 extern cudaError_t
cudaMallocHost(void **ptr
, size_t size
);
770 extern cudaError_t
cudaMallocPitch(void **devPtr
, size_t *pitch
, size_t width
, size_t height
);
771 extern cudaError_t
cudaMallocArray(struct cudaArray
**array
, const struct cudaChannelFormatDesc
*desc
, size_t width
, size_t height
);
772 extern cudaError_t
cudaFree(void *devPtr
);
773 extern cudaError_t
cudaFreeHost(void *ptr
);
774 extern cudaError_t
cudaFreeArray(struct cudaArray
*array
);
775 # 97 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 3
776 extern cudaError_t
cudaMemcpy(void *dst
, const void *src
, size_t count
, enum cudaMemcpyKind kind
);
777 extern cudaError_t
cudaMemcpyToArray(struct cudaArray
*dst
, size_t wOffset
, size_t hOffset
, const void *src
, size_t count
, enum cudaMemcpyKind kind
);
778 extern cudaError_t
cudaMemcpyFromArray(void *dst
, const struct cudaArray
*src
, size_t wOffset
, size_t hOffset
, size_t count
, enum cudaMemcpyKind kind
);
779 extern cudaError_t
cudaMemcpyArrayToArray(struct cudaArray
*dst
, size_t wOffsetDst
, size_t hOffsetDst
, const struct cudaArray
*src
, size_t wOffsetSrc
, size_t hOffsetSrc
, size_t count
, enum cudaMemcpyKind kind
);
780 extern cudaError_t
cudaMemcpy2D(void *dst
, size_t dpitch
, const void *src
, size_t spitch
, size_t width
, size_t height
, enum cudaMemcpyKind kind
);
781 extern cudaError_t
cudaMemcpy2DToArray(struct cudaArray
*dst
, size_t wOffset
, size_t hOffset
, const void *src
, size_t spitch
, size_t width
, size_t height
, enum cudaMemcpyKind kind
);
782 extern cudaError_t
cudaMemcpy2DFromArray(void *dst
, size_t dpitch
, const struct cudaArray
*src
, size_t wOffset
, size_t hOffset
, size_t width
, size_t height
, enum cudaMemcpyKind kind
);
783 extern cudaError_t
cudaMemcpy2DArrayToArray(struct cudaArray
*dst
, size_t wOffsetDst
, size_t hOffsetDst
, const struct cudaArray
*src
, size_t wOffsetSrc
, size_t hOffsetSrc
, size_t width
, size_t height
, enum cudaMemcpyKind kind
);
784 extern cudaError_t
cudaMemcpyToSymbol(const char *symbol
, const void *src
, size_t count
, size_t offset
, enum cudaMemcpyKind kind
);
785 extern cudaError_t
cudaMemcpyFromSymbol(void *dst
, const char *symbol
, size_t count
, size_t offset
, enum cudaMemcpyKind kind
);
793 extern cudaError_t
cudaMemcpyAsync(void *dst
, const void *src
, size_t count
, enum cudaMemcpyKind kind
, cudaStream_t stream
);
794 extern cudaError_t
cudaMemcpyToArrayAsync(struct cudaArray
*dst
, size_t wOffset
, size_t hOffset
, const void *src
, size_t count
, enum cudaMemcpyKind kind
, cudaStream_t stream
);
795 extern cudaError_t
cudaMemcpyFromArrayAsync(void *dst
, const struct cudaArray
*src
, size_t wOffset
, size_t hOffset
, size_t count
, enum cudaMemcpyKind kind
, cudaStream_t stream
);
796 extern cudaError_t
cudaMemcpy2DAsync(void *dst
, size_t dpitch
, const void *src
, size_t spitch
, size_t width
, size_t height
, enum cudaMemcpyKind kind
, cudaStream_t stream
);
797 extern cudaError_t
cudaMemcpy2DToArrayAsync(struct cudaArray
*dst
, size_t wOffset
, size_t hOffset
, const void *src
, size_t spitch
, size_t width
, size_t height
, enum cudaMemcpyKind kind
, cudaStream_t stream
);
798 extern cudaError_t
cudaMemcpy2DFromArrayAsync(void *dst
, size_t dpitch
, const struct cudaArray
*src
, size_t wOffset
, size_t hOffset
, size_t width
, size_t height
, enum cudaMemcpyKind kind
, cudaStream_t stream
);
806 extern cudaError_t
cudaMemset(void *mem
, int c
, size_t count
);
807 extern cudaError_t
cudaMemset2D(void *mem
, size_t pitch
, int c
, size_t width
, size_t height
);
815 extern cudaError_t
cudaGetSymbolAddress(void **devPtr
, const char *symbol
);
816 extern cudaError_t
cudaGetSymbolSize(size_t *size
, const char *symbol
);
824 extern cudaError_t
cudaGetDeviceCount(int *count
);
825 extern cudaError_t
cudaGetDeviceProperties(struct cudaDeviceProp
*prop
, int device
);
826 extern cudaError_t
cudaChooseDevice(int *device
, const struct cudaDeviceProp
*prop
);
827 extern cudaError_t
cudaSetDevice(int device
);
828 extern cudaError_t
cudaGetDevice(int *device
);
836 extern cudaError_t
cudaBindTexture(size_t *offset
, const struct textureReference
*texref
, const void *devPtr
, const struct cudaChannelFormatDesc
*desc
, size_t size
);
837 extern cudaError_t
cudaBindTextureToArray(const struct textureReference
*texref
, const struct cudaArray
*array
, const struct cudaChannelFormatDesc
*desc
);
838 extern cudaError_t
cudaUnbindTexture(const struct textureReference
*texref
);
839 extern cudaError_t
cudaGetTextureAlignmentOffset(size_t *offset
, const struct textureReference
*texref
);
840 extern cudaError_t
cudaGetTextureReference(const struct textureReference
**texref
, const char *symbol
);
848 extern cudaError_t
cudaGetChannelDesc(struct cudaChannelFormatDesc
*desc
, const struct cudaArray
*array
);
849 extern struct cudaChannelFormatDesc
cudaCreateChannelDesc(int x
, int y
, int z
, int w
, enum cudaChannelFormatKind f
);
857 extern cudaError_t
cudaGetLastError(void);
858 extern const char* cudaGetErrorString(cudaError_t error
);
866 extern cudaError_t
cudaConfigureCall(dim3 gridDim
, dim3 blockDim
, size_t sharedMem
, cudaStream_t stream
);
867 extern cudaError_t
cudaSetupArgument(const void *arg
, size_t size
, size_t offset
);
868 extern cudaError_t
cudaLaunch(const char *symbol
);
876 extern cudaError_t
cudaStreamCreate(cudaStream_t
*stream
);
877 extern cudaError_t
cudaStreamDestroy(cudaStream_t stream
);
878 extern cudaError_t
cudaStreamSynchronize(cudaStream_t stream
);
879 extern cudaError_t
cudaStreamQuery(cudaStream_t stream
);
887 extern cudaError_t
cudaEventCreate(cudaEvent_t
*event
);
888 extern cudaError_t
cudaEventRecord(cudaEvent_t event
, cudaStream_t stream
);
889 extern cudaError_t
cudaEventQuery(cudaEvent_t event
);
890 extern cudaError_t
cudaEventSynchronize(cudaEvent_t event
);
891 extern cudaError_t
cudaEventDestroy(cudaEvent_t event
);
892 extern cudaError_t
cudaEventElapsedTime(float *ms
, cudaEvent_t start
, cudaEvent_t end
);
900 extern cudaError_t
cudaThreadExit(void);
901 extern cudaError_t
cudaThreadSynchronize(void);
902 # 61 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 2 3
903 # 1 "/usr/local/cuda/bin/../include/crt/storage_class.h" 1 3
904 # 62 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 2 3
905 # 216 "/usr/lib/gcc/x86_64-redhat-linux/4.1.2/include/stddef.h" 2 3
906 # 148 "/usr/include/bits/types.h" 3
907 typedef long __clock_t
;
908 # 61 "/usr/include/time.h" 3
909 typedef __clock_t
clock_t;
910 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
911 struct _Z7textureIcLi1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
912 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
913 struct _Z7textureIaLi1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
914 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
915 struct _Z7textureIhLi1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
916 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
917 struct _Z7textureI5char1Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
918 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
919 struct _Z7textureI6uchar1Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
920 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
921 struct _Z7textureI5char2Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
922 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
923 struct _Z7textureI6uchar2Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
924 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
925 struct _Z7textureI5char3Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
926 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
927 struct _Z7textureI6uchar3Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
928 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
929 struct _Z7textureI5char4Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
930 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
931 struct _Z7textureI6uchar4Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
932 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
933 struct _Z7textureIsLi1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
934 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
935 struct _Z7textureItLi1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
936 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
937 struct _Z7textureI6short1Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
938 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
939 struct _Z7textureI7ushort1Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
940 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
941 struct _Z7textureI6short2Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
942 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
943 struct _Z7textureI7ushort2Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
944 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
945 struct _Z7textureI6short3Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
946 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
947 struct _Z7textureI7ushort3Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
948 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
949 struct _Z7textureI6short4Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
950 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
951 struct _Z7textureI7ushort4Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
952 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
953 struct _Z7textureIiLi1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
954 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
955 struct _Z7textureIjLi1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
956 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
957 struct _Z7textureI4int1Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
958 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
959 struct _Z7textureI5uint1Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
960 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
961 struct _Z7textureI4int2Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
962 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
963 struct _Z7textureI5uint2Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
964 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
965 struct _Z7textureI4int3Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
966 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
967 struct _Z7textureI5uint3Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
968 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
969 struct _Z7textureI4int4Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
970 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
971 struct _Z7textureI5uint4Li1EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
972 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
973 struct _Z7textureIcLi1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
974 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
975 struct _Z7textureIaLi1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
976 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
977 struct _Z7textureIhLi1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
978 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
979 struct _Z7textureI5char1Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
980 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
981 struct _Z7textureI6uchar1Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
982 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
983 struct _Z7textureI5char2Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
984 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
985 struct _Z7textureI6uchar2Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
986 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
987 struct _Z7textureI5char3Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
988 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
989 struct _Z7textureI6uchar3Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
990 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
991 struct _Z7textureI5char4Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
992 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
993 struct _Z7textureI6uchar4Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
994 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
995 struct _Z7textureIsLi1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
996 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
997 struct _Z7textureItLi1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
998 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
999 struct _Z7textureI6short1Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1000 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1001 struct _Z7textureI7ushort1Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1002 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1003 struct _Z7textureI6short2Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1004 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1005 struct _Z7textureI7ushort2Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1006 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1007 struct _Z7textureI6short3Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1008 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1009 struct _Z7textureI7ushort3Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1010 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1011 struct _Z7textureI6short4Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1012 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1013 struct _Z7textureI7ushort4Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1014 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1015 struct _Z7textureIcLi2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1016 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1017 struct _Z7textureIaLi2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1018 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1019 struct _Z7textureIhLi2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1020 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1021 struct _Z7textureI5char1Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1022 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1023 struct _Z7textureI6uchar1Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1024 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1025 struct _Z7textureI5char2Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1026 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1027 struct _Z7textureI6uchar2Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1028 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1029 struct _Z7textureI5char3Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1030 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1031 struct _Z7textureI6uchar3Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1032 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1033 struct _Z7textureI5char4Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1034 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1035 struct _Z7textureI6uchar4Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1036 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1037 struct _Z7textureIsLi2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1038 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1039 struct _Z7textureItLi2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1040 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1041 struct _Z7textureI6short1Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1042 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1043 struct _Z7textureI7ushort1Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1044 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1045 struct _Z7textureI6short2Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1046 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1047 struct _Z7textureI7ushort2Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1048 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1049 struct _Z7textureI6short3Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1050 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1051 struct _Z7textureI7ushort3Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1052 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1053 struct _Z7textureI6short4Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1054 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1055 struct _Z7textureI7ushort4Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1056 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1057 struct _Z7textureIiLi2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1058 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1059 struct _Z7textureIjLi2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1060 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1061 struct _Z7textureI4int1Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1062 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1063 struct _Z7textureI5uint1Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1064 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1065 struct _Z7textureI4int2Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1066 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1067 struct _Z7textureI5uint2Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1068 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1069 struct _Z7textureI4int3Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1070 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1071 struct _Z7textureI5uint3Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1072 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1073 struct _Z7textureI4int4Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1074 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1075 struct _Z7textureI5uint4Li2EL19cudaTextureReadMode0EE
{ struct textureReference __b_16textureReference
;};
1076 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1077 struct _Z7textureIiLi1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1078 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1079 struct _Z7textureIjLi1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1080 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1081 struct _Z7textureI4int1Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1082 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1083 struct _Z7textureI5uint1Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1084 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1085 struct _Z7textureI4int2Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1086 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1087 struct _Z7textureI5uint2Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1088 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1089 struct _Z7textureI4int3Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1090 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1091 struct _Z7textureI5uint3Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1092 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1093 struct _Z7textureI4int4Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1094 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1095 struct _Z7textureI5uint4Li1EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1096 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1097 struct _Z7textureIcLi2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1098 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1099 struct _Z7textureIaLi2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1100 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1101 struct _Z7textureIhLi2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1102 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1103 struct _Z7textureI5char1Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1104 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1105 struct _Z7textureI6uchar1Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1106 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1107 struct _Z7textureI5char2Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1108 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1109 struct _Z7textureI6uchar2Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1110 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1111 struct _Z7textureI5char3Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1112 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1113 struct _Z7textureI6uchar3Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1114 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1115 struct _Z7textureI5char4Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1116 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1117 struct _Z7textureI6uchar4Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1118 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1119 struct _Z7textureIsLi2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1120 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1121 struct _Z7textureItLi2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1122 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1123 struct _Z7textureI6short1Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1124 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1125 struct _Z7textureI7ushort1Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1126 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1127 struct _Z7textureI6short2Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1128 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1129 struct _Z7textureI7ushort2Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1130 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1131 struct _Z7textureI6short3Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1132 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1133 struct _Z7textureI7ushort3Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1134 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1135 struct _Z7textureI6short4Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1136 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1137 struct _Z7textureI7ushort4Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1138 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1139 struct _Z7textureIiLi2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1140 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1141 struct _Z7textureIjLi2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1142 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1143 struct _Z7textureI4int1Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1144 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1145 struct _Z7textureI5uint1Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1146 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1147 struct _Z7textureI4int2Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1148 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1149 struct _Z7textureI5uint2Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1150 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1151 struct _Z7textureI4int3Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1152 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1153 struct _Z7textureI5uint3Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1154 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1155 struct _Z7textureI4int4Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1156 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1157 struct _Z7textureI5uint4Li2EL19cudaTextureReadMode1EE
{ struct textureReference __b_16textureReference
;};
1158 # 46 "/usr/include/stdio.h" 3
1159 typedef struct _IO_FILE
FILE;
1160 void *memcpy(void*, const void*, size_t); void *memset(void*, int, size_t);
1161 # 82 "/usr/local/cuda/bin/../include/cuda_runtime_api.h"
1162 extern cudaError_t
cudaMalloc(void **, size_t);
1166 extern cudaError_t
cudaFree(void *);
1167 # 97 "/usr/local/cuda/bin/../include/cuda_runtime_api.h"
1168 extern cudaError_t
cudaMemcpy(void *, const void *, size_t, enum cudaMemcpyKind
);
1169 # 145 "/usr/local/cuda/bin/../include/cuda_runtime_api.h"
1170 extern cudaError_t
cudaGetDeviceCount(int *);
1171 extern cudaError_t
cudaGetDeviceProperties(struct cudaDeviceProp
*, int);
1173 extern cudaError_t
cudaSetDevice(int);
1174 # 187 "/usr/local/cuda/bin/../include/cuda_runtime_api.h"
1175 extern cudaError_t
cudaConfigureCall(dim3
, dim3
, size_t, cudaStream_t
);
1176 # 222 "/usr/local/cuda/bin/../include/cuda_runtime_api.h"
1177 extern cudaError_t
cudaThreadSynchronize(void);
1178 # 184 "/usr/include/time.h" 3
1179 extern __attribute__((__weak__
)) clock_t clock(void);
1180 # 58 "/usr/local/cuda/bin/../include/common_functions.h"
1181 extern __attribute__((__weak__
)) void *memset(void *, int, size_t);
1182 # 65 "/usr/local/cuda/bin/../include/math_functions.h"
1183 extern __attribute__((__weak__
)) int abs(int) __attribute__((__const__
));
1185 extern __attribute__((__weak__
)) long labs(long) __attribute__((__const__
));
1187 extern __attribute__((__weak__
)) long long llabs(long long) __attribute__((__const__
));
1189 extern __attribute__((__weak__
)) double fabs(double) __attribute__((__const__
));
1191 extern __attribute__((__weak__
)) float fabsf(float) __attribute__((__const__
));
1194 extern __attribute__((__weak__
)) int min(int, int);
1196 extern __attribute__((__weak__
)) unsigned umin(unsigned, unsigned);
1198 extern __attribute__((__weak__
)) float fminf(float, float);
1200 extern __attribute__((__weak__
)) double fmin(double, double);
1203 extern __attribute__((__weak__
)) int max(int, int);
1205 extern __attribute__((__weak__
)) unsigned umax(unsigned, unsigned);
1207 extern __attribute__((__weak__
)) float fmaxf(float, float);
1209 extern __attribute__((__weak__
)) double fmax(double, double);
1212 extern __attribute__((__weak__
)) double sin(double);
1214 extern __attribute__((__weak__
)) float sinf(float);
1217 extern __attribute__((__weak__
)) double cos(double);
1219 extern __attribute__((__weak__
)) float cosf(float);
1222 extern __attribute__((__weak__
)) void sincos(double, double *, double *);
1224 extern __attribute__((__weak__
)) void sincosf(float, float *, float *);
1227 extern __attribute__((__weak__
)) double tan(double);
1229 extern __attribute__((__weak__
)) float tanf(float);
1232 extern __attribute__((__weak__
)) double sqrt(double);
1234 extern __attribute__((__weak__
)) float sqrtf(float);
1237 extern __attribute__((__weak__
)) double rsqrt(double);
1239 extern __attribute__((__weak__
)) float rsqrtf(float);
1242 extern __attribute__((__weak__
)) double exp2(double);
1244 extern __attribute__((__weak__
)) float exp2f(float);
1247 extern __attribute__((__weak__
)) double exp10(double);
1249 extern __attribute__((__weak__
)) float exp10f(float);
1252 extern __attribute__((__weak__
)) double expm1(double);
1254 extern __attribute__((__weak__
)) float expm1f(float);
1257 extern __attribute__((__weak__
)) double log2(double);
1259 extern __attribute__((__weak__
)) float log2f(float);
1262 extern __attribute__((__weak__
)) double log10(double);
1264 extern __attribute__((__weak__
)) float log10f(float);
1267 extern __attribute__((__weak__
)) double log(double);
1269 extern __attribute__((__weak__
)) float logf(float);
1272 extern __attribute__((__weak__
)) double log1p(double);
1274 extern __attribute__((__weak__
)) float log1pf(float);
1277 extern __attribute__((__weak__
)) double floor(double) __attribute__((__const__
));
1279 extern __attribute__((__weak__
)) float floorf(float) __attribute__((__const__
));
1282 extern __attribute__((__weak__
)) double exp(double);
1284 extern __attribute__((__weak__
)) float expf(float);
1287 extern __attribute__((__weak__
)) double cosh(double);
1289 extern __attribute__((__weak__
)) float coshf(float);
1292 extern __attribute__((__weak__
)) double sinh(double);
1294 extern __attribute__((__weak__
)) float sinhf(float);
1297 extern __attribute__((__weak__
)) double tanh(double);
1299 extern __attribute__((__weak__
)) float tanhf(float);
1302 extern __attribute__((__weak__
)) double acosh(double);
1304 extern __attribute__((__weak__
)) float acoshf(float);
1307 extern __attribute__((__weak__
)) double asinh(double);
1309 extern __attribute__((__weak__
)) float asinhf(float);
1312 extern __attribute__((__weak__
)) double atanh(double);
1314 extern __attribute__((__weak__
)) float atanhf(float);
1317 extern __attribute__((__weak__
)) double ldexp(double, int);
1319 extern __attribute__((__weak__
)) float ldexpf(float, int);
1322 extern __attribute__((__weak__
)) double logb(double);
1324 extern __attribute__((__weak__
)) float logbf(float);
1327 extern __attribute__((__weak__
)) int ilogb(double);
1329 extern __attribute__((__weak__
)) int ilogbf(float);
1332 extern __attribute__((__weak__
)) double scalbn(double, int);
1334 extern __attribute__((__weak__
)) float scalbnf(float, int);
1337 extern __attribute__((__weak__
)) double scalbln(double, long);
1339 extern __attribute__((__weak__
)) float scalblnf(float, long);
1342 extern __attribute__((__weak__
)) double frexp(double, int *);
1344 extern __attribute__((__weak__
)) float frexpf(float, int *);
1347 extern __attribute__((__weak__
)) double round(double) __attribute__((__const__
));
1349 extern __attribute__((__weak__
)) float roundf(float) __attribute__((__const__
));
1352 extern __attribute__((__weak__
)) long lround(double);
1354 extern __attribute__((__weak__
)) long lroundf(float);
1357 extern __attribute__((__weak__
)) long long llround(double);
1359 extern __attribute__((__weak__
)) long long llroundf(float);
1362 extern __attribute__((__weak__
)) double rint(double);
1364 extern __attribute__((__weak__
)) float rintf(float);
1367 extern __attribute__((__weak__
)) long lrint(double);
1369 extern __attribute__((__weak__
)) long lrintf(float);
1372 extern __attribute__((__weak__
)) long long llrint(double);
1374 extern __attribute__((__weak__
)) long long llrintf(float);
1377 extern __attribute__((__weak__
)) double nearbyint(double);
1379 extern __attribute__((__weak__
)) float nearbyintf(float);
1382 extern __attribute__((__weak__
)) double ceil(double) __attribute__((__const__
));
1384 extern __attribute__((__weak__
)) float ceilf(float) __attribute__((__const__
));
1387 extern __attribute__((__weak__
)) double trunc(double) __attribute__((__const__
));
1389 extern __attribute__((__weak__
)) float truncf(float) __attribute__((__const__
));
1392 extern __attribute__((__weak__
)) double fdim(double, double);
1394 extern __attribute__((__weak__
)) float fdimf(float, float);
1397 extern __attribute__((__weak__
)) double atan2(double, double);
1399 extern __attribute__((__weak__
)) float atan2f(float, float);
1402 extern __attribute__((__weak__
)) double atan(double);
1404 extern __attribute__((__weak__
)) float atanf(float);
1407 extern __attribute__((__weak__
)) double asin(double);
1409 extern __attribute__((__weak__
)) float asinf(float);
1412 extern __attribute__((__weak__
)) double acos(double);
1414 extern __attribute__((__weak__
)) float acosf(float);
1417 extern __attribute__((__weak__
)) double hypot(double, double);
1419 extern __attribute__((__weak__
)) float hypotf(float, float);
1422 extern __attribute__((__weak__
)) double cbrt(double);
1424 extern __attribute__((__weak__
)) float cbrtf(float);
1427 extern __attribute__((__weak__
)) double pow(double, double);
1429 extern __attribute__((__weak__
)) float powf(float, float);
1432 extern __attribute__((__weak__
)) double modf(double, double *);
1434 extern __attribute__((__weak__
)) float modff(float, float *);
1437 extern __attribute__((__weak__
)) double fmod(double, double);
1439 extern __attribute__((__weak__
)) float fmodf(float, float);
1442 extern __attribute__((__weak__
)) double remainder(double, double);
1444 extern __attribute__((__weak__
)) float remainderf(float, float);
1447 extern __attribute__((__weak__
)) double remquo(double, double, int *);
1449 extern __attribute__((__weak__
)) float remquof(float, float, int *);
1452 extern __attribute__((__weak__
)) double erf(double);
1454 extern __attribute__((__weak__
)) float erff(float);
1457 extern __attribute__((__weak__
)) double erfc(double);
1459 extern __attribute__((__weak__
)) float erfcf(float);
1462 extern __attribute__((__weak__
)) double lgamma(double);
1464 extern __attribute__((__weak__
)) float lgammaf(float);
1467 extern __attribute__((__weak__
)) double tgamma(double);
1469 extern __attribute__((__weak__
)) float tgammaf(float);
1472 extern __attribute__((__weak__
)) double copysign(double, double) __attribute__((__const__
));
1474 extern __attribute__((__weak__
)) float copysignf(float, float) __attribute__((__const__
));
1477 extern __attribute__((__weak__
)) double nextafter(double, double) __attribute__((__const__
));
1479 extern __attribute__((__weak__
)) float nextafterf(float, float) __attribute__((__const__
));
1482 extern __attribute__((__weak__
)) double nan(const char *) __attribute__((__const__
));
1484 extern __attribute__((__weak__
)) float nanf(const char *) __attribute__((__const__
));
1487 extern __attribute__((__weak__
)) int __signbit(double) __attribute__((__const__
));
1489 extern __attribute__((__weak__
)) int __signbitf(float) __attribute__((__const__
));
1492 extern __attribute__((__weak__
)) int __isinf(double) __attribute__((__const__
));
1494 extern __attribute__((__weak__
)) int __isinff(float) __attribute__((__const__
));
1497 extern __attribute__((__weak__
)) int __isnan(double) __attribute__((__const__
));
1499 extern __attribute__((__weak__
)) int __isnanf(float) __attribute__((__const__
));
1502 extern __attribute__((__weak__
)) int __finite(double) __attribute__((__const__
));
1504 extern __attribute__((__weak__
)) int __finitef(float) __attribute__((__const__
));
1507 extern __attribute__((__weak__
)) double fma(double, double, double);
1509 extern __attribute__((__weak__
)) float fmaf(float, float, float);
1510 # 193 "/usr/include/bits/mathcalls.h" 3
1511 extern __attribute__((__weak__
)) int __isinfl(long double) __attribute__((__const__
));
1514 extern __attribute__((__weak__
)) int __finitel(long double) __attribute__((__const__
));
1515 # 231 "/usr/include/bits/mathcalls.h" 3
1516 extern __attribute__((__weak__
)) int __isnanl(long double) __attribute__((__const__
));
1517 # 350 "/usr/include/bits/mathcalls.h" 3
1518 extern __attribute__((__weak__
)) int __signbitl(long double) __attribute__((__const__
));
1519 # 589 "/usr/include/stdlib.h" 3
1520 extern void *malloc(size_t) __attribute__((__malloc__
));
1521 # 327 "/usr/include/stdio.h" 3
1522 extern int fprintf(FILE *, const char *, ...);
1524 extern int rsl_internal_microclock_(void);
1526 extern int gethostname(char *, size_t);
1528 extern int wsm5_gpu_init_(int *, int *, int *);
1530 extern int wsm5_host_(float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *);
1532 extern int get_wsm5_gpu_levels_(int *);
1533 extern void __sti___29_tmpxft_00001ecc_00000000_2_ii_91788a12(void) __attribute__((__constructor__
));
1534 # 144 "/usr/include/stdio.h" 3
1535 extern struct _IO_FILE
*stderr
;
1536 # 1 "/tmp/tmpxft_00001ecc_00000000-0.stub.h" 1 3
1541 extern void __device_stub__Z8wsm5_gpuPfS_S_S_S_S_S_S_S_S_S_S_S_S_S_fS_iiiiiiiiiiiiiiiiii(float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float, float *, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int);
1542 # 146 "/usr/include/stdio.h" 2 3
1544 int wsm5_gpu_init_( int *myproc
, int *nproc
, int *mydevice
)
1555 auto cudaError_t cerr
;
1556 auto char hostname
[64];
1557 auto struct cudaDeviceProp dp
;
1559 cudaGetDeviceCount((&dc
));
1561 { fprintf(stderr
, "warning: more than %d devices on node (%d)\n", 4, dc
); dc
= 4; }
1562 fprintf(stderr
, "Number of devices on this node: %d\n", dc
);
1569 if ((int)(cerr
= (cudaSetDevice(i
)))) {
1570 fprintf(stderr
, " non-zero cerr %d\n", ((int)cerr
));
1573 gethostname(((char *)hostname
), 64UL);
1574 fprintf(stderr
, "Setting device %02d for task %03d on host %s\n", i
, ((*myproc
)), ((char *)hostname
));
1576 if ((int)(cerr
= (cudaGetDeviceProperties((&dp
), i
)))) {
1577 fprintf(stderr
, "Device %02d: cerr = %d\n", ((int)cerr
));
1579 fprintf(stderr
, "Device %02d: name %s\n", i
, ((char *)(&dp
.name
)));
1580 fprintf(stderr
, "Device %02d: mem %d\n", i
, ((dp
.totalGlobalMem
)));
1581 fprintf(stderr
, "Device %02d: smem %d\n", i
, ((dp
.sharedMemPerBlock
)));
1582 fprintf(stderr
, "Device %02d: nreg %d\n", i
, ((dp
.regsPerBlock
)));
1583 fprintf(stderr
, "Device %02d: warp %d\n", i
, ((dp
.warpSize
)));
1584 fprintf(stderr
, "Device %02d: pitch %d\n", i
, ((dp
.memPitch
)));
1585 fprintf(stderr
, "Device %02d: maxthrds %d\n", i
, ((dp
.maxThreadsPerBlock
)));
1586 fprintf(stderr
, "Device %02d: maxtdim %d %d %d\n", i
, (((int *)(&dp
.maxThreadsDim
))[0]), (((int *)(&dp
.maxThreadsDim
))[1]), (((int *)(&dp
.maxThreadsDim
))[2]));
1589 fprintf(stderr
, "Device %02d: maxgdim %d %d %d\n", i
, (((int *)(&dp
.maxGridSize
))[0]), (((int *)(&dp
.maxGridSize
))[1]), (((int *)(&dp
.maxGridSize
))[2]));
1592 fprintf(stderr
, "Device %02d: clock %d\n", i
, ((dp
.clockRate
)));
1593 fprintf(stderr
, "Device %02d: talign %d\n", i
, ((dp
.textureAlignment
)));
1597 s
= (rsl_internal_microclock_());
1598 cudaMalloc(((void **)(&x_d
)), 4UL);
1599 cudaMemcpy(((void *)x_d
), ((const void *)(&x
)), 4UL, cudaMemcpyHostToDevice
);
1600 cudaFree(((void *)x_d
));
1601 e
= (rsl_internal_microclock_());
1602 fprintf(stderr
, "wsm5_init: %d\n", (e
- s
));
1608 float *th
, float *pii
,
1610 float *qc
, float *qi
, float *qr
, float *qs
,
1611 float *den
, float *p
, float *delz
,
1616 float *rain
, float *rainncv
,
1618 float *snow
, float *snowncv
,
1619 int *ids
, int *ide
, int *jds
, int *jde
, int *kds
, int *kde
,
1620 int *ims
, int *ime
, int *jms
, int *jme
, int *kms
, int *kme
,
1621 int *ips
, int *ipe
, int *jps
, int *jpe
, int *kps
, int *kpe
)
1623 { auto unsigned __T20
;
1624 auto unsigned __T21
;
1661 auto float *rainncv_d
;
1664 auto float *snowncv_d
;
1665 auto float retvals
[100];
1669 auto float *retvals_d
;
1682 d3
= ((((((*ime
)) - ((*ims
))) + 1) * ((((*jme
)) - ((*jms
))) + 1)) * ((((*kme
)) - ((*kms
))) + 1));
1683 d2
= (((((*ime
)) - ((*ims
))) + 1) * ((((*jme
)) - ((*jms
))) + 1));
1685 dips
= 0; dipe
= ((((*ipe
)) - ((*ips
))) + 1);
1687 djps
= 0; djpe
= ((((*jpe
)) - ((*jps
))) + 1);
1688 dkps
= 0; dkpe
= ((((*kpe
)) - ((*kps
))) + 1);
1690 bigbuf
= ((float *)(malloc((((unsigned long)((dipe
* djpe
) * dkpe
)) * 4UL))));
1692 s
= (rsl_internal_microclock_());
1693 cudaMalloc(((void **)(&th_d
)), (((unsigned long)d3
) * 4UL)); cudaMemcpy(((void *)th_d
), ((const void *)th
), (((unsigned long)d3
) * 4UL), cudaMemcpyHostToDevice
);
1694 cudaMalloc(((void **)(&pii_d
)), (((unsigned long)d3
) * 4UL)); cudaMemcpy(((void *)pii_d
), ((const void *)pii
), (((unsigned long)d3
) * 4UL), cudaMemcpyHostToDevice
);
1695 cudaMalloc(((void **)(&q_d
)), (((unsigned long)d3
) * 4UL)); cudaMemcpy(((void *)q_d
), ((const void *)q
), (((unsigned long)d3
) * 4UL), cudaMemcpyHostToDevice
);
1696 cudaMalloc(((void **)(&qc_d
)), (((unsigned long)d3
) * 4UL)); cudaMemcpy(((void *)qc_d
), ((const void *)qc
), (((unsigned long)d3
) * 4UL), cudaMemcpyHostToDevice
);
1697 cudaMalloc(((void **)(&qi_d
)), (((unsigned long)d3
) * 4UL)); cudaMemcpy(((void *)qi_d
), ((const void *)qi
), (((unsigned long)d3
) * 4UL), cudaMemcpyHostToDevice
);
1698 cudaMalloc(((void **)(&qr_d
)), (((unsigned long)d3
) * 4UL)); cudaMemcpy(((void *)qr_d
), ((const void *)qr
), (((unsigned long)d3
) * 4UL), cudaMemcpyHostToDevice
);
1699 cudaMalloc(((void **)(&qs_d
)), (((unsigned long)d3
) * 4UL)); cudaMemcpy(((void *)qs_d
), ((const void *)qs
), (((unsigned long)d3
) * 4UL), cudaMemcpyHostToDevice
);
1700 cudaMalloc(((void **)(&den_d
)), (((unsigned long)d3
) * 4UL)); cudaMemcpy(((void *)den_d
), ((const void *)den
), (((unsigned long)d3
) * 4UL), cudaMemcpyHostToDevice
);
1701 cudaMalloc(((void **)(&p_d
)), (((unsigned long)d3
) * 4UL)); cudaMemcpy(((void *)p_d
), ((const void *)p
), (((unsigned long)d3
) * 4UL), cudaMemcpyHostToDevice
);
1702 cudaMalloc(((void **)(&delz_d
)), (((unsigned long)d3
) * 4UL)); cudaMemcpy(((void *)delz_d
), ((const void *)delz
), (((unsigned long)d3
) * 4UL), cudaMemcpyHostToDevice
);
1706 cudaMalloc(((void **)(&rain_d
)), (((unsigned long)d2
) * 4UL)); cudaMemcpy(((void *)rain_d
), ((const void *)rain
), (((unsigned long)d2
) * 4UL), cudaMemcpyHostToDevice
);
1707 cudaMalloc(((void **)(&rainncv_d
)), (((unsigned long)d2
) * 4UL)); cudaMemcpy(((void *)rainncv_d
), ((const void *)rainncv
), (((unsigned long)d2
) * 4UL), cudaMemcpyHostToDevice
);
1708 cudaMalloc(((void **)(&sr_d
)), (((unsigned long)d2
) * 4UL)); cudaMemcpy(((void *)sr_d
), ((const void *)sr
), (((unsigned long)d2
) * 4UL), cudaMemcpyHostToDevice
);
1709 cudaMalloc(((void **)(&snow_d
)), (((unsigned long)d2
) * 4UL)); cudaMemcpy(((void *)snow_d
), ((const void *)snow
), (((unsigned long)d2
) * 4UL), cudaMemcpyHostToDevice
);
1710 cudaMalloc(((void **)(&snowncv_d
)), (((unsigned long)d2
) * 4UL)); cudaMemcpy(((void *)snowncv_d
), ((const void *)snowncv
), (((unsigned long)d2
) * 4UL), cudaMemcpyHostToDevice
);
1713 for (k
= 0; (k
< ((((*kme
)) - ((*kms
))) + 1)); k
++) { (((float *)retvals
)[k
]) = (0.0F
); }
1715 cudaMalloc(((void **)(&retvals_d
)), (((unsigned long)((((*kme
)) - ((*kms
))) + 1)) * 4UL)); cudaMemcpy(((void *)retvals_d
), ((const void *)((float *)retvals
)), (((unsigned long)((((*kme
)) - ((*kms
))) + 1)) * 4UL), cudaMemcpyHostToDevice
);
1719 remx
= ((((((*ipe
)) - ((*ips
))) + 1) % 16) ? 1 : 0);
1720 remy
= ((((((*jpe
)) - ((*jps
))) + 1) % 8) ? 1 : 0);
1722 { (dimBlock
.x
) = 16U; (dimBlock
.y
) = 8U; (dimBlock
.z
) = 1U; }
1724 { __T20
= ((unsigned)((((((*ipe
)) - ((*ips
))) + 1) / 16) + remx
)); __T21
= ((unsigned)((((((*jpe
)) - ((*jps
))) + 1) / 8) + remy
)); { (dimGrid
.x
) = __T20
; (dimGrid
.y
) = __T21
; (dimGrid
.z
) = 1U; } }
1726 fprintf(stderr
, "Call to wsm5_gpu: block dims %d %d\n", ((dimBlock
.x
)), ((dimBlock
.y
)));
1727 fprintf(stderr
, "Call to wsm5_gpu: grid dims %d %d\n", ((dimGrid
.x
)), ((dimGrid
.y
)));
1729 s2
= (rsl_internal_microclock_());
1730 ((int)(cudaConfigureCall(dimGrid
, dimBlock
, 0UL, 0))) ? ((void)0) : (__device_stub__Z8wsm5_gpuPfS_S_S_S_S_S_S_S_S_S_S_S_S_S_fS_iiiiiiiiiiiiiiiiii(th_d
, pii_d
, q_d
, qc_d
, qi_d
, qr_d
, qs_d
, den_d
, p_d
, delz_d
, rain_d
, rainncv_d
, sr_d
, snow_d
, snowncv_d
, ((*delt
)), retvals_d
, (dips
+ 1), ((((*ipe
)) - ((*ips
))) + 1), (djps
+ 1), ((((*jpe
)) - ((*jps
))) + 1), (dkps
+ 1), ((((*kpe
)) - ((*kps
))) + 1), (dips
+ 1), dipe
, (djps
+ 1), djpe
, (dkps
+ 1), dkpe
, (dips
+ 1), dipe
, (djps
+ 1), djpe
, (dkps
+ 1), dkpe
));
1732 cudaThreadSynchronize();
1733 e2
= (rsl_internal_microclock_());
1734 fprintf(stderr
, "Call to wsm5_gpu (not including data xfer): %d microseconds\n", (e2
- s2
));
1737 cudaMemcpy(((void *)th
), ((const void *)th_d
), (((unsigned long)d3
) * 4UL), cudaMemcpyDeviceToHost
);
1738 cudaMemcpy(((void *)pii
), ((const void *)pii_d
), (((unsigned long)d3
) * 4UL), cudaMemcpyDeviceToHost
);
1739 cudaMemcpy(((void *)q
), ((const void *)q_d
), (((unsigned long)d3
) * 4UL), cudaMemcpyDeviceToHost
);
1740 cudaMemcpy(((void *)qc
), ((const void *)qc_d
), (((unsigned long)d3
) * 4UL), cudaMemcpyDeviceToHost
);
1741 cudaMemcpy(((void *)qi
), ((const void *)qi_d
), (((unsigned long)d3
) * 4UL), cudaMemcpyDeviceToHost
);
1742 cudaMemcpy(((void *)qr
), ((const void *)qr_d
), (((unsigned long)d3
) * 4UL), cudaMemcpyDeviceToHost
);
1743 cudaMemcpy(((void *)qs
), ((const void *)qs_d
), (((unsigned long)d3
) * 4UL), cudaMemcpyDeviceToHost
);
1747 cudaMemcpy(((void *)rain
), ((const void *)rain_d
), (((unsigned long)d2
) * 4UL), cudaMemcpyDeviceToHost
);
1748 cudaMemcpy(((void *)rainncv
), ((const void *)rainncv_d
), (((unsigned long)d2
) * 4UL), cudaMemcpyDeviceToHost
);
1749 cudaMemcpy(((void *)sr
), ((const void *)sr_d
), (((unsigned long)d2
) * 4UL), cudaMemcpyDeviceToHost
);
1750 cudaMemcpy(((void *)snow
), ((const void *)snow_d
), (((unsigned long)d2
) * 4UL), cudaMemcpyDeviceToHost
);
1751 cudaMemcpy(((void *)snowncv
), ((const void *)snowncv_d
), (((unsigned long)d2
) * 4UL), cudaMemcpyDeviceToHost
);
1752 e
= (rsl_internal_microclock_());
1754 cudaMemcpy(((void *)((float *)retvals
)), ((const void *)retvals_d
), (((unsigned long)((((*kme
)) - ((*kms
))) + 1)) * 4UL), cudaMemcpyDeviceToHost
);
1755 fprintf(stderr
, "Call to wsm5_gpu (including data xfer): %d microseconds\n", (e
- s
));
1762 cudaFree(((void *)th_d
));
1763 cudaFree(((void *)pii_d
));
1764 cudaFree(((void *)q_d
));
1765 cudaFree(((void *)qc_d
));
1766 cudaFree(((void *)qi_d
));
1767 cudaFree(((void *)qr_d
));
1768 cudaFree(((void *)qs_d
));
1769 cudaFree(((void *)den_d
));
1770 cudaFree(((void *)p_d
));
1771 cudaFree(((void *)delz_d
));
1775 cudaFree(((void *)rain_d
));
1776 cudaFree(((void *)rainncv_d
));
1777 cudaFree(((void *)sr_d
));
1778 cudaFree(((void *)snow_d
));
1779 cudaFree(((void *)snowncv_d
));
1780 cudaFree(((void *)retvals_d
));
1785 int get_wsm5_gpu_levels_( int *retval
)
1789 void __sti___29_tmpxft_00001ecc_00000000_2_ii_91788a12(void) { }
1790 # 1 "/tmp/tmpxft_00001ecc_00000000-0.stub.c" 1
1794 # 1 "/tmp/tmpxft_00001ecc_00000000-1.c" 1
1795 # 1 "/usr/local/cuda/bin/../include/__cudaFatFormat.h" 1
1796 # 97 "/usr/local/cuda/bin/../include/__cudaFatFormat.h"
1798 char* gpuProfileName
;
1800 } __cudaFatCubinEntry
;
1801 # 113 "/usr/local/cuda/bin/../include/__cudaFatFormat.h"
1803 char* gpuProfileName
;
1805 } __cudaFatPtxEntry
;
1806 # 125 "/usr/local/cuda/bin/../include/__cudaFatFormat.h"
1808 char* gpuProfileName
;
1810 } __cudaFatDebugEntry
;
1814 __cudaFatDontSearchFlag
= (1 << 0),
1815 __cudaFatDontCacheFlag
= (1 << 1)
1816 } __cudaFatCudaBinaryFlag
;
1817 # 145 "/usr/local/cuda/bin/../include/__cudaFatFormat.h"
1819 unsigned long magic
;
1820 unsigned long version
;
1821 unsigned long gpuInfoVersion
;
1825 __cudaFatPtxEntry
*ptx
;
1826 __cudaFatCubinEntry
*cubin
;
1827 __cudaFatDebugEntry
*debug
;
1830 } __cudaFatCudaBinary
;
1831 # 189 "/usr/local/cuda/bin/../include/__cudaFatFormat.h"
1832 void fatGetCubinForGpu( __cudaFatCudaBinary
*binary
, char* gpuName
, char* *cubin
, char* *dbgInfoFile
);
1833 # 2 "/tmp/tmpxft_00001ecc_00000000-1.c" 2
1841 static const unsigned char __deviceText
[] = {
1842 0x61,0x72,0x63,0x68,0x69,0x74,0x65,0x63,0x74,0x75,0x72,0x65,0x20,0x7b,0x73,0x6d,
1843 0x5f,0x31,0x30,0x7d,0x0a,0x61,0x62,0x69,0x76,0x65,0x72,0x73,0x69,0x6f,0x6e,0x20,
1844 0x7b,0x30,0x7d,0x0a,0x63,0x6f,0x64,0x65,0x20,0x20,0x7b,0x0a,0x09,0x6e,0x61,0x6d,
1845 0x65,0x20,0x3d,0x20,0x5f,0x5f,0x64,0x75,0x6d,0x6d,0x79,0x5f,0x65,0x6e,0x74,0x72,
1846 0x79,0x5f,0x5f,0x0a,0x09,0x6c,0x6d,0x65,0x6d,0x20,0x3d,0x20,0x30,0x0a,0x09,0x73,
1847 0x6d,0x65,0x6d,0x20,0x3d,0x20,0x30,0x0a,0x09,0x72,0x65,0x67,0x20,0x3d,0x20,0x30,
1848 0x0a,0x09,0x62,0x61,0x72,0x20,0x3d,0x20,0x30,0x0a,0x09,0x62,0x69,0x6e,0x63,0x6f,
1849 0x64,0x65,0x20,0x20,0x7b,0x0a,0x09,0x09,0x30,0x78,0x66,0x30,0x30,0x30,0x30,0x30,
1850 0x30,0x31,0x20,0x30,0x78,0x65,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x20,0x0a,0x09,
1851 0x7d,0x0a,0x7d,0x0a,0x00
1858 static __cudaFatPtxEntry __ptxEntries
[] = {{0,0}};
1859 static __cudaFatCubinEntry __cubinEntries
[] = {{"sm_10",(char*)__deviceText
},{0,0}};
1860 static __cudaFatDebugEntry __debugEntries
[] = {{0,0}};
1862 static __cudaFatCudaBinary __fatDeviceText
__attribute__ ((section (".nvFatBinSegment")))= {0x1ee55a01,0x00000002,0x840b5bca,"81bb892378501d16","y.cu"," ",__ptxEntries
,__cubinEntries
,__debugEntries
,0,0};
1863 # 5 "/tmp/tmpxft_00001ecc_00000000-0.stub.c" 2
1864 # 1 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 1
1865 # 65 "/usr/local/cuda/bin/../include/crt/host_runtime.h"
1866 # 1 "/usr/local/cuda/bin/../include/host_defines.h" 1
1867 # 66 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 2
1868 # 88 "/usr/local/cuda/bin/../include/crt/host_runtime.h"
1869 extern void** __cudaRegisterFatBinary(
1873 extern void __cudaUnregisterFatBinary(
1874 void **fatCubinHandle
1877 extern void __cudaRegisterVar(
1878 void **fatCubinHandle
,
1880 char *deviceAddress
,
1881 const char *deviceName
,
1888 extern void __cudaRegisterTexture(
1889 void **fatCubinHandle
,
1890 const struct textureReference
*hostVar
,
1891 const void **deviceAddress
,
1892 const char *deviceName
,
1898 extern void __cudaRegisterShared(
1899 void **fatCubinHandle
,
1903 extern void __cudaRegisterFunction(
1904 void **fatCubinHandle
,
1905 const char *hostFun
,
1907 const char *deviceName
,
1919 static void **__cudaFatCubinHandle
;
1921 static void __cudaUnregisterBinaryUtil(void)
1923 __cudaUnregisterFatBinary(__cudaFatCubinHandle
);
1928 __attribute__((destructor
)) static void __cudaUnregisterBinary(void)
1930 __cudaUnregisterBinaryUtil();
1932 # 195 "/usr/local/cuda/bin/../include/crt/host_runtime.h"
1933 # 1 "/usr/local/cuda/bin/../include/common_functions.h" 1
1934 # 64 "/usr/local/cuda/bin/../include/common_functions.h"
1935 # 1 "/usr/local/cuda/bin/../include/crt/func_macro.h" 1 3
1936 # 65 "/usr/local/cuda/bin/../include/common_functions.h" 2
1938 extern __attribute__((weak
)) long __cuda_clock(void); long __cuda_clock(void)
1943 extern __attribute__((weak
)) void *__cuda_memset(void *s
, int c
, size_t n
); void *__cuda_memset(void *s
, int c
, size_t n
)
1947 while (n
--) *p
++ = (char)c
;
1951 # 88 "/usr/local/cuda/bin/../include/common_functions.h"
1952 # 1 "/usr/local/cuda/bin/../include/math_functions.h" 1 3
1953 # 794 "/usr/local/cuda/bin/../include/math_functions.h" 3
1954 extern __attribute__((weak
)) int __cuda___signbitl(long double a
); int __cuda___signbitl(long double a
);
1955 extern __attribute__((weak
)) int __cuda___isinfl(long double a
); int __cuda___isinfl(long double a
);
1956 extern __attribute__((weak
)) int __cuda___isnanl(long double a
); int __cuda___isnanl(long double a
);
1957 extern __attribute__((weak
)) int __cuda___finitel(long double a
); int __cuda___finitel(long double a
);
1958 # 834 "/usr/local/cuda/bin/../include/math_functions.h" 3
1959 extern __attribute__((weak
)) int __cuda_abs(int a
); int __cuda_abs(int a
)
1964 extern __attribute__((weak
)) float __cuda_fabsf(float a
); float __cuda_fabsf(float a
)
1969 extern __attribute__((weak
)) long long int __cuda_llabs(long long int a
); long long int __cuda_llabs(long long int a
)
1978 extern __attribute__((weak
)) float __cuda_exp2f(float a
); float __cuda_exp2f(float a
)
1983 # 1 "/usr/local/cuda/bin/../include/device_functions.h" 1 3
1984 # 322 "/usr/local/cuda/bin/../include/device_functions.h" 3
1985 # 1 "/usr/local/cuda/bin/../include/math_constants.h" 1 3
1986 # 323 "/usr/local/cuda/bin/../include/device_functions.h" 2 3
1990 extern __attribute__((weak
)) int __cuda___isnan(double a
); int __cuda___isnan(double a
);
1991 extern __attribute__((weak
)) int __cuda___isnanf(float a
); int __cuda___isnanf(float a
);
1992 static int __double2int_rz(double);
1993 static unsigned int __double2uint_rz(double);
1994 static long long int __double2ll_rz(double);
1995 static unsigned long long int __double2ull_rz(double);
1996 # 345 "/usr/local/cuda/bin/../include/device_functions.h" 3
1997 static int __mulhi(int a
, int b
)
1999 long long int c
= (long long int)a
* (long long int)b
;
2001 return (int)(c
>> 32);
2004 static unsigned int __umulhi(unsigned int a
, unsigned int b
)
2006 unsigned long long int c
= (unsigned long long int)a
* (unsigned long long int)b
;
2008 return (unsigned int)(c
>> 32);
2011 static unsigned long long int __umul64hi(unsigned long long int a
, unsigned long long int b
)
2013 unsigned int a_lo
= (unsigned int)a
;
2014 unsigned long long int a_hi
= a
>> 32;
2015 unsigned int b_lo
= (unsigned int)b
;
2016 unsigned long long int b_hi
= b
>> 32;
2017 unsigned long long int m1
= a_lo
* b_hi
;
2018 unsigned long long int m2
= a_hi
* b_lo
;
2021 carry
= (0ULL + __umulhi(a_lo
, b_lo
) + (unsigned int)m1
+ (unsigned int)m2
) >> 32;
2023 return a_hi
* b_hi
+ (m1
>> 32) + (m2
>> 32) + carry
;
2026 static long long int __mul64hi(long long int a
, long long int b
)
2028 return __umul64hi(a
, b
) - (a
< 0LL ? b
: 0LL) - (b
< 0LL ? a
: 0LL);
2031 static float __saturatef(float a
)
2033 return a
>= 1.0f
? 1.0f
: a
<= 0.0f
? 0.0f
: a
;
2036 static unsigned int __sad(int a
, int b
, unsigned int c
)
2038 long long int diff
= (long long int)a
- (long long int)b
;
2040 return (unsigned int)(__cuda_llabs(diff
) + (long long int)c
);
2043 static unsigned int __usad(unsigned int a
, unsigned int b
, unsigned int c
)
2045 long long int diff
= (long long int)a
- (long long int)b
;
2047 return (unsigned int)(__cuda_llabs(diff
) + (long long int)c
);
2050 static int __mul24(int a
, int b
)
2053 a
= (a
& 0x800000) != 0 ? a
| ~0xffffff : a
;
2055 b
= (b
& 0x800000) != 0 ? b
| ~0xffffff : b
;
2060 static unsigned int __umul24(unsigned int a
, unsigned int b
)
2068 static float __int_as_float(int a
)
2070 union {int a
; float b
;} u
;
2077 static int __float_as_int(float a
)
2079 union {float a
; int b
;} u
;
2086 static long long int __internal_float2ll_kernel(float a
, long long int max
, long long int min
, long long int nan
, enum cudaRoundMode rndMode
)
2088 unsigned long long int res
, t
= 0ULL;
2092 if (sizeof(a
) == sizeof(double) && __cuda___isnan((double)a
)) return nan
; if (sizeof(a
) == sizeof(float) && __cuda___isnanf((float)a
)) return nan
; if (a
>= max
) return max
; if (a
<= min
) return min
;
2093 ia
= __float_as_int(a
);
2094 shift
= 189 - ((ia
>> 23) & 0xff);
2095 res
= (unsigned long long int)(((ia
<< 8) | 0x80000000) >> 1) << 32;
2100 t
= res
<< (64 - shift
);
2103 if (rndMode
== cudaRoundNearest
&& (long long int)t
< 0LL) {
2104 res
+= t
== 0x8000000000000000ULL
? res
& 1ULL : 1ULL;
2106 else if (rndMode
== cudaRoundMinInf
&& t
!= 0ULL && ia
> 0x80000000) {
2109 else if (rndMode
== cudaRoundPosInf
&& t
!= 0ULL && (int)ia
> 0) {
2112 if ((int)ia
< 0) res
= (unsigned long long int)-(long long int)res
;
2113 return (long long int)res
;
2116 static int __internal_float2int(float a
, enum cudaRoundMode rndMode
)
2118 return (int)__internal_float2ll_kernel(a
, 2147483647LL, -2147483648LL, 0LL, rndMode
);
2121 static int __float2int_rz(float a
)
2123 return __internal_float2int(a
, cudaRoundZero
);
2126 static int __float2int_ru(float a
)
2128 return __internal_float2int(a
, cudaRoundPosInf
);
2131 static int __float2int_rd(float a
)
2133 return __internal_float2int(a
, cudaRoundMinInf
);
2136 static int __float2int_rn(float a
)
2138 return __internal_float2int(a
, cudaRoundNearest
);
2141 static long long int __internal_float2ll(float a
, enum cudaRoundMode rndMode
)
2143 return __internal_float2ll_kernel(a
, 9223372036854775807LL, -9223372036854775807LL -1LL, -9223372036854775807LL -1LL, rndMode
);
2146 static long long int __float2ll_rz(float a
)
2148 return __internal_float2ll(a
, cudaRoundZero
);
2151 static long long int __float2ll_ru(float a
)
2153 return __internal_float2ll(a
, cudaRoundPosInf
);
2156 static long long int __float2ll_rd(float a
)
2158 return __internal_float2ll(a
, cudaRoundMinInf
);
2161 static long long int __float2ll_rn(float a
)
2163 return __internal_float2ll(a
, cudaRoundNearest
);
2166 static unsigned long long int __internal_float2ull_kernel(float a
, unsigned long long int max
, unsigned long long int nan
, enum cudaRoundMode rndMode
)
2168 unsigned long long int res
, t
= 0ULL;
2172 if (sizeof(a
) == sizeof(double) && __cuda___isnan((double)a
)) return nan
; if (sizeof(a
) == sizeof(float) && __cuda___isnanf((float)a
)) return nan
; if (a
>= max
) return max
; if (a
<= 0LL) return 0LL;
2173 ia
= __float_as_int(a
);
2174 shift
= 190 - ((ia
>> 23) & 0xff);
2175 res
= (unsigned long long int)((ia
<< 8) | 0x80000000) << 32;
2177 t
= res
>> (int)(shift
> 64);
2180 t
= res
<< (64 - shift
);
2183 if (rndMode
== cudaRoundNearest
&& (long long int)t
< 0LL) {
2184 res
+= t
== 0x8000000000000000ULL
? res
& 1ULL : 1ULL;
2186 else if (rndMode
== cudaRoundPosInf
&& t
!= 0ULL) {
2192 static unsigned int __internal_float2uint(float a
, enum cudaRoundMode rndMode
)
2194 return (unsigned int)__internal_float2ull_kernel(a
, 4294967295U, 0U, rndMode
);
2197 static unsigned int __float2uint_rz(float a
)
2199 return __internal_float2uint(a
, cudaRoundZero
);
2202 static unsigned int __float2uint_ru(float a
)
2204 return __internal_float2uint(a
, cudaRoundPosInf
);
2207 static unsigned int __float2uint_rd(float a
)
2209 return __internal_float2uint(a
, cudaRoundMinInf
);
2212 static unsigned int __float2uint_rn(float a
)
2214 return __internal_float2uint(a
, cudaRoundNearest
);
2217 static unsigned long long int __internal_float2ull(float a
, enum cudaRoundMode rndMode
)
2219 return __internal_float2ull_kernel(a
, 18446744073709551615ULL, 9223372036854775808ULL, rndMode
);
2222 static unsigned long long int __float2ull_rz(float a
)
2224 return __internal_float2ull(a
, cudaRoundZero
);
2227 static unsigned long long int __float2ull_ru(float a
)
2229 return __internal_float2ull(a
, cudaRoundPosInf
);
2232 static unsigned long long int __float2ull_rd(float a
)
2234 return __internal_float2ull(a
, cudaRoundMinInf
);
2237 static unsigned long long int __float2ull_rn(float a
)
2239 return __internal_float2ull(a
, cudaRoundNearest
);
2242 static int __internal_normalize64(unsigned long long int *a
)
2246 if ((*a
& 0xffffffff00000000ULL
) == 0ULL) {
2250 if ((*a
& 0xffff000000000000ULL
) == 0ULL) {
2254 if ((*a
& 0xff00000000000000ULL
) == 0ULL) {
2258 if ((*a
& 0xf000000000000000ULL
) == 0ULL) {
2262 if ((*a
& 0xC000000000000000ULL
) == 0ULL) {
2266 if ((*a
& 0x8000000000000000ULL
) == 0ULL) {
2273 static int __internal_normalize(unsigned int *a
)
2275 unsigned long long int t
= (unsigned long long int)*a
;
2276 int lz
= __internal_normalize64(&t
);
2278 *a
= (unsigned int)(t
>> 32);
2283 static float __internal_int2float_kernel(int a
, enum cudaRoundMode rndMode
)
2292 if (a
== 0) return res
.f
;
2293 if (a
< 0) res
.i
= (unsigned int)-a
;
2294 shift
= __internal_normalize((unsigned int*)&res
.i
);
2296 res
.i
= (res
.i
>> 8);
2297 res
.i
+= (127 + 30 - shift
) << 23;
2298 if (a
< 0) res
.i
|= 0x80000000;
2299 if ((rndMode
== cudaRoundNearest
) && (t
>= 0x80000000)) {
2300 res
.i
+= (t
== 0x80000000) ? (res
.i
& 1) : (t
>> 31);
2302 else if ((rndMode
== cudaRoundMinInf
) && t
&& (a
< 0)) {
2305 else if ((rndMode
== cudaRoundPosInf
) && t
&& (a
> 0)) {
2311 static float __int2float_rz(int a
)
2313 return __internal_int2float_kernel(a
, cudaRoundZero
);
2316 static float __int2float_ru(int a
)
2318 return __internal_int2float_kernel(a
, cudaRoundPosInf
);
2321 static float __int2float_rd(int a
)
2323 return __internal_int2float_kernel(a
, cudaRoundMinInf
);
2326 static float __int2float_rn(int a
)
2328 return __internal_int2float_kernel(a
, cudaRoundNearest
);
2331 static float __internal_uint2float_kernel(unsigned int a
, enum cudaRoundMode rndMode
)
2340 if (a
== 0) return res
.f
;
2341 shift
= __internal_normalize((unsigned int*)&res
.i
);
2343 res
.i
= (res
.i
>> 8);
2344 res
.i
+= (127 + 30 - shift
) << 23;
2345 if ((rndMode
== cudaRoundNearest
) && (t
>= 0x80000000)) {
2346 res
.i
+= (t
== 0x80000000) ? (res
.i
& 1) : (t
>> 31);
2348 else if ((rndMode
== cudaRoundPosInf
) && t
) {
2354 static float __uint2float_rz(unsigned int a
)
2356 return __internal_uint2float_kernel(a
, cudaRoundZero
);
2359 static float __uint2float_ru(unsigned int a
)
2361 return __internal_uint2float_kernel(a
, cudaRoundPosInf
);
2364 static float __uint2float_rd(unsigned int a
)
2366 return __internal_uint2float_kernel(a
, cudaRoundMinInf
);
2369 static float __uint2float_rn(unsigned int a
)
2371 return __internal_uint2float_kernel(a
, cudaRoundNearest
);
2374 static float __ll2float_rn(long long int a
)
2379 static float __ull2float_rn(unsigned long long int a
)
2381 unsigned long long int temp
;
2382 unsigned int res
, t
;
2384 if (a
== 0ULL) return 0.0f
;
2386 shift
= __internal_normalize64(&temp
);
2387 temp
= (temp
>> 8) | ((temp
& 0xffULL
) ? 1ULL : 0ULL);
2388 res
= (unsigned int)(temp
>> 32);
2389 t
= (unsigned int)temp
;
2390 res
+= (127 + 62 - shift
) << 23;
2391 res
+= t
== 0x80000000 ? res
& 1 : t
>> 31;
2392 return __int_as_float(res
);
2395 static float __internal_fmul_kernel(float a
, float b
, int rndNearest
)
2397 unsigned long long product
;
2402 unsigned expo_x
, expo_y
;
2408 expo_x
= expo_y
& (xx
.i
>> 23);
2409 expo_x
= expo_x
- 1;
2410 expo_y
= expo_y
& (yy
.i
>> 23);
2411 expo_y
= expo_y
- 1;
2413 if ((expo_x
<= 0xFD) &&
2416 expo_x
= expo_x
+ expo_y
;
2417 expo_y
= xx
.i
^ yy
.i
;
2418 xx
.i
= xx
.i
& 0x00ffffff;
2420 xx
.i
= xx
.i
| 0x00800000;
2421 yy
.i
= yy
.i
| 0x80000000;
2423 product
= ((unsigned long long)xx
.i
) * yy
.i
;
2424 expo_x
= expo_x
- 127 + 2;
2425 expo_y
= expo_y
& 0x80000000;
2426 xx
.i
= (unsigned int)(product
>> 32);
2427 yy
.i
= (unsigned int)(product
& 0xffffffff);
2429 if (xx
.i
< 0x00800000) {
2430 xx
.i
= (xx
.i
<< 1) | (yy
.i
>> 31);
2434 if (expo_x
<= 0xFD) {
2435 xx
.i
= xx
.i
| expo_y
;
2436 xx
.i
= xx
.i
+ (expo_x
<< 23);
2438 if (yy
.i
< 0x80000000) return xx
.f
;
2439 xx
.i
+= (((yy
.i
== 0x80000000) ? (xx
.i
& 1) : (yy
.i
>> 31))
2442 } else if ((int)expo_x
>= 254) {
2444 xx
.i
= (expo_y
| 0x7F800000) - (!rndNearest
);
2448 expo_x
= ((unsigned int)-((int)expo_x
));
2454 yy
.i
= (xx
.i
<< (32 - expo_x
)) | ((yy
.i
) ? 1 : 0);
2455 xx
.i
= expo_y
+ (xx
.i
>> expo_x
);
2456 xx
.i
+= (((yy
.i
== 0x80000000) ? (xx
.i
& 1) : (yy
.i
>> 31))
2462 product
= xx
.i
^ yy
.i
;
2463 product
= product
& 0x80000000;
2464 if (!(xx
.i
& 0x7fffffff)) {
2465 if (expo_y
!= 254) {
2466 xx
.i
= (unsigned int)product
;
2470 if (expo_y
== 0xFF000000) {
2471 xx
.i
= expo_y
| 0x00C00000;
2473 xx
.i
= yy
.i
| 0x00400000;
2477 if (!(yy
.i
& 0x7fffffff)) {
2478 if (expo_x
!= 254) {
2479 xx
.i
= (unsigned int)product
;
2483 if (expo_x
== 0xFF000000) {
2484 xx
.i
= expo_x
| 0x00C00000;
2486 xx
.i
= xx
.i
| 0x00400000;
2490 if ((expo_y
!= 254) && (expo_x
!= 254)) {
2494 expo_y
|= xx
.i
& 0x80000000;
2500 while (!(xx
.i
& 0x80000000)) {
2504 xx
.i
= (xx
.i
>> 8) | (expo_y
& 0x80000000);
2505 expo_y
&= ~0x80000000;
2510 expo_x
|= yy
.i
& 0x80000000;
2512 while (!(yy
.i
& 0x80000000)) {
2516 yy
.i
= (yy
.i
>> 8) | (expo_x
& 0x80000000);
2517 expo_x
&= ~0x80000000;
2525 if (expo_x
> 0xFF000000) {
2527 xx
.i
= xx
.i
| 0x00400000;
2531 if (expo_y
> 0xFF000000) {
2533 xx
.i
= yy
.i
| 0x00400000;
2536 xx
.i
= (unsigned int)product
| 0x7f800000;
2541 static float __internal_fadd_kernel(float a
, float b
, int rndNearest
)
2547 unsigned int expo_x
;
2548 unsigned int expo_y
;
2556 if (expo_y
> (xx
.i
<< 1)) {
2563 expo_x
= temp
& (xx
.i
>> 23);
2564 expo_x
= expo_x
- 1;
2565 expo_y
= temp
& (yy
.i
>> 23);
2566 expo_y
= expo_y
- 1;
2568 if ((expo_x
<= 0xFD) &&
2572 expo_y
= expo_x
- expo_y
;
2577 xx
.i
= xx
.i
& ~0x7f000000;
2578 xx
.i
= xx
.i
| 0x00800000;
2579 yy
.i
= yy
.i
& ~0xff000000;
2580 yy
.i
= yy
.i
| 0x00800000;
2582 if ((int)temp
< 0) {
2585 temp
= (expo_y
) ? (yy
.i
<< temp
) : 0;
2586 temp
= (unsigned int)(-((int)temp
));
2587 xx
.i
= xx
.i
- (yy
.i
>> expo_y
) - (temp
? 1 : 0);
2588 if (xx
.i
& 0x00800000) {
2589 if (expo_x
<= 0xFD) {
2590 xx
.i
= xx
.i
& ~0x00800000;
2591 xx
.i
= (xx
.i
+ (expo_x
<< 23)) + 0x00800000;
2592 if (temp
< 0x80000000) return xx
.f
;
2593 xx
.i
+= (((temp
== 0x80000000) ? (xx
.i
& 1) : (temp
>> 31))
2598 if ((temp
| (xx
.i
<< 1)) == 0) {
2604 yy
.i
= xx
.i
& 0x80000000;
2606 xx
.i
= (xx
.i
<< 1) | (temp
>> 31);
2609 } while (!(xx
.i
& 0x00800000));
2615 temp
= (expo_y
) ? (yy
.i
<< temp
) : 0;
2616 xx
.i
= xx
.i
+ (yy
.i
>> expo_y
);
2617 if (!(xx
.i
& 0x01000000)) {
2618 if (expo_x
<= 0xFD) {
2620 xx
.i
= xx
.i
+ (expo_x
<< 23);
2621 if (temp
< 0x80000000) return xx
.f
;
2622 xx
.i
+= (((temp
== 0x80000000) ? expo_y
: (temp
>> 31))
2628 temp
= (xx
.i
<< 31) | (temp
>> 1);
2630 xx
.i
= ((xx
.i
& 0x80000000) | (xx
.i
>> 1)) & ~0x40000000;
2634 if (expo_x
<= 0xFD) {
2636 xx
.i
+= (((temp
== 0x80000000) ? expo_y
: (temp
>> 31))
2638 xx
.i
= xx
.i
+ (expo_x
<< 23);
2641 if ((int)expo_x
>= 254) {
2643 xx
.i
= ((xx
.i
& 0x80000000) | 0x7f800000) - (!rndNearest
);
2647 expo_y
= expo_x
+ 32;
2648 yy
.i
= xx
.i
& 0x80000000;
2649 xx
.i
= xx
.i
& ~0xff000000;
2651 expo_x
= (unsigned int)(-((int)expo_x
));
2652 temp
= xx
.i
<< expo_y
| ((temp
) ? 1 : 0);
2653 xx
.i
= yy
.i
| (xx
.i
>> expo_x
);
2654 xx
.i
+= (((temp
== 0x80000000) ? (xx
.i
& 1) : (temp
>> 31))
2660 if (xx
.i
== 0x80000000) {
2665 if ((expo_y
!= 254) && (expo_x
!= 254)) {
2667 if (expo_x
== (unsigned int) -1) {
2668 temp
= xx
.i
& 0x80000000;
2670 while (!(xx
.i
& 0x80000000)) {
2675 xx
.i
= (xx
.i
>> 8) | temp
;
2677 if (expo_y
== (unsigned int) -1) {
2678 temp
= yy
.i
& 0x80000000;
2680 while (!(yy
.i
& 0x80000000)) {
2685 yy
.i
= (yy
.i
>> 8) | temp
;
2692 if (expo_x
> 0xff000000) {
2694 xx
.i
= xx
.i
| 0x00400000;
2698 if (expo_y
> 0xff000000) {
2700 xx
.i
= yy
.i
| 0x00400000;
2703 if ((expo_x
== 0xff000000) && (expo_y
== 0xff000000)) {
2708 expo_x
= xx
.i
^ yy
.i
;
2709 xx
.i
= xx
.i
| ((expo_x
) ? 0xffc00000 : 0);
2713 if (expo_y
== 0xff000000) {
2720 static float __fadd_rz(float a
, float b
)
2722 return __internal_fadd_kernel(a
, b
, 0);
2725 static float __fmul_rz(float a
, float b
)
2727 return __internal_fmul_kernel(a
, b
, 0);
2730 static float __fdividef(float a
, float b
)
2733 if (__cuda_fabsf(b
) > 8.507059173e37f
) {
2734 if (__cuda_fabsf(a
) <= 3.402823466e38f
) {
2735 return ((a
/ b
) / 3.402823466e38f
) / 3.402823466e38f
;
2737 return __int_as_float(0x7fffffff);
2744 static void __brkpt(int c
)
2749 extern int __cudaSynchronizeThreads(void**, void*);
2753 static inline __attribute__((always_inline
)) void __syncthreads(void)
2756 L
: if (__cudaSynchronizeThreads((void**)&&L
, (void*)&_
)) goto L
;
2759 static void __trap(void)
2763 # 1139 "/usr/local/cuda/bin/../include/device_functions.h" 3
2764 static float __sinf(float a
)
2769 static float __cosf(float a
)
2774 static float __log2f(float a
)
2785 static float __internal_accurate_fdividef(float a
, float b
)
2787 if (__cuda_fabsf(b
) > 8.507059173e37f
) {
2791 return __fdividef(a
, b
);
2794 static float __tanf(float a
)
2796 return __sinf(a
) / __cosf(a
);
2799 static void __sincosf(float a
, float *sptr
, float *cptr
)
2805 static float __expf(float a
)
2807 return __cuda_exp2f(a
* 1.442695041f
);
2810 static float __exp10f(float a
)
2812 return __cuda_exp2f(a
* 3.321928094f
);
2815 static float __log10f(float a
)
2817 return 0.301029996f
* __log2f(a
);
2820 static float __logf(float a
)
2822 return 0.693147181f
* __log2f(a
);
2825 static float __powf(float a
, float b
)
2827 return __cuda_exp2f(b
* __log2f(a
));
2830 static float fdividef(float a
, float b
)
2835 return __internal_accurate_fdividef(a
, b
);
2839 static int __clz(int a
)
2841 return (a
)?(158-(__float_as_int(__uint2float_rz((unsigned int)a
))>>23)):32;
2844 static int __ffs(int a
)
2846 return 32 - __clz (a
& -a
);
2849 static int __clzll(long long int a
)
2851 int ahi
= ((int)(a
>> 32));
2852 int alo
= ((int)(a
& 0xffffffffULL
));
2860 res
= res
+ __clz(ahi
);
2864 static int __ffsll(long long int a
)
2866 return 64 - __clzll (a
& -a
);
2868 # 1252 "/usr/local/cuda/bin/../include/device_functions.h" 3
2869 static double fdivide(double a
, double b
)
2871 return (double)fdividef((float)a
, (float)b
);
2876 static int __double2int_rz(double a
)
2878 return __float2int_rz((float)a
);
2881 static unsigned int __double2uint_rz(double a
)
2883 return __float2uint_rz((float)a
);
2886 static long long int __double2ll_rz(double a
)
2888 return __float2ll_rz((float)a
);
2891 static unsigned long long int __double2ull_rz(double a
)
2893 return __float2ull_rz((float)a
);
2895 # 1291 "/usr/local/cuda/bin/../include/device_functions.h" 3
2896 # 1 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h" 1 3
2897 # 214 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h" 3
2898 static int __iAtomicAdd(int *address
, int val
)
2902 *address
= old
+ val
;
2907 static unsigned int __uAtomicAdd(unsigned int *address
, unsigned int val
)
2909 unsigned int old
= *address
;
2911 *address
= old
+ val
;
2916 static int __iAtomicExch(int *address
, int val
)
2925 static unsigned int __uAtomicExch(unsigned int *address
, unsigned int val
)
2927 unsigned int old
= *address
;
2934 static float __fAtomicExch(float *address
, float val
)
2936 float old
= *address
;
2943 static int __iAtomicMin(int *address
, int val
)
2947 *address
= old
< val
? old
: val
;
2952 static unsigned int __uAtomicMin(unsigned int *address
, unsigned int val
)
2954 unsigned int old
= *address
;
2956 *address
= old
< val
? old
: val
;
2961 static int __iAtomicMax(int *address
, int val
)
2965 *address
= old
> val
? old
: val
;
2970 static unsigned int __uAtomicMax(unsigned int *address
, unsigned int val
)
2972 unsigned int old
= *address
;
2974 *address
= old
> val
? old
: val
;
2979 static unsigned int __uAtomicInc(unsigned int *address
, unsigned int val
)
2981 unsigned int old
= *address
;
2983 *address
= (old
>= val
) ? 0 : old
+ 1;
2988 static unsigned int __uAtomicDec(unsigned int *address
, unsigned int val
)
2990 unsigned int old
= *address
;
2992 *address
= ((old
== 0) | (old
> val
)) ? val
: (old
- 1);
2997 static int __iAtomicAnd(int *address
, int val
)
3001 *address
= old
& val
;
3006 static unsigned int __uAtomicAnd(unsigned int *address
, unsigned int val
)
3008 unsigned int old
= *address
;
3010 *address
= old
& val
;
3015 static int __iAtomicOr(int *address
, int val
)
3019 *address
= old
| val
;
3024 static unsigned int __uAtomicOr(unsigned int *address
, unsigned int val
)
3026 unsigned int old
= *address
;
3028 *address
= old
| val
;
3033 static int __iAtomicXor(int *address
, int val
)
3037 *address
= old
^ val
;
3042 static unsigned int __uAtomicXor(unsigned int *address
, unsigned int val
)
3044 unsigned int old
= *address
;
3046 *address
= old
^ val
;
3051 static int __iAtomicCAS(int *address
, int compare
, int val
)
3055 *address
= old
== compare
? val
: old
;
3060 static unsigned int __uAtomicCAS(unsigned int *address
, unsigned int compare
, unsigned int val
)
3062 unsigned int old
= *address
;
3064 *address
= old
== compare
? val
: old
;
3068 # 1292 "/usr/local/cuda/bin/../include/device_functions.h" 2 3
3069 # 1 "/usr/local/cuda/bin/../include/texture_fetch_functions.h" 1 3
3070 # 2007 "/usr/local/cuda/bin/../include/texture_fetch_functions.h" 3
3071 extern void __cudaTextureFetch(const void *tex
, void *index
, int integer
, void *val
);
3073 static int4
__itexfetchi(const void *tex
, int4 index
)
3077 __cudaTextureFetch(tex
, &index
, 1, &val
);
3082 static uint4
__utexfetchi(const void *tex
, int4 index
)
3086 __cudaTextureFetch(tex
, &index
, 1, &val
);
3091 static float4
__ftexfetchi(const void *tex
, int4 index
)
3095 __cudaTextureFetch(tex
, &index
, 1, &val
);
3100 static int4
__itexfetch(const void *tex
, float4 index
, int dim
)
3104 __cudaTextureFetch(tex
, &index
, 0, &val
);
3109 static uint4
__utexfetch(const void *tex
, float4 index
, int dim
)
3113 __cudaTextureFetch(tex
, &index
, 0, &val
);
3118 static float4
__ftexfetch(const void *tex
, float4 index
, int dim
)
3122 __cudaTextureFetch(tex
, &index
, 0, &val
);
3126 # 1293 "/usr/local/cuda/bin/../include/device_functions.h" 2 3
3127 # 859 "/usr/local/cuda/bin/../include/math_functions.h" 2 3
3130 extern __attribute__((weak
)) int __cuda___signbitf(float a
); int __cuda___signbitf(float a
)
3132 return (int)((unsigned int)__float_as_int(a
) >> 31);
3138 extern __attribute__((weak
)) float __cuda_copysignf(float a
, float b
); float __cuda_copysignf(float a
, float b
)
3140 return __int_as_float((__float_as_int(b
) & 0x80000000) |
3141 (__float_as_int(a
) & ~0x80000000));
3143 # 883 "/usr/local/cuda/bin/../include/math_functions.h" 3
3144 extern __attribute__((weak
)) int min(int a
, int b
); int min(int a
, int b
)
3146 return a
< b
? a
: b
;
3149 extern __attribute__((weak
)) unsigned int umin(unsigned int a
, unsigned int b
); unsigned int umin(unsigned int a
, unsigned int b
)
3151 return a
< b
? a
: b
;
3154 extern __attribute__((weak
)) int max(int a
, int b
); int max(int a
, int b
)
3156 return a
> b
? a
: b
;
3159 extern __attribute__((weak
)) unsigned int umax(unsigned int a
, unsigned int b
); unsigned int umax(unsigned int a
, unsigned int b
)
3161 return a
> b
? a
: b
;
3163 # 967 "/usr/local/cuda/bin/../include/math_functions.h" 3
3164 extern __attribute__((weak
)) float __internal_nearbyintf(float a
); float __internal_nearbyintf(float a
)
3166 float fa
= fabsf(a
);
3168 if (fa
>= 8388608.0f
) {
3171 volatile float u
= 8388608.0f
+ fa
;
3174 return copysignf(u
, a
);
3178 extern __attribute__((weak
)) float __internal_fminf(float a
, float b
); float __internal_fminf(float a
, float b
)
3187 if ((cvta
.i
<< 1) > 0xff000000) return b
;
3188 if ((cvtb
.i
<< 1) > 0xff000000) return a
;
3189 if ((cvta
.i
| cvtb
.i
) == 0x80000000) {
3190 return __int_as_float(0x80000000);
3192 return a
< b
? a
: b
;
3195 extern __attribute__((weak
)) float __internal_fmaxf(float a
, float b
); float __internal_fmaxf(float a
, float b
)
3204 if ((cvta
.i
<< 1) > 0xff000000) return b
;
3205 if ((cvtb
.i
<< 1) > 0xff000000) return a
;
3206 if ((cvta
.f
== 0.0f
) && (cvtb
.f
== 0.0f
)) {
3210 return a
> b
? a
: b
;
3212 # 1055 "/usr/local/cuda/bin/../include/math_functions.h" 3
3213 extern __attribute__((weak
)) long int __cuda_labs(long int a
); long int __cuda_labs(long int a
)
3218 extern __attribute__((weak
)) float __cuda_ceilf(float a
); float __cuda_ceilf(float a
)
3223 extern __attribute__((weak
)) float __cuda_floorf(float a
); float __cuda_floorf(float a
)
3228 extern __attribute__((weak
)) float __cuda_sqrtf(float a
); float __cuda_sqrtf(float a
)
3233 extern __attribute__((weak
)) float __cuda_rsqrtf(float a
); float __cuda_rsqrtf(float a
)
3235 return 1.0f
/ sqrtf(a
);
3238 extern __attribute__((weak
)) float __cuda_truncf(float a
); float __cuda_truncf(float a
)
3243 extern __attribute__((weak
)) int __cuda_max(int a
, int b
); int __cuda_max(int a
, int b
)
3248 extern __attribute__((weak
)) int __cuda_min(int a
, int b
); int __cuda_min(int a
, int b
)
3253 extern __attribute__((weak
)) unsigned int __cuda_umax(unsigned int a
, unsigned int b
); unsigned int __cuda_umax(unsigned int a
, unsigned int b
)
3258 extern __attribute__((weak
)) unsigned int __cuda_umin(unsigned int a
, unsigned int b
); unsigned int __cuda_umin(unsigned int a
, unsigned int b
)
3263 extern __attribute__((weak
)) long long int __cuda_llrintf(float a
); long long int __cuda_llrintf(float a
)
3265 return __float2ll_rn(a
);
3268 extern __attribute__((weak
)) long int __cuda_lrintf(float a
); long int __cuda_lrintf(float a
)
3271 return (long int)__cuda_llrintf(a
);
3277 extern __attribute__((weak
)) float __cuda_nearbyintf(float a
); float __cuda_nearbyintf(float a
)
3282 return __internal_nearbyintf(a
);
3286 extern __attribute__((weak
)) float __cuda_fmaxf(float a
, float b
); float __cuda_fmaxf(float a
, float b
)
3291 return __internal_fmaxf(a
, b
);
3295 extern __attribute__((weak
)) float __cuda_fminf(float a
, float b
); float __cuda_fminf(float a
, float b
)
3300 return __internal_fminf(a
, b
);
3303 # 1162 "/usr/local/cuda/bin/../include/math_functions.h" 3
3304 extern __attribute__((weak
)) int __cuda___finitef(float a
); int __cuda___finitef(float a
)
3306 return __cuda_fabsf(a
) < __int_as_float(0x7f800000);
3309 extern __attribute__((weak
)) int __cuda___isinff(float a
); int __cuda___isinff(float a
)
3311 return __cuda_fabsf(a
) == __int_as_float(0x7f800000);
3314 extern __attribute__((weak
)) int __cuda___isnanf(float a
); int __cuda___isnanf(float a
)
3316 return !(__cuda_fabsf(a
) <= __int_as_float(0x7f800000));
3319 extern __attribute__((weak
)) float __cuda_nextafterf(float a
, float b
); float __cuda_nextafterf(float a
, float b
)
3323 ia
= __float_as_int(a
);
3324 ib
= __float_as_int(b
);
3329 if (__cuda___isnanf(a
) || __cuda___isnanf(b
)) return a
+ b
;
3330 if (__int_as_float (ia
| ib
) == 0.0f
) return b
;
3336 if (__int_as_float(ia
) == 0.0f
) {
3337 return __cuda_copysignf(__int_as_float(0x00000001), b
);
3340 if ((a
< b
) && (a
< 0.0f
)) ia
--;
3341 if ((a
< b
) && (a
> 0.0f
)) ia
++;
3342 if ((a
> b
) && (a
< 0.0f
)) ia
++;
3343 if ((a
> b
) && (a
> 0.0f
)) ia
--;
3344 a
= __int_as_float(ia
);
3353 extern __attribute__((weak
)) float __cuda_nanf(const char *tagp
); float __cuda_nanf(const char *tagp
)
3356 return __int_as_float(0x7fffffff);
3360 extern __attribute__((weak
)) float __internal_atanhf_kernel(float a_1
, float a_2
); float __internal_atanhf_kernel(float a_1
, float a_2
)
3366 t
= 1.566305595598990E-001f
/64.0f
;
3367 t
= t
* a2
+ 1.995081856004762E-001f
/16.0f
;
3368 t
= t
* a2
+ 3.333382699617026E-001f
/4.0f
;
3378 extern __attribute__((weak
)) float __internal_atanf_kernel(float a
); float __internal_atanf_kernel(float a
)
3383 t0
= - 5.674867153f
;
3384 t0
= t4
* - 0.823362947f
+ t0
;
3385 t0
= t0
* t4
- 6.565555096f
;
3388 t1
= t4
+ 11.33538818f
;
3389 t1
= t1
* t4
+ 28.84246826f
;
3390 t1
= t1
* t4
+ 19.69667053f
;
3397 extern __attribute__((weak
)) float __internal_tan_kernel(float a
); float __internal_tan_kernel(float a
)
3402 t
= 4.114678393115178E-003f
* a2
- 8.231194034909670E-001f
;
3403 s
= a2
- 2.469348886157666E+000f
;
3411 extern __attribute__((weak
)) float __internal_accurate_logf(float a
); float __internal_accurate_logf(float a
)
3417 ia
= __float_as_int(a
);
3419 if ((ia
< 0x00800000) || (ia
> 0x7f7fffff)) {
3423 m
= __int_as_float((ia
& 0x807fffff) | 0x3f800000);
3424 e
= ((unsigned)ia
>> 23) - 127;
3425 if (m
> 1.414213562f
) {
3433 z
= __internal_atanhf_kernel(t
, z
);
3434 z
= (float)e
* 0.693147181f
+ z
;
3438 extern __attribute__((weak
)) float __internal_accurate_log2f(float a
); float __internal_accurate_log2f(float a
)
3440 return 1.442695041f
* __internal_accurate_logf(a
);
3444 static unsigned int __cudart_i2opi_f
[] = {
3454 extern __attribute__((weak
)) float __internal_trig_reduction_kernel(float a
, int *quadrant
); float __internal_trig_reduction_kernel(float a
, int *quadrant
)
3458 if (__cuda_fabsf(a
) > 48039.0f
) {
3460 unsigned int ia
= __float_as_int(a
);
3461 unsigned int s
= ia
& 0x80000000;
3462 unsigned int result
[7];
3463 unsigned int phi
, plo
;
3464 unsigned int hi
, lo
;
3467 e
= ((ia
>> 23) & 0xff) - 128;
3468 ia
= (ia
<< 8) | 0x80000000;
3475 for (q
= 0; q
< 6; q
++) {
3476 plo
= __cudart_i2opi_f
[q
] * ia
;
3477 phi
= __umulhi (__cudart_i2opi_f
[q
], ia
);
3479 hi
= phi
+ (lo
< plo
);
3491 hi
= (hi
<< e
) | (lo
>> q
);
3492 lo
= (lo
<< e
) | (result
[idx
] >> q
);
3496 hi
= (hi
<< 2) | (lo
>> 30);
3498 e
= (hi
+ (lo
> 0)) > 0x80000000;
3512 while ((int)hi
> 0) {
3513 hi
= (hi
<< 1) | (lo
>> 31);
3517 lo
= hi
* 0xc90fdaa2;
3518 hi
= __umulhi(hi
, 0xc90fdaa2);
3520 hi
= (hi
<< 1) | (lo
>> 31);
3525 ia
= s
| (((e
+ 126) << 23) + (hi
>> 8) + ((hi
<< 24) >= 0x80000000));
3526 return __int_as_float(ia
);
3528 q
= __float2int_rn(a
* 0.636619772f
);
3530 a
= a
- j
* 1.5703125000000000e+000f
;
3531 a
= a
- j
* 4.8351287841796875e-004f
;
3532 a
= a
- j
* 3.1385570764541626e-007f
;
3533 a
= a
- j
* 6.0771005065061922e-011f
;
3537 # 1405 "/usr/local/cuda/bin/../include/math_functions.h" 3
3538 extern __attribute__((weak
)) float __internal_expf_kernel(float a
, float scale
); float __internal_expf_kernel(float a
, float scale
)
3542 j
= __cuda_truncf(a
* 1.442695041f
);
3543 z
= a
- j
* 0.6931457519f
;
3544 z
= z
- j
* 1.4286067653e-6f
;
3545 z
= z
* 1.442695041f
;
3546 z
= __cuda_exp2f(z
) * __cuda_exp2f(j
+ scale
);
3550 extern __attribute__((weak
)) float __internal_accurate_expf(float a
); float __internal_accurate_expf(float a
)
3553 z
= __internal_expf_kernel(a
, 0.0f
);
3554 if (a
< -105.0f
) z
= 0.0f
;
3555 if (a
> 105.0f
) z
= __int_as_float(0x7f800000);
3559 extern __attribute__((weak
)) float __internal_accurate_exp10f(float a
); float __internal_accurate_exp10f(float a
)
3562 j
= __cuda_truncf(a
* 3.321928094f
);
3563 z
= a
- j
* 3.0102920532226563e-001f
;
3564 z
= z
- j
* 7.9034171557301747e-007f
;
3565 z
= z
* 3.321928094f
;
3566 z
= __cuda_exp2f(z
) * __cuda_exp2f(j
);
3567 if (a
< -46.0f
) z
= 0.0f
;
3568 if (a
> 46.0f
) z
= __int_as_float(0x7f800000);
3572 extern __attribute__((weak
)) float __internal_lgammaf_pos(float a
); float __internal_lgammaf_pos(float a
)
3577 if (__cuda___isinff(a
)) {
3587 sum
= 0.77783067e-3f
;
3588 sum
= sum
* t
- 0.2777655457e-2f
;
3589 sum
= sum
* t
+ 0.83333273853e-1f
;
3590 sum
= sum
* s
+ 0.918938533204672f
;
3591 s
= 0.5f
* __internal_accurate_logf(a
);
3600 s
= - 7.488903254816711E+002f
;
3601 s
= s
* a
- 1.234974215949363E+004f
;
3602 s
= s
* a
- 4.106137688064877E+004f
;
3603 s
= s
* a
- 4.831066242492429E+004f
;
3604 s
= s
* a
- 1.430333998207429E+005f
;
3605 t
= a
- 2.592509840117874E+002f
;
3606 t
= t
* a
- 1.077717972228532E+004f
;
3607 t
= t
* a
- 9.268505031444956E+004f
;
3608 t
= t
* a
- 2.063535768623558E+005f
;
3613 } else if (a
>= 1.5f
) {
3615 t
= + 4.959849168282574E-005f
;
3616 t
= t
* a
- 2.208948403848352E-004f
;
3617 t
= t
* a
+ 5.413142447864599E-004f
;
3618 t
= t
* a
- 1.204516976842832E-003f
;
3619 t
= t
* a
+ 2.884251838546602E-003f
;
3620 t
= t
* a
- 7.382757963931180E-003f
;
3621 t
= t
* a
+ 2.058131963026755E-002f
;
3622 t
= t
* a
- 6.735248600734503E-002f
;
3623 t
= t
* a
+ 3.224670187176319E-001f
;
3624 t
= t
* a
+ 4.227843368636472E-001f
;
3627 } else if (a
>= 0.7f
) {
3629 t
= + 4.588266515364258E-002f
;
3630 t
= t
* a
+ 1.037396712740616E-001f
;
3631 t
= t
* a
+ 1.228036339653591E-001f
;
3632 t
= t
* a
+ 1.275242157462838E-001f
;
3633 t
= t
* a
+ 1.432166835245778E-001f
;
3634 t
= t
* a
+ 1.693435824224152E-001f
;
3635 t
= t
* a
+ 2.074079329483975E-001f
;
3636 t
= t
* a
+ 2.705875136435339E-001f
;
3637 t
= t
* a
+ 4.006854436743395E-001f
;
3638 t
= t
* a
+ 8.224669796332661E-001f
;
3639 t
= t
* a
+ 5.772156651487230E-001f
;
3643 t
= + 3.587515669447039E-003f
;
3644 t
= t
* a
- 5.471285428060787E-003f
;
3645 t
= t
* a
- 4.462712795343244E-002f
;
3646 t
= t
* a
+ 1.673177015593242E-001f
;
3647 t
= t
* a
- 4.213597883575600E-002f
;
3648 t
= t
* a
- 6.558672843439567E-001f
;
3649 t
= t
* a
+ 5.772153712885004E-001f
;
3652 return -__internal_accurate_logf(t
);
3657 extern __attribute__((weak
)) float __internal_sin_kernel(float x
); float __internal_sin_kernel(float x
)
3662 z
= - 1.95152959e-4f
;
3663 z
= z
* x2
+ 8.33216087e-3f
;
3664 z
= z
* x2
- 1.66666546e-1f
;
3672 extern __attribute__((weak
)) float __internal_cos_kernel(float x
); float __internal_cos_kernel(float x
)
3678 z
= z
* x2
- 1.38873163e-3f
;
3679 z
= z
* x2
+ 4.16666457e-2f
;
3680 z
= z
* x2
- 5.00000000e-1f
;
3681 z
= z
* x2
+ 1.00000000e+0f
;
3685 extern __attribute__((weak
)) float __internal_accurate_sinf(float a
); float __internal_accurate_sinf(float a
)
3690 if (__cuda___isinff(a
)) {
3691 return __int_as_float(0x7fffffff);
3696 z
= __internal_trig_reduction_kernel(a
, &i
);
3699 z
= __internal_cos_kernel(z
);
3701 z
= __internal_sin_kernel(z
);
3715 extern __attribute__((weak
)) float __cuda_rintf(float a
); float __cuda_rintf(float a
)
3717 return __cuda_nearbyintf(a
);
3720 extern __attribute__((weak
)) float __cuda_sinf(float a
); float __cuda_sinf(float a
)
3725 return __internal_accurate_sinf(a
);
3729 extern __attribute__((weak
)) float __cuda_cosf(float a
); float __cuda_cosf(float a
)
3737 if (__cuda___isinff(a
)) {
3738 return __int_as_float(0x7fffffff);
3740 z
= __internal_trig_reduction_kernel(a
, &i
);
3744 z
= __internal_cos_kernel(z
);
3746 z
= __internal_sin_kernel(z
);
3755 extern __attribute__((weak
)) float __cuda_tanf(float a
); float __cuda_tanf(float a
)
3763 if (__cuda___isinff(a
)) {
3764 return __int_as_float(0x7fffffff);
3766 z
= __internal_trig_reduction_kernel(a
, &i
);
3768 z
= __internal_tan_kernel(z
);
3776 extern __attribute__((weak
)) float __cuda_log2f(float a
); float __cuda_log2f(float a
)
3781 return __internal_accurate_log2f(a
);
3785 extern __attribute__((weak
)) float __cuda_expf(float a
); float __cuda_expf(float a
)
3790 return __internal_accurate_expf(a
);
3794 extern __attribute__((weak
)) float __cuda_exp10f(float a
); float __cuda_exp10f(float a
)
3799 return __internal_accurate_exp10f(a
);
3803 extern __attribute__((weak
)) float __cuda_coshf(float a
); float __cuda_coshf(float a
)
3807 a
= __cuda_fabsf(a
);
3808 z
= __internal_expf_kernel(a
, -2.0f
);
3809 z
= 2.0f
* z
+ 0.125f
/ z
;
3811 z
= __int_as_float(0x7f800000);
3816 extern __attribute__((weak
)) float __cuda_sinhf(float a
); float __cuda_sinhf(float a
)
3821 a
= __cuda_fabsf(a
);
3825 z
= 2.816951222e-6f
;
3826 z
= z
* a2
+ 1.983615978e-4f
;
3827 z
= z
* a2
+ 8.333350058e-3f
;
3828 z
= z
* a2
+ 1.666666650e-1f
;
3832 z
= __internal_expf_kernel(a
, -2.0f
);
3833 z
= 2.0f
* z
- 0.125f
/ z
;
3835 z
= __int_as_float(0x7f800000);
3838 return __cuda_copysignf(z
, s
);
3841 extern __attribute__((weak
)) float __cuda_tanhf(float a
); float __cuda_tanhf(float a
)
3845 t
= __cuda_fabsf(a
);
3850 t
= 1.643758066599993e-2f
;
3851 t
= t
* z2
- 5.267181327760551e-2f
;
3852 t
= t
* z2
+ 1.332072505223051e-1f
;
3853 t
= t
* z2
- 3.333294663641083e-1f
;
3857 else if (t
< 88.0f
) {
3858 t
= 1.0f
- 2.0f
/ (__internal_expf_kernel(2.0f
* t
, 0.0f
) + 1.0f
);
3860 else if (t
>= 88.0f
) {
3863 return __cuda_copysignf(t
, a
);
3866 extern __attribute__((weak
)) float __cuda_atan2f(float a
, float b
); float __cuda_atan2f(float a
, float b
)
3872 t3
= __cuda_fabsf(b
);
3873 t1
= __cuda_fabsf(a
);
3875 if (t3
== 0.0f
&& t1
== 0.0f
) {
3876 t3
= __cuda___signbitf(b
) ? 3.141592654f
: 0;
3877 } else if (__cuda___isinff(t3
) && __cuda___isinff(t1
)) {
3878 t3
= __cuda___signbitf(b
) ? 2.356194490f
: 0.785398163f
;
3888 t3
= __internal_accurate_fdividef(t1
, t0
);
3889 t3
= __internal_atanf_kernel(t3
);
3891 if (__cuda_fabsf(a
) > __cuda_fabsf(b
)) t3
= 1.570796327f
- t3
;
3892 if (b
< 0.0f
) t3
= 3.141592654f
- t3
;
3894 t3
= __cuda_copysignf(t3
, a
);
3899 extern __attribute__((weak
)) float __cuda_atanf(float a
); float __cuda_atanf(float a
)
3904 t0
= __cuda_fabsf(a
);
3910 t1
= __internal_atanf_kernel(t1
);
3913 t1
= 1.570796327f
- t1
;
3915 return __cuda_copysignf(t1
, a
);
3919 extern __attribute__((weak
)) float __internal_asinf_kernel(float a
); float __internal_asinf_kernel(float a
)
3924 t3
= - 0.501162291f
;
3925 t3
= t3
* t2
+ 0.915201485f
;
3928 t4
= t2
- 5.478654385f
;
3929 t4
= t4
* t2
+ 5.491230488f
;
3935 extern __attribute__((weak
)) float __cuda_asinf(float a
); float __cuda_asinf(float a
)
3939 t0
= __cuda_fabsf(a
);
3942 t2
= __cuda_sqrtf(t2
);
3943 t1
= t0
> 0.575f
? t2
: t0
;
3944 t1
= __internal_asinf_kernel(t1
);
3945 t2
= -2.0f
* t1
+ 1.570796327f
;
3949 return __cuda_copysignf(t1
, a
);
3952 extern __attribute__((weak
)) float __cuda_acosf(float a
); float __cuda_acosf(float a
)
3956 t0
= __cuda_fabsf(a
);
3959 t2
= __cuda_sqrtf(t2
);
3960 t1
= t0
> 0.575f
? t2
: t0
;
3961 t1
= __internal_asinf_kernel(t1
);
3962 t1
= t0
> 0.575f
? 2.0f
* t1
: 1.570796327f
- t1
;
3963 if (__cuda___signbitf(a
)) {
3964 t1
= 3.141592654f
- t1
;
3969 extern __attribute__((weak
)) float __cuda_logf(float a
); float __cuda_logf(float a
)
3974 return __internal_accurate_logf(a
);
3978 extern __attribute__((weak
)) float __cuda_log10f(float a
); float __cuda_log10f(float a
)
3983 return 0.434294482f
* __internal_accurate_logf(a
);
3987 extern __attribute__((weak
)) float __cuda_log1pf(float a
); float __cuda_log1pf(float a
)
3994 if (a
>= -0.394f
&& a
<= 0.65f
) {
3999 t
= __internal_atanhf_kernel (a
, t
);
4001 t
= __internal_accurate_logf (1.0f
+ a
);
4006 extern __attribute__((weak
)) float __cuda_acoshf(float a
); float __cuda_acoshf(float a
)
4011 if (__cuda_fabsf(t
) > 8388608.0f
) {
4013 return 0.693147181f
+ __internal_accurate_logf(a
);
4016 t
= t
+ __cuda_sqrtf(s
* t
);
4017 return __cuda_log1pf(t
);
4021 extern __attribute__((weak
)) float __cuda_asinhf(float a
); float __cuda_asinhf(float a
)
4025 fa
= __cuda_fabsf(a
);
4026 if (fa
> 8.507059173e37f
) {
4027 t
= 0.693147181f
+ __logf(fa
);
4030 t
= fa
+ fa
/ (oofa
+ __cuda_sqrtf(1.0f
+ oofa
* oofa
));
4031 t
= __cuda_log1pf(t
);
4033 return __cuda_copysignf(t
, a
);
4036 extern __attribute__((weak
)) float __cuda_atanhf(float a
); float __cuda_atanhf(float a
)
4040 fa
= __cuda_fabsf(a
);
4041 t
= (2.0f
* fa
) / (1.0f
- fa
);
4042 t
= 0.5f
* __cuda_log1pf(t
);
4043 return __cuda_copysignf(t
, a
);
4046 extern __attribute__((weak
)) float __cuda_expm1f(float a
); float __cuda_expm1f(float a
)
4050 t
= __cuda_rintf (a
* 1.442695041f
);
4051 z
= a
- t
* 0.6931457519f
;
4052 z
= z
- t
* 1.4286067653e-6f
;
4054 if (__cuda_fabsf(a
) < 0.41f
) {
4060 if (t
== 128.0f
) j
= j
- 1.0f
;
4062 u
= 1.38795078474044430E-003f
;
4063 u
= u
* z
+ 8.38241261853264930E-003f
;
4064 u
= u
* z
+ 4.16678317762833940E-002f
;
4065 u
= u
* z
+ 1.66663978874356580E-001f
;
4066 u
= u
* z
+ 4.99999940395997040E-001f
;
4069 if (a
== 0.0f
) u
= a
;
4071 z
= __cuda_exp2f (j
);
4073 if (a
!= 0.0f
) u
= u
* z
+ a
;
4074 if (t
== 128.0f
) u
= u
+ u
;
4076 if (j
> 128.0f
) u
= __int_as_float(0x7f800000);
4077 if (j
< -25.0f
) u
= -1.0f
;
4081 extern __attribute__((weak
)) float __cuda_hypotf(float a
, float b
); float __cuda_hypotf(float a
, float b
)
4085 a
= __cuda_fabsf(a
);
4086 b
= __cuda_fabsf(b
);
4095 t
= __internal_accurate_fdividef(w
, v
);
4097 t
= v
* __cuda_sqrtf(t
);
4101 if ((v
== __int_as_float(0x7f800000)) || (w
== __int_as_float(0x7f800000))) {
4102 t
= __int_as_float(0x7f800000);
4107 extern __attribute__((weak
)) float __cuda_cbrtf(float a
); float __cuda_cbrtf(float a
)
4110 if (a
== 0.0f
|| __cuda___isinff(a
)) {
4113 s
= __cuda_fabsf(a
);
4114 t
= __cuda_exp2f(0.333333333f
* __log2f(s
));
4115 t
= t
- (t
- (s
/ (t
* t
))) * 0.333333333f
;
4116 if (__cuda___signbitf(a
)) {
4122 extern __attribute__((weak
)) float __cuda_erff(float a
); float __cuda_erff(float a
)
4126 t
= __cuda_fabsf(a
);
4129 r
= -5.58510127926029810E-004f
;
4130 r
= r
* t
+ 4.90688891415893070E-003f
;
4131 r
= r
* t
- 2.67027980930150640E-002f
;
4132 r
= r
* t
+ 1.12799056505903940E-001f
;
4133 r
= r
* t
- 3.76122956138427440E-001f
;
4134 r
= r
* t
+ 1.12837911712623450E+000f
;
4136 } else if (t
<= __int_as_float(0x7f800000)) {
4140 q
= 0.3275911f
* t
+ 1.0f
;
4143 r
= r
* q
- 1.453152027f
;
4144 r
= r
* q
+ 1.421413741f
;
4145 r
= r
* q
- 0.284496736f
;
4146 r
= r
* q
+ 0.254829592f
;
4148 q
= __internal_expf_kernel(-a
* a
, 0.0f
);
4153 a
= __int_as_float (__float_as_int(r
) | (__float_as_int(a
) & 0x80000000));
4158 extern __attribute__((weak
)) float __cuda_erfcf(float a
); float __cuda_erfcf(float a
)
4161 return 1.0f
- __cuda_erff(a
);
4162 } else if (a
> 10.0f
) {
4173 p
= + 4.014893410762552E-006f
;
4174 p
= p
* a
+ 5.640401259462436E-001f
;
4175 p
= p
* a
+ 2.626649872281140E+000f
;
4176 p
= p
* a
+ 5.486372652389673E+000f
;
4177 p
= p
* a
+ 5.250714831459401E+000f
;
4178 q
= a
+ 4.651376250488319E+000f
;
4179 q
= q
* a
+ 1.026302828878470E+001f
;
4180 q
= q
* a
+ 1.140762166021288E+001f
;
4181 q
= q
* a
+ 5.251211619089947E+000f
;
4184 q
= 2.0f
* h
- q
* h
* h
;
4187 h
= __int_as_float(__float_as_int(a
) & 0xfffff000);
4190 q
= __internal_expf_kernel(q
, 0.0f
);
4194 h
= __internal_expf_kernel(-l
, 0.0f
);
4202 extern __attribute__((weak
)) float __cuda_lgammaf(float a
); float __cuda_lgammaf(float a
)
4207 t
= __internal_lgammaf_pos(__cuda_fabsf(a
));
4208 if (a
>= 0.0f
) return t
;
4209 a
= __cuda_fabsf(a
);
4210 i
= __cuda_floorf(a
);
4211 if (a
== i
) return __int_as_float(0x7f800000);
4212 if (a
< 1e-19f
) return -__internal_accurate_logf(a
);
4213 i
= __cuda_rintf (2.0f
* a
);
4216 i
= i
* 3.141592654f
;
4218 i
= __internal_cos_kernel(i
);
4220 i
= __internal_sin_kernel(i
);
4222 i
= __cuda_fabsf(i
);
4223 t
= 1.144729886f
- __internal_accurate_logf(i
* a
) - t
;
4227 extern __attribute__((weak
)) float __cuda_ldexpf(float a
, int b
); float __cuda_ldexpf(float a
, int b
)
4229 float fa
= __cuda_fabsf(a
);
4231 if (fa
== 0.0f
|| __cuda___isinff(fa
) || b
== 0) {
4234 else if (__cuda_abs(b
) < 126) {
4235 return a
* __cuda_exp2f((float)b
);
4237 else if (__cuda_abs(b
) < 252) {
4239 return a
* __cuda_exp2f((float)bhalf
) * __cuda_exp2f((float)(b
- bhalf
));
4242 int bquarter
= b
/ 4;
4243 float t
= __cuda_exp2f((float)bquarter
);
4244 return a
* t
* t
* t
* __cuda_exp2f((float)(b
- 3 * bquarter
));
4248 extern __attribute__((weak
)) float __cuda_scalbnf(float a
, int b
); float __cuda_scalbnf(float a
, int b
)
4251 return __cuda_ldexpf(a
, b
);
4254 extern __attribute__((weak
)) float __cuda_scalblnf(float a
, long int b
); float __cuda_scalblnf(float a
, long int b
)
4257 if (b
> 2147483647L) {
4259 } else if (b
< (-2147483647 - 1)) {
4260 t
= (-2147483647 - 1);
4264 return __cuda_scalbnf(a
, t
);
4267 extern __attribute__((weak
)) float __cuda_frexpf(float a
, int *b
); float __cuda_frexpf(float a
, int *b
)
4269 float fa
= __cuda_fabsf(a
);
4271 unsigned int denorm
;
4273 if (fa
< 1.175494351e-38f
) {
4279 expo
= ((__float_as_int(a
) >> 23) & 0xff);
4280 if ((fa
== 0.0f
) || (expo
== 0xff)) {
4284 expo
= expo
- denorm
- 126;
4285 a
= __int_as_float(((__float_as_int(a
) & 0x807fffff) | 0x3f000000));
4291 extern __attribute__((weak
)) float __cuda_modff(float a
, float *b
); float __cuda_modff(float a
, float *b
)
4294 if (__cuda___finitef(a
)) {
4295 t
= __cuda_truncf(a
);
4298 return __cuda_copysignf(t
, a
);
4299 } else if (__cuda___isinff(a
)) {
4302 return __cuda_copysignf(t
, a
);
4309 extern __attribute__((weak
)) float __cuda_fmodf(float a
, float b
); float __cuda_fmodf(float a
, float b
)
4313 if (__cuda___isnanf(a
) || __cuda___isnanf(b
)) {
4317 a
= __cuda_fabsf(a
);
4318 b
= __cuda_fabsf(b
);
4319 if (__cuda___isinff(a
) || b
== 0.0f
) {
4320 return __int_as_float(0x7fffffff);
4321 } else if (a
>= b
) {
4324 int expoa
= (a
< 1.175494351e-38f
) ?
4325 ((int)__log2f(a
)) : (((__float_as_int(a
) >> 23) & 0xff) - 127);
4326 int expob
= (b
< 1.175494351e-38f
) ?
4327 ((int)__log2f(b
)) : (((__float_as_int(b
) >> 23) & 0xff) - 127);
4328 int scale
= expoa
- expob
;
4329 float scaled_b
= __cuda_ldexpf(b
, scale
);
4330 if (scaled_b
<= 0.5f
* a
) {
4340 while (scaled_b
>= b
) {
4341 if (a
>= scaled_b
) {
4346 return __cuda_copysignf(a
, orig_a
);
4352 extern __attribute__((weak
)) float __cuda_remainderf(float a
, float b
); float __cuda_remainderf(float a
, float b
)
4356 unsigned int quot0
= 0;
4358 if (__cuda___isnanf(a
) || __cuda___isnanf(b
)) {
4362 a
= __cuda_fabsf(a
);
4363 b
= __cuda_fabsf(b
);
4364 if (__cuda___isinff(a
) || (b
== 0.0f
)) {
4365 return __int_as_float(0x7fffffff);
4366 } else if (a
>= b
) {
4368 int expoa
= (a
< 1.175494351e-38f
) ?
4369 ((int)__log2f(a
)) : (((__float_as_int(a
) >> 23) & 0xff) - 127);
4370 int expob
= (b
< 1.175494351e-38f
) ?
4371 ((int)__log2f(b
)) : (((__float_as_int(b
) >> 23) & 0xff) - 127);
4372 int scale
= expoa
- expob
;
4373 float scaled_b
= __cuda_ldexpf(b
, scale
);
4374 if (scaled_b
<= 0.5f
* a
) {
4377 # 2255 "/usr/local/cuda/bin/../include/math_functions.h" 3
4378 while (scaled_b
>= b
) {
4380 if (a
>= scaled_b
) {
4381 twoa
= (2.0f
* a
- scaled_b
) - scaled_b
;
4391 if ((twoa
> b
) || ((twoa
== b
) && quot0
)) {
4393 a
= __cuda_copysignf (a
, -1.0f
);
4395 # 2287 "/usr/local/cuda/bin/../include/math_functions.h" 3
4396 a
= __int_as_float((__float_as_int(orig_a
) & 0x80000000)^
4401 extern __attribute__((weak
)) float __cuda_remquof(float a
, float b
, int* quo
); float __cuda_remquof(float a
, float b
, int* quo
)
4405 unsigned int quot
= 0;
4408 if (__cuda___isnanf(a
) || __cuda___isnanf(b
)) {
4414 sign
= 0 - (__cuda___signbitf(a
) != __cuda___signbitf(b
));
4415 a
= __cuda_fabsf(a
);
4416 b
= __cuda_fabsf(b
);
4417 if (__cuda___isinff(a
) || (b
== 0.0f
)) {
4419 return __int_as_float(0x7fffffff);
4420 } else if (a
>= b
) {
4423 int expoa
= (a
< 1.175494351e-38f
) ?
4424 ((int)__log2f(a
)) : (((__float_as_int(a
) >> 23) & 0xff) - 127);
4425 int expob
= (b
< 1.175494351e-38f
) ?
4426 ((int)__log2f(b
)) : (((__float_as_int(b
) >> 23) & 0xff) - 127);
4427 int scale
= expoa
- expob
;
4428 float scaled_b
= __cuda_ldexpf(b
, scale
);
4429 if (scaled_b
<= 0.5f
* a
) {
4432 # 2340 "/usr/local/cuda/bin/../include/math_functions.h" 3
4433 while (scaled_b
>= b
) {
4435 if (a
>= scaled_b
) {
4436 twoa
= (2.0f
* a
- scaled_b
) - scaled_b
;
4446 if ((twoa
> b
) || ((twoa
== b
) && (quot
& 1))) {
4449 a
= __cuda_copysignf (a
, -1.0f
);
4451 # 2375 "/usr/local/cuda/bin/../include/math_functions.h" 3
4452 a
= __int_as_float((__float_as_int(orig_a
) & 0x80000000)^
4454 quot
= quot
& (~((~0)<<3));
4461 extern __attribute__((weak
)) float __cuda_fmaf(float a
, float b
, float c
); float __cuda_fmaf(float a
, float b
, float c
)
4463 unsigned int xx
, yy
, zz
, ww
;
4464 unsigned int temp
, s
, u
;
4465 unsigned int expo_x
, expo_y
, expo_z
;
4467 xx
= __float_as_int(a
);
4468 yy
= __float_as_int(b
);
4469 zz
= __float_as_int(c
);
4470 # 2401 "/usr/local/cuda/bin/../include/math_functions.h" 3
4472 expo_x
= temp
& (xx
>> 23);
4473 expo_x
= expo_x
- 1;
4474 expo_y
= temp
& (yy
>> 23);
4475 expo_y
= expo_y
- 1;
4476 expo_z
= temp
& (zz
>> 23);
4477 expo_z
= expo_z
- 1;
4479 if (!((expo_x
<= 0xFD) &&
4481 (expo_z
<= 0xFD))) {
4486 if ((yy
<< 1) > 0xff000000) {
4487 return __int_as_float(0x7fffffff);
4489 if ((zz
<< 1) > 0xff000000) {
4490 return __int_as_float(0x7fffffff);
4492 if ((xx
<< 1) > 0xff000000) {
4493 return __int_as_float(0x7fffffff);
4495 # 2436 "/usr/local/cuda/bin/../include/math_functions.h" 3
4496 if ((((xx
<< 1) == 0) && ((yy
<< 1) == 0xff000000)) ||
4497 (((yy
<< 1) == 0) && ((xx
<< 1) == 0xff000000))) {
4498 return __int_as_float(0x7fffffff);
4500 if ((zz
<< 1) == 0xff000000) {
4501 if (((yy
<< 1) == 0xff000000) || ((xx
<< 1) == 0xff000000)) {
4502 if ((int)(xx
^ yy
^ zz
) < 0) {
4503 return __int_as_float(0x7fffffff);
4511 if ((xx
<< 1) == 0xff000000) {
4512 xx
= xx
^ (yy
& 0x80000000);
4513 return __int_as_float(xx
);
4515 if ((yy
<< 1) == 0xff000000) {
4516 yy
= yy
^ (xx
& 0x80000000);
4517 return __int_as_float(yy
);
4519 if ((zz
<< 1) == 0xff000000) {
4520 return __int_as_float(zz
);
4527 if (zz
== 0x80000000) {
4528 if (((xx
<< 1) == 0) || ((yy
<< 1) == 0)) {
4529 if ((int)(xx
^ yy
) < 0) {
4530 return __int_as_float(zz
);
4537 if (((zz
<< 1) == 0) &&
4538 (((xx
<< 1) == 0) || ((yy
<< 1) == 0))) {
4540 return __int_as_float(zz
);
4545 if (((xx
<< 1) == 0) || ((yy
<< 1) == 0)) {
4546 return __int_as_float(zz
);
4549 if (expo_x
== (unsigned int)-1) {
4550 temp
= xx
& 0x80000000;
4552 while (!(xx
& 0x80000000)) {
4557 xx
= (xx
>> 8) | temp
;
4560 if (expo_y
== (unsigned int)-1) {
4561 temp
= yy
& 0x80000000;
4563 while (!(yy
& 0x80000000)) {
4568 yy
= (yy
>> 8) | temp
;
4571 if ((expo_z
== (unsigned int)-1) && ((zz
<< 1) != 0)) {
4572 temp
= zz
& 0x80000000;
4574 while (!(zz
& 0x80000000)) {
4579 zz
= (zz
>> 8) | temp
;
4583 expo_x
= expo_x
+ expo_y
;
4585 xx
= xx
& 0x00ffffff;
4587 xx
= xx
| 0x00800000;
4588 yy
= yy
| 0x80000000;
4590 s
= __umulhi(xx
, yy
);
4593 expo_x
= expo_x
- 127 + 2;
4594 expo_y
= expo_y
& 0x80000000;
4597 if (xx
< 0x00800000) {
4598 xx
= (xx
<< 1) | (yy
>> 31);
4603 if ((zz
<< 1) != 0) {
4604 s
= zz
& 0x80000000;
4609 if ((int)expo_z
> (int)expo_x
) {
4625 expo_z
= expo_x
- expo_z
;
4630 while (expo_z
>= 32) {
4631 temp
= ww
| (temp
!= 0);
4637 temp
= ((temp
>> expo_z
) | (ww
<< (32 - expo_z
)) |
4638 ((temp
<< (32 - expo_z
)) != 0));
4639 ww
= (ww
>> expo_z
) | (zz
<< (32 - expo_z
));
4640 zz
= (zz
>> expo_z
);
4649 temp
= (unsigned)(-(int)temp
);
4656 if (!(xx
| yy
| temp
)) {
4658 return __int_as_float(xx
);
4672 expo_y
^= 0x80000000;
4675 while (!(xx
& 0x00800000)) {
4676 xx
= (xx
<< 1) | (yy
>> 31);
4685 if (xx
& 0x01000000) {
4686 temp
= temp
| (yy
<< 31);
4687 yy
= (yy
>> 1) | (xx
<< 31);
4688 xx
= ((xx
& 0x80000000) | (xx
>> 1)) & ~0x40000000;
4693 temp
= yy
| (temp
!= 0);
4694 if (expo_x
<= 0xFD) {
4698 xx
+= (temp
== 0x80000000) ? s
: (temp
>> 31);
4699 xx
= xx
+ (expo_x
<< 23);
4700 return __int_as_float(xx
);
4701 } else if ((int)expo_x
>= 126) {
4703 xx
= expo_y
| 0x7f800000;
4704 return __int_as_float(xx
);
4707 expo_x
= (unsigned int)(-(int)expo_x
);
4710 return __int_as_float(expo_y
);
4712 yy
= (xx
<< (32 - expo_x
)) | ((yy
) ? 1 : 0);
4713 xx
= expo_y
+ (xx
>> expo_x
);
4714 xx
= xx
+ ((yy
==0x80000000) ? (xx
& 1) : (yy
>> 31));
4720 return __int_as_float(xx
);
4723 static float __cudart_A1
[32] =
4759 static float __cudart_A2
[32] =
4762 -4.8115598617e-008f
,
4764 -5.9337519787e-008f
,
4765 -1.3077539940e-008f
,
4766 -5.4355400181e-008f
,
4768 -4.0514414934e-008f
,
4770 -3.2673948880e-008f
,
4773 -4.0189995332e-008f
,
4774 -3.4963733242e-008f
,
4775 -1.0123349270e-008f
,
4776 -5.8755773580e-008f
,
4779 -4.5008988536e-008f
,
4780 -2.4959373235e-008f
,
4782 -5.6610254262e-008f
,
4784 -5.1249720912e-008f
,
4785 -2.4755326677e-008f
,
4786 -4.8496175964e-008f
,
4787 -9.2357703707e-009f
,
4788 -1.1415044909e-008f
,
4789 -1.1239277953e-008f
,
4790 -4.6630056261e-008f
,
4795 static float __cudart_Ainv
[32] =
4831 extern __attribute__((weak
)) float __internal_accurate_powf(float a
, float b
); float __internal_accurate_powf(float a
, float b
)
4836 float log_hi
, log_lo
;
4838 float prod_hi
, prod_lo
;
4840 if ((a
> 0.707106781f
) && (a
< 1.414213562f
)) {
4841 float f
, g
, u
, v
, q
;
4853 q
= 1.49356810919559350E-001f
/64.0f
;
4854 q
= q
* v
+ 1.99887797540072460E-001f
/16.0f
;
4855 q
= q
* v
+ 3.33333880955515580E-001f
/4.0f
;
4858 log_hi
= __int_as_float(__float_as_int(u
) & 0xfffff000);
4859 v
= __int_as_float(__float_as_int(f
) & 0xfffff000);
4860 u
= 2.0f
* (f
- log_hi
);
4868 b_hi
= __int_as_float(__float_as_int(b
) & 0xfffff000);
4870 prod_lo
= b_lo
* log_lo
;
4871 prod_lo
+= b_lo
* log_hi
;
4872 prod_lo
+= b_hi
* log_lo
;
4873 prod_hi
= b_hi
* log_hi
;
4876 return __cuda_expf(prod_hi
) * __cuda_expf(prod_lo
);
4880 if (a
>= 1.175494351e-38f
) {
4881 i
= __float_as_int(a
);
4882 expo
= ((i
>> 23) & 0xff) - 127;
4885 i
= __float_as_int(a
);
4886 expo
= ((i
>> 23) & 0xff) - 127 - 24;
4888 i
= (i
& 0x007fffff) | (0x3f800000);
4889 t
= __int_as_float(i
);
4892 if (t
>= __cudart_A1
[i
+16]) i
+= 16;
4893 if (t
>= __cudart_A1
[i
+8]) i
+= 8;
4894 if (t
>= __cudart_A1
[i
+4]) i
+= 4;
4895 if (t
>= __cudart_A1
[i
+2]) i
+= 2;
4896 if (t
>= __cudart_A1
[i
+1]) i
+= 1;
4898 t
= t
- __cudart_A1
[i
];
4899 t
= t
- __cudart_A2
[i
];
4901 t
= t
* __cudart_Ainv
[i
];
4904 log_hi
= (float)expo
+ (float)i
* 0.03125f
;
4906 log_lo
= - 3.42338934684934650E-001f
;
4907 log_lo
= log_lo
* t
+ 4.80524913518140690E-001f
;
4908 log_lo
= log_lo
* t
- 7.21345070621603800E-001f
;
4909 log_lo
= log_lo
* t
+ 1.44269503837073180E+000f
;
4910 log_lo
= log_lo
* t
;
4913 b_hi
= __int_as_float(__float_as_int(b
) & 0xfffff000);
4915 prod_lo
= b_lo
* log_lo
;
4916 prod_lo
= prod_lo
+ b_lo
* log_hi
;
4917 prod_lo
= prod_lo
+ b_hi
* log_lo
;
4918 prod_hi
= b_hi
* log_hi
;
4921 if (prod_hi
>= 256.0f
) {
4922 return __int_as_float(0x7f800000);
4924 if (prod_hi
<= -256.0f
) {
4929 b
= __cuda_exp2f (0.5f
* prod_hi
);
4930 t
= __cuda_exp2f (prod_lo
);
4936 extern __attribute__((weak
)) float __cuda_powif(float a
, int b
); float __cuda_powif(float a
, int b
)
4938 unsigned int e
= __cuda_abs(b
);
4947 return b
< 0 ? 1.0f
/r
: r
;
4953 extern __attribute__((weak
)) double __cuda_powi(double a
, int b
); double __cuda_powi(double a
, int b
)
4955 unsigned int e
= __cuda_abs(b
);
4964 return b
< 0 ? 1.0/r
: r
;
4970 extern __attribute__((weak
)) float __cuda_powf(float a
, float b
); float __cuda_powf(float a
, float b
)
4977 if (a
== 1.0f
|| b
== 0.0f
) {
4980 if (__cuda___isnanf(a
) || __cuda___isnanf(b
)) {
4983 if (a
== __int_as_float(0x7f800000)) {
4984 return __cuda___signbitf(b
) ? 0.0f
: __int_as_float(0x7f800000);
4986 if (__cuda___isinff(b
)) {
4990 t
= (__cuda_fabsf(a
) > 1.0f
) ? __int_as_float(0x7f800000) : 0.0f
;
4996 bIsOddInteger
= (b
- (2.0f
* floorf(0.5f
* b
))) == 1.0f
;
4998 t
= bIsOddInteger
? a
: 0.0f
;
5004 if (a
== -__int_as_float(0x7f800000)) {
5005 t
= (b
< 0.0f
) ? -1.0f
/a
: -a
;
5006 if (bIsOddInteger
) {
5007 t
= __int_as_float(__float_as_int(t
) ^ 0x80000000);
5011 if ((a
< 0.0f
) && (b
!= __cuda_truncf(b
))) {
5012 return __int_as_float(0x7fffffff);
5014 t
= __cuda_fabsf(a
);
5015 t
= __internal_accurate_powf(t
, b
);
5016 if ((a
< 0.0f
) && bIsOddInteger
) {
5017 t
= __int_as_float(__float_as_int(t
) ^ 0x80000000);
5024 extern __attribute__((weak
)) float __internal_tgammaf_kernel(float a
); float __internal_tgammaf_kernel(float a
)
5027 t
= - 1.05767296987211380E-003f
;
5028 t
= t
* a
+ 7.09279059435508670E-003f
;
5029 t
= t
* a
- 9.65347121958557050E-003f
;
5030 t
= t
* a
- 4.21736613253687960E-002f
;
5031 t
= t
* a
+ 1.66542401247154280E-001f
;
5032 t
= t
* a
- 4.20043267827838460E-002f
;
5033 t
= t
* a
- 6.55878234051332940E-001f
;
5034 t
= t
* a
+ 5.77215696929794240E-001f
;
5035 t
= t
* a
+ 1.00000000000000000E+000f
;
5043 extern __attribute__((weak
)) float __cuda_tgammaf(float a
); float __cuda_tgammaf(float a
)
5047 if (x
> 36.0f
) x
= 36.0f
;
5060 xx
= __internal_tgammaf_kernel(xx
);
5072 if (x
== __cuda_floorf(x
)) {
5073 x
= __int_as_float(0x7fffffff);
5078 if (x
< -41.1f
) x
= -41.1f
;
5084 while (xx
< -0.5f
) {
5088 xx
= __internal_tgammaf_kernel(xx
);
5100 if ((a
< -42.0f
) && !(((int)a
)&1)) {
5101 s
= __int_as_float(0x80000000);
5108 extern __attribute__((weak
)) float __cuda_roundf(float a
); float __cuda_roundf(float a
)
5110 float fa
= __cuda_fabsf(a
);
5111 if (fa
> 8388608.0f
) {
5114 float u
= __cuda_floorf(fa
+ 0.5f
);
5115 if (fa
< 0.5f
) u
= 0.0f
;
5116 return __cuda_copysignf(u
, a
);
5120 extern __attribute__((weak
)) long long int __internal_llroundf_kernel(float a
); long long int __internal_llroundf_kernel(float a
)
5122 unsigned long long int res
, t
= 0LL;
5124 unsigned int ia
= __float_as_int(a
);
5125 if ((ia
<< 1) > 0xff000000) return 0LL;
5126 if ((int)ia
>= 0x5f000000) return 0x7fffffffffffffffLL
;
5127 if (ia
>= 0xdf000000) return 0x8000000000000000LL
;
5128 shift
= 189 - ((ia
>> 23) & 0xff);
5129 res
= ((long long int)(((ia
<< 8) | 0x80000000) >> 1)) << 32;
5134 t
= res
<< (64 - shift
);
5137 if (t
>= 0x8000000000000000LL
) {
5140 if ((int)ia
< 0) res
= (unsigned long long int)(-(long long int)res
);
5141 return (long long int)res
;
5144 extern __attribute__((weak
)) long long int __cuda_llroundf(float a
); long long int __cuda_llroundf(float a
)
5146 return __internal_llroundf_kernel(a
);
5149 extern __attribute__((weak
)) long int __cuda_lroundf(float a
); long int __cuda_lroundf(float a
)
5152 return (long int)__cuda_llroundf(a
);
5153 # 3101 "/usr/local/cuda/bin/../include/math_functions.h" 3
5156 extern __attribute__((weak
)) float __cuda_fdimf(float a
, float b
); float __cuda_fdimf(float a
, float b
)
5166 extern __attribute__((weak
)) int __cuda_ilogbf(float a
); int __cuda_ilogbf(float a
)
5170 a
= __cuda_fabsf(a
);
5171 if (a
<= 1.175494351e-38f
) {
5174 expo
= -((int)((unsigned int)-1 >> 1))-1;
5177 i
= __float_as_int(a
);
5179 while ((int)i
>= 0) {
5185 i
= __float_as_int(a
);
5186 expo
= ((int)((i
>> 23) & 0xff)) - 127;
5187 if ((i
== 0x7f800000)) {
5188 expo
= ((int)((unsigned int)-1 >> 1));
5190 if ((i
> 0x7f800000)) {
5191 expo
= -((int)((unsigned int)-1 >> 1))-1;
5197 extern __attribute__((weak
)) float __cuda_logbf(float a
); float __cuda_logbf(float a
)
5203 if (__cuda___isnanf(a
)) return a
+ a
;
5205 a
= __cuda_fabsf(a
);
5206 if (a
<= 1.175494351e-38f
) {
5209 res
= -__int_as_float(0x7f800000);
5212 i
= __float_as_int(a
);
5214 while ((int)i
>= 0) {
5221 i
= __float_as_int(a
);
5222 expo
= ((int)((i
>> 23) & 0xff)) - 127;
5224 if ((i
>= 0x7f800000)) {
5232 extern __attribute__((weak
)) void __cuda_sincosf(float a
, float *sptr
, float *cptr
); void __cuda_sincosf(float a
, float *sptr
, float *cptr
)
5239 if (__cuda___isinff(a
)) {
5240 *sptr
= __int_as_float(0x7fffffff);
5241 *cptr
= __int_as_float(0x7fffffff);
5249 t
= __internal_trig_reduction_kernel(a
, &quadrant
);
5250 u
= __internal_cos_kernel(t
);
5251 t
= __internal_sin_kernel(t
);
5270 # 3226 "/usr/local/cuda/bin/../include/math_functions.h" 3
5271 extern __attribute__((weak
)) double rsqrt(double a
); double rsqrt(double a
)
5273 return 1.0 / sqrt(a
);
5276 extern __attribute__((weak
)) float rsqrtf(float a
); float rsqrtf(float a
)
5278 return (float)rsqrt((double)a
);
5280 # 4167 "/usr/local/cuda/bin/../include/math_functions.h" 3
5281 # 1 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx1.h" 1 3
5282 # 45 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx1.h" 3
5283 extern __attribute__((weak
)) double __cuda_fabs(double a
); double __cuda_fabs(double a
)
5285 return (float)__cuda_fabsf((float)a
);
5288 extern __attribute__((weak
)) double __cuda_fmax(double a
, double b
); double __cuda_fmax(double a
, double b
)
5290 return (float)__cuda_fmaxf((float)a
, (float)b
);
5293 extern __attribute__((weak
)) double __cuda_fmin(double a
, double b
); double __cuda_fmin(double a
, double b
)
5295 return (float)__cuda_fminf((float)a
, (float)b
);
5298 extern __attribute__((weak
)) int __cuda___finite(double a
); int __cuda___finite(double a
)
5300 return __cuda___finitef((float)a
);
5303 extern __attribute__((weak
)) int __cuda___isinf(double a
); int __cuda___isinf(double a
)
5305 return __cuda___isinff((float)a
);
5308 extern __attribute__((weak
)) int __cuda___isnan(double a
); int __cuda___isnan(double a
)
5310 return __cuda___isnanf((float)a
);
5313 extern __attribute__((weak
)) int __cuda___signbit(double a
); int __cuda___signbit(double a
)
5315 return __cuda___signbitf((float)a
);
5318 extern __attribute__((weak
)) double __cuda_sqrt(double a
); double __cuda_sqrt(double a
)
5320 return (double)__cuda_sqrtf((float)a
);
5323 extern __attribute__((weak
)) double __cuda_rsqrt(double a
); double __cuda_rsqrt(double a
)
5325 return (double)__cuda_rsqrtf((float)a
);
5328 extern __attribute__((weak
)) double __cuda_ceil(double a
); double __cuda_ceil(double a
)
5330 return (double)__cuda_ceilf((float)a
);
5333 extern __attribute__((weak
)) double __cuda_trunc(double a
); double __cuda_trunc(double a
)
5335 return (double)__cuda_truncf((float)a
);
5338 extern __attribute__((weak
)) double __cuda_floor(double a
); double __cuda_floor(double a
)
5340 return (double)__cuda_floorf((float)a
);
5343 extern __attribute__((weak
)) double __cuda_copysign(double a
, double b
); double __cuda_copysign(double a
, double b
)
5345 return (double)__cuda_copysignf((float)a
, (float)b
);
5348 extern __attribute__((weak
)) double __cuda_sin(double a
); double __cuda_sin(double a
)
5350 return (double)__cuda_sinf((float)a
);
5353 extern __attribute__((weak
)) double __cuda_cos(double a
); double __cuda_cos(double a
)
5355 return (double)__cuda_cosf((float)a
);
5358 extern __attribute__((weak
)) void __cuda_sincos(double a
, double *sptr
, double *cptr
); void __cuda_sincos(double a
, double *sptr
, double *cptr
)
5362 __cuda_sincosf((float)a
, &fs
, &fc
);
5368 extern __attribute__((weak
)) double __cuda_tan(double a
); double __cuda_tan(double a
)
5370 return (double)__cuda_tanf((float)a
);
5373 extern __attribute__((weak
)) double __cuda_exp(double a
); double __cuda_exp(double a
)
5375 return (double)__cuda_expf((float)a
);
5378 extern __attribute__((weak
)) double __cuda_exp2(double a
); double __cuda_exp2(double a
)
5380 return (double)__cuda_exp2f((float)a
);
5383 extern __attribute__((weak
)) double __cuda_exp10(double a
); double __cuda_exp10(double a
)
5385 return (double)__cuda_exp10f((float)a
);
5388 extern __attribute__((weak
)) double __cuda_expm1(double a
); double __cuda_expm1(double a
)
5390 return (double)__cuda_expm1f((float)a
);
5393 extern __attribute__((weak
)) double __cuda_cosh(double a
); double __cuda_cosh(double a
)
5395 return (double)__cuda_coshf((float)a
);
5398 extern __attribute__((weak
)) double __cuda_sinh(double a
); double __cuda_sinh(double a
)
5400 return (double)__cuda_sinhf((float)a
);
5403 extern __attribute__((weak
)) double __cuda_tanh(double a
); double __cuda_tanh(double a
)
5405 return (double)__cuda_tanhf((float)a
);
5408 extern __attribute__((weak
)) double __cuda_asin(double a
); double __cuda_asin(double a
)
5410 return (double)__cuda_asinf((float)a
);
5413 extern __attribute__((weak
)) double __cuda_acos(double a
); double __cuda_acos(double a
)
5415 return (double)__cuda_acosf((float)a
);
5418 extern __attribute__((weak
)) double __cuda_atan(double a
); double __cuda_atan(double a
)
5420 return (double)__cuda_atanf((float)a
);
5423 extern __attribute__((weak
)) double __cuda_atan2(double a
, double b
); double __cuda_atan2(double a
, double b
)
5425 return (double)__cuda_atan2f((float)a
, (float)b
);
5428 extern __attribute__((weak
)) double __cuda_log(double a
); double __cuda_log(double a
)
5430 return (double)__cuda_logf((float)a
);
5433 extern __attribute__((weak
)) double __cuda_log2(double a
); double __cuda_log2(double a
)
5435 return (double)__cuda_log2f((float)a
);
5438 extern __attribute__((weak
)) double __cuda_log10(double a
); double __cuda_log10(double a
)
5440 return (double)__cuda_log10f((float)a
);
5443 extern __attribute__((weak
)) double __cuda_log1p(double a
); double __cuda_log1p(double a
)
5445 return (double)__cuda_log1pf((float)a
);
5448 extern __attribute__((weak
)) double __cuda_acosh(double a
); double __cuda_acosh(double a
)
5450 return (double)__cuda_acoshf((float)a
);
5453 extern __attribute__((weak
)) double __cuda_asinh(double a
); double __cuda_asinh(double a
)
5455 return (double)__cuda_asinhf((float)a
);
5458 extern __attribute__((weak
)) double __cuda_atanh(double a
); double __cuda_atanh(double a
)
5460 return (double)__cuda_atanhf((float)a
);
5463 extern __attribute__((weak
)) double __cuda_hypot(double a
, double b
); double __cuda_hypot(double a
, double b
)
5465 return (double)__cuda_hypotf((float)a
, (float)b
);
5468 extern __attribute__((weak
)) double __cuda_cbrt(double a
); double __cuda_cbrt(double a
)
5470 return (double)__cuda_cbrtf((float)a
);
5473 extern __attribute__((weak
)) double __cuda_erf(double a
); double __cuda_erf(double a
)
5475 return (double)__cuda_erff((float)a
);
5478 extern __attribute__((weak
)) double __cuda_erfc(double a
); double __cuda_erfc(double a
)
5480 return (double)__cuda_erfcf((float)a
);
5483 extern __attribute__((weak
)) double __cuda_lgamma(double a
); double __cuda_lgamma(double a
)
5485 return (double)__cuda_lgammaf((float)a
);
5488 extern __attribute__((weak
)) double __cuda_tgamma(double a
); double __cuda_tgamma(double a
)
5490 return (double)__cuda_tgammaf((float)a
);
5493 extern __attribute__((weak
)) double __cuda_ldexp(double a
, int b
); double __cuda_ldexp(double a
, int b
)
5495 return (double)__cuda_ldexpf((float)a
, b
);
5498 extern __attribute__((weak
)) double __cuda_scalbn(double a
, int b
); double __cuda_scalbn(double a
, int b
)
5500 return (double)__cuda_scalbnf((float)a
, b
);
5503 extern __attribute__((weak
)) double __cuda_scalbln(double a
, long b
); double __cuda_scalbln(double a
, long b
)
5505 return (double)__cuda_scalblnf((float)a
, b
);
5508 extern __attribute__((weak
)) double __cuda_frexp(double a
, int *b
); double __cuda_frexp(double a
, int *b
)
5510 return (double)__cuda_frexpf((float)a
, b
);
5513 extern __attribute__((weak
)) double __cuda_modf(double a
, double *b
); double __cuda_modf(double a
, double *b
)
5516 float fa
= __cuda_modff((float)a
, &fb
);
5523 extern __attribute__((weak
)) double __cuda_fmod(double a
, double b
); double __cuda_fmod(double a
, double b
)
5525 return (double)__cuda_fmodf((float)a
, (float)b
);
5528 extern __attribute__((weak
)) double __cuda_remainder(double a
, double b
); double __cuda_remainder(double a
, double b
)
5530 return (double)__cuda_remainderf((float)a
, (float)b
);
5533 extern __attribute__((weak
)) double __cuda_remquo(double a
, double b
, int *c
); double __cuda_remquo(double a
, double b
, int *c
)
5535 return (double)__cuda_remquof((float)a
, (float)b
, c
);
5538 extern __attribute__((weak
)) double __cuda_nextafter(double a
, double b
); double __cuda_nextafter(double a
, double b
)
5540 return (double)__cuda_nextafterf((float)a
, (float)b
);
5543 extern __attribute__((weak
)) double __cuda_nan(const char *tagp
); double __cuda_nan(const char *tagp
)
5545 return (double)__cuda_nanf(tagp
);
5548 extern __attribute__((weak
)) double __cuda_pow(double a
, double b
); double __cuda_pow(double a
, double b
)
5550 return (double)__cuda_powf((float)a
, (float)b
);
5553 extern __attribute__((weak
)) double __cuda_round(double a
); double __cuda_round(double a
)
5555 return (double)__cuda_roundf((float)a
);
5558 extern __attribute__((weak
)) long __cuda_lround(double a
); long __cuda_lround(double a
)
5560 return __cuda_lroundf((float)a
);
5563 extern __attribute__((weak
)) long long __cuda_llround(double a
); long long __cuda_llround(double a
)
5565 return __cuda_llroundf((float)a
);
5568 extern __attribute__((weak
)) double __cuda_rint(double a
); double __cuda_rint(double a
)
5570 return (double)__cuda_rintf((float)a
);
5573 extern __attribute__((weak
)) long __cuda_lrint(double a
); long __cuda_lrint(double a
)
5575 return __cuda_lrintf((float)a
);
5578 extern __attribute__((weak
)) long long __cuda_llrint(double a
); long long __cuda_llrint(double a
)
5580 return __cuda_llrintf((float)a
);
5583 extern __attribute__((weak
)) double __cuda_nearbyint(double a
); double __cuda_nearbyint(double a
)
5585 return (double)__cuda_nearbyintf((float)a
);
5588 extern __attribute__((weak
)) double __cuda_fdim(double a
, double b
); double __cuda_fdim(double a
, double b
)
5590 return (double)__cuda_fdimf((float)a
, (float)b
);
5593 extern __attribute__((weak
)) int __cuda_ilogb(double a
); int __cuda_ilogb(double a
)
5595 return __cuda_ilogbf((float)a
);
5598 extern __attribute__((weak
)) double __cuda_logb(double a
); double __cuda_logb(double a
)
5600 return (double)__cuda_logbf((float)a
);
5603 extern __attribute__((weak
)) double __cuda_fma(double a
, double b
, double c
); double __cuda_fma(double a
, double b
, double c
)
5605 return (double)__cuda_fmaf((float)a
, (float)b
, (float)c
);
5607 # 4168 "/usr/local/cuda/bin/../include/math_functions.h" 2 3
5608 # 89 "/usr/local/cuda/bin/../include/common_functions.h" 2
5609 # 196 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 2
5610 # 6 "/tmp/tmpxft_00001ecc_00000000-0.stub.c" 2
5611 extern void __sti____cudaRegisterAll_29_tmpxft_00001ecc_00000000_2_ii_91788a12(void) __attribute__((__constructor__
));
5612 void __sti____cudaRegisterAll_29_tmpxft_00001ecc_00000000_2_ii_91788a12(void){__cudaFatCubinHandle
= __cudaRegisterFatBinary((void*)(&__fatDeviceText
));}