merge standard release WRF/WPS V3.0.1.1 into wrffire
[wrffire.git] / wrfv2_fire / phys / wsm5.cu.c
blob264d20d1d390de274472998ca4e815e33382eb71
1 /*
3 This WSM5 microphysics accelerated for the NVIDIA GPU. It is experimental and
4 is not supported as part of WRF. There is additional information available
5 at http://www.mmm.ucar.edu/people/michalakes. Requests for assistance will be
6 considered only on a case by case basis, favoring active collaborators.
8 Required: a Linux x86 or x86_64 system with a CUDA-enabled NVIDIA GPU installed
9 as a co-processor as well as the CUDA libraries on a directory in your system,
10 for example:
12 /usr/local/cuda/lib/libcublas.so
14 included in the CUDA SDK 1.1 from NVIDIA (see nvidia.com).
16 To use with WRF:
18 1) Compile this file and companion file as:
20 gcc -c wsm5.cu.c
21 gcc -c wsm5_gpu.cu.c
23 producing wsm5.cu.o and wsm5_gpu.cu.o
25 2) configure WRF, generating a configure.wrf file for your system
26 Note that serial and dmpar work with the GPU, but smpar
27 and dm+sm may not.
29 3) Modify configure.wrf:
31 a) add -DTEST_ON_GPU_RK -DRUN_ON_GPU to ARCH_LOCAL
32 b) add ../phys/wsm5.cu.o and ../phys/wsm5_gpu.cu.o to LIB_LOCAL
33 (define LIB_LOCAL it does not already exist)
34 c) add -L/usr/local/cuda/lib -lcuda -lcudart to LIB_LOCAL
35 (or wherever the cuda lib is on your system)
37 3) Compile wrf as usual.
39 Note: The GPU code is compiled for a maximum number of 41 vertical levels
40 If you need a larger number, contact below.
42 20080721, JM (michalak@ucar.edu)
46 # 1 "/tmp/tmpxft_00001ecc_00000000-0.c"
47 # 1 "<built-in>"
48 # 1 "<command line>"
49 # 1 "/tmp/tmpxft_00001ecc_00000000-0.c"
50 # 1 "y.cu"
51 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
52 struct _Z7textureIcLi1EL19cudaTextureReadMode0EE;
53 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
54 struct _Z7textureIaLi1EL19cudaTextureReadMode0EE;
55 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
56 struct _Z7textureIhLi1EL19cudaTextureReadMode0EE;
57 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
58 struct _Z7textureI5char1Li1EL19cudaTextureReadMode0EE;
59 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
60 struct _Z7textureI6uchar1Li1EL19cudaTextureReadMode0EE;
61 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
62 struct _Z7textureI5char2Li1EL19cudaTextureReadMode0EE;
63 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
64 struct _Z7textureI6uchar2Li1EL19cudaTextureReadMode0EE;
65 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
66 struct _Z7textureI5char3Li1EL19cudaTextureReadMode0EE;
67 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
68 struct _Z7textureI6uchar3Li1EL19cudaTextureReadMode0EE;
69 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
70 struct _Z7textureI5char4Li1EL19cudaTextureReadMode0EE;
71 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
72 struct _Z7textureI6uchar4Li1EL19cudaTextureReadMode0EE;
73 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
74 struct _Z7textureIsLi1EL19cudaTextureReadMode0EE;
75 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
76 struct _Z7textureItLi1EL19cudaTextureReadMode0EE;
77 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
78 struct _Z7textureI6short1Li1EL19cudaTextureReadMode0EE;
79 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
80 struct _Z7textureI7ushort1Li1EL19cudaTextureReadMode0EE;
81 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
82 struct _Z7textureI6short2Li1EL19cudaTextureReadMode0EE;
83 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
84 struct _Z7textureI7ushort2Li1EL19cudaTextureReadMode0EE;
85 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
86 struct _Z7textureI6short3Li1EL19cudaTextureReadMode0EE;
87 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
88 struct _Z7textureI7ushort3Li1EL19cudaTextureReadMode0EE;
89 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
90 struct _Z7textureI6short4Li1EL19cudaTextureReadMode0EE;
91 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
92 struct _Z7textureI7ushort4Li1EL19cudaTextureReadMode0EE;
93 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
94 struct _Z7textureIiLi1EL19cudaTextureReadMode0EE;
95 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
96 struct _Z7textureIjLi1EL19cudaTextureReadMode0EE;
97 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
98 struct _Z7textureI4int1Li1EL19cudaTextureReadMode0EE;
99 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
100 struct _Z7textureI5uint1Li1EL19cudaTextureReadMode0EE;
101 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
102 struct _Z7textureI4int2Li1EL19cudaTextureReadMode0EE;
103 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
104 struct _Z7textureI5uint2Li1EL19cudaTextureReadMode0EE;
105 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
106 struct _Z7textureI4int3Li1EL19cudaTextureReadMode0EE;
107 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
108 struct _Z7textureI5uint3Li1EL19cudaTextureReadMode0EE;
109 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
110 struct _Z7textureI4int4Li1EL19cudaTextureReadMode0EE;
111 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
112 struct _Z7textureI5uint4Li1EL19cudaTextureReadMode0EE;
113 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
114 struct _Z7textureIcLi1EL19cudaTextureReadMode1EE;
115 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
116 struct _Z7textureIaLi1EL19cudaTextureReadMode1EE;
117 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
118 struct _Z7textureIhLi1EL19cudaTextureReadMode1EE;
119 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
120 struct _Z7textureI5char1Li1EL19cudaTextureReadMode1EE;
121 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
122 struct _Z7textureI6uchar1Li1EL19cudaTextureReadMode1EE;
123 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
124 struct _Z7textureI5char2Li1EL19cudaTextureReadMode1EE;
125 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
126 struct _Z7textureI6uchar2Li1EL19cudaTextureReadMode1EE;
127 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
128 struct _Z7textureI5char3Li1EL19cudaTextureReadMode1EE;
129 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
130 struct _Z7textureI6uchar3Li1EL19cudaTextureReadMode1EE;
131 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
132 struct _Z7textureI5char4Li1EL19cudaTextureReadMode1EE;
133 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
134 struct _Z7textureI6uchar4Li1EL19cudaTextureReadMode1EE;
135 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
136 struct _Z7textureIsLi1EL19cudaTextureReadMode1EE;
137 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
138 struct _Z7textureItLi1EL19cudaTextureReadMode1EE;
139 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
140 struct _Z7textureI6short1Li1EL19cudaTextureReadMode1EE;
141 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
142 struct _Z7textureI7ushort1Li1EL19cudaTextureReadMode1EE;
143 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
144 struct _Z7textureI6short2Li1EL19cudaTextureReadMode1EE;
145 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
146 struct _Z7textureI7ushort2Li1EL19cudaTextureReadMode1EE;
147 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
148 struct _Z7textureI6short3Li1EL19cudaTextureReadMode1EE;
149 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
150 struct _Z7textureI7ushort3Li1EL19cudaTextureReadMode1EE;
151 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
152 struct _Z7textureI6short4Li1EL19cudaTextureReadMode1EE;
153 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
154 struct _Z7textureI7ushort4Li1EL19cudaTextureReadMode1EE;
155 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
156 struct _Z7textureIcLi2EL19cudaTextureReadMode0EE;
157 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
158 struct _Z7textureIaLi2EL19cudaTextureReadMode0EE;
159 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
160 struct _Z7textureIhLi2EL19cudaTextureReadMode0EE;
161 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
162 struct _Z7textureI5char1Li2EL19cudaTextureReadMode0EE;
163 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
164 struct _Z7textureI6uchar1Li2EL19cudaTextureReadMode0EE;
165 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
166 struct _Z7textureI5char2Li2EL19cudaTextureReadMode0EE;
167 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
168 struct _Z7textureI6uchar2Li2EL19cudaTextureReadMode0EE;
169 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
170 struct _Z7textureI5char3Li2EL19cudaTextureReadMode0EE;
171 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
172 struct _Z7textureI6uchar3Li2EL19cudaTextureReadMode0EE;
173 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
174 struct _Z7textureI5char4Li2EL19cudaTextureReadMode0EE;
175 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
176 struct _Z7textureI6uchar4Li2EL19cudaTextureReadMode0EE;
177 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
178 struct _Z7textureIsLi2EL19cudaTextureReadMode0EE;
179 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
180 struct _Z7textureItLi2EL19cudaTextureReadMode0EE;
181 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
182 struct _Z7textureI6short1Li2EL19cudaTextureReadMode0EE;
183 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
184 struct _Z7textureI7ushort1Li2EL19cudaTextureReadMode0EE;
185 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
186 struct _Z7textureI6short2Li2EL19cudaTextureReadMode0EE;
187 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
188 struct _Z7textureI7ushort2Li2EL19cudaTextureReadMode0EE;
189 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
190 struct _Z7textureI6short3Li2EL19cudaTextureReadMode0EE;
191 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
192 struct _Z7textureI7ushort3Li2EL19cudaTextureReadMode0EE;
193 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
194 struct _Z7textureI6short4Li2EL19cudaTextureReadMode0EE;
195 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
196 struct _Z7textureI7ushort4Li2EL19cudaTextureReadMode0EE;
197 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
198 struct _Z7textureIiLi2EL19cudaTextureReadMode0EE;
199 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
200 struct _Z7textureIjLi2EL19cudaTextureReadMode0EE;
201 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
202 struct _Z7textureI4int1Li2EL19cudaTextureReadMode0EE;
203 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
204 struct _Z7textureI5uint1Li2EL19cudaTextureReadMode0EE;
205 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
206 struct _Z7textureI4int2Li2EL19cudaTextureReadMode0EE;
207 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
208 struct _Z7textureI5uint2Li2EL19cudaTextureReadMode0EE;
209 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
210 struct _Z7textureI4int3Li2EL19cudaTextureReadMode0EE;
211 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
212 struct _Z7textureI5uint3Li2EL19cudaTextureReadMode0EE;
213 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
214 struct _Z7textureI4int4Li2EL19cudaTextureReadMode0EE;
215 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
216 struct _Z7textureI5uint4Li2EL19cudaTextureReadMode0EE;
217 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
218 struct _Z7textureIiLi1EL19cudaTextureReadMode1EE;
219 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
220 struct _Z7textureIjLi1EL19cudaTextureReadMode1EE;
221 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
222 struct _Z7textureI4int1Li1EL19cudaTextureReadMode1EE;
223 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
224 struct _Z7textureI5uint1Li1EL19cudaTextureReadMode1EE;
225 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
226 struct _Z7textureI4int2Li1EL19cudaTextureReadMode1EE;
227 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
228 struct _Z7textureI5uint2Li1EL19cudaTextureReadMode1EE;
229 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
230 struct _Z7textureI4int3Li1EL19cudaTextureReadMode1EE;
231 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
232 struct _Z7textureI5uint3Li1EL19cudaTextureReadMode1EE;
233 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
234 struct _Z7textureI4int4Li1EL19cudaTextureReadMode1EE;
235 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
236 struct _Z7textureI5uint4Li1EL19cudaTextureReadMode1EE;
237 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
238 struct _Z7textureIcLi2EL19cudaTextureReadMode1EE;
239 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
240 struct _Z7textureIaLi2EL19cudaTextureReadMode1EE;
241 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
242 struct _Z7textureIhLi2EL19cudaTextureReadMode1EE;
243 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
244 struct _Z7textureI5char1Li2EL19cudaTextureReadMode1EE;
245 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
246 struct _Z7textureI6uchar1Li2EL19cudaTextureReadMode1EE;
247 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
248 struct _Z7textureI5char2Li2EL19cudaTextureReadMode1EE;
249 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
250 struct _Z7textureI6uchar2Li2EL19cudaTextureReadMode1EE;
251 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
252 struct _Z7textureI5char3Li2EL19cudaTextureReadMode1EE;
253 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
254 struct _Z7textureI6uchar3Li2EL19cudaTextureReadMode1EE;
255 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
256 struct _Z7textureI5char4Li2EL19cudaTextureReadMode1EE;
257 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
258 struct _Z7textureI6uchar4Li2EL19cudaTextureReadMode1EE;
259 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
260 struct _Z7textureIsLi2EL19cudaTextureReadMode1EE;
261 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
262 struct _Z7textureItLi2EL19cudaTextureReadMode1EE;
263 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
264 struct _Z7textureI6short1Li2EL19cudaTextureReadMode1EE;
265 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
266 struct _Z7textureI7ushort1Li2EL19cudaTextureReadMode1EE;
267 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
268 struct _Z7textureI6short2Li2EL19cudaTextureReadMode1EE;
269 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
270 struct _Z7textureI7ushort2Li2EL19cudaTextureReadMode1EE;
271 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
272 struct _Z7textureI6short3Li2EL19cudaTextureReadMode1EE;
273 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
274 struct _Z7textureI7ushort3Li2EL19cudaTextureReadMode1EE;
275 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
276 struct _Z7textureI6short4Li2EL19cudaTextureReadMode1EE;
277 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
278 struct _Z7textureI7ushort4Li2EL19cudaTextureReadMode1EE;
279 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
280 struct _Z7textureIiLi2EL19cudaTextureReadMode1EE;
281 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
282 struct _Z7textureIjLi2EL19cudaTextureReadMode1EE;
283 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
284 struct _Z7textureI4int1Li2EL19cudaTextureReadMode1EE;
285 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
286 struct _Z7textureI5uint1Li2EL19cudaTextureReadMode1EE;
287 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
288 struct _Z7textureI4int2Li2EL19cudaTextureReadMode1EE;
289 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
290 struct _Z7textureI5uint2Li2EL19cudaTextureReadMode1EE;
291 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
292 struct _Z7textureI4int3Li2EL19cudaTextureReadMode1EE;
293 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
294 struct _Z7textureI5uint3Li2EL19cudaTextureReadMode1EE;
295 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
296 struct _Z7textureI4int4Li2EL19cudaTextureReadMode1EE;
297 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
298 struct _Z7textureI5uint4Li2EL19cudaTextureReadMode1EE;
299 # 268 "/usr/include/libio.h" 3
300 struct _IO_FILE;
301 # 214 "/usr/lib/gcc/x86_64-redhat-linux/4.1.2/include/stddef.h" 3
302 typedef unsigned long size_t;
303 # 1 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 1 3
304 # 56 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 3
305 typedef char bool;
309 # 1 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 1 3
310 # 48 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 3
311 # 1 "/usr/local/cuda/bin/../include/host_defines.h" 1 3
312 # 49 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 2 3
313 # 1 "/usr/local/cuda/bin/../include/builtin_types.h" 1 3
314 # 42 "/usr/local/cuda/bin/../include/builtin_types.h" 3
315 # 1 "/usr/local/cuda/bin/../include/device_types.h" 1 3
316 # 46 "/usr/local/cuda/bin/../include/device_types.h" 3
317 enum cudaRoundMode
319 cudaRoundNearest,
320 cudaRoundZero,
321 cudaRoundPosInf,
322 cudaRoundMinInf
324 # 43 "/usr/local/cuda/bin/../include/builtin_types.h" 2 3
325 # 1 "/usr/local/cuda/bin/../include/driver_types.h" 1 3
326 # 60 "/usr/local/cuda/bin/../include/driver_types.h" 3
327 enum cudaError
329 cudaSuccess = 0,
330 cudaErrorMissingConfiguration,
331 cudaErrorMemoryAllocation,
332 cudaErrorInitializationError,
333 cudaErrorLaunchFailure,
334 cudaErrorPriorLaunchFailure,
335 cudaErrorLaunchTimeout,
336 cudaErrorLaunchOutOfResources,
337 cudaErrorInvalidDeviceFunction,
338 cudaErrorInvalidConfiguration,
339 cudaErrorInvalidDevice,
340 cudaErrorInvalidValue,
341 cudaErrorInvalidPitchValue,
342 cudaErrorInvalidSymbol,
343 cudaErrorMapBufferObjectFailed,
344 cudaErrorUnmapBufferObjectFailed,
345 cudaErrorInvalidHostPointer,
346 cudaErrorInvalidDevicePointer,
347 cudaErrorInvalidTexture,
348 cudaErrorInvalidTextureBinding,
349 cudaErrorInvalidChannelDescriptor,
350 cudaErrorInvalidMemcpyDirection,
351 cudaErrorAddressOfConstant,
352 cudaErrorTextureFetchFailed,
353 cudaErrorTextureNotBound,
354 cudaErrorSynchronizationError,
355 cudaErrorInvalidFilterSetting,
356 cudaErrorInvalidNormSetting,
357 cudaErrorMixedDeviceExecution,
358 cudaErrorCudartUnloading,
359 cudaErrorUnknown,
360 cudaErrorNotYetImplemented,
361 cudaErrorMemoryValueTooLarge,
362 cudaErrorInvalidResourceHandle,
363 cudaErrorNotReady,
364 cudaErrorStartupFailure = 0x7f,
365 cudaErrorApiFailureBase = 10000
369 enum cudaMemcpyKind
371 cudaMemcpyHostToHost = 0,
372 cudaMemcpyHostToDevice,
373 cudaMemcpyDeviceToHost,
374 cudaMemcpyDeviceToDevice
378 struct cudaDeviceProp
380 char name[256];
381 size_t totalGlobalMem;
382 size_t sharedMemPerBlock;
383 int regsPerBlock;
384 int warpSize;
385 size_t memPitch;
386 int maxThreadsPerBlock;
387 int maxThreadsDim[3];
388 int maxGridSize[3];
389 size_t totalConstMem;
390 int major;
391 int minor;
392 int clockRate;
393 size_t textureAlignment;
395 # 154 "/usr/local/cuda/bin/../include/driver_types.h" 3
396 typedef enum cudaError cudaError_t;
399 typedef int cudaStream_t;
402 typedef int cudaEvent_t;
403 # 44 "/usr/local/cuda/bin/../include/builtin_types.h" 2 3
404 # 1 "/usr/local/cuda/bin/../include/texture_types.h" 1 3
405 # 46 "/usr/local/cuda/bin/../include/texture_types.h" 3
406 struct cudaArray;
409 enum cudaChannelFormatKind
411 cudaChannelFormatKindSigned,
412 cudaChannelFormatKindUnsigned,
413 cudaChannelFormatKindFloat
417 struct cudaChannelFormatDesc
419 int x;
420 int y;
421 int z;
422 int w;
423 enum cudaChannelFormatKind f;
427 enum cudaTextureAddressMode
429 cudaAddressModeWrap,
430 cudaAddressModeClamp
434 enum cudaTextureFilterMode
436 cudaFilterModePoint,
437 cudaFilterModeLinear
441 enum cudaTextureReadMode
443 cudaReadModeElementType,
444 cudaReadModeNormalizedFloat
448 struct textureReference
450 int normalized;
451 enum cudaTextureFilterMode filterMode;
452 enum cudaTextureAddressMode addressMode[2];
453 struct cudaChannelFormatDesc channelDesc;
455 # 45 "/usr/local/cuda/bin/../include/builtin_types.h" 2 3
456 # 1 "/usr/local/cuda/bin/../include/vector_types.h" 1 3
457 # 54 "/usr/local/cuda/bin/../include/vector_types.h" 3
458 struct char1
460 signed char x;
464 struct uchar1
466 unsigned char x;
470 struct char2
472 signed char x, y;
476 struct uchar2
478 unsigned char x, y;
482 struct char3
484 signed char x, y, z;
488 struct uchar3
490 unsigned char x, y, z;
494 struct char4
496 signed char x, y, z, w;
500 struct uchar4
502 unsigned char x, y, z, w;
506 struct short1
508 short x;
512 struct ushort1
514 unsigned short x;
518 struct short2
520 short x, y;
524 struct ushort2
526 unsigned short x, y;
530 struct short3
532 short x, y, z;
536 struct ushort3
538 unsigned short x, y, z;
542 struct short4
544 short x, y, z, w;
548 struct ushort4
550 unsigned short x, y, z, w;
554 struct int1
556 int x;
560 struct uint1
562 unsigned int x;
566 struct int2
568 int x, y;
572 struct uint2
574 unsigned int x, y;
578 struct int3
580 int x, y, z;
584 struct uint3
586 unsigned int x, y, z;
590 struct int4
592 int x, y, z, w;
596 struct uint4
598 unsigned int x, y, z, w;
602 struct long1
604 long x;
608 struct ulong1
610 unsigned long x;
614 struct long2
616 long x, y;
620 struct ulong2
622 unsigned long x, y;
626 struct long3
628 long x, y, z;
632 struct ulong3
634 unsigned long x, y, z;
638 struct long4
640 long x, y, z, w;
644 struct ulong4
646 unsigned long x, y, z, w;
650 struct float1
652 float x;
656 struct float2
658 float x, y;
662 struct float3
664 float x, y, z;
668 struct float4
670 float x, y, z, w;
674 struct double2
676 double x, y;
678 # 282 "/usr/local/cuda/bin/../include/vector_types.h" 3
679 typedef struct char1 char1;
681 typedef struct uchar1 uchar1;
683 typedef struct char2 char2;
685 typedef struct uchar2 uchar2;
687 typedef struct char3 char3;
689 typedef struct uchar3 uchar3;
691 typedef struct char4 char4;
693 typedef struct uchar4 uchar4;
695 typedef struct short1 short1;
697 typedef struct ushort1 ushort1;
699 typedef struct short2 short2;
701 typedef struct ushort2 ushort2;
703 typedef struct short3 short3;
705 typedef struct ushort3 ushort3;
707 typedef struct short4 short4;
709 typedef struct ushort4 ushort4;
711 typedef struct int1 int1;
713 typedef struct uint1 uint1;
715 typedef struct int2 int2;
717 typedef struct uint2 uint2;
719 typedef struct int3 int3;
721 typedef struct uint3 uint3;
723 typedef struct int4 int4;
725 typedef struct uint4 uint4;
727 typedef struct long1 long1;
729 typedef struct ulong1 ulong1;
731 typedef struct long2 long2;
733 typedef struct ulong2 ulong2;
735 typedef struct long3 long3;
737 typedef struct ulong3 ulong3;
739 typedef struct long4 long4;
741 typedef struct ulong4 ulong4;
743 typedef struct float1 float1;
745 typedef struct float2 float2;
747 typedef struct float3 float3;
749 typedef struct float4 float4;
751 typedef struct double2 double2;
752 # 363 "/usr/local/cuda/bin/../include/vector_types.h" 3
753 typedef struct dim3 dim3;
756 struct dim3
758 unsigned int x, y, z;
765 # 45 "/usr/local/cuda/bin/../include/builtin_types.h" 2 3
766 # 50 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 2 3
767 # 82 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 3
768 extern cudaError_t cudaMalloc(void **devPtr, size_t size);
769 extern cudaError_t cudaMallocHost(void **ptr, size_t size);
770 extern cudaError_t cudaMallocPitch(void **devPtr, size_t *pitch, size_t width, size_t height);
771 extern cudaError_t cudaMallocArray(struct cudaArray **array, const struct cudaChannelFormatDesc *desc, size_t width, size_t height );
772 extern cudaError_t cudaFree(void *devPtr);
773 extern cudaError_t cudaFreeHost(void *ptr);
774 extern cudaError_t cudaFreeArray(struct cudaArray *array);
775 # 97 "/usr/local/cuda/bin/../include/cuda_runtime_api.h" 3
776 extern cudaError_t cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind);
777 extern cudaError_t cudaMemcpyToArray(struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind);
778 extern cudaError_t cudaMemcpyFromArray(void *dst, const struct cudaArray *src, size_t wOffset, size_t hOffset, size_t count, enum cudaMemcpyKind kind);
779 extern cudaError_t cudaMemcpyArrayToArray(struct cudaArray *dst, size_t wOffsetDst, size_t hOffsetDst, const struct cudaArray *src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, enum cudaMemcpyKind kind );
780 extern cudaError_t cudaMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind);
781 extern cudaError_t cudaMemcpy2DToArray(struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind);
782 extern cudaError_t cudaMemcpy2DFromArray(void *dst, size_t dpitch, const struct cudaArray *src, size_t wOffset, size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind);
783 extern cudaError_t cudaMemcpy2DArrayToArray(struct cudaArray *dst, size_t wOffsetDst, size_t hOffsetDst, const struct cudaArray *src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, enum cudaMemcpyKind kind );
784 extern cudaError_t cudaMemcpyToSymbol(const char *symbol, const void *src, size_t count, size_t offset , enum cudaMemcpyKind kind );
785 extern cudaError_t cudaMemcpyFromSymbol(void *dst, const char *symbol, size_t count, size_t offset , enum cudaMemcpyKind kind );
793 extern cudaError_t cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
794 extern cudaError_t cudaMemcpyToArrayAsync(struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
795 extern cudaError_t cudaMemcpyFromArrayAsync(void *dst, const struct cudaArray *src, size_t wOffset, size_t hOffset, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
796 extern cudaError_t cudaMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
797 extern cudaError_t cudaMemcpy2DToArrayAsync(struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
798 extern cudaError_t cudaMemcpy2DFromArrayAsync(void *dst, size_t dpitch, const struct cudaArray *src, size_t wOffset, size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
806 extern cudaError_t cudaMemset(void *mem, int c, size_t count);
807 extern cudaError_t cudaMemset2D(void *mem, size_t pitch, int c, size_t width, size_t height);
815 extern cudaError_t cudaGetSymbolAddress(void **devPtr, const char *symbol);
816 extern cudaError_t cudaGetSymbolSize(size_t *size, const char *symbol);
824 extern cudaError_t cudaGetDeviceCount(int *count);
825 extern cudaError_t cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device);
826 extern cudaError_t cudaChooseDevice(int *device, const struct cudaDeviceProp *prop);
827 extern cudaError_t cudaSetDevice(int device);
828 extern cudaError_t cudaGetDevice(int *device);
836 extern cudaError_t cudaBindTexture(size_t *offset, const struct textureReference *texref, const void *devPtr, const struct cudaChannelFormatDesc *desc, size_t size );
837 extern cudaError_t cudaBindTextureToArray(const struct textureReference *texref, const struct cudaArray *array, const struct cudaChannelFormatDesc *desc);
838 extern cudaError_t cudaUnbindTexture(const struct textureReference *texref);
839 extern cudaError_t cudaGetTextureAlignmentOffset(size_t *offset, const struct textureReference *texref);
840 extern cudaError_t cudaGetTextureReference(const struct textureReference **texref, const char *symbol);
848 extern cudaError_t cudaGetChannelDesc(struct cudaChannelFormatDesc *desc, const struct cudaArray *array);
849 extern struct cudaChannelFormatDesc cudaCreateChannelDesc(int x, int y, int z, int w, enum cudaChannelFormatKind f);
857 extern cudaError_t cudaGetLastError(void);
858 extern const char* cudaGetErrorString(cudaError_t error);
866 extern cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem , cudaStream_t stream );
867 extern cudaError_t cudaSetupArgument(const void *arg, size_t size, size_t offset);
868 extern cudaError_t cudaLaunch(const char *symbol);
876 extern cudaError_t cudaStreamCreate(cudaStream_t *stream);
877 extern cudaError_t cudaStreamDestroy(cudaStream_t stream);
878 extern cudaError_t cudaStreamSynchronize(cudaStream_t stream);
879 extern cudaError_t cudaStreamQuery(cudaStream_t stream);
887 extern cudaError_t cudaEventCreate(cudaEvent_t *event);
888 extern cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t stream);
889 extern cudaError_t cudaEventQuery(cudaEvent_t event);
890 extern cudaError_t cudaEventSynchronize(cudaEvent_t event);
891 extern cudaError_t cudaEventDestroy(cudaEvent_t event);
892 extern cudaError_t cudaEventElapsedTime(float *ms, cudaEvent_t start, cudaEvent_t end);
900 extern cudaError_t cudaThreadExit(void);
901 extern cudaError_t cudaThreadSynchronize(void);
902 # 61 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 2 3
903 # 1 "/usr/local/cuda/bin/../include/crt/storage_class.h" 1 3
904 # 62 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 2 3
905 # 216 "/usr/lib/gcc/x86_64-redhat-linux/4.1.2/include/stddef.h" 2 3
906 # 148 "/usr/include/bits/types.h" 3
907 typedef long __clock_t;
908 # 61 "/usr/include/time.h" 3
909 typedef __clock_t clock_t;
910 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
911 struct _Z7textureIcLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
912 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
913 struct _Z7textureIaLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
914 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
915 struct _Z7textureIhLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
916 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
917 struct _Z7textureI5char1Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
918 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
919 struct _Z7textureI6uchar1Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
920 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
921 struct _Z7textureI5char2Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
922 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
923 struct _Z7textureI6uchar2Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
924 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
925 struct _Z7textureI5char3Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
926 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
927 struct _Z7textureI6uchar3Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
928 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
929 struct _Z7textureI5char4Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
930 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
931 struct _Z7textureI6uchar4Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
932 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
933 struct _Z7textureIsLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
934 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
935 struct _Z7textureItLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
936 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
937 struct _Z7textureI6short1Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
938 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
939 struct _Z7textureI7ushort1Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
940 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
941 struct _Z7textureI6short2Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
942 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
943 struct _Z7textureI7ushort2Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
944 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
945 struct _Z7textureI6short3Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
946 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
947 struct _Z7textureI7ushort3Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
948 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
949 struct _Z7textureI6short4Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
950 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
951 struct _Z7textureI7ushort4Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
952 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
953 struct _Z7textureIiLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
954 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
955 struct _Z7textureIjLi1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
956 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
957 struct _Z7textureI4int1Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
958 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
959 struct _Z7textureI5uint1Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
960 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
961 struct _Z7textureI4int2Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
962 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
963 struct _Z7textureI5uint2Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
964 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
965 struct _Z7textureI4int3Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
966 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
967 struct _Z7textureI5uint3Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
968 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
969 struct _Z7textureI4int4Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
970 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
971 struct _Z7textureI5uint4Li1EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
972 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
973 struct _Z7textureIcLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
974 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
975 struct _Z7textureIaLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
976 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
977 struct _Z7textureIhLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
978 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
979 struct _Z7textureI5char1Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
980 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
981 struct _Z7textureI6uchar1Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
982 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
983 struct _Z7textureI5char2Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
984 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
985 struct _Z7textureI6uchar2Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
986 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
987 struct _Z7textureI5char3Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
988 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
989 struct _Z7textureI6uchar3Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
990 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
991 struct _Z7textureI5char4Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
992 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
993 struct _Z7textureI6uchar4Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
994 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
995 struct _Z7textureIsLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
996 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
997 struct _Z7textureItLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
998 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
999 struct _Z7textureI6short1Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1000 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1001 struct _Z7textureI7ushort1Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1002 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1003 struct _Z7textureI6short2Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1004 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1005 struct _Z7textureI7ushort2Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1006 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1007 struct _Z7textureI6short3Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1008 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1009 struct _Z7textureI7ushort3Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1010 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1011 struct _Z7textureI6short4Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1012 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1013 struct _Z7textureI7ushort4Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1014 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1015 struct _Z7textureIcLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1016 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1017 struct _Z7textureIaLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1018 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1019 struct _Z7textureIhLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1020 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1021 struct _Z7textureI5char1Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1022 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1023 struct _Z7textureI6uchar1Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1024 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1025 struct _Z7textureI5char2Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1026 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1027 struct _Z7textureI6uchar2Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1028 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1029 struct _Z7textureI5char3Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1030 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1031 struct _Z7textureI6uchar3Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1032 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1033 struct _Z7textureI5char4Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1034 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1035 struct _Z7textureI6uchar4Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1036 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1037 struct _Z7textureIsLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1038 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1039 struct _Z7textureItLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1040 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1041 struct _Z7textureI6short1Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1042 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1043 struct _Z7textureI7ushort1Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1044 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1045 struct _Z7textureI6short2Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1046 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1047 struct _Z7textureI7ushort2Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1048 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1049 struct _Z7textureI6short3Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1050 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1051 struct _Z7textureI7ushort3Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1052 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1053 struct _Z7textureI6short4Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1054 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1055 struct _Z7textureI7ushort4Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1056 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1057 struct _Z7textureIiLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1058 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1059 struct _Z7textureIjLi2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1060 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1061 struct _Z7textureI4int1Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1062 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1063 struct _Z7textureI5uint1Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1064 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1065 struct _Z7textureI4int2Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1066 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1067 struct _Z7textureI5uint2Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1068 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1069 struct _Z7textureI4int3Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1070 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1071 struct _Z7textureI5uint3Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1072 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1073 struct _Z7textureI4int4Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1074 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1075 struct _Z7textureI5uint4Li2EL19cudaTextureReadMode0EE { struct textureReference __b_16textureReference;};
1076 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1077 struct _Z7textureIiLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1078 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1079 struct _Z7textureIjLi1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1080 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1081 struct _Z7textureI4int1Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1082 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1083 struct _Z7textureI5uint1Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1084 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1085 struct _Z7textureI4int2Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1086 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1087 struct _Z7textureI5uint2Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1088 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1089 struct _Z7textureI4int3Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1090 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1091 struct _Z7textureI5uint3Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1092 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1093 struct _Z7textureI4int4Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1094 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1095 struct _Z7textureI5uint4Li1EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1096 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1097 struct _Z7textureIcLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1098 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1099 struct _Z7textureIaLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1100 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1101 struct _Z7textureIhLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1102 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1103 struct _Z7textureI5char1Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1104 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1105 struct _Z7textureI6uchar1Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1106 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1107 struct _Z7textureI5char2Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1108 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1109 struct _Z7textureI6uchar2Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1110 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1111 struct _Z7textureI5char3Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1112 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1113 struct _Z7textureI6uchar3Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1114 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1115 struct _Z7textureI5char4Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1116 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1117 struct _Z7textureI6uchar4Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1118 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1119 struct _Z7textureIsLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1120 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1121 struct _Z7textureItLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1122 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1123 struct _Z7textureI6short1Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1124 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1125 struct _Z7textureI7ushort1Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1126 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1127 struct _Z7textureI6short2Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1128 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1129 struct _Z7textureI7ushort2Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1130 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1131 struct _Z7textureI6short3Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1132 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1133 struct _Z7textureI7ushort3Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1134 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1135 struct _Z7textureI6short4Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1136 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1137 struct _Z7textureI7ushort4Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1138 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1139 struct _Z7textureIiLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1140 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1141 struct _Z7textureIjLi2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1142 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1143 struct _Z7textureI4int1Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1144 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1145 struct _Z7textureI5uint1Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1146 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1147 struct _Z7textureI4int2Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1148 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1149 struct _Z7textureI5uint2Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1150 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1151 struct _Z7textureI4int3Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1152 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1153 struct _Z7textureI5uint3Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1154 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1155 struct _Z7textureI4int4Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1156 # 115 "/usr/local/cuda/bin/../include/texture_types.h"
1157 struct _Z7textureI5uint4Li2EL19cudaTextureReadMode1EE { struct textureReference __b_16textureReference;};
1158 # 46 "/usr/include/stdio.h" 3
1159 typedef struct _IO_FILE FILE;
1160 void *memcpy(void*, const void*, size_t); void *memset(void*, int, size_t);
1161 # 82 "/usr/local/cuda/bin/../include/cuda_runtime_api.h"
1162 extern cudaError_t cudaMalloc(void **, size_t);
1166 extern cudaError_t cudaFree(void *);
1167 # 97 "/usr/local/cuda/bin/../include/cuda_runtime_api.h"
1168 extern cudaError_t cudaMemcpy(void *, const void *, size_t, enum cudaMemcpyKind);
1169 # 145 "/usr/local/cuda/bin/../include/cuda_runtime_api.h"
1170 extern cudaError_t cudaGetDeviceCount(int *);
1171 extern cudaError_t cudaGetDeviceProperties(struct cudaDeviceProp *, int);
1173 extern cudaError_t cudaSetDevice(int);
1174 # 187 "/usr/local/cuda/bin/../include/cuda_runtime_api.h"
1175 extern cudaError_t cudaConfigureCall(dim3, dim3, size_t, cudaStream_t);
1176 # 222 "/usr/local/cuda/bin/../include/cuda_runtime_api.h"
1177 extern cudaError_t cudaThreadSynchronize(void);
1178 # 184 "/usr/include/time.h" 3
1179 extern __attribute__((__weak__)) clock_t clock(void);
1180 # 58 "/usr/local/cuda/bin/../include/common_functions.h"
1181 extern __attribute__((__weak__)) void *memset(void *, int, size_t);
1182 # 65 "/usr/local/cuda/bin/../include/math_functions.h"
1183 extern __attribute__((__weak__)) int abs(int) __attribute__((__const__));
1185 extern __attribute__((__weak__)) long labs(long) __attribute__((__const__));
1187 extern __attribute__((__weak__)) long long llabs(long long) __attribute__((__const__));
1189 extern __attribute__((__weak__)) double fabs(double) __attribute__((__const__));
1191 extern __attribute__((__weak__)) float fabsf(float) __attribute__((__const__));
1194 extern __attribute__((__weak__)) int min(int, int);
1196 extern __attribute__((__weak__)) unsigned umin(unsigned, unsigned);
1198 extern __attribute__((__weak__)) float fminf(float, float);
1200 extern __attribute__((__weak__)) double fmin(double, double);
1203 extern __attribute__((__weak__)) int max(int, int);
1205 extern __attribute__((__weak__)) unsigned umax(unsigned, unsigned);
1207 extern __attribute__((__weak__)) float fmaxf(float, float);
1209 extern __attribute__((__weak__)) double fmax(double, double);
1212 extern __attribute__((__weak__)) double sin(double);
1214 extern __attribute__((__weak__)) float sinf(float);
1217 extern __attribute__((__weak__)) double cos(double);
1219 extern __attribute__((__weak__)) float cosf(float);
1222 extern __attribute__((__weak__)) void sincos(double, double *, double *);
1224 extern __attribute__((__weak__)) void sincosf(float, float *, float *);
1227 extern __attribute__((__weak__)) double tan(double);
1229 extern __attribute__((__weak__)) float tanf(float);
1232 extern __attribute__((__weak__)) double sqrt(double);
1234 extern __attribute__((__weak__)) float sqrtf(float);
1237 extern __attribute__((__weak__)) double rsqrt(double);
1239 extern __attribute__((__weak__)) float rsqrtf(float);
1242 extern __attribute__((__weak__)) double exp2(double);
1244 extern __attribute__((__weak__)) float exp2f(float);
1247 extern __attribute__((__weak__)) double exp10(double);
1249 extern __attribute__((__weak__)) float exp10f(float);
1252 extern __attribute__((__weak__)) double expm1(double);
1254 extern __attribute__((__weak__)) float expm1f(float);
1257 extern __attribute__((__weak__)) double log2(double);
1259 extern __attribute__((__weak__)) float log2f(float);
1262 extern __attribute__((__weak__)) double log10(double);
1264 extern __attribute__((__weak__)) float log10f(float);
1267 extern __attribute__((__weak__)) double log(double);
1269 extern __attribute__((__weak__)) float logf(float);
1272 extern __attribute__((__weak__)) double log1p(double);
1274 extern __attribute__((__weak__)) float log1pf(float);
1277 extern __attribute__((__weak__)) double floor(double) __attribute__((__const__));
1279 extern __attribute__((__weak__)) float floorf(float) __attribute__((__const__));
1282 extern __attribute__((__weak__)) double exp(double);
1284 extern __attribute__((__weak__)) float expf(float);
1287 extern __attribute__((__weak__)) double cosh(double);
1289 extern __attribute__((__weak__)) float coshf(float);
1292 extern __attribute__((__weak__)) double sinh(double);
1294 extern __attribute__((__weak__)) float sinhf(float);
1297 extern __attribute__((__weak__)) double tanh(double);
1299 extern __attribute__((__weak__)) float tanhf(float);
1302 extern __attribute__((__weak__)) double acosh(double);
1304 extern __attribute__((__weak__)) float acoshf(float);
1307 extern __attribute__((__weak__)) double asinh(double);
1309 extern __attribute__((__weak__)) float asinhf(float);
1312 extern __attribute__((__weak__)) double atanh(double);
1314 extern __attribute__((__weak__)) float atanhf(float);
1317 extern __attribute__((__weak__)) double ldexp(double, int);
1319 extern __attribute__((__weak__)) float ldexpf(float, int);
1322 extern __attribute__((__weak__)) double logb(double);
1324 extern __attribute__((__weak__)) float logbf(float);
1327 extern __attribute__((__weak__)) int ilogb(double);
1329 extern __attribute__((__weak__)) int ilogbf(float);
1332 extern __attribute__((__weak__)) double scalbn(double, int);
1334 extern __attribute__((__weak__)) float scalbnf(float, int);
1337 extern __attribute__((__weak__)) double scalbln(double, long);
1339 extern __attribute__((__weak__)) float scalblnf(float, long);
1342 extern __attribute__((__weak__)) double frexp(double, int *);
1344 extern __attribute__((__weak__)) float frexpf(float, int *);
1347 extern __attribute__((__weak__)) double round(double) __attribute__((__const__));
1349 extern __attribute__((__weak__)) float roundf(float) __attribute__((__const__));
1352 extern __attribute__((__weak__)) long lround(double);
1354 extern __attribute__((__weak__)) long lroundf(float);
1357 extern __attribute__((__weak__)) long long llround(double);
1359 extern __attribute__((__weak__)) long long llroundf(float);
1362 extern __attribute__((__weak__)) double rint(double);
1364 extern __attribute__((__weak__)) float rintf(float);
1367 extern __attribute__((__weak__)) long lrint(double);
1369 extern __attribute__((__weak__)) long lrintf(float);
1372 extern __attribute__((__weak__)) long long llrint(double);
1374 extern __attribute__((__weak__)) long long llrintf(float);
1377 extern __attribute__((__weak__)) double nearbyint(double);
1379 extern __attribute__((__weak__)) float nearbyintf(float);
1382 extern __attribute__((__weak__)) double ceil(double) __attribute__((__const__));
1384 extern __attribute__((__weak__)) float ceilf(float) __attribute__((__const__));
1387 extern __attribute__((__weak__)) double trunc(double) __attribute__((__const__));
1389 extern __attribute__((__weak__)) float truncf(float) __attribute__((__const__));
1392 extern __attribute__((__weak__)) double fdim(double, double);
1394 extern __attribute__((__weak__)) float fdimf(float, float);
1397 extern __attribute__((__weak__)) double atan2(double, double);
1399 extern __attribute__((__weak__)) float atan2f(float, float);
1402 extern __attribute__((__weak__)) double atan(double);
1404 extern __attribute__((__weak__)) float atanf(float);
1407 extern __attribute__((__weak__)) double asin(double);
1409 extern __attribute__((__weak__)) float asinf(float);
1412 extern __attribute__((__weak__)) double acos(double);
1414 extern __attribute__((__weak__)) float acosf(float);
1417 extern __attribute__((__weak__)) double hypot(double, double);
1419 extern __attribute__((__weak__)) float hypotf(float, float);
1422 extern __attribute__((__weak__)) double cbrt(double);
1424 extern __attribute__((__weak__)) float cbrtf(float);
1427 extern __attribute__((__weak__)) double pow(double, double);
1429 extern __attribute__((__weak__)) float powf(float, float);
1432 extern __attribute__((__weak__)) double modf(double, double *);
1434 extern __attribute__((__weak__)) float modff(float, float *);
1437 extern __attribute__((__weak__)) double fmod(double, double);
1439 extern __attribute__((__weak__)) float fmodf(float, float);
1442 extern __attribute__((__weak__)) double remainder(double, double);
1444 extern __attribute__((__weak__)) float remainderf(float, float);
1447 extern __attribute__((__weak__)) double remquo(double, double, int *);
1449 extern __attribute__((__weak__)) float remquof(float, float, int *);
1452 extern __attribute__((__weak__)) double erf(double);
1454 extern __attribute__((__weak__)) float erff(float);
1457 extern __attribute__((__weak__)) double erfc(double);
1459 extern __attribute__((__weak__)) float erfcf(float);
1462 extern __attribute__((__weak__)) double lgamma(double);
1464 extern __attribute__((__weak__)) float lgammaf(float);
1467 extern __attribute__((__weak__)) double tgamma(double);
1469 extern __attribute__((__weak__)) float tgammaf(float);
1472 extern __attribute__((__weak__)) double copysign(double, double) __attribute__((__const__));
1474 extern __attribute__((__weak__)) float copysignf(float, float) __attribute__((__const__));
1477 extern __attribute__((__weak__)) double nextafter(double, double) __attribute__((__const__));
1479 extern __attribute__((__weak__)) float nextafterf(float, float) __attribute__((__const__));
1482 extern __attribute__((__weak__)) double nan(const char *) __attribute__((__const__));
1484 extern __attribute__((__weak__)) float nanf(const char *) __attribute__((__const__));
1487 extern __attribute__((__weak__)) int __signbit(double) __attribute__((__const__));
1489 extern __attribute__((__weak__)) int __signbitf(float) __attribute__((__const__));
1492 extern __attribute__((__weak__)) int __isinf(double) __attribute__((__const__));
1494 extern __attribute__((__weak__)) int __isinff(float) __attribute__((__const__));
1497 extern __attribute__((__weak__)) int __isnan(double) __attribute__((__const__));
1499 extern __attribute__((__weak__)) int __isnanf(float) __attribute__((__const__));
1502 extern __attribute__((__weak__)) int __finite(double) __attribute__((__const__));
1504 extern __attribute__((__weak__)) int __finitef(float) __attribute__((__const__));
1507 extern __attribute__((__weak__)) double fma(double, double, double);
1509 extern __attribute__((__weak__)) float fmaf(float, float, float);
1510 # 193 "/usr/include/bits/mathcalls.h" 3
1511 extern __attribute__((__weak__)) int __isinfl(long double) __attribute__((__const__));
1514 extern __attribute__((__weak__)) int __finitel(long double) __attribute__((__const__));
1515 # 231 "/usr/include/bits/mathcalls.h" 3
1516 extern __attribute__((__weak__)) int __isnanl(long double) __attribute__((__const__));
1517 # 350 "/usr/include/bits/mathcalls.h" 3
1518 extern __attribute__((__weak__)) int __signbitl(long double) __attribute__((__const__));
1519 # 589 "/usr/include/stdlib.h" 3
1520 extern void *malloc(size_t) __attribute__((__malloc__));
1521 # 327 "/usr/include/stdio.h" 3
1522 extern int fprintf(FILE *, const char *, ...);
1523 # 113 "y.cu"
1524 extern int rsl_internal_microclock_(void);
1525 # 135 "y.cu"
1526 extern int gethostname(char *, size_t);
1527 # 142 "y.cu"
1528 extern int wsm5_gpu_init_(int *, int *, int *);
1529 # 199 "y.cu"
1530 extern int wsm5_host_(float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *);
1531 # 470 "y.cu"
1532 extern int get_wsm5_gpu_levels_(int *);
1533 extern void __sti___29_tmpxft_00001ecc_00000000_2_ii_91788a12(void) __attribute__((__constructor__));
1534 # 144 "/usr/include/stdio.h" 3
1535 extern struct _IO_FILE *stderr;
1536 # 1 "/tmp/tmpxft_00001ecc_00000000-0.stub.h" 1 3
1541 extern void __device_stub__Z8wsm5_gpuPfS_S_S_S_S_S_S_S_S_S_S_S_S_S_fS_iiiiiiiiiiiiiiiiii(float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float *, float, float *, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int);
1542 # 146 "/usr/include/stdio.h" 2 3
1543 # 142 "y.cu"
1544 int wsm5_gpu_init_( int *myproc, int *nproc, int *mydevice)
1546 auto float x;
1547 # 144 "y.cu"
1548 auto float *x_d;
1549 auto int s;
1550 # 145 "y.cu"
1551 auto int e;
1552 auto int i;
1553 # 146 "y.cu"
1554 auto int dc;
1555 auto cudaError_t cerr;
1556 auto char hostname[64];
1557 auto struct cudaDeviceProp dp;
1559 cudaGetDeviceCount((&dc));
1560 if (dc > 4)
1561 { fprintf(stderr, "warning: more than %d devices on node (%d)\n", 4, dc); dc = 4; }
1562 fprintf(stderr, "Number of devices on this node: %d\n", dc);
1566 i = ((*mydevice));
1567 if (dc > 0)
1569 if ((int)(cerr = (cudaSetDevice(i)))) {
1570 fprintf(stderr, " non-zero cerr %d\n", ((int)cerr));
1573 gethostname(((char *)hostname), 64UL);
1574 fprintf(stderr, "Setting device %02d for task %03d on host %s\n", i, ((*myproc)), ((char *)hostname));
1576 if ((int)(cerr = (cudaGetDeviceProperties((&dp), i)))) {
1577 fprintf(stderr, "Device %02d: cerr = %d\n", ((int)cerr));
1578 } else {
1579 fprintf(stderr, "Device %02d: name %s\n", i, ((char *)(&dp.name)));
1580 fprintf(stderr, "Device %02d: mem %d\n", i, ((dp.totalGlobalMem)));
1581 fprintf(stderr, "Device %02d: smem %d\n", i, ((dp.sharedMemPerBlock)));
1582 fprintf(stderr, "Device %02d: nreg %d\n", i, ((dp.regsPerBlock)));
1583 fprintf(stderr, "Device %02d: warp %d\n", i, ((dp.warpSize)));
1584 fprintf(stderr, "Device %02d: pitch %d\n", i, ((dp.memPitch)));
1585 fprintf(stderr, "Device %02d: maxthrds %d\n", i, ((dp.maxThreadsPerBlock)));
1586 fprintf(stderr, "Device %02d: maxtdim %d %d %d\n", i, (((int *)(&dp.maxThreadsDim))[0]), (((int *)(&dp.maxThreadsDim))[1]), (((int *)(&dp.maxThreadsDim))[2]));
1589 fprintf(stderr, "Device %02d: maxgdim %d %d %d\n", i, (((int *)(&dp.maxGridSize))[0]), (((int *)(&dp.maxGridSize))[1]), (((int *)(&dp.maxGridSize))[2]));
1592 fprintf(stderr, "Device %02d: clock %d\n", i, ((dp.clockRate)));
1593 fprintf(stderr, "Device %02d: talign %d\n", i, ((dp.textureAlignment)));
1597 s = (rsl_internal_microclock_());
1598 cudaMalloc(((void **)(&x_d)), 4UL);
1599 cudaMemcpy(((void *)x_d), ((const void *)(&x)), 4UL, cudaMemcpyHostToDevice);
1600 cudaFree(((void *)x_d));
1601 e = (rsl_internal_microclock_());
1602 fprintf(stderr, "wsm5_init: %d\n", (e - s));
1603 return 0;
1607 int wsm5_host_(
1608 float *th, float *pii,
1609 float *q,
1610 float *qc, float *qi, float *qr, float *qs,
1611 float *den, float *p, float *delz,
1615 float *delt,
1616 float *rain, float *rainncv,
1617 float *sr,
1618 float *snow, float *snowncv,
1619 int *ids, int *ide, int *jds, int *jde, int *kds, int *kde,
1620 int *ims, int *ime, int *jms, int *jme, int *kms, int *kme,
1621 int *ips, int *ipe, int *jps, int *jpe, int *kps, int *kpe)
1623 { auto unsigned __T20;
1624 auto unsigned __T21;
1625 auto float *bigbuf;
1626 auto int s;
1627 # 218 "y.cu"
1628 auto int e;
1629 # 218 "y.cu"
1630 auto int s2;
1631 # 218 "y.cu"
1632 auto int e2;
1633 auto int d3;
1634 auto int d2;
1635 # 229 "y.cu"
1636 auto int dips;
1637 # 229 "y.cu"
1638 auto int dipe;
1640 auto int djps;
1641 # 231 "y.cu"
1642 auto int djpe;
1643 auto int dkps;
1644 # 232 "y.cu"
1645 auto int dkpe;
1646 # 242 "y.cu"
1647 auto float *th_d;
1648 auto float *pii_d;
1649 auto float *q_d;
1650 auto float *qc_d;
1651 auto float *qi_d;
1652 auto float *qr_d;
1653 auto float *qs_d;
1654 auto float *den_d;
1655 auto float *p_d;
1656 auto float *delz_d;
1660 auto float *rain_d;
1661 auto float *rainncv_d;
1662 auto float *sr_d;
1663 auto float *snow_d;
1664 auto float *snowncv_d;
1665 auto float retvals[100];
1669 auto float *retvals_d;
1671 auto int remx;
1672 # 266 "y.cu"
1673 auto int remy;
1678 auto dim3 dimBlock;
1680 auto dim3 dimGrid;
1681 # 219 "y.cu"
1682 d3 = ((((((*ime)) - ((*ims))) + 1) * ((((*jme)) - ((*jms))) + 1)) * ((((*kme)) - ((*kms))) + 1));
1683 d2 = (((((*ime)) - ((*ims))) + 1) * ((((*jme)) - ((*jms))) + 1));
1684 # 229 "y.cu"
1685 dips = 0; dipe = ((((*ipe)) - ((*ips))) + 1);
1687 djps = 0; djpe = ((((*jpe)) - ((*jps))) + 1);
1688 dkps = 0; dkpe = ((((*kpe)) - ((*kps))) + 1);
1690 bigbuf = ((float *)(malloc((((unsigned long)((dipe * djpe) * dkpe)) * 4UL))));
1691 # 241 "y.cu"
1692 s = (rsl_internal_microclock_());
1693 cudaMalloc(((void **)(&th_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)th_d), ((const void *)th), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice);
1694 cudaMalloc(((void **)(&pii_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)pii_d), ((const void *)pii), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice);
1695 cudaMalloc(((void **)(&q_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)q_d), ((const void *)q), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice);
1696 cudaMalloc(((void **)(&qc_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)qc_d), ((const void *)qc), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice);
1697 cudaMalloc(((void **)(&qi_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)qi_d), ((const void *)qi), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice);
1698 cudaMalloc(((void **)(&qr_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)qr_d), ((const void *)qr), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice);
1699 cudaMalloc(((void **)(&qs_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)qs_d), ((const void *)qs), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice);
1700 cudaMalloc(((void **)(&den_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)den_d), ((const void *)den), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice);
1701 cudaMalloc(((void **)(&p_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)p_d), ((const void *)p), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice);
1702 cudaMalloc(((void **)(&delz_d)), (((unsigned long)d3) * 4UL)); cudaMemcpy(((void *)delz_d), ((const void *)delz), (((unsigned long)d3) * 4UL), cudaMemcpyHostToDevice);
1706 cudaMalloc(((void **)(&rain_d)), (((unsigned long)d2) * 4UL)); cudaMemcpy(((void *)rain_d), ((const void *)rain), (((unsigned long)d2) * 4UL), cudaMemcpyHostToDevice);
1707 cudaMalloc(((void **)(&rainncv_d)), (((unsigned long)d2) * 4UL)); cudaMemcpy(((void *)rainncv_d), ((const void *)rainncv), (((unsigned long)d2) * 4UL), cudaMemcpyHostToDevice);
1708 cudaMalloc(((void **)(&sr_d)), (((unsigned long)d2) * 4UL)); cudaMemcpy(((void *)sr_d), ((const void *)sr), (((unsigned long)d2) * 4UL), cudaMemcpyHostToDevice);
1709 cudaMalloc(((void **)(&snow_d)), (((unsigned long)d2) * 4UL)); cudaMemcpy(((void *)snow_d), ((const void *)snow), (((unsigned long)d2) * 4UL), cudaMemcpyHostToDevice);
1710 cudaMalloc(((void **)(&snowncv_d)), (((unsigned long)d2) * 4UL)); cudaMemcpy(((void *)snowncv_d), ((const void *)snowncv), (((unsigned long)d2) * 4UL), cudaMemcpyHostToDevice);
1712 { auto int k;
1713 for (k = 0; (k < ((((*kme)) - ((*kms))) + 1)); k++) { (((float *)retvals)[k]) = (0.0F); }
1715 cudaMalloc(((void **)(&retvals_d)), (((unsigned long)((((*kme)) - ((*kms))) + 1)) * 4UL)); cudaMemcpy(((void *)retvals_d), ((const void *)((float *)retvals)), (((unsigned long)((((*kme)) - ((*kms))) + 1)) * 4UL), cudaMemcpyHostToDevice);
1719 remx = ((((((*ipe)) - ((*ips))) + 1) % 16) ? 1 : 0);
1720 remy = ((((((*jpe)) - ((*jps))) + 1) % 8) ? 1 : 0);
1722 { (dimBlock.x) = 16U; (dimBlock.y) = 8U; (dimBlock.z) = 1U; }
1724 { __T20 = ((unsigned)((((((*ipe)) - ((*ips))) + 1) / 16) + remx)); __T21 = ((unsigned)((((((*jpe)) - ((*jps))) + 1) / 8) + remy)); { (dimGrid.x) = __T20; (dimGrid.y) = __T21; (dimGrid.z) = 1U; } }
1726 fprintf(stderr, "Call to wsm5_gpu: block dims %d %d\n", ((dimBlock.x)), ((dimBlock.y)));
1727 fprintf(stderr, "Call to wsm5_gpu: grid dims %d %d\n", ((dimGrid.x)), ((dimGrid.y)));
1728 # 284 "y.cu"
1729 s2 = (rsl_internal_microclock_());
1730 ((int)(cudaConfigureCall(dimGrid, dimBlock, 0UL, 0))) ? ((void)0) : (__device_stub__Z8wsm5_gpuPfS_S_S_S_S_S_S_S_S_S_S_S_S_S_fS_iiiiiiiiiiiiiiiiii(th_d, pii_d, q_d, qc_d, qi_d, qr_d, qs_d, den_d, p_d, delz_d, rain_d, rainncv_d, sr_d, snow_d, snowncv_d, ((*delt)), retvals_d, (dips + 1), ((((*ipe)) - ((*ips))) + 1), (djps + 1), ((((*jpe)) - ((*jps))) + 1), (dkps + 1), ((((*kpe)) - ((*kps))) + 1), (dips + 1), dipe, (djps + 1), djpe, (dkps + 1), dkpe, (dips + 1), dipe, (djps + 1), djpe, (dkps + 1), dkpe));
1731 # 299 "y.cu"
1732 cudaThreadSynchronize();
1733 e2 = (rsl_internal_microclock_());
1734 fprintf(stderr, "Call to wsm5_gpu (not including data xfer): %d microseconds\n", (e2 - s2));
1737 cudaMemcpy(((void *)th), ((const void *)th_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost);
1738 cudaMemcpy(((void *)pii), ((const void *)pii_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost);
1739 cudaMemcpy(((void *)q), ((const void *)q_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost);
1740 cudaMemcpy(((void *)qc), ((const void *)qc_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost);
1741 cudaMemcpy(((void *)qi), ((const void *)qi_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost);
1742 cudaMemcpy(((void *)qr), ((const void *)qr_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost);
1743 cudaMemcpy(((void *)qs), ((const void *)qs_d), (((unsigned long)d3) * 4UL), cudaMemcpyDeviceToHost);
1747 cudaMemcpy(((void *)rain), ((const void *)rain_d), (((unsigned long)d2) * 4UL), cudaMemcpyDeviceToHost);
1748 cudaMemcpy(((void *)rainncv), ((const void *)rainncv_d), (((unsigned long)d2) * 4UL), cudaMemcpyDeviceToHost);
1749 cudaMemcpy(((void *)sr), ((const void *)sr_d), (((unsigned long)d2) * 4UL), cudaMemcpyDeviceToHost);
1750 cudaMemcpy(((void *)snow), ((const void *)snow_d), (((unsigned long)d2) * 4UL), cudaMemcpyDeviceToHost);
1751 cudaMemcpy(((void *)snowncv), ((const void *)snowncv_d), (((unsigned long)d2) * 4UL), cudaMemcpyDeviceToHost);
1752 e = (rsl_internal_microclock_());
1754 cudaMemcpy(((void *)((float *)retvals)), ((const void *)retvals_d), (((unsigned long)((((*kme)) - ((*kms))) + 1)) * 4UL), cudaMemcpyDeviceToHost);
1755 fprintf(stderr, "Call to wsm5_gpu (including data xfer): %d microseconds\n", (e - s));
1762 cudaFree(((void *)th_d));
1763 cudaFree(((void *)pii_d));
1764 cudaFree(((void *)q_d));
1765 cudaFree(((void *)qc_d));
1766 cudaFree(((void *)qi_d));
1767 cudaFree(((void *)qr_d));
1768 cudaFree(((void *)qs_d));
1769 cudaFree(((void *)den_d));
1770 cudaFree(((void *)p_d));
1771 cudaFree(((void *)delz_d));
1775 cudaFree(((void *)rain_d));
1776 cudaFree(((void *)rainncv_d));
1777 cudaFree(((void *)sr_d));
1778 cudaFree(((void *)snow_d));
1779 cudaFree(((void *)snowncv_d));
1780 cudaFree(((void *)retvals_d));
1782 return 0;
1784 # 470 "y.cu"
1785 int get_wsm5_gpu_levels_( int *retval)
1787 (*retval) = 41;
1789 void __sti___29_tmpxft_00001ecc_00000000_2_ii_91788a12(void) { }
1790 # 1 "/tmp/tmpxft_00001ecc_00000000-0.stub.c" 1
1794 # 1 "/tmp/tmpxft_00001ecc_00000000-1.c" 1
1795 # 1 "/usr/local/cuda/bin/../include/__cudaFatFormat.h" 1
1796 # 97 "/usr/local/cuda/bin/../include/__cudaFatFormat.h"
1797 typedef struct {
1798 char* gpuProfileName;
1799 char* cubin;
1800 } __cudaFatCubinEntry;
1801 # 113 "/usr/local/cuda/bin/../include/__cudaFatFormat.h"
1802 typedef struct {
1803 char* gpuProfileName;
1804 char* ptx;
1805 } __cudaFatPtxEntry;
1806 # 125 "/usr/local/cuda/bin/../include/__cudaFatFormat.h"
1807 typedef struct {
1808 char* gpuProfileName;
1809 char* debug;
1810 } __cudaFatDebugEntry;
1813 typedef enum {
1814 __cudaFatDontSearchFlag = (1 << 0),
1815 __cudaFatDontCacheFlag = (1 << 1)
1816 } __cudaFatCudaBinaryFlag;
1817 # 145 "/usr/local/cuda/bin/../include/__cudaFatFormat.h"
1818 typedef struct {
1819 unsigned long magic;
1820 unsigned long version;
1821 unsigned long gpuInfoVersion;
1822 char* key;
1823 char* ident;
1824 char* usageMode;
1825 __cudaFatPtxEntry *ptx;
1826 __cudaFatCubinEntry *cubin;
1827 __cudaFatDebugEntry *debug;
1828 void* debugInfo;
1829 unsigned int flags;
1830 } __cudaFatCudaBinary;
1831 # 189 "/usr/local/cuda/bin/../include/__cudaFatFormat.h"
1832 void fatGetCubinForGpu( __cudaFatCudaBinary *binary, char* gpuName, char* *cubin, char* *dbgInfoFile );
1833 # 2 "/tmp/tmpxft_00001ecc_00000000-1.c" 2
1841 static const unsigned char __deviceText[] = {
1842 0x61,0x72,0x63,0x68,0x69,0x74,0x65,0x63,0x74,0x75,0x72,0x65,0x20,0x7b,0x73,0x6d,
1843 0x5f,0x31,0x30,0x7d,0x0a,0x61,0x62,0x69,0x76,0x65,0x72,0x73,0x69,0x6f,0x6e,0x20,
1844 0x7b,0x30,0x7d,0x0a,0x63,0x6f,0x64,0x65,0x20,0x20,0x7b,0x0a,0x09,0x6e,0x61,0x6d,
1845 0x65,0x20,0x3d,0x20,0x5f,0x5f,0x64,0x75,0x6d,0x6d,0x79,0x5f,0x65,0x6e,0x74,0x72,
1846 0x79,0x5f,0x5f,0x0a,0x09,0x6c,0x6d,0x65,0x6d,0x20,0x3d,0x20,0x30,0x0a,0x09,0x73,
1847 0x6d,0x65,0x6d,0x20,0x3d,0x20,0x30,0x0a,0x09,0x72,0x65,0x67,0x20,0x3d,0x20,0x30,
1848 0x0a,0x09,0x62,0x61,0x72,0x20,0x3d,0x20,0x30,0x0a,0x09,0x62,0x69,0x6e,0x63,0x6f,
1849 0x64,0x65,0x20,0x20,0x7b,0x0a,0x09,0x09,0x30,0x78,0x66,0x30,0x30,0x30,0x30,0x30,
1850 0x30,0x31,0x20,0x30,0x78,0x65,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x20,0x0a,0x09,
1851 0x7d,0x0a,0x7d,0x0a,0x00
1858 static __cudaFatPtxEntry __ptxEntries [] = {{0,0}};
1859 static __cudaFatCubinEntry __cubinEntries[] = {{"sm_10",(char*)__deviceText},{0,0}};
1860 static __cudaFatDebugEntry __debugEntries[] = {{0,0}};
1862 static __cudaFatCudaBinary __fatDeviceText __attribute__ ((section (".nvFatBinSegment")))= {0x1ee55a01,0x00000002,0x840b5bca,"81bb892378501d16","y.cu"," ",__ptxEntries,__cubinEntries,__debugEntries,0,0};
1863 # 5 "/tmp/tmpxft_00001ecc_00000000-0.stub.c" 2
1864 # 1 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 1
1865 # 65 "/usr/local/cuda/bin/../include/crt/host_runtime.h"
1866 # 1 "/usr/local/cuda/bin/../include/host_defines.h" 1
1867 # 66 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 2
1868 # 88 "/usr/local/cuda/bin/../include/crt/host_runtime.h"
1869 extern void** __cudaRegisterFatBinary(
1870 void *fatCubin
1873 extern void __cudaUnregisterFatBinary(
1874 void **fatCubinHandle
1877 extern void __cudaRegisterVar(
1878 void **fatCubinHandle,
1879 char *hostVar,
1880 char *deviceAddress,
1881 const char *deviceName,
1882 int ext,
1883 int size,
1884 int constant,
1885 int global
1888 extern void __cudaRegisterTexture(
1889 void **fatCubinHandle,
1890 const struct textureReference *hostVar,
1891 const void **deviceAddress,
1892 const char *deviceName,
1893 int dim,
1894 int norm,
1895 int ext
1898 extern void __cudaRegisterShared(
1899 void **fatCubinHandle,
1900 void **devicePtr
1903 extern void __cudaRegisterFunction(
1904 void **fatCubinHandle,
1905 const char *hostFun,
1906 char *deviceFun,
1907 const char *deviceName,
1908 int thread_limit,
1909 uint3 *tid,
1910 uint3 *bid,
1911 dim3 *bDim,
1912 dim3 *gDim
1919 static void **__cudaFatCubinHandle;
1921 static void __cudaUnregisterBinaryUtil(void)
1923 __cudaUnregisterFatBinary(__cudaFatCubinHandle);
1928 __attribute__((destructor)) static void __cudaUnregisterBinary(void)
1930 __cudaUnregisterBinaryUtil();
1932 # 195 "/usr/local/cuda/bin/../include/crt/host_runtime.h"
1933 # 1 "/usr/local/cuda/bin/../include/common_functions.h" 1
1934 # 64 "/usr/local/cuda/bin/../include/common_functions.h"
1935 # 1 "/usr/local/cuda/bin/../include/crt/func_macro.h" 1 3
1936 # 65 "/usr/local/cuda/bin/../include/common_functions.h" 2
1938 extern __attribute__((weak)) long __cuda_clock(void); long __cuda_clock(void)
1940 return clock();
1943 extern __attribute__((weak)) void *__cuda_memset(void *s, int c, size_t n); void *__cuda_memset(void *s, int c, size_t n)
1945 char *p = (char*)s;
1947 while (n--) *p++ = (char)c;
1949 return s;
1951 # 88 "/usr/local/cuda/bin/../include/common_functions.h"
1952 # 1 "/usr/local/cuda/bin/../include/math_functions.h" 1 3
1953 # 794 "/usr/local/cuda/bin/../include/math_functions.h" 3
1954 extern __attribute__((weak)) int __cuda___signbitl(long double a); int __cuda___signbitl(long double a);
1955 extern __attribute__((weak)) int __cuda___isinfl(long double a); int __cuda___isinfl(long double a);
1956 extern __attribute__((weak)) int __cuda___isnanl(long double a); int __cuda___isnanl(long double a);
1957 extern __attribute__((weak)) int __cuda___finitel(long double a); int __cuda___finitel(long double a);
1958 # 834 "/usr/local/cuda/bin/../include/math_functions.h" 3
1959 extern __attribute__((weak)) int __cuda_abs(int a); int __cuda_abs(int a)
1961 return abs(a);
1964 extern __attribute__((weak)) float __cuda_fabsf(float a); float __cuda_fabsf(float a)
1966 return fabsf(a);
1969 extern __attribute__((weak)) long long int __cuda_llabs(long long int a); long long int __cuda_llabs(long long int a)
1974 return llabs(a);
1978 extern __attribute__((weak)) float __cuda_exp2f(float a); float __cuda_exp2f(float a)
1980 return exp2f(a);
1983 # 1 "/usr/local/cuda/bin/../include/device_functions.h" 1 3
1984 # 322 "/usr/local/cuda/bin/../include/device_functions.h" 3
1985 # 1 "/usr/local/cuda/bin/../include/math_constants.h" 1 3
1986 # 323 "/usr/local/cuda/bin/../include/device_functions.h" 2 3
1990 extern __attribute__((weak)) int __cuda___isnan(double a); int __cuda___isnan(double a);
1991 extern __attribute__((weak)) int __cuda___isnanf(float a); int __cuda___isnanf(float a);
1992 static int __double2int_rz(double);
1993 static unsigned int __double2uint_rz(double);
1994 static long long int __double2ll_rz(double);
1995 static unsigned long long int __double2ull_rz(double);
1996 # 345 "/usr/local/cuda/bin/../include/device_functions.h" 3
1997 static int __mulhi(int a, int b)
1999 long long int c = (long long int)a * (long long int)b;
2001 return (int)(c >> 32);
2004 static unsigned int __umulhi(unsigned int a, unsigned int b)
2006 unsigned long long int c = (unsigned long long int)a * (unsigned long long int)b;
2008 return (unsigned int)(c >> 32);
2011 static unsigned long long int __umul64hi(unsigned long long int a, unsigned long long int b)
2013 unsigned int a_lo = (unsigned int)a;
2014 unsigned long long int a_hi = a >> 32;
2015 unsigned int b_lo = (unsigned int)b;
2016 unsigned long long int b_hi = b >> 32;
2017 unsigned long long int m1 = a_lo * b_hi;
2018 unsigned long long int m2 = a_hi * b_lo;
2019 unsigned int carry;
2021 carry = (0ULL + __umulhi(a_lo, b_lo) + (unsigned int)m1 + (unsigned int)m2) >> 32;
2023 return a_hi * b_hi + (m1 >> 32) + (m2 >> 32) + carry;
2026 static long long int __mul64hi(long long int a, long long int b)
2028 return __umul64hi(a, b) - (a < 0LL ? b : 0LL) - (b < 0LL ? a : 0LL);
2031 static float __saturatef(float a)
2033 return a >= 1.0f ? 1.0f : a <= 0.0f ? 0.0f : a;
2036 static unsigned int __sad(int a, int b, unsigned int c)
2038 long long int diff = (long long int)a - (long long int)b;
2040 return (unsigned int)(__cuda_llabs(diff) + (long long int)c);
2043 static unsigned int __usad(unsigned int a, unsigned int b, unsigned int c)
2045 long long int diff = (long long int)a - (long long int)b;
2047 return (unsigned int)(__cuda_llabs(diff) + (long long int)c);
2050 static int __mul24(int a, int b)
2052 a &= 0xffffff;
2053 a = (a & 0x800000) != 0 ? a | ~0xffffff : a;
2054 b &= 0xffffff;
2055 b = (b & 0x800000) != 0 ? b | ~0xffffff : b;
2057 return a * b;
2060 static unsigned int __umul24(unsigned int a, unsigned int b)
2062 a &= 0xffffff;
2063 b &= 0xffffff;
2065 return a * b;
2068 static float __int_as_float(int a)
2070 union {int a; float b;} u;
2072 u.a = a;
2074 return u.b;
2077 static int __float_as_int(float a)
2079 union {float a; int b;} u;
2081 u.a = a;
2083 return u.b;
2086 static long long int __internal_float2ll_kernel(float a, long long int max, long long int min, long long int nan, enum cudaRoundMode rndMode)
2088 unsigned long long int res, t = 0ULL;
2089 int shift;
2090 unsigned int ia;
2092 if (sizeof(a) == sizeof(double) && __cuda___isnan((double)a)) return nan; if (sizeof(a) == sizeof(float) && __cuda___isnanf((float)a)) return nan; if (a >= max) return max; if (a <= min) return min;
2093 ia = __float_as_int(a);
2094 shift = 189 - ((ia >> 23) & 0xff);
2095 res = (unsigned long long int)(((ia << 8) | 0x80000000) >> 1) << 32;
2096 if (shift >= 64) {
2097 t = res;
2098 res = 0;
2099 } else if (shift) {
2100 t = res << (64 - shift);
2101 res = res >> shift;
2103 if (rndMode == cudaRoundNearest && (long long int)t < 0LL) {
2104 res += t == 0x8000000000000000ULL ? res & 1ULL : 1ULL;
2106 else if (rndMode == cudaRoundMinInf && t != 0ULL && ia > 0x80000000) {
2107 res++;
2109 else if (rndMode == cudaRoundPosInf && t != 0ULL && (int)ia > 0) {
2110 res++;
2112 if ((int)ia < 0) res = (unsigned long long int)-(long long int)res;
2113 return (long long int)res;
2116 static int __internal_float2int(float a, enum cudaRoundMode rndMode)
2118 return (int)__internal_float2ll_kernel(a, 2147483647LL, -2147483648LL, 0LL, rndMode);
2121 static int __float2int_rz(float a)
2123 return __internal_float2int(a, cudaRoundZero);
2126 static int __float2int_ru(float a)
2128 return __internal_float2int(a, cudaRoundPosInf);
2131 static int __float2int_rd(float a)
2133 return __internal_float2int(a, cudaRoundMinInf);
2136 static int __float2int_rn(float a)
2138 return __internal_float2int(a, cudaRoundNearest);
2141 static long long int __internal_float2ll(float a, enum cudaRoundMode rndMode)
2143 return __internal_float2ll_kernel(a, 9223372036854775807LL, -9223372036854775807LL -1LL, -9223372036854775807LL -1LL, rndMode);
2146 static long long int __float2ll_rz(float a)
2148 return __internal_float2ll(a, cudaRoundZero);
2151 static long long int __float2ll_ru(float a)
2153 return __internal_float2ll(a, cudaRoundPosInf);
2156 static long long int __float2ll_rd(float a)
2158 return __internal_float2ll(a, cudaRoundMinInf);
2161 static long long int __float2ll_rn(float a)
2163 return __internal_float2ll(a, cudaRoundNearest);
2166 static unsigned long long int __internal_float2ull_kernel(float a, unsigned long long int max, unsigned long long int nan, enum cudaRoundMode rndMode)
2168 unsigned long long int res, t = 0ULL;
2169 int shift;
2170 unsigned int ia;
2172 if (sizeof(a) == sizeof(double) && __cuda___isnan((double)a)) return nan; if (sizeof(a) == sizeof(float) && __cuda___isnanf((float)a)) return nan; if (a >= max) return max; if (a <= 0LL) return 0LL;
2173 ia = __float_as_int(a);
2174 shift = 190 - ((ia >> 23) & 0xff);
2175 res = (unsigned long long int)((ia << 8) | 0x80000000) << 32;
2176 if (shift >= 64) {
2177 t = res >> (int)(shift > 64);
2178 res = 0;
2179 } else if (shift) {
2180 t = res << (64 - shift);
2181 res = res >> shift;
2183 if (rndMode == cudaRoundNearest && (long long int)t < 0LL) {
2184 res += t == 0x8000000000000000ULL ? res & 1ULL : 1ULL;
2186 else if (rndMode == cudaRoundPosInf && t != 0ULL) {
2187 res++;
2189 return res;
2192 static unsigned int __internal_float2uint(float a, enum cudaRoundMode rndMode)
2194 return (unsigned int)__internal_float2ull_kernel(a, 4294967295U, 0U, rndMode);
2197 static unsigned int __float2uint_rz(float a)
2199 return __internal_float2uint(a, cudaRoundZero);
2202 static unsigned int __float2uint_ru(float a)
2204 return __internal_float2uint(a, cudaRoundPosInf);
2207 static unsigned int __float2uint_rd(float a)
2209 return __internal_float2uint(a, cudaRoundMinInf);
2212 static unsigned int __float2uint_rn(float a)
2214 return __internal_float2uint(a, cudaRoundNearest);
2217 static unsigned long long int __internal_float2ull(float a, enum cudaRoundMode rndMode)
2219 return __internal_float2ull_kernel(a, 18446744073709551615ULL, 9223372036854775808ULL, rndMode);
2222 static unsigned long long int __float2ull_rz(float a)
2224 return __internal_float2ull(a, cudaRoundZero);
2227 static unsigned long long int __float2ull_ru(float a)
2229 return __internal_float2ull(a, cudaRoundPosInf);
2232 static unsigned long long int __float2ull_rd(float a)
2234 return __internal_float2ull(a, cudaRoundMinInf);
2237 static unsigned long long int __float2ull_rn(float a)
2239 return __internal_float2ull(a, cudaRoundNearest);
2242 static int __internal_normalize64(unsigned long long int *a)
2244 int lz = 0;
2246 if ((*a & 0xffffffff00000000ULL) == 0ULL) {
2247 *a <<= 32;
2248 lz += 32;
2250 if ((*a & 0xffff000000000000ULL) == 0ULL) {
2251 *a <<= 16;
2252 lz += 16;
2254 if ((*a & 0xff00000000000000ULL) == 0ULL) {
2255 *a <<= 8;
2256 lz += 8;
2258 if ((*a & 0xf000000000000000ULL) == 0ULL) {
2259 *a <<= 4;
2260 lz += 4;
2262 if ((*a & 0xC000000000000000ULL) == 0ULL) {
2263 *a <<= 2;
2264 lz += 2;
2266 if ((*a & 0x8000000000000000ULL) == 0ULL) {
2267 *a <<= 1;
2268 lz += 1;
2270 return lz;
2273 static int __internal_normalize(unsigned int *a)
2275 unsigned long long int t = (unsigned long long int)*a;
2276 int lz = __internal_normalize64(&t);
2278 *a = (unsigned int)(t >> 32);
2280 return lz - 32;
2283 static float __internal_int2float_kernel(int a, enum cudaRoundMode rndMode)
2285 volatile union {
2286 float f;
2287 unsigned int i;
2288 } res;
2289 int shift;
2290 unsigned int t;
2291 res.i = a;
2292 if (a == 0) return res.f;
2293 if (a < 0) res.i = (unsigned int)-a;
2294 shift = __internal_normalize((unsigned int*)&res.i);
2295 t = res.i << 24;
2296 res.i = (res.i >> 8);
2297 res.i += (127 + 30 - shift) << 23;
2298 if (a < 0) res.i |= 0x80000000;
2299 if ((rndMode == cudaRoundNearest) && (t >= 0x80000000)) {
2300 res.i += (t == 0x80000000) ? (res.i & 1) : (t >> 31);
2302 else if ((rndMode == cudaRoundMinInf) && t && (a < 0)) {
2303 res.i++;
2305 else if ((rndMode == cudaRoundPosInf) && t && (a > 0)) {
2306 res.i++;
2308 return res.f;
2311 static float __int2float_rz(int a)
2313 return __internal_int2float_kernel(a, cudaRoundZero);
2316 static float __int2float_ru(int a)
2318 return __internal_int2float_kernel(a, cudaRoundPosInf);
2321 static float __int2float_rd(int a)
2323 return __internal_int2float_kernel(a, cudaRoundMinInf);
2326 static float __int2float_rn(int a)
2328 return __internal_int2float_kernel(a, cudaRoundNearest);
2331 static float __internal_uint2float_kernel(unsigned int a, enum cudaRoundMode rndMode)
2333 volatile union {
2334 float f;
2335 unsigned int i;
2336 } res;
2337 int shift;
2338 unsigned int t;
2339 res.i = a;
2340 if (a == 0) return res.f;
2341 shift = __internal_normalize((unsigned int*)&res.i);
2342 t = res.i << 24;
2343 res.i = (res.i >> 8);
2344 res.i += (127 + 30 - shift) << 23;
2345 if ((rndMode == cudaRoundNearest) && (t >= 0x80000000)) {
2346 res.i += (t == 0x80000000) ? (res.i & 1) : (t >> 31);
2348 else if ((rndMode == cudaRoundPosInf) && t) {
2349 res.i++;
2351 return res.f;
2354 static float __uint2float_rz(unsigned int a)
2356 return __internal_uint2float_kernel(a, cudaRoundZero);
2359 static float __uint2float_ru(unsigned int a)
2361 return __internal_uint2float_kernel(a, cudaRoundPosInf);
2364 static float __uint2float_rd(unsigned int a)
2366 return __internal_uint2float_kernel(a, cudaRoundMinInf);
2369 static float __uint2float_rn(unsigned int a)
2371 return __internal_uint2float_kernel(a, cudaRoundNearest);
2374 static float __ll2float_rn(long long int a)
2376 return (float)a;
2379 static float __ull2float_rn(unsigned long long int a)
2381 unsigned long long int temp;
2382 unsigned int res, t;
2383 int shift;
2384 if (a == 0ULL) return 0.0f;
2385 temp = a;
2386 shift = __internal_normalize64(&temp);
2387 temp = (temp >> 8) | ((temp & 0xffULL) ? 1ULL : 0ULL);
2388 res = (unsigned int)(temp >> 32);
2389 t = (unsigned int)temp;
2390 res += (127 + 62 - shift) << 23;
2391 res += t == 0x80000000 ? res & 1 : t >> 31;
2392 return __int_as_float(res);
2395 static float __internal_fmul_kernel(float a, float b, int rndNearest)
2397 unsigned long long product;
2398 volatile union {
2399 float f;
2400 unsigned int i;
2401 } xx, yy;
2402 unsigned expo_x, expo_y;
2404 xx.f = a;
2405 yy.f = b;
2407 expo_y = 0xFF;
2408 expo_x = expo_y & (xx.i >> 23);
2409 expo_x = expo_x - 1;
2410 expo_y = expo_y & (yy.i >> 23);
2411 expo_y = expo_y - 1;
2413 if ((expo_x <= 0xFD) &&
2414 (expo_y <= 0xFD)) {
2415 multiply:
2416 expo_x = expo_x + expo_y;
2417 expo_y = xx.i ^ yy.i;
2418 xx.i = xx.i & 0x00ffffff;
2419 yy.i = yy.i << 8;
2420 xx.i = xx.i | 0x00800000;
2421 yy.i = yy.i | 0x80000000;
2423 product = ((unsigned long long)xx.i) * yy.i;
2424 expo_x = expo_x - 127 + 2;
2425 expo_y = expo_y & 0x80000000;
2426 xx.i = (unsigned int)(product >> 32);
2427 yy.i = (unsigned int)(product & 0xffffffff);
2429 if (xx.i < 0x00800000) {
2430 xx.i = (xx.i << 1) | (yy.i >> 31);
2431 yy.i = (yy.i << 1);
2432 expo_x--;
2434 if (expo_x <= 0xFD) {
2435 xx.i = xx.i | expo_y;
2436 xx.i = xx.i + (expo_x << 23);
2438 if (yy.i < 0x80000000) return xx.f;
2439 xx.i += (((yy.i == 0x80000000) ? (xx.i & 1) : (yy.i >> 31))
2440 && rndNearest);
2441 return xx.f;
2442 } else if ((int)expo_x >= 254) {
2444 xx.i = (expo_y | 0x7F800000) - (!rndNearest);
2445 return xx.f;
2446 } else {
2448 expo_x = ((unsigned int)-((int)expo_x));
2449 if (expo_x > 25) {
2451 xx.i = expo_y;
2452 return xx.f;
2453 } else {
2454 yy.i = (xx.i << (32 - expo_x)) | ((yy.i) ? 1 : 0);
2455 xx.i = expo_y + (xx.i >> expo_x);
2456 xx.i += (((yy.i == 0x80000000) ? (xx.i & 1) : (yy.i >> 31))
2457 && rndNearest);
2458 return xx.f;
2461 } else {
2462 product = xx.i ^ yy.i;
2463 product = product & 0x80000000;
2464 if (!(xx.i & 0x7fffffff)) {
2465 if (expo_y != 254) {
2466 xx.i = (unsigned int)product;
2467 return xx.f;
2469 expo_y = yy.i << 1;
2470 if (expo_y == 0xFF000000) {
2471 xx.i = expo_y | 0x00C00000;
2472 } else {
2473 xx.i = yy.i | 0x00400000;
2475 return xx.f;
2477 if (!(yy.i & 0x7fffffff)) {
2478 if (expo_x != 254) {
2479 xx.i = (unsigned int)product;
2480 return xx.f;
2482 expo_x = xx.i << 1;
2483 if (expo_x == 0xFF000000) {
2484 xx.i = expo_x | 0x00C00000;
2485 } else {
2486 xx.i = xx.i | 0x00400000;
2488 return xx.f;
2490 if ((expo_y != 254) && (expo_x != 254)) {
2491 expo_y++;
2492 expo_x++;
2493 if (expo_x == 0) {
2494 expo_y |= xx.i & 0x80000000;
2499 xx.i = xx.i << 8;
2500 while (!(xx.i & 0x80000000)) {
2501 xx.i <<= 1;
2502 expo_x--;
2504 xx.i = (xx.i >> 8) | (expo_y & 0x80000000);
2505 expo_y &= ~0x80000000;
2506 expo_y--;
2507 goto multiply;
2509 if (expo_y == 0) {
2510 expo_x |= yy.i & 0x80000000;
2511 yy.i = yy.i << 8;
2512 while (!(yy.i & 0x80000000)) {
2513 yy.i <<= 1;
2514 expo_y--;
2516 yy.i = (yy.i >> 8) | (expo_x & 0x80000000);
2517 expo_x &= ~0x80000000;
2518 expo_x--;
2519 goto multiply;
2522 expo_x = xx.i << 1;
2523 expo_y = yy.i << 1;
2525 if (expo_x > 0xFF000000) {
2527 xx.i = xx.i | 0x00400000;
2528 return xx.f;
2531 if (expo_y > 0xFF000000) {
2533 xx.i = yy.i | 0x00400000;
2534 return xx.f;
2536 xx.i = (unsigned int)product | 0x7f800000;
2537 return xx.f;
2541 static float __internal_fadd_kernel(float a, float b, int rndNearest)
2543 volatile union {
2544 float f;
2545 unsigned int i;
2546 } xx, yy;
2547 unsigned int expo_x;
2548 unsigned int expo_y;
2549 unsigned int temp;
2551 xx.f = a;
2552 yy.f = b;
2555 expo_y = yy.i << 1;
2556 if (expo_y > (xx.i << 1)) {
2557 expo_y = xx.i;
2558 xx.i = yy.i;
2559 yy.i = expo_y;
2562 temp = 0xff;
2563 expo_x = temp & (xx.i >> 23);
2564 expo_x = expo_x - 1;
2565 expo_y = temp & (yy.i >> 23);
2566 expo_y = expo_y - 1;
2568 if ((expo_x <= 0xFD) &&
2569 (expo_y <= 0xFD)) {
2571 add:
2572 expo_y = expo_x - expo_y;
2573 if (expo_y > 25) {
2574 expo_y = 31;
2576 temp = xx.i ^ yy.i;
2577 xx.i = xx.i & ~0x7f000000;
2578 xx.i = xx.i | 0x00800000;
2579 yy.i = yy.i & ~0xff000000;
2580 yy.i = yy.i | 0x00800000;
2582 if ((int)temp < 0) {
2584 temp = 32 - expo_y;
2585 temp = (expo_y) ? (yy.i << temp) : 0;
2586 temp = (unsigned int)(-((int)temp));
2587 xx.i = xx.i - (yy.i >> expo_y) - (temp ? 1 : 0);
2588 if (xx.i & 0x00800000) {
2589 if (expo_x <= 0xFD) {
2590 xx.i = xx.i & ~0x00800000;
2591 xx.i = (xx.i + (expo_x << 23)) + 0x00800000;
2592 if (temp < 0x80000000) return xx.f;
2593 xx.i += (((temp == 0x80000000) ? (xx.i & 1) : (temp >> 31))
2594 && rndNearest);
2595 return xx.f;
2597 } else {
2598 if ((temp | (xx.i << 1)) == 0) {
2600 xx.i = 0;
2601 return xx.f;
2604 yy.i = xx.i & 0x80000000;
2605 do {
2606 xx.i = (xx.i << 1) | (temp >> 31);
2607 temp <<= 1;
2608 expo_x--;
2609 } while (!(xx.i & 0x00800000));
2610 xx.i = xx.i | yy.i;
2612 } else {
2614 temp = 32 - expo_y;
2615 temp = (expo_y) ? (yy.i << temp) : 0;
2616 xx.i = xx.i + (yy.i >> expo_y);
2617 if (!(xx.i & 0x01000000)) {
2618 if (expo_x <= 0xFD) {
2619 expo_y = xx.i & 1;
2620 xx.i = xx.i + (expo_x << 23);
2621 if (temp < 0x80000000) return xx.f;
2622 xx.i += (((temp == 0x80000000) ? expo_y : (temp >> 31))
2623 && rndNearest);
2624 return xx.f;
2626 } else {
2628 temp = (xx.i << 31) | (temp >> 1);
2630 xx.i = ((xx.i & 0x80000000) | (xx.i >> 1)) & ~0x40000000;
2631 expo_x++;
2634 if (expo_x <= 0xFD) {
2635 expo_y = xx.i & 1;
2636 xx.i += (((temp == 0x80000000) ? expo_y : (temp >> 31))
2637 && rndNearest);
2638 xx.i = xx.i + (expo_x << 23);
2639 return xx.f;
2641 if ((int)expo_x >= 254) {
2643 xx.i = ((xx.i & 0x80000000) | 0x7f800000) - (!rndNearest);
2644 return xx.f;
2647 expo_y = expo_x + 32;
2648 yy.i = xx.i & 0x80000000;
2649 xx.i = xx.i & ~0xff000000;
2651 expo_x = (unsigned int)(-((int)expo_x));
2652 temp = xx.i << expo_y | ((temp) ? 1 : 0);
2653 xx.i = yy.i | (xx.i >> expo_x);
2654 xx.i += (((temp == 0x80000000) ? (xx.i & 1) : (temp >> 31))
2655 && rndNearest);
2656 return xx.f;
2657 } else {
2659 if (!(yy.i << 1)) {
2660 if (xx.i == 0x80000000) {
2661 xx.i = yy.i;
2663 return xx.f;
2665 if ((expo_y != 254) && (expo_x != 254)) {
2667 if (expo_x == (unsigned int) -1) {
2668 temp = xx.i & 0x80000000;
2669 xx.i = xx.i << 8;
2670 while (!(xx.i & 0x80000000)) {
2671 xx.i <<= 1;
2672 expo_x--;
2674 expo_x++;
2675 xx.i = (xx.i >> 8) | temp;
2677 if (expo_y == (unsigned int) -1) {
2678 temp = yy.i & 0x80000000;
2679 yy.i = yy.i << 8;
2680 while (!(yy.i & 0x80000000)) {
2681 yy.i <<= 1;
2682 expo_y--;
2684 expo_y++;
2685 yy.i = (yy.i >> 8) | temp;
2687 goto add;
2689 expo_x = xx.i << 1;
2690 expo_y = yy.i << 1;
2692 if (expo_x > 0xff000000) {
2694 xx.i = xx.i | 0x00400000;
2695 return xx.f;
2698 if (expo_y > 0xff000000) {
2700 xx.i = yy.i | 0x00400000;
2701 return xx.f;
2703 if ((expo_x == 0xff000000) && (expo_y == 0xff000000)) {
2708 expo_x = xx.i ^ yy.i;
2709 xx.i = xx.i | ((expo_x) ? 0xffc00000 : 0);
2710 return xx.f;
2713 if (expo_y == 0xff000000) {
2714 xx.i = yy.i;
2716 return xx.f;
2720 static float __fadd_rz(float a, float b)
2722 return __internal_fadd_kernel(a, b, 0);
2725 static float __fmul_rz(float a, float b)
2727 return __internal_fmul_kernel(a, b, 0);
2730 static float __fdividef(float a, float b)
2733 if (__cuda_fabsf(b) > 8.507059173e37f) {
2734 if (__cuda_fabsf(a) <= 3.402823466e38f) {
2735 return ((a / b) / 3.402823466e38f) / 3.402823466e38f;
2736 } else {
2737 return __int_as_float(0x7fffffff);
2739 } else {
2740 return a / b;
2744 static void __brkpt(int c)
2749 extern int __cudaSynchronizeThreads(void**, void*);
2753 static inline __attribute__((always_inline)) void __syncthreads(void)
2755 volatile int _ = 0;
2756 L: if (__cudaSynchronizeThreads((void**)&&L, (void*)&_)) goto L;
2759 static void __trap(void)
2761 __builtin_trap();
2763 # 1139 "/usr/local/cuda/bin/../include/device_functions.h" 3
2764 static float __sinf(float a)
2766 return sinf(a);
2769 static float __cosf(float a)
2771 return cosf(a);
2774 static float __log2f(float a)
2776 return log2f(a);
2785 static float __internal_accurate_fdividef(float a, float b)
2787 if (__cuda_fabsf(b) > 8.507059173e37f) {
2788 a *= .25f;
2789 b *= .25f;
2791 return __fdividef(a, b);
2794 static float __tanf(float a)
2796 return __sinf(a) / __cosf(a);
2799 static void __sincosf(float a, float *sptr, float *cptr)
2801 *sptr = __sinf(a);
2802 *cptr = __cosf(a);
2805 static float __expf(float a)
2807 return __cuda_exp2f(a * 1.442695041f);
2810 static float __exp10f(float a)
2812 return __cuda_exp2f(a * 3.321928094f);
2815 static float __log10f(float a)
2817 return 0.301029996f * __log2f(a);
2820 static float __logf(float a)
2822 return 0.693147181f * __log2f(a);
2825 static float __powf(float a, float b)
2827 return __cuda_exp2f(b * __log2f(a));
2830 static float fdividef(float a, float b)
2835 return __internal_accurate_fdividef(a, b);
2839 static int __clz(int a)
2841 return (a)?(158-(__float_as_int(__uint2float_rz((unsigned int)a))>>23)):32;
2844 static int __ffs(int a)
2846 return 32 - __clz (a & -a);
2849 static int __clzll(long long int a)
2851 int ahi = ((int)(a >> 32));
2852 int alo = ((int)(a & 0xffffffffULL));
2853 int res;
2854 if (ahi) {
2855 res = 0;
2856 } else {
2857 res = 32;
2858 ahi = alo;
2860 res = res + __clz(ahi);
2861 return res;
2864 static int __ffsll(long long int a)
2866 return 64 - __clzll (a & -a);
2868 # 1252 "/usr/local/cuda/bin/../include/device_functions.h" 3
2869 static double fdivide(double a, double b)
2871 return (double)fdividef((float)a, (float)b);
2876 static int __double2int_rz(double a)
2878 return __float2int_rz((float)a);
2881 static unsigned int __double2uint_rz(double a)
2883 return __float2uint_rz((float)a);
2886 static long long int __double2ll_rz(double a)
2888 return __float2ll_rz((float)a);
2891 static unsigned long long int __double2ull_rz(double a)
2893 return __float2ull_rz((float)a);
2895 # 1291 "/usr/local/cuda/bin/../include/device_functions.h" 3
2896 # 1 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h" 1 3
2897 # 214 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h" 3
2898 static int __iAtomicAdd(int *address, int val)
2900 int old = *address;
2902 *address = old + val;
2904 return old;
2907 static unsigned int __uAtomicAdd(unsigned int *address, unsigned int val)
2909 unsigned int old = *address;
2911 *address = old + val;
2913 return old;
2916 static int __iAtomicExch(int *address, int val)
2918 int old = *address;
2920 *address = val;
2922 return old;
2925 static unsigned int __uAtomicExch(unsigned int *address, unsigned int val)
2927 unsigned int old = *address;
2929 *address = val;
2931 return old;
2934 static float __fAtomicExch(float *address, float val)
2936 float old = *address;
2938 *address = val;
2940 return old;
2943 static int __iAtomicMin(int *address, int val)
2945 int old = *address;
2947 *address = old < val ? old : val;
2949 return old;
2952 static unsigned int __uAtomicMin(unsigned int *address, unsigned int val)
2954 unsigned int old = *address;
2956 *address = old < val ? old : val;
2958 return old;
2961 static int __iAtomicMax(int *address, int val)
2963 int old = *address;
2965 *address = old > val ? old : val;
2967 return old;
2970 static unsigned int __uAtomicMax(unsigned int *address, unsigned int val)
2972 unsigned int old = *address;
2974 *address = old > val ? old : val;
2976 return old;
2979 static unsigned int __uAtomicInc(unsigned int *address, unsigned int val)
2981 unsigned int old = *address;
2983 *address = (old >= val) ? 0 : old + 1;
2985 return old;
2988 static unsigned int __uAtomicDec(unsigned int *address, unsigned int val)
2990 unsigned int old = *address;
2992 *address = ((old == 0) | (old > val)) ? val : (old - 1);
2994 return old;
2997 static int __iAtomicAnd(int *address, int val)
2999 int old = *address;
3001 *address = old & val;
3003 return old;
3006 static unsigned int __uAtomicAnd(unsigned int *address, unsigned int val)
3008 unsigned int old = *address;
3010 *address = old & val;
3012 return old;
3015 static int __iAtomicOr(int *address, int val)
3017 int old = *address;
3019 *address = old | val;
3021 return old;
3024 static unsigned int __uAtomicOr(unsigned int *address, unsigned int val)
3026 unsigned int old = *address;
3028 *address = old | val;
3030 return old;
3033 static int __iAtomicXor(int *address, int val)
3035 int old = *address;
3037 *address = old ^ val;
3039 return old;
3042 static unsigned int __uAtomicXor(unsigned int *address, unsigned int val)
3044 unsigned int old = *address;
3046 *address = old ^ val;
3048 return old;
3051 static int __iAtomicCAS(int *address, int compare, int val)
3053 int old = *address;
3055 *address = old == compare ? val : old;
3057 return old;
3060 static unsigned int __uAtomicCAS(unsigned int *address, unsigned int compare, unsigned int val)
3062 unsigned int old = *address;
3064 *address = old == compare ? val : old;
3066 return old;
3068 # 1292 "/usr/local/cuda/bin/../include/device_functions.h" 2 3
3069 # 1 "/usr/local/cuda/bin/../include/texture_fetch_functions.h" 1 3
3070 # 2007 "/usr/local/cuda/bin/../include/texture_fetch_functions.h" 3
3071 extern void __cudaTextureFetch(const void *tex, void *index, int integer, void *val);
3073 static int4 __itexfetchi(const void *tex, int4 index)
3075 int4 val;
3077 __cudaTextureFetch(tex, &index, 1, &val);
3079 return val;
3082 static uint4 __utexfetchi(const void *tex, int4 index)
3084 uint4 val;
3086 __cudaTextureFetch(tex, &index, 1, &val);
3088 return val;
3091 static float4 __ftexfetchi(const void *tex, int4 index)
3093 float4 val;
3095 __cudaTextureFetch(tex, &index, 1, &val);
3097 return val;
3100 static int4 __itexfetch(const void *tex, float4 index, int dim)
3102 int4 val;
3104 __cudaTextureFetch(tex, &index, 0, &val);
3106 return val;
3109 static uint4 __utexfetch(const void *tex, float4 index, int dim)
3111 uint4 val;
3113 __cudaTextureFetch(tex, &index, 0, &val);
3115 return val;
3118 static float4 __ftexfetch(const void *tex, float4 index, int dim)
3120 float4 val;
3122 __cudaTextureFetch(tex, &index, 0, &val);
3124 return val;
3126 # 1293 "/usr/local/cuda/bin/../include/device_functions.h" 2 3
3127 # 859 "/usr/local/cuda/bin/../include/math_functions.h" 2 3
3130 extern __attribute__((weak)) int __cuda___signbitf(float a); int __cuda___signbitf(float a)
3132 return (int)((unsigned int)__float_as_int(a) >> 31);
3138 extern __attribute__((weak)) float __cuda_copysignf(float a, float b); float __cuda_copysignf(float a, float b)
3140 return __int_as_float((__float_as_int(b) & 0x80000000) |
3141 (__float_as_int(a) & ~0x80000000));
3143 # 883 "/usr/local/cuda/bin/../include/math_functions.h" 3
3144 extern __attribute__((weak)) int min(int a, int b); int min(int a, int b)
3146 return a < b ? a : b;
3149 extern __attribute__((weak)) unsigned int umin(unsigned int a, unsigned int b); unsigned int umin(unsigned int a, unsigned int b)
3151 return a < b ? a : b;
3154 extern __attribute__((weak)) int max(int a, int b); int max(int a, int b)
3156 return a > b ? a : b;
3159 extern __attribute__((weak)) unsigned int umax(unsigned int a, unsigned int b); unsigned int umax(unsigned int a, unsigned int b)
3161 return a > b ? a : b;
3163 # 967 "/usr/local/cuda/bin/../include/math_functions.h" 3
3164 extern __attribute__((weak)) float __internal_nearbyintf(float a); float __internal_nearbyintf(float a)
3166 float fa = fabsf(a);
3168 if (fa >= 8388608.0f) {
3169 return a;
3170 } else {
3171 volatile float u = 8388608.0f + fa;
3173 u = u - 8388608.0f;
3174 return copysignf(u, a);
3178 extern __attribute__((weak)) float __internal_fminf(float a, float b); float __internal_fminf(float a, float b)
3180 volatile union {
3181 float f;
3182 unsigned int i;
3183 } cvta, cvtb;
3185 cvta.f = a;
3186 cvtb.f = b;
3187 if ((cvta.i << 1) > 0xff000000) return b;
3188 if ((cvtb.i << 1) > 0xff000000) return a;
3189 if ((cvta.i | cvtb.i) == 0x80000000) {
3190 return __int_as_float(0x80000000);
3192 return a < b ? a : b;
3195 extern __attribute__((weak)) float __internal_fmaxf(float a, float b); float __internal_fmaxf(float a, float b)
3197 volatile union {
3198 float f;
3199 unsigned int i;
3200 } cvta, cvtb;
3202 cvta.f = a;
3203 cvtb.f = b;
3204 if ((cvta.i << 1) > 0xff000000) return b;
3205 if ((cvtb.i << 1) > 0xff000000) return a;
3206 if ((cvta.f == 0.0f) && (cvtb.f == 0.0f)) {
3207 cvta.i &= cvtb.i;
3208 return cvta.f;
3210 return a > b ? a : b;
3212 # 1055 "/usr/local/cuda/bin/../include/math_functions.h" 3
3213 extern __attribute__((weak)) long int __cuda_labs(long int a); long int __cuda_labs(long int a)
3215 return labs(a);
3218 extern __attribute__((weak)) float __cuda_ceilf(float a); float __cuda_ceilf(float a)
3220 return ceilf(a);
3223 extern __attribute__((weak)) float __cuda_floorf(float a); float __cuda_floorf(float a)
3225 return floorf(a);
3228 extern __attribute__((weak)) float __cuda_sqrtf(float a); float __cuda_sqrtf(float a)
3230 return sqrtf(a);
3233 extern __attribute__((weak)) float __cuda_rsqrtf(float a); float __cuda_rsqrtf(float a)
3235 return 1.0f / sqrtf(a);
3238 extern __attribute__((weak)) float __cuda_truncf(float a); float __cuda_truncf(float a)
3240 return truncf(a);
3243 extern __attribute__((weak)) int __cuda_max(int a, int b); int __cuda_max(int a, int b)
3245 return max(a, b);
3248 extern __attribute__((weak)) int __cuda_min(int a, int b); int __cuda_min(int a, int b)
3250 return min(a, b);
3253 extern __attribute__((weak)) unsigned int __cuda_umax(unsigned int a, unsigned int b); unsigned int __cuda_umax(unsigned int a, unsigned int b)
3255 return umax(a, b);
3258 extern __attribute__((weak)) unsigned int __cuda_umin(unsigned int a, unsigned int b); unsigned int __cuda_umin(unsigned int a, unsigned int b)
3260 return umin(a, b);
3263 extern __attribute__((weak)) long long int __cuda_llrintf(float a); long long int __cuda_llrintf(float a)
3265 return __float2ll_rn(a);
3268 extern __attribute__((weak)) long int __cuda_lrintf(float a); long int __cuda_lrintf(float a)
3271 return (long int)__cuda_llrintf(a);
3277 extern __attribute__((weak)) float __cuda_nearbyintf(float a); float __cuda_nearbyintf(float a)
3282 return __internal_nearbyintf(a);
3286 extern __attribute__((weak)) float __cuda_fmaxf(float a, float b); float __cuda_fmaxf(float a, float b)
3291 return __internal_fmaxf(a, b);
3295 extern __attribute__((weak)) float __cuda_fminf(float a, float b); float __cuda_fminf(float a, float b)
3300 return __internal_fminf(a, b);
3303 # 1162 "/usr/local/cuda/bin/../include/math_functions.h" 3
3304 extern __attribute__((weak)) int __cuda___finitef(float a); int __cuda___finitef(float a)
3306 return __cuda_fabsf(a) < __int_as_float(0x7f800000);
3309 extern __attribute__((weak)) int __cuda___isinff(float a); int __cuda___isinff(float a)
3311 return __cuda_fabsf(a) == __int_as_float(0x7f800000);
3314 extern __attribute__((weak)) int __cuda___isnanf(float a); int __cuda___isnanf(float a)
3316 return !(__cuda_fabsf(a) <= __int_as_float(0x7f800000));
3319 extern __attribute__((weak)) float __cuda_nextafterf(float a, float b); float __cuda_nextafterf(float a, float b)
3321 unsigned int ia;
3322 unsigned int ib;
3323 ia = __float_as_int(a);
3324 ib = __float_as_int(b);
3329 if (__cuda___isnanf(a) || __cuda___isnanf(b)) return a + b;
3330 if (__int_as_float (ia | ib) == 0.0f) return b;
3336 if (__int_as_float(ia) == 0.0f) {
3337 return __cuda_copysignf(__int_as_float(0x00000001), b);
3340 if ((a < b) && (a < 0.0f)) ia--;
3341 if ((a < b) && (a > 0.0f)) ia++;
3342 if ((a > b) && (a < 0.0f)) ia++;
3343 if ((a > b) && (a > 0.0f)) ia--;
3344 a = __int_as_float(ia);
3350 return a;
3353 extern __attribute__((weak)) float __cuda_nanf(const char *tagp); float __cuda_nanf(const char *tagp)
3356 return __int_as_float(0x7fffffff);
3360 extern __attribute__((weak)) float __internal_atanhf_kernel(float a_1, float a_2); float __internal_atanhf_kernel(float a_1, float a_2)
3362 float a, a2, t;
3364 a = a_1 + a_2;
3365 a2 = a * a;
3366 t = 1.566305595598990E-001f/64.0f;
3367 t = t * a2 + 1.995081856004762E-001f/16.0f;
3368 t = t * a2 + 3.333382699617026E-001f/4.0f;
3369 t = t * a2;
3370 t = t * a + a_2;
3371 t = t + a_1;
3372 return t;
3378 extern __attribute__((weak)) float __internal_atanf_kernel(float a); float __internal_atanf_kernel(float a)
3380 float t4, t0, t1;
3382 t4 = a * a;
3383 t0 = - 5.674867153f;
3384 t0 = t4 * - 0.823362947f + t0;
3385 t0 = t0 * t4 - 6.565555096f;
3386 t0 = t0 * t4;
3387 t0 = t0 * a;
3388 t1 = t4 + 11.33538818f;
3389 t1 = t1 * t4 + 28.84246826f;
3390 t1 = t1 * t4 + 19.69667053f;
3391 t1 = 1.0f / t1;
3392 a = t0 * t1 + a;
3393 return a;
3397 extern __attribute__((weak)) float __internal_tan_kernel(float a); float __internal_tan_kernel(float a)
3399 float a2, s, t;
3401 a2 = a * a;
3402 t = 4.114678393115178E-003f * a2 - 8.231194034909670E-001f;
3403 s = a2 - 2.469348886157666E+000f;
3404 s = 1.0f / s;
3405 t = t * s;
3406 t = t * a2;
3407 t = t * a + a;
3408 return t;
3411 extern __attribute__((weak)) float __internal_accurate_logf(float a); float __internal_accurate_logf(float a)
3413 float t;
3414 float z;
3415 float m;
3416 int ia, e;
3417 ia = __float_as_int(a);
3419 if ((ia < 0x00800000) || (ia > 0x7f7fffff)) {
3420 return __logf(a);
3423 m = __int_as_float((ia & 0x807fffff) | 0x3f800000);
3424 e = ((unsigned)ia >> 23) - 127;
3425 if (m > 1.414213562f) {
3426 m = m * 0.5f;
3427 e = e + 1;
3429 t = m - 1.0f;
3430 z = m + 1.0f;
3431 z = t / z;
3432 z = -t * z;
3433 z = __internal_atanhf_kernel(t, z);
3434 z = (float)e * 0.693147181f + z;
3435 return z;
3438 extern __attribute__((weak)) float __internal_accurate_log2f(float a); float __internal_accurate_log2f(float a)
3440 return 1.442695041f * __internal_accurate_logf(a);
3444 static unsigned int __cudart_i2opi_f [] = {
3445 0x3c439041,
3446 0xdb629599,
3447 0xf534ddc0,
3448 0xfc2757d1,
3449 0x4e441529,
3450 0xa2f9836e,
3454 extern __attribute__((weak)) float __internal_trig_reduction_kernel(float a, int *quadrant); float __internal_trig_reduction_kernel(float a, int *quadrant)
3456 float j;
3457 int q;
3458 if (__cuda_fabsf(a) > 48039.0f) {
3460 unsigned int ia = __float_as_int(a);
3461 unsigned int s = ia & 0x80000000;
3462 unsigned int result[7];
3463 unsigned int phi, plo;
3464 unsigned int hi, lo;
3465 unsigned int e;
3466 int idx;
3467 e = ((ia >> 23) & 0xff) - 128;
3468 ia = (ia << 8) | 0x80000000;
3470 idx = 4 - (e >> 5);
3471 hi = 0;
3475 for (q = 0; q < 6; q++) {
3476 plo = __cudart_i2opi_f[q] * ia;
3477 phi = __umulhi (__cudart_i2opi_f[q], ia);
3478 lo = hi + plo;
3479 hi = phi + (lo < plo);
3480 result[q] = lo;
3482 result[q] = hi;
3483 e = e & 31;
3487 hi = result[idx+2];
3488 lo = result[idx+1];
3489 if (e) {
3490 q = 32 - e;
3491 hi = (hi << e) | (lo >> q);
3492 lo = (lo << e) | (result[idx] >> q);
3494 q = hi >> 30;
3496 hi = (hi << 2) | (lo >> 30);
3497 lo = (lo << 2);
3498 e = (hi + (lo > 0)) > 0x80000000;
3499 q += e;
3500 if (s) q = -q;
3501 if (e) {
3502 unsigned int t;
3503 hi = ~hi;
3504 lo = -(int)lo;
3505 t = (lo == 0);
3506 hi += t;
3507 s = s ^ 0x80000000;
3509 *quadrant = q;
3511 e = 0;
3512 while ((int)hi > 0) {
3513 hi = (hi << 1) | (lo >> 31);
3514 lo = (lo << 1);
3515 e--;
3517 lo = hi * 0xc90fdaa2;
3518 hi = __umulhi(hi, 0xc90fdaa2);
3519 if ((int)hi > 0) {
3520 hi = (hi << 1) | (lo >> 31);
3521 lo = (lo << 1);
3522 e--;
3524 hi = hi + (lo > 0);
3525 ia = s | (((e + 126) << 23) + (hi >> 8) + ((hi << 24) >= 0x80000000));
3526 return __int_as_float(ia);
3528 q = __float2int_rn(a * 0.636619772f);
3529 j = (float)q;
3530 a = a - j * 1.5703125000000000e+000f;
3531 a = a - j * 4.8351287841796875e-004f;
3532 a = a - j * 3.1385570764541626e-007f;
3533 a = a - j * 6.0771005065061922e-011f;
3534 *quadrant = q;
3535 return a;
3537 # 1405 "/usr/local/cuda/bin/../include/math_functions.h" 3
3538 extern __attribute__((weak)) float __internal_expf_kernel(float a, float scale); float __internal_expf_kernel(float a, float scale)
3540 float j, z;
3542 j = __cuda_truncf(a * 1.442695041f);
3543 z = a - j * 0.6931457519f;
3544 z = z - j * 1.4286067653e-6f;
3545 z = z * 1.442695041f;
3546 z = __cuda_exp2f(z) * __cuda_exp2f(j + scale);
3547 return z;
3550 extern __attribute__((weak)) float __internal_accurate_expf(float a); float __internal_accurate_expf(float a)
3552 float z;
3553 z = __internal_expf_kernel(a, 0.0f);
3554 if (a < -105.0f) z = 0.0f;
3555 if (a > 105.0f) z = __int_as_float(0x7f800000);
3556 return z;
3559 extern __attribute__((weak)) float __internal_accurate_exp10f(float a); float __internal_accurate_exp10f(float a)
3561 float j, z;
3562 j = __cuda_truncf(a * 3.321928094f);
3563 z = a - j * 3.0102920532226563e-001f;
3564 z = z - j * 7.9034171557301747e-007f;
3565 z = z * 3.321928094f;
3566 z = __cuda_exp2f(z) * __cuda_exp2f(j);
3567 if (a < -46.0f) z = 0.0f;
3568 if (a > 46.0f) z = __int_as_float(0x7f800000);
3569 return z;
3572 extern __attribute__((weak)) float __internal_lgammaf_pos(float a); float __internal_lgammaf_pos(float a)
3574 float sum;
3575 float s, t;
3577 if (__cuda___isinff(a)) {
3578 return a;
3580 if (a >= 3.0f) {
3581 if (a >= 7.8f) {
3585 s = 1.0f / a;
3586 t = s * s;
3587 sum = 0.77783067e-3f;
3588 sum = sum * t - 0.2777655457e-2f;
3589 sum = sum * t + 0.83333273853e-1f;
3590 sum = sum * s + 0.918938533204672f;
3591 s = 0.5f * __internal_accurate_logf(a);
3592 t = a - 0.5f;
3593 s = s * t;
3594 t = s - a;
3595 s = s + sum;
3596 t = t + s;
3597 return t;
3598 } else {
3599 a = a - 3.0f;
3600 s = - 7.488903254816711E+002f;
3601 s = s * a - 1.234974215949363E+004f;
3602 s = s * a - 4.106137688064877E+004f;
3603 s = s * a - 4.831066242492429E+004f;
3604 s = s * a - 1.430333998207429E+005f;
3605 t = a - 2.592509840117874E+002f;
3606 t = t * a - 1.077717972228532E+004f;
3607 t = t * a - 9.268505031444956E+004f;
3608 t = t * a - 2.063535768623558E+005f;
3609 t = s / t;
3610 t = t + a;
3611 return t;
3613 } else if (a >= 1.5f) {
3614 a = a - 2.0f;
3615 t = + 4.959849168282574E-005f;
3616 t = t * a - 2.208948403848352E-004f;
3617 t = t * a + 5.413142447864599E-004f;
3618 t = t * a - 1.204516976842832E-003f;
3619 t = t * a + 2.884251838546602E-003f;
3620 t = t * a - 7.382757963931180E-003f;
3621 t = t * a + 2.058131963026755E-002f;
3622 t = t * a - 6.735248600734503E-002f;
3623 t = t * a + 3.224670187176319E-001f;
3624 t = t * a + 4.227843368636472E-001f;
3625 t = t * a;
3626 return t;
3627 } else if (a >= 0.7f) {
3628 a = 1.0f - a;
3629 t = + 4.588266515364258E-002f;
3630 t = t * a + 1.037396712740616E-001f;
3631 t = t * a + 1.228036339653591E-001f;
3632 t = t * a + 1.275242157462838E-001f;
3633 t = t * a + 1.432166835245778E-001f;
3634 t = t * a + 1.693435824224152E-001f;
3635 t = t * a + 2.074079329483975E-001f;
3636 t = t * a + 2.705875136435339E-001f;
3637 t = t * a + 4.006854436743395E-001f;
3638 t = t * a + 8.224669796332661E-001f;
3639 t = t * a + 5.772156651487230E-001f;
3640 t = t * a;
3641 return t;
3642 } else {
3643 t = + 3.587515669447039E-003f;
3644 t = t * a - 5.471285428060787E-003f;
3645 t = t * a - 4.462712795343244E-002f;
3646 t = t * a + 1.673177015593242E-001f;
3647 t = t * a - 4.213597883575600E-002f;
3648 t = t * a - 6.558672843439567E-001f;
3649 t = t * a + 5.772153712885004E-001f;
3650 t = t * a;
3651 t = t * a + a;
3652 return -__internal_accurate_logf(t);
3657 extern __attribute__((weak)) float __internal_sin_kernel(float x); float __internal_sin_kernel(float x)
3659 float x2, z;
3661 x2 = x * x;
3662 z = - 1.95152959e-4f;
3663 z = z * x2 + 8.33216087e-3f;
3664 z = z * x2 - 1.66666546e-1f;
3665 z = z * x2;
3666 z = z * x + x;
3668 return z;
3672 extern __attribute__((weak)) float __internal_cos_kernel(float x); float __internal_cos_kernel(float x)
3674 float x2, z;
3676 x2 = x * x;
3677 z = 2.44331571e-5f;
3678 z = z * x2 - 1.38873163e-3f;
3679 z = z * x2 + 4.16666457e-2f;
3680 z = z * x2 - 5.00000000e-1f;
3681 z = z * x2 + 1.00000000e+0f;
3682 return z;
3685 extern __attribute__((weak)) float __internal_accurate_sinf(float a); float __internal_accurate_sinf(float a)
3687 float z;
3688 int i;
3690 if (__cuda___isinff(a)) {
3691 return __int_as_float(0x7fffffff);
3693 if (a == 0.0f) {
3694 return a;
3696 z = __internal_trig_reduction_kernel(a, &i);
3698 if (i & 1) {
3699 z = __internal_cos_kernel(z);
3700 } else {
3701 z = __internal_sin_kernel(z);
3703 if (i & 2) {
3704 z = -z;
3706 return z;
3715 extern __attribute__((weak)) float __cuda_rintf(float a); float __cuda_rintf(float a)
3717 return __cuda_nearbyintf(a);
3720 extern __attribute__((weak)) float __cuda_sinf(float a); float __cuda_sinf(float a)
3725 return __internal_accurate_sinf(a);
3729 extern __attribute__((weak)) float __cuda_cosf(float a); float __cuda_cosf(float a)
3734 float z;
3735 int i;
3737 if (__cuda___isinff(a)) {
3738 return __int_as_float(0x7fffffff);
3740 z = __internal_trig_reduction_kernel(a, &i);
3742 i++;
3743 if (i & 1) {
3744 z = __internal_cos_kernel(z);
3745 } else {
3746 z = __internal_sin_kernel(z);
3748 if (i & 2) {
3749 z = -z;
3751 return z;
3755 extern __attribute__((weak)) float __cuda_tanf(float a); float __cuda_tanf(float a)
3760 float z;
3761 int i;
3763 if (__cuda___isinff(a)) {
3764 return __int_as_float(0x7fffffff);
3766 z = __internal_trig_reduction_kernel(a, &i);
3768 z = __internal_tan_kernel(z);
3769 if (i & 1) {
3770 z = -1.0f / z;
3772 return z;
3776 extern __attribute__((weak)) float __cuda_log2f(float a); float __cuda_log2f(float a)
3781 return __internal_accurate_log2f(a);
3785 extern __attribute__((weak)) float __cuda_expf(float a); float __cuda_expf(float a)
3790 return __internal_accurate_expf(a);
3794 extern __attribute__((weak)) float __cuda_exp10f(float a); float __cuda_exp10f(float a)
3799 return __internal_accurate_exp10f(a);
3803 extern __attribute__((weak)) float __cuda_coshf(float a); float __cuda_coshf(float a)
3805 float z;
3807 a = __cuda_fabsf(a);
3808 z = __internal_expf_kernel(a, -2.0f);
3809 z = 2.0f * z + 0.125f / z;
3810 if (a >= 90.0f) {
3811 z = __int_as_float(0x7f800000);
3813 return z;
3816 extern __attribute__((weak)) float __cuda_sinhf(float a); float __cuda_sinhf(float a)
3818 float s, z;
3820 s = a;
3821 a = __cuda_fabsf(a);
3822 if (a < 1.0f) {
3823 float a2 = a * a;
3825 z = 2.816951222e-6f;
3826 z = z * a2 + 1.983615978e-4f;
3827 z = z * a2 + 8.333350058e-3f;
3828 z = z * a2 + 1.666666650e-1f;
3829 z = z * a2;
3830 z = z * a + a;
3831 } else {
3832 z = __internal_expf_kernel(a, -2.0f);
3833 z = 2.0f * z - 0.125f / z;
3834 if (a >= 90.0f) {
3835 z = __int_as_float(0x7f800000);
3838 return __cuda_copysignf(z, s);
3841 extern __attribute__((weak)) float __cuda_tanhf(float a); float __cuda_tanhf(float a)
3843 float t;
3845 t = __cuda_fabsf(a);
3846 if (t < 0.55f) {
3847 float z, z2;
3848 z = t;
3849 z2 = z * z;
3850 t = 1.643758066599993e-2f;
3851 t = t * z2 - 5.267181327760551e-2f;
3852 t = t * z2 + 1.332072505223051e-1f;
3853 t = t * z2 - 3.333294663641083e-1f;
3854 t = t * z2;
3855 t = t * z + z;
3857 else if (t < 88.0f) {
3858 t = 1.0f - 2.0f / (__internal_expf_kernel(2.0f * t, 0.0f) + 1.0f);
3860 else if (t >= 88.0f) {
3861 t = 1.0f;
3863 return __cuda_copysignf(t, a);
3866 extern __attribute__((weak)) float __cuda_atan2f(float a, float b); float __cuda_atan2f(float a, float b)
3868 float t0, t1, t3;
3872 t3 = __cuda_fabsf(b);
3873 t1 = __cuda_fabsf(a);
3875 if (t3 == 0.0f && t1 == 0.0f) {
3876 t3 = __cuda___signbitf(b) ? 3.141592654f : 0;
3877 } else if (__cuda___isinff(t3) && __cuda___isinff(t1)) {
3878 t3 = __cuda___signbitf(b) ? 2.356194490f : 0.785398163f;
3879 } else {
3881 if (t3 < t1) {
3882 t0 = t1;
3883 t1 = t3;
3884 } else {
3885 t0 = t3;
3886 t1 = t1;
3888 t3 = __internal_accurate_fdividef(t1, t0);
3889 t3 = __internal_atanf_kernel(t3);
3891 if (__cuda_fabsf(a) > __cuda_fabsf(b)) t3 = 1.570796327f - t3;
3892 if (b < 0.0f) t3 = 3.141592654f - t3;
3894 t3 = __cuda_copysignf(t3, a);
3896 return t3;
3899 extern __attribute__((weak)) float __cuda_atanf(float a); float __cuda_atanf(float a)
3901 float t0, t1;
3904 t0 = __cuda_fabsf(a);
3905 t1 = t0;
3906 if (t0 > 1.0f) {
3907 t1 = 1.0f / t1;
3910 t1 = __internal_atanf_kernel(t1);
3912 if (t0 > 1.0f) {
3913 t1 = 1.570796327f - t1;
3915 return __cuda_copysignf(t1, a);
3919 extern __attribute__((weak)) float __internal_asinf_kernel(float a); float __internal_asinf_kernel(float a)
3921 float t2, t3, t4;
3923 t2 = a * a;
3924 t3 = - 0.501162291f;
3925 t3 = t3 * t2 + 0.915201485f;
3926 t3 = t3 * t2;
3927 t3 = t3 * a;
3928 t4 = t2 - 5.478654385f;
3929 t4 = t4 * t2 + 5.491230488f;
3930 t4 = 1.0f / t4;
3931 a = t3 * t4 + a;
3932 return a;
3935 extern __attribute__((weak)) float __cuda_asinf(float a); float __cuda_asinf(float a)
3937 float t0, t1, t2;
3939 t0 = __cuda_fabsf(a);
3940 t2 = 1.0f - t0;
3941 t2 = 0.5f * t2;
3942 t2 = __cuda_sqrtf(t2);
3943 t1 = t0 > 0.575f ? t2 : t0;
3944 t1 = __internal_asinf_kernel(t1);
3945 t2 = -2.0f * t1 + 1.570796327f;
3946 if (t0 > 0.575f) {
3947 t1 = t2;
3949 return __cuda_copysignf(t1, a);
3952 extern __attribute__((weak)) float __cuda_acosf(float a); float __cuda_acosf(float a)
3954 float t0, t1, t2;
3956 t0 = __cuda_fabsf(a);
3957 t2 = 1.0f - t0;
3958 t2 = 0.5f * t2;
3959 t2 = __cuda_sqrtf(t2);
3960 t1 = t0 > 0.575f ? t2 : t0;
3961 t1 = __internal_asinf_kernel(t1);
3962 t1 = t0 > 0.575f ? 2.0f * t1 : 1.570796327f - t1;
3963 if (__cuda___signbitf(a)) {
3964 t1 = 3.141592654f - t1;
3966 return t1;
3969 extern __attribute__((weak)) float __cuda_logf(float a); float __cuda_logf(float a)
3974 return __internal_accurate_logf(a);
3978 extern __attribute__((weak)) float __cuda_log10f(float a); float __cuda_log10f(float a)
3983 return 0.434294482f * __internal_accurate_logf(a);
3987 extern __attribute__((weak)) float __cuda_log1pf(float a); float __cuda_log1pf(float a)
3989 float t;
3994 if (a >= -0.394f && a <= 0.65f) {
3996 t = a + 2.0f;
3997 t = a / t;
3998 t = -a * t;
3999 t = __internal_atanhf_kernel (a, t);
4000 } else {
4001 t = __internal_accurate_logf (1.0f + a);
4003 return t;
4006 extern __attribute__((weak)) float __cuda_acoshf(float a); float __cuda_acoshf(float a)
4008 float s, t;
4010 t = a - 1.0f;
4011 if (__cuda_fabsf(t) > 8388608.0f) {
4013 return 0.693147181f + __internal_accurate_logf(a);
4014 } else {
4015 s = a + 1.0f;
4016 t = t + __cuda_sqrtf(s * t);
4017 return __cuda_log1pf(t);
4021 extern __attribute__((weak)) float __cuda_asinhf(float a); float __cuda_asinhf(float a)
4023 float fa, oofa, t;
4025 fa = __cuda_fabsf(a);
4026 if (fa > 8.507059173e37f) {
4027 t = 0.693147181f + __logf(fa);
4028 } else {
4029 oofa = 1.0f / fa;
4030 t = fa + fa / (oofa + __cuda_sqrtf(1.0f + oofa * oofa));
4031 t = __cuda_log1pf(t);
4033 return __cuda_copysignf(t, a);
4036 extern __attribute__((weak)) float __cuda_atanhf(float a); float __cuda_atanhf(float a)
4038 float fa, t;
4040 fa = __cuda_fabsf(a);
4041 t = (2.0f * fa) / (1.0f - fa);
4042 t = 0.5f * __cuda_log1pf(t);
4043 return __cuda_copysignf(t, a);
4046 extern __attribute__((weak)) float __cuda_expm1f(float a); float __cuda_expm1f(float a)
4048 float t, z, j, u;
4050 t = __cuda_rintf (a * 1.442695041f);
4051 z = a - t * 0.6931457519f;
4052 z = z - t * 1.4286067653e-6f;
4054 if (__cuda_fabsf(a) < 0.41f) {
4055 z = a;
4056 t = 0.0f;
4059 j = t;
4060 if (t == 128.0f) j = j - 1.0f;
4062 u = 1.38795078474044430E-003f;
4063 u = u * z + 8.38241261853264930E-003f;
4064 u = u * z + 4.16678317762833940E-002f;
4065 u = u * z + 1.66663978874356580E-001f;
4066 u = u * z + 4.99999940395997040E-001f;
4067 u = u * z;
4068 u = u * z + z;
4069 if (a == 0.0f) u = a;
4071 z = __cuda_exp2f (j);
4072 a = z - 1.0f;
4073 if (a != 0.0f) u = u * z + a;
4074 if (t == 128.0f) u = u + u;
4076 if (j > 128.0f) u = __int_as_float(0x7f800000);
4077 if (j < -25.0f) u = -1.0f;
4078 return u;
4081 extern __attribute__((weak)) float __cuda_hypotf(float a, float b); float __cuda_hypotf(float a, float b)
4083 float v, w, t;
4085 a = __cuda_fabsf(a);
4086 b = __cuda_fabsf(b);
4088 if (a > b) {
4089 v = a;
4090 w = b;
4091 } else {
4092 v = b;
4093 w = a;
4095 t = __internal_accurate_fdividef(w, v);
4096 t = 1.0f + t * t;
4097 t = v * __cuda_sqrtf(t);
4098 if (v == 0.0f) {
4099 t = v + w;
4101 if ((v == __int_as_float(0x7f800000)) || (w == __int_as_float(0x7f800000))) {
4102 t = __int_as_float(0x7f800000);
4104 return t;
4107 extern __attribute__((weak)) float __cuda_cbrtf(float a); float __cuda_cbrtf(float a)
4109 float s, t;
4110 if (a == 0.0f || __cuda___isinff(a)) {
4111 return a;
4113 s = __cuda_fabsf(a);
4114 t = __cuda_exp2f(0.333333333f * __log2f(s));
4115 t = t - (t - (s / (t * t))) * 0.333333333f;
4116 if (__cuda___signbitf(a)) {
4117 t = -t;
4119 return t;
4122 extern __attribute__((weak)) float __cuda_erff(float a); float __cuda_erff(float a)
4124 float t, r, q;
4126 t = __cuda_fabsf(a);
4127 if (t < 1.0f) {
4128 t = t * t;
4129 r = -5.58510127926029810E-004f;
4130 r = r * t + 4.90688891415893070E-003f;
4131 r = r * t - 2.67027980930150640E-002f;
4132 r = r * t + 1.12799056505903940E-001f;
4133 r = r * t - 3.76122956138427440E-001f;
4134 r = r * t + 1.12837911712623450E+000f;
4135 a = a * r;
4136 } else if (t <= __int_as_float(0x7f800000)) {
4140 q = 0.3275911f * t + 1.0f;
4141 q = 1.0f / q;
4142 r = 1.061405429f;
4143 r = r * q - 1.453152027f;
4144 r = r * q + 1.421413741f;
4145 r = r * q - 0.284496736f;
4146 r = r * q + 0.254829592f;
4147 r = r * q;
4148 q = __internal_expf_kernel(-a * a, 0.0f);
4149 r = 1.0f - q * r;
4150 if (t >= 5.5f) {
4151 r = 1.0f;
4153 a = __int_as_float (__float_as_int(r) | (__float_as_int(a) & 0x80000000));
4155 return a;
4158 extern __attribute__((weak)) float __cuda_erfcf(float a); float __cuda_erfcf(float a)
4160 if (a <= 0.55f) {
4161 return 1.0f - __cuda_erff(a);
4162 } else if (a > 10.0f) {
4163 return 0.0f;
4164 } else {
4165 float p;
4166 float q;
4167 float h;
4168 float l;
4173 p = + 4.014893410762552E-006f;
4174 p = p * a + 5.640401259462436E-001f;
4175 p = p * a + 2.626649872281140E+000f;
4176 p = p * a + 5.486372652389673E+000f;
4177 p = p * a + 5.250714831459401E+000f;
4178 q = a + 4.651376250488319E+000f;
4179 q = q * a + 1.026302828878470E+001f;
4180 q = q * a + 1.140762166021288E+001f;
4181 q = q * a + 5.251211619089947E+000f;
4183 h = 1.0f / q;
4184 q = 2.0f * h - q * h * h;
4185 p = p * q;
4187 h = __int_as_float(__float_as_int(a) & 0xfffff000);
4188 l = a - h;
4189 q = -h * h;
4190 q = __internal_expf_kernel(q, 0.0f);
4191 if (l != 0.0f) {
4192 a = a + h;
4193 l = l * a;
4194 h = __internal_expf_kernel(-l, 0.0f);
4195 q = q * h;
4197 p = p * q;
4198 return p;
4202 extern __attribute__((weak)) float __cuda_lgammaf(float a); float __cuda_lgammaf(float a)
4204 float t;
4205 float i;
4206 int quot;
4207 t = __internal_lgammaf_pos(__cuda_fabsf(a));
4208 if (a >= 0.0f) return t;
4209 a = __cuda_fabsf(a);
4210 i = __cuda_floorf(a);
4211 if (a == i) return __int_as_float(0x7f800000);
4212 if (a < 1e-19f) return -__internal_accurate_logf(a);
4213 i = __cuda_rintf (2.0f * a);
4214 quot = (int)i;
4215 i = a - 0.5f * i;
4216 i = i * 3.141592654f;
4217 if (quot & 1) {
4218 i = __internal_cos_kernel(i);
4219 } else {
4220 i = __internal_sin_kernel(i);
4222 i = __cuda_fabsf(i);
4223 t = 1.144729886f - __internal_accurate_logf(i * a) - t;
4224 return t;
4227 extern __attribute__((weak)) float __cuda_ldexpf(float a, int b); float __cuda_ldexpf(float a, int b)
4229 float fa = __cuda_fabsf(a);
4231 if (fa == 0.0f || __cuda___isinff(fa) || b == 0) {
4232 return a;
4234 else if (__cuda_abs(b) < 126) {
4235 return a * __cuda_exp2f((float)b);
4237 else if (__cuda_abs(b) < 252) {
4238 int bhalf = b / 2;
4239 return a * __cuda_exp2f((float)bhalf) * __cuda_exp2f((float)(b - bhalf));
4241 else {
4242 int bquarter = b / 4;
4243 float t = __cuda_exp2f((float)bquarter);
4244 return a * t * t * t * __cuda_exp2f((float)(b - 3 * bquarter));
4248 extern __attribute__((weak)) float __cuda_scalbnf(float a, int b); float __cuda_scalbnf(float a, int b)
4251 return __cuda_ldexpf(a, b);
4254 extern __attribute__((weak)) float __cuda_scalblnf(float a, long int b); float __cuda_scalblnf(float a, long int b)
4256 int t;
4257 if (b > 2147483647L) {
4258 t = 2147483647;
4259 } else if (b < (-2147483647 - 1)) {
4260 t = (-2147483647 - 1);
4261 } else {
4262 t = (int)b;
4264 return __cuda_scalbnf(a, t);
4267 extern __attribute__((weak)) float __cuda_frexpf(float a, int *b); float __cuda_frexpf(float a, int *b)
4269 float fa = __cuda_fabsf(a);
4270 unsigned int expo;
4271 unsigned int denorm;
4273 if (fa < 1.175494351e-38f) {
4274 a *= 16777216.0f;
4275 denorm = 24;
4276 } else {
4277 denorm = 0;
4279 expo = ((__float_as_int(a) >> 23) & 0xff);
4280 if ((fa == 0.0f) || (expo == 0xff)) {
4281 expo = 0;
4282 a = a + a;
4283 } else {
4284 expo = expo - denorm - 126;
4285 a = __int_as_float(((__float_as_int(a) & 0x807fffff) | 0x3f000000));
4287 *b = expo;
4288 return a;
4291 extern __attribute__((weak)) float __cuda_modff(float a, float *b); float __cuda_modff(float a, float *b)
4293 float t;
4294 if (__cuda___finitef(a)) {
4295 t = __cuda_truncf(a);
4296 *b = t;
4297 t = a - t;
4298 return __cuda_copysignf(t, a);
4299 } else if (__cuda___isinff(a)) {
4300 t = 0.0f;
4301 *b = a;
4302 return __cuda_copysignf(t, a);
4303 } else {
4304 *b = a;
4305 return a;
4309 extern __attribute__((weak)) float __cuda_fmodf(float a, float b); float __cuda_fmodf(float a, float b)
4311 float orig_a;
4313 if (__cuda___isnanf(a) || __cuda___isnanf(b)) {
4314 return a + b;
4316 orig_a = a;
4317 a = __cuda_fabsf(a);
4318 b = __cuda_fabsf(b);
4319 if (__cuda___isinff(a) || b == 0.0f) {
4320 return __int_as_float(0x7fffffff);
4321 } else if (a >= b) {
4324 int expoa = (a < 1.175494351e-38f) ?
4325 ((int)__log2f(a)) : (((__float_as_int(a) >> 23) & 0xff) - 127);
4326 int expob = (b < 1.175494351e-38f) ?
4327 ((int)__log2f(b)) : (((__float_as_int(b) >> 23) & 0xff) - 127);
4328 int scale = expoa - expob;
4329 float scaled_b = __cuda_ldexpf(b, scale);
4330 if (scaled_b <= 0.5f * a) {
4331 scaled_b *= 2.0f;
4340 while (scaled_b >= b) {
4341 if (a >= scaled_b) {
4342 a -= scaled_b;
4344 scaled_b *= 0.5f;
4346 return __cuda_copysignf(a, orig_a);
4347 } else {
4348 return orig_a;
4352 extern __attribute__((weak)) float __cuda_remainderf(float a, float b); float __cuda_remainderf(float a, float b)
4354 float orig_a;
4355 float twoa = 0.0f;
4356 unsigned int quot0 = 0;
4358 if (__cuda___isnanf(a) || __cuda___isnanf(b)) {
4359 return a + b;
4361 orig_a = a;
4362 a = __cuda_fabsf(a);
4363 b = __cuda_fabsf(b);
4364 if (__cuda___isinff(a) || (b == 0.0f)) {
4365 return __int_as_float(0x7fffffff);
4366 } else if (a >= b) {
4368 int expoa = (a < 1.175494351e-38f) ?
4369 ((int)__log2f(a)) : (((__float_as_int(a) >> 23) & 0xff) - 127);
4370 int expob = (b < 1.175494351e-38f) ?
4371 ((int)__log2f(b)) : (((__float_as_int(b) >> 23) & 0xff) - 127);
4372 int scale = expoa - expob;
4373 float scaled_b = __cuda_ldexpf(b, scale);
4374 if (scaled_b <= 0.5f * a) {
4375 scaled_b *= 2.0f;
4377 # 2255 "/usr/local/cuda/bin/../include/math_functions.h" 3
4378 while (scaled_b >= b) {
4379 quot0 = 0;
4380 if (a >= scaled_b) {
4381 twoa = (2.0f * a - scaled_b) - scaled_b;
4382 a -= scaled_b;
4383 quot0 = 1;
4385 scaled_b *= 0.5f;
4390 twoa = 2.0f * a;
4391 if ((twoa > b) || ((twoa == b) && quot0)) {
4392 a -= b;
4393 a = __cuda_copysignf (a, -1.0f);
4395 # 2287 "/usr/local/cuda/bin/../include/math_functions.h" 3
4396 a = __int_as_float((__float_as_int(orig_a) & 0x80000000)^
4397 __float_as_int(a));
4398 return a;
4401 extern __attribute__((weak)) float __cuda_remquof(float a, float b, int* quo); float __cuda_remquof(float a, float b, int* quo)
4403 float orig_a;
4404 float twoa = 0.0f;
4405 unsigned int quot = 0;
4406 unsigned int sign;
4408 if (__cuda___isnanf(a) || __cuda___isnanf(b)) {
4409 *quo = quot;
4410 return a + b;
4412 orig_a = a;
4414 sign = 0 - (__cuda___signbitf(a) != __cuda___signbitf(b));
4415 a = __cuda_fabsf(a);
4416 b = __cuda_fabsf(b);
4417 if (__cuda___isinff(a) || (b == 0.0f)) {
4418 *quo = quot;
4419 return __int_as_float(0x7fffffff);
4420 } else if (a >= b) {
4423 int expoa = (a < 1.175494351e-38f) ?
4424 ((int)__log2f(a)) : (((__float_as_int(a) >> 23) & 0xff) - 127);
4425 int expob = (b < 1.175494351e-38f) ?
4426 ((int)__log2f(b)) : (((__float_as_int(b) >> 23) & 0xff) - 127);
4427 int scale = expoa - expob;
4428 float scaled_b = __cuda_ldexpf(b, scale);
4429 if (scaled_b <= 0.5f * a) {
4430 scaled_b *= 2.0f;
4432 # 2340 "/usr/local/cuda/bin/../include/math_functions.h" 3
4433 while (scaled_b >= b) {
4434 quot <<= 1;
4435 if (a >= scaled_b) {
4436 twoa = (2.0f * a - scaled_b) - scaled_b;
4437 a -= scaled_b;
4438 quot += 1;
4440 scaled_b *= 0.5f;
4445 twoa = 2.0f * a;
4446 if ((twoa > b) || ((twoa == b) && (quot & 1))) {
4447 quot++;
4448 a -= b;
4449 a = __cuda_copysignf (a, -1.0f);
4451 # 2375 "/usr/local/cuda/bin/../include/math_functions.h" 3
4452 a = __int_as_float((__float_as_int(orig_a) & 0x80000000)^
4453 __float_as_int(a));
4454 quot = quot & (~((~0)<<3));
4455 quot = quot ^ sign;
4456 quot = quot - sign;
4457 *quo = quot;
4458 return a;
4461 extern __attribute__((weak)) float __cuda_fmaf(float a, float b, float c); float __cuda_fmaf(float a, float b, float c)
4463 unsigned int xx, yy, zz, ww;
4464 unsigned int temp, s, u;
4465 unsigned int expo_x, expo_y, expo_z;
4467 xx = __float_as_int(a);
4468 yy = __float_as_int(b);
4469 zz = __float_as_int(c);
4470 # 2401 "/usr/local/cuda/bin/../include/math_functions.h" 3
4471 temp = 0xff;
4472 expo_x = temp & (xx >> 23);
4473 expo_x = expo_x - 1;
4474 expo_y = temp & (yy >> 23);
4475 expo_y = expo_y - 1;
4476 expo_z = temp & (zz >> 23);
4477 expo_z = expo_z - 1;
4479 if (!((expo_x <= 0xFD) &&
4480 (expo_y <= 0xFD) &&
4481 (expo_z <= 0xFD))) {
4486 if ((yy << 1) > 0xff000000) {
4487 return __int_as_float(0x7fffffff);
4489 if ((zz << 1) > 0xff000000) {
4490 return __int_as_float(0x7fffffff);
4492 if ((xx << 1) > 0xff000000) {
4493 return __int_as_float(0x7fffffff);
4495 # 2436 "/usr/local/cuda/bin/../include/math_functions.h" 3
4496 if ((((xx << 1) == 0) && ((yy << 1) == 0xff000000)) ||
4497 (((yy << 1) == 0) && ((xx << 1) == 0xff000000))) {
4498 return __int_as_float(0x7fffffff);
4500 if ((zz << 1) == 0xff000000) {
4501 if (((yy << 1) == 0xff000000) || ((xx << 1) == 0xff000000)) {
4502 if ((int)(xx ^ yy ^ zz) < 0) {
4503 return __int_as_float(0x7fffffff);
4511 if ((xx << 1) == 0xff000000) {
4512 xx = xx ^ (yy & 0x80000000);
4513 return __int_as_float(xx);
4515 if ((yy << 1) == 0xff000000) {
4516 yy = yy ^ (xx & 0x80000000);
4517 return __int_as_float(yy);
4519 if ((zz << 1) == 0xff000000) {
4520 return __int_as_float(zz);
4527 if (zz == 0x80000000) {
4528 if (((xx << 1) == 0) || ((yy << 1) == 0)) {
4529 if ((int)(xx ^ yy) < 0) {
4530 return __int_as_float(zz);
4537 if (((zz << 1) == 0) &&
4538 (((xx << 1) == 0) || ((yy << 1) == 0))) {
4539 zz &= 0x7fffffff;
4540 return __int_as_float(zz);
4545 if (((xx << 1) == 0) || ((yy << 1) == 0)) {
4546 return __int_as_float(zz);
4549 if (expo_x == (unsigned int)-1) {
4550 temp = xx & 0x80000000;
4551 xx = xx << 8;
4552 while (!(xx & 0x80000000)) {
4553 xx <<= 1;
4554 expo_x--;
4556 expo_x++;
4557 xx = (xx >> 8) | temp;
4560 if (expo_y == (unsigned int)-1) {
4561 temp = yy & 0x80000000;
4562 yy = yy << 8;
4563 while (!(yy & 0x80000000)) {
4564 yy <<= 1;
4565 expo_y--;
4567 expo_y++;
4568 yy = (yy >> 8) | temp;
4571 if ((expo_z == (unsigned int)-1) && ((zz << 1) != 0)) {
4572 temp = zz & 0x80000000;
4573 zz = zz << 8;
4574 while (!(zz & 0x80000000)) {
4575 zz <<= 1;
4576 expo_z--;
4578 expo_z++;
4579 zz = (zz >> 8) | temp;
4583 expo_x = expo_x + expo_y;
4584 expo_y = xx ^ yy;
4585 xx = xx & 0x00ffffff;
4586 yy = yy << 8;
4587 xx = xx | 0x00800000;
4588 yy = yy | 0x80000000;
4590 s = __umulhi(xx, yy);
4591 yy = xx * yy;
4592 xx = s;
4593 expo_x = expo_x - 127 + 2;
4594 expo_y = expo_y & 0x80000000;
4597 if (xx < 0x00800000) {
4598 xx = (xx << 1) | (yy >> 31);
4599 yy = (yy << 1);
4600 expo_x--;
4602 temp = 0;
4603 if ((zz << 1) != 0) {
4604 s = zz & 0x80000000;
4605 zz &= 0x00ffffff;
4606 zz |= 0x00800000;
4607 ww = 0;
4609 if ((int)expo_z > (int)expo_x) {
4610 temp = expo_z;
4611 expo_z = expo_x;
4612 expo_x = temp;
4613 temp = zz;
4614 zz = xx;
4615 xx = temp;
4616 temp = ww;
4617 ww = yy;
4618 yy = temp;
4619 temp = expo_y;
4620 expo_y = s;
4621 s = temp;
4625 expo_z = expo_x - expo_z;
4626 u = expo_y ^ s;
4627 if (expo_z <= 49) {
4629 temp = 0;
4630 while (expo_z >= 32) {
4631 temp = ww | (temp != 0);
4632 ww = zz;
4633 zz = 0;
4634 expo_z -= 32;
4636 if (expo_z) {
4637 temp = ((temp >> expo_z) | (ww << (32 - expo_z)) |
4638 ((temp << (32 - expo_z)) != 0));
4639 ww = (ww >> expo_z) | (zz << (32 - expo_z));
4640 zz = (zz >> expo_z);
4642 } else {
4643 temp = 1;
4644 ww = 0;
4645 zz = 0;
4647 if ((int)u < 0) {
4649 temp = (unsigned)(-(int)temp);
4650 s = (temp != 0);
4651 u = yy - s;
4652 s = u > yy;
4653 yy = u - ww;
4654 s += yy > u;
4655 xx = (xx - zz) - s;
4656 if (!(xx | yy | temp)) {
4658 return __int_as_float(xx);
4660 if ((int)xx < 0) {
4664 temp = ~temp;
4665 yy = ~yy;
4666 xx = ~xx;
4667 if (++temp == 0) {
4668 if (++yy == 0) {
4669 ++xx;
4672 expo_y ^= 0x80000000;
4675 while (!(xx & 0x00800000)) {
4676 xx = (xx << 1) | (yy >> 31);
4677 yy = (yy << 1);
4678 expo_x--;
4680 } else {
4682 yy = yy + ww;
4683 s = yy < ww;
4684 xx = xx + zz + s;
4685 if (xx & 0x01000000) {
4686 temp = temp | (yy << 31);
4687 yy = (yy >> 1) | (xx << 31);
4688 xx = ((xx & 0x80000000) | (xx >> 1)) & ~0x40000000;
4689 expo_x++;
4693 temp = yy | (temp != 0);
4694 if (expo_x <= 0xFD) {
4696 xx |= expo_y;
4697 s = xx & 1;
4698 xx += (temp == 0x80000000) ? s : (temp >> 31);
4699 xx = xx + (expo_x << 23);
4700 return __int_as_float(xx);
4701 } else if ((int)expo_x >= 126) {
4703 xx = expo_y | 0x7f800000;
4704 return __int_as_float(xx);
4707 expo_x = (unsigned int)(-(int)expo_x);
4708 if (expo_x > 25) {
4710 return __int_as_float(expo_y);
4712 yy = (xx << (32 - expo_x)) | ((yy) ? 1 : 0);
4713 xx = expo_y + (xx >> expo_x);
4714 xx = xx + ((yy==0x80000000) ? (xx & 1) : (yy >> 31));
4715 xx |= expo_y;
4720 return __int_as_float(xx);
4723 static float __cudart_A1[32] =
4725 1.0000000000e+000f,
4726 1.0218971968e+000f,
4727 1.0442737341e+000f,
4728 1.0671404600e+000f,
4729 1.0905077457e+000f,
4730 1.1143867970e+000f,
4731 1.1387885809e+000f,
4732 1.1637248993e+000f,
4733 1.1892070770e+000f,
4734 1.2152473927e+000f,
4735 1.2418577671e+000f,
4736 1.2690509558e+000f,
4737 1.2968395948e+000f,
4738 1.3252366781e+000f,
4739 1.3542555571e+000f,
4740 1.3839099407e+000f,
4741 1.4142135382e+000f,
4742 1.4451807737e+000f,
4743 1.4768261909e+000f,
4744 1.5091644526e+000f,
4745 1.5422108173e+000f,
4746 1.5759809017e+000f,
4747 1.6104903221e+000f,
4748 1.6457555294e+000f,
4749 1.6817928553e+000f,
4750 1.7186193466e+000f,
4751 1.7562521696e+000f,
4752 1.7947090864e+000f,
4753 1.8340080976e+000f,
4754 1.8741676807e+000f,
4755 1.9152065516e+000f,
4756 1.9571441412e+000f
4759 static float __cudart_A2[32] =
4761 0.0000000000e+000f,
4762 -4.8115598617e-008f,
4763 4.8334701575e-008f,
4764 -5.9337519787e-008f,
4765 -1.3077539940e-008f,
4766 -5.4355400181e-008f,
4767 5.3862223126e-008f,
4768 -4.0514414934e-008f,
4769 3.7976352729e-008f,
4770 -3.2673948880e-008f,
4771 4.4968381019e-008f,
4772 1.4193333175e-009f,
4773 -4.0189995332e-008f,
4774 -3.4963733242e-008f,
4775 -1.0123349270e-008f,
4776 -5.8755773580e-008f,
4777 2.4203234972e-008f,
4778 3.3241999375e-008f,
4779 -4.5008988536e-008f,
4780 -2.4959373235e-008f,
4781 8.0709048333e-009f,
4782 -5.6610254262e-008f,
4783 9.8362171741e-009f,
4784 -5.1249720912e-008f,
4785 -2.4755326677e-008f,
4786 -4.8496175964e-008f,
4787 -9.2357703707e-009f,
4788 -1.1415044909e-008f,
4789 -1.1239277953e-008f,
4790 -4.6630056261e-008f,
4791 9.8453281083e-009f,
4792 -1.7021804410e-008f
4795 static float __cudart_Ainv[32] =
4797 1.0000000000e+000f,
4798 9.7857207060e-001f,
4799 9.5760327578e-001f,
4800 9.3708384037e-001f,
4801 9.1700404882e-001f,
4802 8.9735454321e-001f,
4803 8.7812608480e-001f,
4804 8.5930967331e-001f,
4805 8.4089642763e-001f,
4806 8.2287776470e-001f,
4807 8.0524516106e-001f,
4808 7.8799045086e-001f,
4809 7.7110540867e-001f,
4810 7.5458222628e-001f,
4811 7.3841309547e-001f,
4812 7.2259038687e-001f,
4813 7.0710676908e-001f,
4814 6.9195497036e-001f,
4815 6.7712777853e-001f,
4816 6.6261833906e-001f,
4817 6.4841979742e-001f,
4818 6.3452547789e-001f,
4819 6.2092888355e-001f,
4820 6.0762369633e-001f,
4821 5.9460353851e-001f,
4822 5.8186244965e-001f,
4823 5.6939429045e-001f,
4824 5.5719339848e-001f,
4825 5.4525387287e-001f,
4826 5.3357023001e-001f,
4827 5.2213686705e-001f,
4828 5.1094859838e-001f
4831 extern __attribute__((weak)) float __internal_accurate_powf(float a, float b); float __internal_accurate_powf(float a, float b)
4833 int i;
4834 float t;
4835 int expo;
4836 float log_hi, log_lo;
4837 float b_hi, b_lo;
4838 float prod_hi, prod_lo;
4840 if ((a > 0.707106781f) && (a < 1.414213562f)) {
4841 float f, g, u, v, q;
4848 f = a - 1.0f;
4849 g = a + 1.0f;
4850 g = 1.0f / g;
4851 u = 2.0f * f * g;
4852 v = u * u;
4853 q = 1.49356810919559350E-001f/64.0f;
4854 q = q * v + 1.99887797540072460E-001f/16.0f;
4855 q = q * v + 3.33333880955515580E-001f/4.0f;
4856 q = q * v;
4857 q = q * u;
4858 log_hi = __int_as_float(__float_as_int(u) & 0xfffff000);
4859 v = __int_as_float(__float_as_int(f) & 0xfffff000);
4860 u = 2.0f * (f - log_hi);
4861 f = f - v;
4862 u = u - log_hi * v;
4863 u = u - log_hi * f;
4864 u = g * u;
4865 log_lo = q + u;
4868 b_hi = __int_as_float(__float_as_int(b) & 0xfffff000);
4869 b_lo = b - b_hi;
4870 prod_lo = b_lo * log_lo;
4871 prod_lo += b_lo * log_hi;
4872 prod_lo += b_hi * log_lo;
4873 prod_hi = b_hi * log_hi;
4876 return __cuda_expf(prod_hi) * __cuda_expf(prod_lo);
4880 if (a >= 1.175494351e-38f) {
4881 i = __float_as_int(a);
4882 expo = ((i >> 23) & 0xff) - 127;
4883 } else {
4884 a *= 16777216.0f;
4885 i = __float_as_int(a);
4886 expo = ((i >> 23) & 0xff) - 127 - 24;
4888 i = (i & 0x007fffff) | (0x3f800000);
4889 t = __int_as_float(i);
4891 i = 0;
4892 if (t >= __cudart_A1[i+16]) i += 16;
4893 if (t >= __cudart_A1[i+8]) i += 8;
4894 if (t >= __cudart_A1[i+4]) i += 4;
4895 if (t >= __cudart_A1[i+2]) i += 2;
4896 if (t >= __cudart_A1[i+1]) i += 1;
4898 t = t - __cudart_A1[i];
4899 t = t - __cudart_A2[i];
4901 t = t * __cudart_Ainv[i];
4904 log_hi = (float)expo + (float)i * 0.03125f;
4906 log_lo = - 3.42338934684934650E-001f;
4907 log_lo = log_lo * t + 4.80524913518140690E-001f;
4908 log_lo = log_lo * t - 7.21345070621603800E-001f;
4909 log_lo = log_lo * t + 1.44269503837073180E+000f;
4910 log_lo = log_lo * t;
4913 b_hi = __int_as_float(__float_as_int(b) & 0xfffff000);
4914 b_lo = b - b_hi;
4915 prod_lo = b_lo * log_lo;
4916 prod_lo = prod_lo + b_lo * log_hi;
4917 prod_lo = prod_lo + b_hi * log_lo;
4918 prod_hi = b_hi * log_hi;
4921 if (prod_hi >= 256.0f) {
4922 return __int_as_float(0x7f800000);
4924 if (prod_hi <= -256.0f) {
4925 return 0.0f;
4929 b = __cuda_exp2f (0.5f * prod_hi);
4930 t = __cuda_exp2f (prod_lo);
4931 t = t * b;
4932 t = t * b;
4933 return t;
4936 extern __attribute__((weak)) float __cuda_powif(float a, int b); float __cuda_powif(float a, int b)
4938 unsigned int e = __cuda_abs(b);
4939 float r = 1.0f;
4941 while (1) {
4942 if ((e & 1) != 0) {
4943 r = r * a;
4945 e = e >> 1;
4946 if (e == 0) {
4947 return b < 0 ? 1.0f/r : r;
4949 a = a * a;
4953 extern __attribute__((weak)) double __cuda_powi(double a, int b); double __cuda_powi(double a, int b)
4955 unsigned int e = __cuda_abs(b);
4956 double r = 1.0;
4958 while (1) {
4959 if ((e & 1) != 0) {
4960 r = r * a;
4962 e = e >> 1;
4963 if (e == 0) {
4964 return b < 0 ? 1.0/r : r;
4966 a = a * a;
4970 extern __attribute__((weak)) float __cuda_powf(float a, float b); float __cuda_powf(float a, float b)
4975 int bIsOddInteger;
4976 float t;
4977 if (a == 1.0f || b == 0.0f) {
4978 return 1.0f;
4980 if (__cuda___isnanf(a) || __cuda___isnanf(b)) {
4981 return a + b;
4983 if (a == __int_as_float(0x7f800000)) {
4984 return __cuda___signbitf(b) ? 0.0f : __int_as_float(0x7f800000);
4986 if (__cuda___isinff(b)) {
4987 if (a == -1.0f) {
4988 return 1.0f;
4990 t = (__cuda_fabsf(a) > 1.0f) ? __int_as_float(0x7f800000) : 0.0f;
4991 if (b < 0.0f) {
4992 t = 1.0f / t;
4994 return t;
4996 bIsOddInteger = (b - (2.0f * floorf(0.5f * b))) == 1.0f;
4997 if (a == 0.0f) {
4998 t = bIsOddInteger ? a : 0.0f;
4999 if (b < 0.0f) {
5000 t = 1.0f / t;
5002 return t;
5004 if (a == -__int_as_float(0x7f800000)) {
5005 t = (b < 0.0f) ? -1.0f/a : -a;
5006 if (bIsOddInteger) {
5007 t = __int_as_float(__float_as_int(t) ^ 0x80000000);
5009 return t;
5011 if ((a < 0.0f) && (b != __cuda_truncf(b))) {
5012 return __int_as_float(0x7fffffff);
5014 t = __cuda_fabsf(a);
5015 t = __internal_accurate_powf(t, b);
5016 if ((a < 0.0f) && bIsOddInteger) {
5017 t = __int_as_float(__float_as_int(t) ^ 0x80000000);
5019 return t;
5024 extern __attribute__((weak)) float __internal_tgammaf_kernel(float a); float __internal_tgammaf_kernel(float a)
5026 float t;
5027 t = - 1.05767296987211380E-003f;
5028 t = t * a + 7.09279059435508670E-003f;
5029 t = t * a - 9.65347121958557050E-003f;
5030 t = t * a - 4.21736613253687960E-002f;
5031 t = t * a + 1.66542401247154280E-001f;
5032 t = t * a - 4.20043267827838460E-002f;
5033 t = t * a - 6.55878234051332940E-001f;
5034 t = t * a + 5.77215696929794240E-001f;
5035 t = t * a + 1.00000000000000000E+000f;
5036 return t;
5043 extern __attribute__((weak)) float __cuda_tgammaf(float a); float __cuda_tgammaf(float a)
5045 float s, xx, x=a;
5046 if (x >= 0.0f) {
5047 if (x > 36.0f) x = 36.0f;
5048 s = 1.0f;
5049 xx = x;
5050 if (x > 34.03f) {
5051 xx -= 1.0f;
5053 while (xx > 1.5f) {
5054 xx = xx - 1.0f;
5055 s = s * xx;
5057 if (x >= 0.5f) {
5058 xx = xx - 1.0f;
5060 xx = __internal_tgammaf_kernel(xx);
5061 if (x < 0.5f) {
5062 xx = xx * x;
5064 s = s / xx;
5065 if (x > 34.03f) {
5067 xx = x - 1.0f;
5068 s = s * xx;
5070 return s;
5071 } else {
5072 if (x == __cuda_floorf(x)) {
5073 x = __int_as_float(0x7fffffff);
5075 return x;
5078 if (x < -41.1f) x = -41.1f;
5079 xx = x;
5080 if (x < -34.03f) {
5081 xx += 6.0f;
5083 s = xx;
5084 while (xx < -0.5f) {
5085 xx = xx + 1.0f;
5086 s = s * xx;
5088 xx = __internal_tgammaf_kernel(xx);
5089 s = s * xx;
5090 s = 1.0f / s;
5091 if (x < -34.03f) {
5092 xx = x;
5093 xx *= (x + 1.0f);
5094 xx *= (x + 2.0f);
5095 xx *= (x + 3.0f);
5096 xx *= (x + 4.0f);
5097 xx *= (x + 5.0f);
5098 xx = 1.0f / xx;
5099 s = s * xx;
5100 if ((a < -42.0f) && !(((int)a)&1)) {
5101 s = __int_as_float(0x80000000);
5104 return s;
5108 extern __attribute__((weak)) float __cuda_roundf(float a); float __cuda_roundf(float a)
5110 float fa = __cuda_fabsf(a);
5111 if (fa > 8388608.0f) {
5112 return a;
5113 } else {
5114 float u = __cuda_floorf(fa + 0.5f);
5115 if (fa < 0.5f) u = 0.0f;
5116 return __cuda_copysignf(u, a);
5120 extern __attribute__((weak)) long long int __internal_llroundf_kernel(float a); long long int __internal_llroundf_kernel(float a)
5122 unsigned long long int res, t = 0LL;
5123 int shift;
5124 unsigned int ia = __float_as_int(a);
5125 if ((ia << 1) > 0xff000000) return 0LL;
5126 if ((int)ia >= 0x5f000000) return 0x7fffffffffffffffLL;
5127 if (ia >= 0xdf000000) return 0x8000000000000000LL;
5128 shift = 189 - ((ia >> 23) & 0xff);
5129 res = ((long long int)(((ia << 8) | 0x80000000) >> 1)) << 32;
5130 if (shift >= 64) {
5131 t = res;
5132 res = 0;
5133 } else if (shift) {
5134 t = res << (64 - shift);
5135 res = res >> shift;
5137 if (t >= 0x8000000000000000LL) {
5138 res++;
5140 if ((int)ia < 0) res = (unsigned long long int)(-(long long int)res);
5141 return (long long int)res;
5144 extern __attribute__((weak)) long long int __cuda_llroundf(float a); long long int __cuda_llroundf(float a)
5146 return __internal_llroundf_kernel(a);
5149 extern __attribute__((weak)) long int __cuda_lroundf(float a); long int __cuda_lroundf(float a)
5152 return (long int)__cuda_llroundf(a);
5153 # 3101 "/usr/local/cuda/bin/../include/math_functions.h" 3
5156 extern __attribute__((weak)) float __cuda_fdimf(float a, float b); float __cuda_fdimf(float a, float b)
5158 float t;
5159 t = a - b;
5160 if (a <= b) {
5161 t = 0.0f;
5163 return t;
5166 extern __attribute__((weak)) int __cuda_ilogbf(float a); int __cuda_ilogbf(float a)
5168 unsigned int i;
5169 int expo;
5170 a = __cuda_fabsf(a);
5171 if (a <= 1.175494351e-38f) {
5173 if (a == 0.0f) {
5174 expo = -((int)((unsigned int)-1 >> 1))-1;
5175 } else {
5176 expo = -126;
5177 i = __float_as_int(a);
5178 i = i << 8;
5179 while ((int)i >= 0) {
5180 expo--;
5181 i = i + i;
5184 } else {
5185 i = __float_as_int(a);
5186 expo = ((int)((i >> 23) & 0xff)) - 127;
5187 if ((i == 0x7f800000)) {
5188 expo = ((int)((unsigned int)-1 >> 1));
5190 if ((i > 0x7f800000)) {
5191 expo = -((int)((unsigned int)-1 >> 1))-1;
5194 return expo;
5197 extern __attribute__((weak)) float __cuda_logbf(float a); float __cuda_logbf(float a)
5199 unsigned int i;
5200 int expo;
5201 float res;
5203 if (__cuda___isnanf(a)) return a + a;
5205 a = __cuda_fabsf(a);
5206 if (a <= 1.175494351e-38f) {
5208 if (a == 0.0f) {
5209 res = -__int_as_float(0x7f800000);
5210 } else {
5211 expo = -126;
5212 i = __float_as_int(a);
5213 i = i << 8;
5214 while ((int)i >= 0) {
5215 expo--;
5216 i = i + i;
5218 res = (float)expo;
5220 } else {
5221 i = __float_as_int(a);
5222 expo = ((int)((i >> 23) & 0xff)) - 127;
5223 res = (float)expo;
5224 if ((i >= 0x7f800000)) {
5226 res = a + a;
5229 return res;
5232 extern __attribute__((weak)) void __cuda_sincosf(float a, float *sptr, float *cptr); void __cuda_sincosf(float a, float *sptr, float *cptr)
5237 float t, u, s, c;
5238 int quadrant;
5239 if (__cuda___isinff(a)) {
5240 *sptr = __int_as_float(0x7fffffff);
5241 *cptr = __int_as_float(0x7fffffff);
5242 return;
5244 if (a == 0.0f) {
5245 *sptr = a;
5246 *cptr = 1.0f;
5247 return;
5249 t = __internal_trig_reduction_kernel(a, &quadrant);
5250 u = __internal_cos_kernel(t);
5251 t = __internal_sin_kernel(t);
5252 if (quadrant & 1) {
5253 s = u;
5254 c = t;
5255 } else {
5256 s = t;
5257 c = u;
5259 if (quadrant & 2) {
5260 s = -s;
5262 quadrant++;
5263 if (quadrant & 2) {
5264 c = -c;
5266 *sptr = s;
5267 *cptr = c;
5270 # 3226 "/usr/local/cuda/bin/../include/math_functions.h" 3
5271 extern __attribute__((weak)) double rsqrt(double a); double rsqrt(double a)
5273 return 1.0 / sqrt(a);
5276 extern __attribute__((weak)) float rsqrtf(float a); float rsqrtf(float a)
5278 return (float)rsqrt((double)a);
5280 # 4167 "/usr/local/cuda/bin/../include/math_functions.h" 3
5281 # 1 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx1.h" 1 3
5282 # 45 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx1.h" 3
5283 extern __attribute__((weak)) double __cuda_fabs(double a); double __cuda_fabs(double a)
5285 return (float)__cuda_fabsf((float)a);
5288 extern __attribute__((weak)) double __cuda_fmax(double a, double b); double __cuda_fmax(double a, double b)
5290 return (float)__cuda_fmaxf((float)a, (float)b);
5293 extern __attribute__((weak)) double __cuda_fmin(double a, double b); double __cuda_fmin(double a, double b)
5295 return (float)__cuda_fminf((float)a, (float)b);
5298 extern __attribute__((weak)) int __cuda___finite(double a); int __cuda___finite(double a)
5300 return __cuda___finitef((float)a);
5303 extern __attribute__((weak)) int __cuda___isinf(double a); int __cuda___isinf(double a)
5305 return __cuda___isinff((float)a);
5308 extern __attribute__((weak)) int __cuda___isnan(double a); int __cuda___isnan(double a)
5310 return __cuda___isnanf((float)a);
5313 extern __attribute__((weak)) int __cuda___signbit(double a); int __cuda___signbit(double a)
5315 return __cuda___signbitf((float)a);
5318 extern __attribute__((weak)) double __cuda_sqrt(double a); double __cuda_sqrt(double a)
5320 return (double)__cuda_sqrtf((float)a);
5323 extern __attribute__((weak)) double __cuda_rsqrt(double a); double __cuda_rsqrt(double a)
5325 return (double)__cuda_rsqrtf((float)a);
5328 extern __attribute__((weak)) double __cuda_ceil(double a); double __cuda_ceil(double a)
5330 return (double)__cuda_ceilf((float)a);
5333 extern __attribute__((weak)) double __cuda_trunc(double a); double __cuda_trunc(double a)
5335 return (double)__cuda_truncf((float)a);
5338 extern __attribute__((weak)) double __cuda_floor(double a); double __cuda_floor(double a)
5340 return (double)__cuda_floorf((float)a);
5343 extern __attribute__((weak)) double __cuda_copysign(double a, double b); double __cuda_copysign(double a, double b)
5345 return (double)__cuda_copysignf((float)a, (float)b);
5348 extern __attribute__((weak)) double __cuda_sin(double a); double __cuda_sin(double a)
5350 return (double)__cuda_sinf((float)a);
5353 extern __attribute__((weak)) double __cuda_cos(double a); double __cuda_cos(double a)
5355 return (double)__cuda_cosf((float)a);
5358 extern __attribute__((weak)) void __cuda_sincos(double a, double *sptr, double *cptr); void __cuda_sincos(double a, double *sptr, double *cptr)
5360 float fs, fc;
5362 __cuda_sincosf((float)a, &fs, &fc);
5364 *sptr = (double)fs;
5365 *cptr = (double)fc;
5368 extern __attribute__((weak)) double __cuda_tan(double a); double __cuda_tan(double a)
5370 return (double)__cuda_tanf((float)a);
5373 extern __attribute__((weak)) double __cuda_exp(double a); double __cuda_exp(double a)
5375 return (double)__cuda_expf((float)a);
5378 extern __attribute__((weak)) double __cuda_exp2(double a); double __cuda_exp2(double a)
5380 return (double)__cuda_exp2f((float)a);
5383 extern __attribute__((weak)) double __cuda_exp10(double a); double __cuda_exp10(double a)
5385 return (double)__cuda_exp10f((float)a);
5388 extern __attribute__((weak)) double __cuda_expm1(double a); double __cuda_expm1(double a)
5390 return (double)__cuda_expm1f((float)a);
5393 extern __attribute__((weak)) double __cuda_cosh(double a); double __cuda_cosh(double a)
5395 return (double)__cuda_coshf((float)a);
5398 extern __attribute__((weak)) double __cuda_sinh(double a); double __cuda_sinh(double a)
5400 return (double)__cuda_sinhf((float)a);
5403 extern __attribute__((weak)) double __cuda_tanh(double a); double __cuda_tanh(double a)
5405 return (double)__cuda_tanhf((float)a);
5408 extern __attribute__((weak)) double __cuda_asin(double a); double __cuda_asin(double a)
5410 return (double)__cuda_asinf((float)a);
5413 extern __attribute__((weak)) double __cuda_acos(double a); double __cuda_acos(double a)
5415 return (double)__cuda_acosf((float)a);
5418 extern __attribute__((weak)) double __cuda_atan(double a); double __cuda_atan(double a)
5420 return (double)__cuda_atanf((float)a);
5423 extern __attribute__((weak)) double __cuda_atan2(double a, double b); double __cuda_atan2(double a, double b)
5425 return (double)__cuda_atan2f((float)a, (float)b);
5428 extern __attribute__((weak)) double __cuda_log(double a); double __cuda_log(double a)
5430 return (double)__cuda_logf((float)a);
5433 extern __attribute__((weak)) double __cuda_log2(double a); double __cuda_log2(double a)
5435 return (double)__cuda_log2f((float)a);
5438 extern __attribute__((weak)) double __cuda_log10(double a); double __cuda_log10(double a)
5440 return (double)__cuda_log10f((float)a);
5443 extern __attribute__((weak)) double __cuda_log1p(double a); double __cuda_log1p(double a)
5445 return (double)__cuda_log1pf((float)a);
5448 extern __attribute__((weak)) double __cuda_acosh(double a); double __cuda_acosh(double a)
5450 return (double)__cuda_acoshf((float)a);
5453 extern __attribute__((weak)) double __cuda_asinh(double a); double __cuda_asinh(double a)
5455 return (double)__cuda_asinhf((float)a);
5458 extern __attribute__((weak)) double __cuda_atanh(double a); double __cuda_atanh(double a)
5460 return (double)__cuda_atanhf((float)a);
5463 extern __attribute__((weak)) double __cuda_hypot(double a, double b); double __cuda_hypot(double a, double b)
5465 return (double)__cuda_hypotf((float)a, (float)b);
5468 extern __attribute__((weak)) double __cuda_cbrt(double a); double __cuda_cbrt(double a)
5470 return (double)__cuda_cbrtf((float)a);
5473 extern __attribute__((weak)) double __cuda_erf(double a); double __cuda_erf(double a)
5475 return (double)__cuda_erff((float)a);
5478 extern __attribute__((weak)) double __cuda_erfc(double a); double __cuda_erfc(double a)
5480 return (double)__cuda_erfcf((float)a);
5483 extern __attribute__((weak)) double __cuda_lgamma(double a); double __cuda_lgamma(double a)
5485 return (double)__cuda_lgammaf((float)a);
5488 extern __attribute__((weak)) double __cuda_tgamma(double a); double __cuda_tgamma(double a)
5490 return (double)__cuda_tgammaf((float)a);
5493 extern __attribute__((weak)) double __cuda_ldexp(double a, int b); double __cuda_ldexp(double a, int b)
5495 return (double)__cuda_ldexpf((float)a, b);
5498 extern __attribute__((weak)) double __cuda_scalbn(double a, int b); double __cuda_scalbn(double a, int b)
5500 return (double)__cuda_scalbnf((float)a, b);
5503 extern __attribute__((weak)) double __cuda_scalbln(double a, long b); double __cuda_scalbln(double a, long b)
5505 return (double)__cuda_scalblnf((float)a, b);
5508 extern __attribute__((weak)) double __cuda_frexp(double a, int *b); double __cuda_frexp(double a, int *b)
5510 return (double)__cuda_frexpf((float)a, b);
5513 extern __attribute__((weak)) double __cuda_modf(double a, double *b); double __cuda_modf(double a, double *b)
5515 float fb;
5516 float fa = __cuda_modff((float)a, &fb);
5518 *b = (double)fb;
5520 return (double)fa;
5523 extern __attribute__((weak)) double __cuda_fmod(double a, double b); double __cuda_fmod(double a, double b)
5525 return (double)__cuda_fmodf((float)a, (float)b);
5528 extern __attribute__((weak)) double __cuda_remainder(double a, double b); double __cuda_remainder(double a, double b)
5530 return (double)__cuda_remainderf((float)a, (float)b);
5533 extern __attribute__((weak)) double __cuda_remquo(double a, double b, int *c); double __cuda_remquo(double a, double b, int *c)
5535 return (double)__cuda_remquof((float)a, (float)b, c);
5538 extern __attribute__((weak)) double __cuda_nextafter(double a, double b); double __cuda_nextafter(double a, double b)
5540 return (double)__cuda_nextafterf((float)a, (float)b);
5543 extern __attribute__((weak)) double __cuda_nan(const char *tagp); double __cuda_nan(const char *tagp)
5545 return (double)__cuda_nanf(tagp);
5548 extern __attribute__((weak)) double __cuda_pow(double a, double b); double __cuda_pow(double a, double b)
5550 return (double)__cuda_powf((float)a, (float)b);
5553 extern __attribute__((weak)) double __cuda_round(double a); double __cuda_round(double a)
5555 return (double)__cuda_roundf((float)a);
5558 extern __attribute__((weak)) long __cuda_lround(double a); long __cuda_lround(double a)
5560 return __cuda_lroundf((float)a);
5563 extern __attribute__((weak)) long long __cuda_llround(double a); long long __cuda_llround(double a)
5565 return __cuda_llroundf((float)a);
5568 extern __attribute__((weak)) double __cuda_rint(double a); double __cuda_rint(double a)
5570 return (double)__cuda_rintf((float)a);
5573 extern __attribute__((weak)) long __cuda_lrint(double a); long __cuda_lrint(double a)
5575 return __cuda_lrintf((float)a);
5578 extern __attribute__((weak)) long long __cuda_llrint(double a); long long __cuda_llrint(double a)
5580 return __cuda_llrintf((float)a);
5583 extern __attribute__((weak)) double __cuda_nearbyint(double a); double __cuda_nearbyint(double a)
5585 return (double)__cuda_nearbyintf((float)a);
5588 extern __attribute__((weak)) double __cuda_fdim(double a, double b); double __cuda_fdim(double a, double b)
5590 return (double)__cuda_fdimf((float)a, (float)b);
5593 extern __attribute__((weak)) int __cuda_ilogb(double a); int __cuda_ilogb(double a)
5595 return __cuda_ilogbf((float)a);
5598 extern __attribute__((weak)) double __cuda_logb(double a); double __cuda_logb(double a)
5600 return (double)__cuda_logbf((float)a);
5603 extern __attribute__((weak)) double __cuda_fma(double a, double b, double c); double __cuda_fma(double a, double b, double c)
5605 return (double)__cuda_fmaf((float)a, (float)b, (float)c);
5607 # 4168 "/usr/local/cuda/bin/../include/math_functions.h" 2 3
5608 # 89 "/usr/local/cuda/bin/../include/common_functions.h" 2
5609 # 196 "/usr/local/cuda/bin/../include/crt/host_runtime.h" 2
5610 # 6 "/tmp/tmpxft_00001ecc_00000000-0.stub.c" 2
5611 extern void __sti____cudaRegisterAll_29_tmpxft_00001ecc_00000000_2_ii_91788a12(void) __attribute__((__constructor__));
5612 void __sti____cudaRegisterAll_29_tmpxft_00001ecc_00000000_2_ii_91788a12(void){__cudaFatCubinHandle = __cudaRegisterFatBinary((void*)(&__fatDeviceText));}
5613 # 475 "y.cu" 2