vcomp: Implement _vcomp_reduction_r8 and add tests.
[wine.git] / dlls / vcomp / tests / vcomp.c
blob96a3d9c20a894d746f851818bbfebe3285664555
1 /*
2 * Unit test suite for vcomp
4 * Copyright 2012 Dan Kegel
5 * Copyright 2015-2016 Sebastian Lackner
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 #include <stdio.h>
23 #include "wine/test.h"
25 static char vcomp_manifest_file[MAX_PATH];
26 static HANDLE vcomp_actctx_hctx;
27 static ULONG_PTR vcomp_actctx_cookie;
28 static HMODULE vcomp_handle;
30 static HANDLE (WINAPI *pCreateActCtxA)(ACTCTXA*);
31 static BOOL (WINAPI *pActivateActCtx)(HANDLE, ULONG_PTR*);
32 static BOOL (WINAPI *pDeactivateActCtx)(DWORD, ULONG_PTR);
33 static VOID (WINAPI *pReleaseActCtx)(HANDLE);
35 typedef CRITICAL_SECTION *omp_lock_t;
36 typedef CRITICAL_SECTION *omp_nest_lock_t;
38 static void (CDECL *p_vcomp_atomic_add_i1)(char *dest, char val);
39 static void (CDECL *p_vcomp_atomic_add_i2)(short *dest, short val);
40 static void (CDECL *p_vcomp_atomic_add_i4)(int *dest, int val);
41 static void (CDECL *p_vcomp_atomic_add_i8)(LONG64 *dest, LONG64 val);
42 static void (CDECL *p_vcomp_atomic_add_r4)(float *dest, float val);
43 static void (CDECL *p_vcomp_atomic_add_r8)(double *dest, double val);
44 static void (CDECL *p_vcomp_atomic_and_i1)(char *dest, char val);
45 static void (CDECL *p_vcomp_atomic_and_i2)(short *dest, short val);
46 static void (CDECL *p_vcomp_atomic_and_i4)(int *dest, int val);
47 static void (CDECL *p_vcomp_atomic_and_i8)(LONG64 *dest, LONG64 val);
48 static void (CDECL *p_vcomp_atomic_div_i1)(char *dest, char val);
49 static void (CDECL *p_vcomp_atomic_div_i2)(short *dest, short val);
50 static void (CDECL *p_vcomp_atomic_div_i4)(int *dest, int val);
51 static void (CDECL *p_vcomp_atomic_div_i8)(LONG64 *dest, LONG64 val);
52 static void (CDECL *p_vcomp_atomic_div_r4)(float *dest, float val);
53 static void (CDECL *p_vcomp_atomic_div_r8)(double *dest, double val);
54 static void (CDECL *p_vcomp_atomic_div_ui1)(unsigned char *dest, unsigned char val);
55 static void (CDECL *p_vcomp_atomic_div_ui2)(unsigned short *dest, unsigned short val);
56 static void (CDECL *p_vcomp_atomic_div_ui4)(unsigned int *dest, unsigned int val);
57 static void (CDECL *p_vcomp_atomic_div_ui8)(ULONG64 *dest, ULONG64 val);
58 static void (CDECL *p_vcomp_atomic_mul_i1)(char *dest, char val);
59 static void (CDECL *p_vcomp_atomic_mul_i2)(short *dest, short val);
60 static void (CDECL *p_vcomp_atomic_mul_i4)(int *dest, int val);
61 static void (CDECL *p_vcomp_atomic_mul_i8)(LONG64 *dest, LONG64 val);
62 static void (CDECL *p_vcomp_atomic_mul_r4)(float *dest, float val);
63 static void (CDECL *p_vcomp_atomic_mul_r8)(double *dest, double val);
64 static void (CDECL *p_vcomp_atomic_or_i1)(char *dest, char val);
65 static void (CDECL *p_vcomp_atomic_or_i2)(short *dest, short val);
66 static void (CDECL *p_vcomp_atomic_or_i4)(int *dest, int val);
67 static void (CDECL *p_vcomp_atomic_or_i8)(LONG64 *dest, LONG64 val);
68 static void (CDECL *p_vcomp_atomic_shl_i1)(char *dest, unsigned int val);
69 static void (CDECL *p_vcomp_atomic_shl_i2)(short *dest, unsigned int val);
70 static void (CDECL *p_vcomp_atomic_shl_i4)(int *dest, int val);
71 static void (CDECL *p_vcomp_atomic_shl_i8)(LONG64 *dest, unsigned int val);
72 static void (CDECL *p_vcomp_atomic_shr_i1)(char *dest, unsigned int val);
73 static void (CDECL *p_vcomp_atomic_shr_i2)(short *dest, unsigned int val);
74 static void (CDECL *p_vcomp_atomic_shr_i4)(int *dest, int val);
75 static void (CDECL *p_vcomp_atomic_shr_i8)(LONG64 *dest, unsigned int val);
76 static void (CDECL *p_vcomp_atomic_shr_ui1)(unsigned char *dest, unsigned int val);
77 static void (CDECL *p_vcomp_atomic_shr_ui2)(unsigned short *dest, unsigned int val);
78 static void (CDECL *p_vcomp_atomic_shr_ui4)(unsigned int *dest, unsigned int val);
79 static void (CDECL *p_vcomp_atomic_shr_ui8)(ULONG64 *dest, unsigned int val);
80 static void (CDECL *p_vcomp_atomic_sub_i1)(char *dest, char val);
81 static void (CDECL *p_vcomp_atomic_sub_i2)(short *dest, short val);
82 static void (CDECL *p_vcomp_atomic_sub_i4)(int *dest, int val);
83 static void (CDECL *p_vcomp_atomic_sub_i8)(LONG64 *dest, LONG64 val);
84 static void (CDECL *p_vcomp_atomic_sub_r4)(float *dest, float val);
85 static void (CDECL *p_vcomp_atomic_sub_r8)(double *dest, double val);
86 static void (CDECL *p_vcomp_atomic_xor_i1)(char *dest, char val);
87 static void (CDECL *p_vcomp_atomic_xor_i2)(short *dest, short val);
88 static void (CDECL *p_vcomp_atomic_xor_i4)(int *dest, int val);
89 static void (CDECL *p_vcomp_atomic_xor_i8)(LONG64 *dest, LONG64 val);
90 static void (CDECL *p_vcomp_barrier)(void);
91 static void (CDECL *p_vcomp_enter_critsect)(CRITICAL_SECTION **critsect);
92 static void (CDECL *p_vcomp_flush)(void);
93 static void (CDECL *p_vcomp_for_dynamic_init)(unsigned int flags, unsigned int first, unsigned int last,
94 int step, unsigned int chunksize);
95 static int (CDECL *p_vcomp_for_dynamic_next)(unsigned int *begin, unsigned int *end);
96 static void (CDECL *p_vcomp_for_static_end)(void);
97 static void (CDECL *p_vcomp_for_static_init)(int first, int last, int step, int chunksize, unsigned int *loops,
98 int *begin, int *end, int *next, int *lastchunk);
99 static void (CDECL *p_vcomp_for_static_simple_init)(unsigned int first, unsigned int last, int step,
100 BOOL increment, unsigned int *begin, unsigned int *end);
101 static void (WINAPIV *p_vcomp_fork)(BOOL ifval, int nargs, void *wrapper, ...);
102 static int (CDECL *p_vcomp_get_thread_num)(void);
103 static void (CDECL *p_vcomp_leave_critsect)(CRITICAL_SECTION *critsect);
104 static int (CDECL *p_vcomp_master_begin)(void);
105 static void (CDECL *p_vcomp_master_end)(void);
106 static void (CDECL *p_vcomp_reduction_i1)(unsigned int flags, char *dest, char val);
107 static void (CDECL *p_vcomp_reduction_i2)(unsigned int flags, short *dest, short val);
108 static void (CDECL *p_vcomp_reduction_i4)(unsigned int flags, int *dest, int val);
109 static void (CDECL *p_vcomp_reduction_i8)(unsigned int flags, LONG64 *dest, LONG64 val);
110 static void (CDECL *p_vcomp_reduction_r4)(unsigned int flags, float *dest, float val);
111 static void (CDECL *p_vcomp_reduction_r8)(unsigned int flags, double *dest, double val);
112 static void (CDECL *p_vcomp_reduction_u1)(unsigned int flags, unsigned char *dest, unsigned char val);
113 static void (CDECL *p_vcomp_reduction_u2)(unsigned int flags, unsigned short *dest, unsigned short val);
114 static void (CDECL *p_vcomp_reduction_u4)(unsigned int flags, unsigned int *dest, unsigned int val);
115 static void (CDECL *p_vcomp_reduction_u8)(unsigned int flags, ULONG64 *dest, ULONG64 val);
116 static void (CDECL *p_vcomp_sections_init)(int n);
117 static int (CDECL *p_vcomp_sections_next)(void);
118 static void (CDECL *p_vcomp_set_num_threads)(int num_threads);
119 static int (CDECL *p_vcomp_single_begin)(int flags);
120 static void (CDECL *p_vcomp_single_end)(void);
121 static void (CDECL *pomp_destroy_lock)(omp_lock_t *lock);
122 static void (CDECL *pomp_destroy_nest_lock)(omp_nest_lock_t *lock);
123 static int (CDECL *pomp_get_max_threads)(void);
124 static int (CDECL *pomp_get_nested)(void);
125 static int (CDECL *pomp_get_num_threads)(void);
126 static int (CDECL *pomp_get_thread_num)(void);
127 static int (CDECL *pomp_in_parallel)(void);
128 static void (CDECL *pomp_init_lock)(omp_lock_t *lock);
129 static void (CDECL *pomp_init_nest_lock)(omp_nest_lock_t *lock);
130 static void (CDECL *pomp_set_lock)(omp_lock_t *lock);
131 static void (CDECL *pomp_set_nest_lock)(omp_nest_lock_t *lock);
132 static void (CDECL *pomp_set_nested)(int nested);
133 static void (CDECL *pomp_set_num_threads)(int num_threads);
134 static int (CDECL *pomp_test_lock)(omp_lock_t *lock);
135 static int (CDECL *pomp_test_nest_lock)(omp_nest_lock_t *lock);
136 static void (CDECL *pomp_unset_lock)(omp_lock_t *lock);
137 static void (CDECL *pomp_unset_nest_lock)(omp_nest_lock_t *lock);
139 #define VCOMP_DYNAMIC_FLAGS_STATIC 0x01
140 #define VCOMP_DYNAMIC_FLAGS_CHUNKED 0x02
141 #define VCOMP_DYNAMIC_FLAGS_GUIDED 0x03
142 #define VCOMP_DYNAMIC_FLAGS_INCREMENT 0x40
144 #define VCOMP_REDUCTION_FLAGS_ADD 0x100
145 #define VCOMP_REDUCTION_FLAGS_MUL 0x200
146 #define VCOMP_REDUCTION_FLAGS_AND 0x300
147 #define VCOMP_REDUCTION_FLAGS_OR 0x400
148 #define VCOMP_REDUCTION_FLAGS_XOR 0x500
149 #define VCOMP_REDUCTION_FLAGS_BOOL_AND 0x600
150 #define VCOMP_REDUCTION_FLAGS_BOOL_OR 0x700
152 #ifdef __i386__
153 #define ARCH "x86"
154 #elif defined(__x86_64__)
155 #define ARCH "amd64"
156 #elif defined __arm__
157 #define ARCH "arm"
158 #elif defined __aarch64__
159 #define ARCH "arm64"
160 #else
161 #define ARCH "none"
162 #endif
164 static const char vcomp_manifest[] =
165 "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
166 "<assembly xmlns=\"urn:schemas-microsoft-com:asm.v1\" manifestVersion=\"1.0\">\n"
167 " <assemblyIdentity\n"
168 " type=\"win32\"\n"
169 " name=\"Wine.vcomp.Test\"\n"
170 " version=\"1.0.0.0\"\n"
171 " processorArchitecture=\"" ARCH "\"\n"
172 " />\n"
173 "<description>Wine vcomp test suite</description>\n"
174 "<dependency>\n"
175 " <dependentAssembly>\n"
176 " <assemblyIdentity\n"
177 " type=\"win32\"\n"
178 " name=\"Microsoft.VC80.OpenMP\"\n"
179 " version=\"8.0.50608.0\"\n"
180 " processorArchitecture=\"" ARCH "\"\n"
181 " publicKeyToken=\"1fc8b3b9a1e18e3b\"\n"
182 " />\n"
183 " </dependentAssembly>\n"
184 "</dependency>\n"
185 "</assembly>\n";
187 #undef ARCH
189 static const char *debugstr_longlong(ULONGLONG ll)
191 static char str[17];
192 if (sizeof(ll) > sizeof(unsigned long) && ll >> 32)
193 sprintf(str, "%lx%08lx", (unsigned long)(ll >> 32), (unsigned long)ll);
194 else
195 sprintf(str, "%lx", (unsigned long)ll);
196 return str;
199 static void create_vcomp_manifest(void)
201 char temp_path[MAX_PATH];
202 HMODULE kernel32;
203 DWORD written;
204 ACTCTXA ctx;
205 HANDLE file;
207 kernel32 = GetModuleHandleA("kernel32.dll");
208 pCreateActCtxA = (void *)GetProcAddress(kernel32, "CreateActCtxA");
209 pActivateActCtx = (void *)GetProcAddress(kernel32, "ActivateActCtx");
210 pDeactivateActCtx = (void *)GetProcAddress(kernel32, "DeactivateActCtx");
211 pReleaseActCtx = (void *)GetProcAddress(kernel32, "ReleaseActCtx");
212 if (!pCreateActCtxA) return;
214 if (!GetTempPathA(sizeof(temp_path), temp_path) ||
215 !GetTempFileNameA(temp_path, "vcomp", 0, vcomp_manifest_file))
217 ok(0, "failed to create manifest file\n");
218 return;
221 file = CreateFileA(vcomp_manifest_file, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, 0, NULL);
222 if (file == INVALID_HANDLE_VALUE)
224 ok(0, "failed to open manifest file\n");
225 return;
228 if (!WriteFile(file, vcomp_manifest, sizeof(vcomp_manifest) - 1, &written, NULL))
229 written = 0;
230 CloseHandle(file);
232 if (written != sizeof(vcomp_manifest) - 1)
234 ok(0, "failed to write manifest file\n");
235 DeleteFileA(vcomp_manifest_file);
236 return;
239 memset(&ctx, 0, sizeof(ctx));
240 ctx.cbSize = sizeof(ctx);
241 ctx.lpSource = vcomp_manifest_file;
242 vcomp_actctx_hctx = pCreateActCtxA(&ctx);
243 if (!vcomp_actctx_hctx)
245 ok(0, "failed to create activation context\n");
246 DeleteFileA(vcomp_manifest_file);
247 return;
250 if (!pActivateActCtx(vcomp_actctx_hctx, &vcomp_actctx_cookie))
252 win_skip("failed to activate context\n");
253 pReleaseActCtx(vcomp_actctx_hctx);
254 DeleteFileA(vcomp_manifest_file);
255 vcomp_actctx_hctx = NULL;
259 static void release_vcomp(void)
261 if (vcomp_handle)
262 FreeLibrary(vcomp_handle);
264 if (vcomp_actctx_hctx)
266 pDeactivateActCtx(0, vcomp_actctx_cookie);
267 pReleaseActCtx(vcomp_actctx_hctx);
268 DeleteFileA(vcomp_manifest_file);
272 #define VCOMP_GET_PROC(func) \
273 do \
275 p ## func = (void *)GetProcAddress(vcomp_handle, #func); \
276 if (!p ## func) trace("Failed to get address for %s\n", #func); \
278 while (0)
280 static BOOL init_vcomp(void)
282 create_vcomp_manifest();
284 vcomp_handle = LoadLibraryA("vcomp.dll");
285 if (!vcomp_handle)
287 win_skip("vcomp.dll not installed\n");
288 release_vcomp();
289 return FALSE;
292 VCOMP_GET_PROC(_vcomp_atomic_add_i1);
293 VCOMP_GET_PROC(_vcomp_atomic_add_i2);
294 VCOMP_GET_PROC(_vcomp_atomic_add_i4);
295 VCOMP_GET_PROC(_vcomp_atomic_add_i8);
296 VCOMP_GET_PROC(_vcomp_atomic_add_r4);
297 VCOMP_GET_PROC(_vcomp_atomic_add_r8);
298 VCOMP_GET_PROC(_vcomp_atomic_and_i1);
299 VCOMP_GET_PROC(_vcomp_atomic_and_i2);
300 VCOMP_GET_PROC(_vcomp_atomic_and_i4);
301 VCOMP_GET_PROC(_vcomp_atomic_and_i8);
302 VCOMP_GET_PROC(_vcomp_atomic_div_i1);
303 VCOMP_GET_PROC(_vcomp_atomic_div_i2);
304 VCOMP_GET_PROC(_vcomp_atomic_div_i4);
305 VCOMP_GET_PROC(_vcomp_atomic_div_i8);
306 VCOMP_GET_PROC(_vcomp_atomic_div_r4);
307 VCOMP_GET_PROC(_vcomp_atomic_div_r8);
308 VCOMP_GET_PROC(_vcomp_atomic_div_ui1);
309 VCOMP_GET_PROC(_vcomp_atomic_div_ui2);
310 VCOMP_GET_PROC(_vcomp_atomic_div_ui4);
311 VCOMP_GET_PROC(_vcomp_atomic_div_ui8);
312 VCOMP_GET_PROC(_vcomp_atomic_mul_i1);
313 VCOMP_GET_PROC(_vcomp_atomic_mul_i2);
314 VCOMP_GET_PROC(_vcomp_atomic_mul_i4);
315 VCOMP_GET_PROC(_vcomp_atomic_mul_i8);
316 VCOMP_GET_PROC(_vcomp_atomic_mul_r4);
317 VCOMP_GET_PROC(_vcomp_atomic_mul_r8);
318 VCOMP_GET_PROC(_vcomp_atomic_or_i1);
319 VCOMP_GET_PROC(_vcomp_atomic_or_i2);
320 VCOMP_GET_PROC(_vcomp_atomic_or_i4);
321 VCOMP_GET_PROC(_vcomp_atomic_or_i8);
322 VCOMP_GET_PROC(_vcomp_atomic_shl_i1);
323 VCOMP_GET_PROC(_vcomp_atomic_shl_i2);
324 VCOMP_GET_PROC(_vcomp_atomic_shl_i4);
325 VCOMP_GET_PROC(_vcomp_atomic_shl_i8);
326 VCOMP_GET_PROC(_vcomp_atomic_shr_i1);
327 VCOMP_GET_PROC(_vcomp_atomic_shr_i2);
328 VCOMP_GET_PROC(_vcomp_atomic_shr_i4);
329 VCOMP_GET_PROC(_vcomp_atomic_shr_i8);
330 VCOMP_GET_PROC(_vcomp_atomic_shr_ui1);
331 VCOMP_GET_PROC(_vcomp_atomic_shr_ui2);
332 VCOMP_GET_PROC(_vcomp_atomic_shr_ui4);
333 VCOMP_GET_PROC(_vcomp_atomic_shr_ui8);
334 VCOMP_GET_PROC(_vcomp_atomic_sub_i1);
335 VCOMP_GET_PROC(_vcomp_atomic_sub_i2);
336 VCOMP_GET_PROC(_vcomp_atomic_sub_i4);
337 VCOMP_GET_PROC(_vcomp_atomic_sub_i8);
338 VCOMP_GET_PROC(_vcomp_atomic_sub_r4);
339 VCOMP_GET_PROC(_vcomp_atomic_sub_r8);
340 VCOMP_GET_PROC(_vcomp_atomic_xor_i1);
341 VCOMP_GET_PROC(_vcomp_atomic_xor_i2);
342 VCOMP_GET_PROC(_vcomp_atomic_xor_i4);
343 VCOMP_GET_PROC(_vcomp_atomic_xor_i8);
344 VCOMP_GET_PROC(_vcomp_barrier);
345 VCOMP_GET_PROC(_vcomp_enter_critsect);
346 VCOMP_GET_PROC(_vcomp_flush);
347 VCOMP_GET_PROC(_vcomp_for_dynamic_init);
348 VCOMP_GET_PROC(_vcomp_for_dynamic_next);
349 VCOMP_GET_PROC(_vcomp_for_static_end);
350 VCOMP_GET_PROC(_vcomp_for_static_init);
351 VCOMP_GET_PROC(_vcomp_for_static_simple_init);
352 VCOMP_GET_PROC(_vcomp_fork);
353 VCOMP_GET_PROC(_vcomp_get_thread_num);
354 VCOMP_GET_PROC(_vcomp_leave_critsect);
355 VCOMP_GET_PROC(_vcomp_master_begin);
356 VCOMP_GET_PROC(_vcomp_master_end);
357 VCOMP_GET_PROC(_vcomp_reduction_i1);
358 VCOMP_GET_PROC(_vcomp_reduction_i2);
359 VCOMP_GET_PROC(_vcomp_reduction_i4);
360 VCOMP_GET_PROC(_vcomp_reduction_i8);
361 VCOMP_GET_PROC(_vcomp_reduction_r4);
362 VCOMP_GET_PROC(_vcomp_reduction_r8);
363 VCOMP_GET_PROC(_vcomp_reduction_u1);
364 VCOMP_GET_PROC(_vcomp_reduction_u2);
365 VCOMP_GET_PROC(_vcomp_reduction_u4);
366 VCOMP_GET_PROC(_vcomp_reduction_u8);
367 VCOMP_GET_PROC(_vcomp_sections_init);
368 VCOMP_GET_PROC(_vcomp_sections_next);
369 VCOMP_GET_PROC(_vcomp_set_num_threads);
370 VCOMP_GET_PROC(_vcomp_single_begin);
371 VCOMP_GET_PROC(_vcomp_single_end);
372 VCOMP_GET_PROC(omp_destroy_lock);
373 VCOMP_GET_PROC(omp_destroy_nest_lock);
374 VCOMP_GET_PROC(omp_get_max_threads);
375 VCOMP_GET_PROC(omp_get_nested);
376 VCOMP_GET_PROC(omp_get_num_threads);
377 VCOMP_GET_PROC(omp_get_thread_num);
378 VCOMP_GET_PROC(omp_in_parallel);
379 VCOMP_GET_PROC(omp_init_lock);
380 VCOMP_GET_PROC(omp_init_nest_lock);
381 VCOMP_GET_PROC(omp_set_lock);
382 VCOMP_GET_PROC(omp_set_nest_lock);
383 VCOMP_GET_PROC(omp_set_nested);
384 VCOMP_GET_PROC(omp_set_num_threads);
385 VCOMP_GET_PROC(omp_test_lock);
386 VCOMP_GET_PROC(omp_test_nest_lock);
387 VCOMP_GET_PROC(omp_unset_lock);
388 VCOMP_GET_PROC(omp_unset_nest_lock);
390 return TRUE;
393 #undef VCOMP_GET_PROC
395 static void CDECL num_threads_cb2(int parallel, LONG *count)
397 int is_parallel = pomp_in_parallel();
398 ok(is_parallel == parallel, "expected %d, got %d\n", parallel, is_parallel);
400 InterlockedIncrement(count);
403 static void CDECL num_threads_cb(BOOL nested, int parallel, int nested_threads, LONG *count)
405 int is_parallel, num_threads, thread_num;
406 LONG thread_count;
408 InterlockedIncrement(count);
409 p_vcomp_barrier();
411 num_threads = pomp_get_num_threads();
412 ok(num_threads == *count, "expected num_threads == %d, got %d\n", *count, num_threads);
413 thread_num = pomp_get_thread_num();
414 ok(thread_num >= 0 && thread_num < num_threads,
415 "expected thread_num in range [0, %d], got %d\n", num_threads - 1, thread_num);
416 ok(thread_num == p_vcomp_get_thread_num(),
417 "expected _vcomp_get_thread_num to return the same value\n");
419 is_parallel = pomp_in_parallel();
420 ok(is_parallel == parallel, "expected %d, got %d\n", parallel, is_parallel);
422 thread_count = 0;
423 p_vcomp_fork(TRUE, 2, num_threads_cb2, TRUE, &thread_count);
424 if (nested)
425 ok(thread_count == nested_threads, "expected %d threads, got %d\n", nested_threads, thread_count);
426 else
427 ok(thread_count == 1, "expected 1 thread, got %d\n", thread_count);
429 is_parallel = pomp_in_parallel();
430 ok(is_parallel == parallel, "expected %d, got %d\n", parallel, is_parallel);
432 thread_count = 0;
433 p_vcomp_fork(FALSE, 2, num_threads_cb2, parallel, &thread_count);
434 ok(thread_count == 1, "expected 1 thread, got %d\n", thread_count);
436 is_parallel = pomp_in_parallel();
437 ok(is_parallel == parallel, "expected %d, got %d\n", parallel, is_parallel);
439 p_vcomp_set_num_threads(4);
440 thread_count = 0;
441 p_vcomp_fork(TRUE, 2, num_threads_cb2, TRUE, &thread_count);
442 if (nested)
443 ok(thread_count == 4, "expected 4 threads, got %d\n", thread_count);
444 else
445 ok(thread_count == 1, "expected 1 thread, got %d\n", thread_count);
447 is_parallel = pomp_in_parallel();
448 ok(is_parallel == parallel, "expected %d, got %d\n", parallel, is_parallel);
451 static void test_omp_get_num_threads(BOOL nested)
453 int is_nested, is_parallel, max_threads, num_threads, thread_num;
454 LONG thread_count;
456 ok(pomp_get_thread_num != p_vcomp_get_thread_num,
457 "expected omp_get_thread_num != _vcomp_get_thread_num\n");
459 pomp_set_nested(nested);
460 is_nested = pomp_get_nested();
461 ok(is_nested == nested, "expected %d, got %d\n", nested, is_nested);
463 max_threads = pomp_get_max_threads();
464 ok(max_threads >= 1, "expected max_threads >= 1, got %d\n", max_threads);
465 thread_num = pomp_get_thread_num();
466 ok(thread_num == 0, "expected thread_num == 0, got %d\n", thread_num);
468 is_parallel = pomp_in_parallel();
469 ok(is_parallel == FALSE, "expected FALSE, got %d\n", is_parallel);
471 num_threads = pomp_get_num_threads();
472 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
473 thread_count = 0;
474 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, max_threads, &thread_count);
475 ok(thread_count == max_threads, "expected %d threads, got %d\n", max_threads, thread_count);
477 is_parallel = pomp_in_parallel();
478 ok(is_parallel == FALSE, "expected FALSE, got %d\n", is_parallel);
480 num_threads = pomp_get_num_threads();
481 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
482 thread_count = 0;
483 p_vcomp_fork(FALSE, 4, num_threads_cb, TRUE, FALSE, max_threads, &thread_count);
484 ok(thread_count == 1, "expected 1 thread, got %d\n", thread_count);
486 is_parallel = pomp_in_parallel();
487 ok(is_parallel == FALSE, "expected FALSE, got %d\n", is_parallel);
489 pomp_set_num_threads(1);
490 num_threads = pomp_get_num_threads();
491 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
492 thread_count = 0;
493 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 1, &thread_count);
494 ok(thread_count == 1, "expected 1 thread, got %d\n", thread_count);
496 is_parallel = pomp_in_parallel();
497 ok(is_parallel == FALSE, "expected FALSE, got %d\n", is_parallel);
499 pomp_set_num_threads(2);
500 num_threads = pomp_get_num_threads();
501 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
502 thread_count = 0;
503 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 2, &thread_count);
504 ok(thread_count == 2, "expected 2 threads, got %d\n", thread_count);
506 pomp_set_num_threads(4);
507 num_threads = pomp_get_num_threads();
508 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
509 thread_count = 0;
510 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 4, &thread_count);
511 ok(thread_count == 4, "expected 4 threads, got %d\n", thread_count);
513 p_vcomp_set_num_threads(8);
514 num_threads = pomp_get_num_threads();
515 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
516 thread_count = 0;
517 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 4, &thread_count);
518 ok(thread_count == 8, "expected 8 threads, got %d\n", thread_count);
519 thread_count = 0;
520 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 4, &thread_count);
521 ok(thread_count == 4, "expected 4 threads, got %d\n", thread_count);
523 p_vcomp_set_num_threads(0);
524 num_threads = pomp_get_num_threads();
525 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
526 thread_count = 0;
527 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 4, &thread_count);
528 ok(thread_count == 4, "expected 4 threads, got %d\n", thread_count);
530 pomp_set_num_threads(0);
531 num_threads = pomp_get_num_threads();
532 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
533 thread_count = 0;
534 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 4, &thread_count);
535 ok(thread_count == 4, "expected 4 threads, got %d\n", thread_count);
537 pomp_set_num_threads(max_threads);
538 pomp_set_nested(FALSE);
541 static void CDECL fork_ptr_cb(LONG *a, LONG *b, LONG *c, LONG *d, LONG *e)
543 InterlockedIncrement(a);
544 InterlockedIncrement(b);
545 InterlockedIncrement(c);
546 InterlockedIncrement(d);
547 InterlockedIncrement(e);
550 static void CDECL fork_uintptr_cb(UINT_PTR a, UINT_PTR b, UINT_PTR c, UINT_PTR d, UINT_PTR e)
552 ok(a == 1, "expected a == 1, got %p\n", (void *)a);
553 ok(b == MAXUINT_PTR - 2, "expected b == MAXUINT_PTR - 2, got %p\n", (void *)b);
554 ok(c == 3, "expected c == 3, got %p\n", (void *)c);
555 ok(d == MAXUINT_PTR - 4, "expected d == MAXUINT_PTR - 4, got %p\n", (void *)d);
556 ok(e == 5, "expected e == 5, got %p\n", (void *)e);
559 #ifdef __i386__
560 static void CDECL fork_float_cb(float a, float b, float c, float d, float e)
562 ok(1.4999 < a && a < 1.5001, "expected a == 1.5, got %f\n", a);
563 ok(2.4999 < b && b < 2.5001, "expected b == 2.5, got %f\n", b);
564 ok(3.4999 < c && c < 3.5001, "expected c == 3.5, got %f\n", c);
565 ok(4.4999 < d && d < 4.5001, "expected d == 4.5, got %f\n", d);
566 ok(5.4999 < e && e < 5.5001, "expected e == 5.5, got %f\n", e);
568 #endif
570 static void test_vcomp_fork(void)
572 LONG a, b, c, d, e;
573 int max_threads = pomp_get_max_threads();
574 pomp_set_num_threads(4);
576 a = 0; b = 1; c = 2; d = 3; e = 4;
577 p_vcomp_fork(FALSE, 5, fork_ptr_cb, &a, &b, &c, &d, &e);
578 ok(a == 1, "expected a == 1, got %d\n", a);
579 ok(b == 2, "expected b == 2, got %d\n", b);
580 ok(c == 3, "expected c == 3, got %d\n", c);
581 ok(d == 4, "expected d == 4, got %d\n", d);
582 ok(e == 5, "expected e == 5, got %d\n", e);
584 a = 0; b = 1; c = 2; d = 3; e = 4;
585 p_vcomp_fork(TRUE, 5, fork_ptr_cb, &a, &b, &c, &d, &e);
586 ok(a == 4, "expected a == 4, got %d\n", a);
587 ok(b == 5, "expected b == 5, got %d\n", b);
588 ok(c == 6, "expected c == 6, got %d\n", c);
589 ok(d == 7, "expected d == 7, got %d\n", d);
590 ok(e == 8, "expected e == 8, got %d\n", e);
592 p_vcomp_fork(TRUE, 5, fork_uintptr_cb, (UINT_PTR)1, (UINT_PTR)(MAXUINT_PTR - 2),
593 (UINT_PTR)3, (UINT_PTR)(MAXUINT_PTR - 4), (UINT_PTR)5);
595 #ifdef __i386__
597 void (CDECL *func)(BOOL, int, void *, float, float, float, float, float) = (void *)p_vcomp_fork;
598 func(TRUE, 5, fork_float_cb, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f);
600 #else
601 skip("skipping float test on non-x86\n");
602 #endif
604 pomp_set_num_threads(max_threads);
607 static void CDECL section_cb(LONG *a, LONG *b, LONG *c)
609 int i;
611 p_vcomp_sections_init(20);
612 while ((i = p_vcomp_sections_next()) != -1)
614 InterlockedIncrement(a);
615 Sleep(1);
618 p_vcomp_sections_init(30);
619 while ((i = p_vcomp_sections_next()) != -1)
621 InterlockedIncrement(b);
622 Sleep(1);
625 p_vcomp_sections_init(40);
626 while ((i = p_vcomp_sections_next()) != -1)
628 InterlockedIncrement(c);
629 Sleep(1);
633 static void test_vcomp_sections_init(void)
635 LONG a, b, c;
636 int max_threads = pomp_get_max_threads();
637 int i;
639 if (0)
641 /* calling _vcomp_sections_next without prior _vcomp_sections_init
642 * returns uninitialized memory on Windows. */
643 i = p_vcomp_sections_next();
644 ok(i == -1, "expected -1, got %d\n", i);
647 a = b = c = 0;
648 section_cb(&a, &b, &c);
649 ok(a == 20, "expected a == 20, got %d\n", a);
650 ok(b == 30, "expected b == 30, got %d\n", b);
651 ok(c == 40, "expected c == 40, got %d\n", c);
653 for (i = 1; i <= 4; i++)
655 pomp_set_num_threads(i);
657 a = b = c = 0;
658 p_vcomp_fork(TRUE, 3, section_cb, &a, &b, &c);
659 ok(a == 20, "expected a == 20, got %d\n", a);
660 ok(b == 30, "expected b == 30, got %d\n", b);
661 ok(c == 40, "expected c == 40, got %d\n", c);
663 a = b = c = 0;
664 p_vcomp_fork(FALSE, 3, section_cb, &a, &b, &c);
665 ok(a == 20, "expected a == 20, got %d\n", a);
666 ok(b == 30, "expected b == 30, got %d\n", b);
667 ok(c == 40, "expected c == 40, got %d\n", c);
670 pomp_set_num_threads(max_threads);
673 static void my_for_static_simple_init(BOOL dynamic, unsigned int first, unsigned int last, int step,
674 BOOL increment, unsigned int *begin, unsigned int *end)
676 unsigned int iterations, per_thread, remaining;
677 int num_threads = pomp_get_num_threads();
678 int thread_num = pomp_get_thread_num();
680 if (!dynamic && num_threads == 1)
682 *begin = first;
683 *end = last;
684 return;
687 if (step <= 0)
689 *begin = 0;
690 *end = increment ? -1 : 1;
691 return;
694 if (increment)
695 iterations = 1 + (last - first) / step;
696 else
698 iterations = 1 + (first - last) / step;
699 step *= -1;
702 per_thread = iterations / num_threads;
703 remaining = iterations - per_thread * num_threads;
705 if (thread_num < remaining)
706 per_thread++;
707 else if (per_thread)
708 first += remaining * step;
709 else
711 *begin = first;
712 *end = first - step;
713 return;
716 *begin = first + per_thread * thread_num * step;
717 *end = *begin + (per_thread - 1) * step;
721 static void CDECL for_static_simple_cb(void)
723 static const struct
725 unsigned int first;
726 unsigned int last;
727 int step;
729 tests[] =
731 { 0, 0, 1 }, /* 0 */
732 { 0, 1, 1 },
733 { 0, 2, 1 },
734 { 0, 3, 1 },
735 { 0, 100, 0 },
736 { 0, 100, 1 },
737 { 0, 100, 2 },
738 { 0, 100, 3 },
739 { 0, 100, -1 },
740 { 0, 100, -2 },
741 { 0, 100, -3 }, /* 10 */
742 { 0, 100, 10 },
743 { 0, 100, 50 },
744 { 0, 100, 100 },
745 { 0, 100, 150 },
746 { 0, 0x80000000, 1 },
747 { 0, 0xfffffffe, 1 },
748 { 0, 0xffffffff, 1 },
749 { 50, 50, 0 },
750 { 50, 50, 1 },
751 { 50, 50, 2 }, /* 20 */
752 { 50, 50, 3 },
753 { 50, 50, -1 },
754 { 50, 50, -2 },
755 { 50, 50, -3 },
756 { 100, 200, 1 },
757 { 100, 200, 5 },
758 { 100, 200, 10 },
759 { 100, 200, 50 },
760 { 100, 200, 100 },
761 { 100, 200, 150 }, /* 30 */
763 int num_threads = pomp_get_num_threads();
764 int thread_num = pomp_get_thread_num();
765 int i;
767 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
769 unsigned int my_begin, my_end, begin, end;
771 begin = end = 0xdeadbeef;
772 my_for_static_simple_init(FALSE, tests[i].first, tests[i].last, tests[i].step, FALSE, &my_begin, &my_end);
773 p_vcomp_for_static_simple_init(tests[i].first, tests[i].last, tests[i].step, FALSE, &begin, &end);
775 ok(begin == my_begin, "test %d, thread %d/%d: expected begin == %u, got %u\n",
776 i, thread_num, num_threads, my_begin, begin);
777 ok(end == my_end, "test %d, thread %d/%d: expected end == %u, got %u\n",
778 i, thread_num, num_threads, my_end, end);
780 p_vcomp_for_static_end();
781 p_vcomp_barrier();
783 begin = end = 0xdeadbeef;
784 my_for_static_simple_init(FALSE, tests[i].first, tests[i].last, tests[i].step, TRUE, &my_begin, &my_end);
785 p_vcomp_for_static_simple_init(tests[i].first, tests[i].last, tests[i].step, TRUE, &begin, &end);
787 ok(begin == my_begin, "test %d, thread %d/%d: expected begin == %u, got %u\n",
788 i, thread_num, num_threads, my_begin, begin);
789 ok(end == my_end, "test %d, thread %d/%d: expected end == %u, got %u\n",
790 i, thread_num, num_threads, my_end, end);
792 p_vcomp_for_static_end();
793 p_vcomp_barrier();
795 if (tests[i].first == tests[i].last) continue;
797 begin = end = 0xdeadbeef;
798 my_for_static_simple_init(FALSE, tests[i].last, tests[i].first, tests[i].step, FALSE, &my_begin, &my_end);
799 p_vcomp_for_static_simple_init(tests[i].last, tests[i].first, tests[i].step, FALSE, &begin, &end);
801 ok(begin == my_begin, "test %d, thread %d/%d: expected begin == %u, got %u\n",
802 i, thread_num, num_threads, my_begin, begin);
803 ok(end == my_end, "test %d, thread %d/%d: expected end == %u, got %u\n",
804 i, thread_num, num_threads, my_end, end);
806 p_vcomp_for_static_end();
807 p_vcomp_barrier();
809 begin = end = 0xdeadbeef;
810 my_for_static_simple_init(FALSE, tests[i].last, tests[i].first, tests[i].step, TRUE, &my_begin, &my_end);
811 p_vcomp_for_static_simple_init(tests[i].last, tests[i].first, tests[i].step, TRUE, &begin, &end);
813 ok(begin == my_begin, "test %d, thread %d/%d: expected begin == %u, got %u\n",
814 i, thread_num, num_threads, my_begin, begin);
815 ok(end == my_end, "test %d, thread %d/%d: expected end == %u, got %u\n",
816 i, thread_num, num_threads, my_end, end);
818 p_vcomp_for_static_end();
819 p_vcomp_barrier();
823 static void test_vcomp_for_static_simple_init(void)
825 int max_threads = pomp_get_max_threads();
826 int i;
828 for_static_simple_cb();
830 for (i = 1; i <= 4; i++)
832 pomp_set_num_threads(i);
833 p_vcomp_fork(TRUE, 0, for_static_simple_cb);
834 p_vcomp_fork(FALSE, 0, for_static_simple_cb);
837 pomp_set_num_threads(max_threads);
840 #define VCOMP_FOR_STATIC_BROKEN_LOOP 1
841 #define VCOMP_FOR_STATIC_BROKEN_NEXT 2
843 static DWORD CDECL my_for_static_init(int first, int last, int step, int chunksize, unsigned int *loops,
844 int *begin, int *end, int *next, int *lastchunk)
846 unsigned int iterations, num_chunks, per_thread, remaining;
847 int num_threads = pomp_get_num_threads();
848 int thread_num = pomp_get_thread_num();
850 if (num_threads == 1 && chunksize != 1)
852 *loops = 1;
853 *begin = first;
854 *end = last;
855 *next = 0;
856 *lastchunk = first;
857 return 0;
860 if (first == last)
862 *loops = !thread_num;
863 if (!thread_num)
865 /* The value in *next on Windows is either uninitialized, or contains
866 * garbage. The value shouldn't matter for *loops <= 1, so no need to
867 * reproduce that. */
868 *begin = first;
869 *end = last;
870 *next = 0;
871 *lastchunk = first;
873 return thread_num ? 0 : VCOMP_FOR_STATIC_BROKEN_NEXT;
876 if (step <= 0)
878 /* The total number of iterations depends on the number of threads here,
879 * which doesn't make any sense. This is most likely a bug in the Windows
880 * implementation. */
881 return VCOMP_FOR_STATIC_BROKEN_LOOP;
884 if (first < last)
885 iterations = 1 + (last - first) / step;
886 else
888 iterations = 1 + (first - last) / step;
889 step *= -1;
892 if (chunksize < 1)
893 chunksize = 1;
895 num_chunks = ((DWORD64)iterations + chunksize - 1) / chunksize;
896 per_thread = num_chunks / num_threads;
897 remaining = num_chunks - per_thread * num_threads;
899 *loops = per_thread + (thread_num < remaining);
900 *begin = first + thread_num * chunksize * step;
901 *end = *begin + (chunksize - 1) * step;
902 *next = chunksize * num_threads * step;
903 *lastchunk = first + (num_chunks - 1) * chunksize * step;
904 return 0;
907 static void CDECL for_static_cb(void)
909 static const struct
911 int first;
912 int last;
913 int step;
914 int chunksize;
916 tests[] =
918 { 0, 0, 1, 1 }, /* 0 */
919 { 0, 1, 1, 1 },
920 { 0, 2, 1, 1 },
921 { 0, 3, 1, 1 },
922 { 0, 100, 1, 0 },
923 { 0, 100, 1, 1 },
924 { 0, 100, 1, 5 },
925 { 0, 100, 1, 10 },
926 { 0, 100, 1, 50 },
927 { 0, 100, 1, 100 },
928 { 0, 100, 1, 150 }, /* 10 */
929 { 0, 100, 3, 0 },
930 { 0, 100, 3, 1 },
931 { 0, 100, 3, 5 },
932 { 0, 100, 3, 10 },
933 { 0, 100, 3, 50 },
934 { 0, 100, 3, 100 },
935 { 0, 100, 3, 150 },
936 { 0, 100, 5, 1 },
937 { 0, 100, -3, 0 },
938 { 0, 100, -3, 1 }, /* 20 */
939 { 0, 100, -3, 5 },
940 { 0, 100, -3, 10 },
941 { 0, 100, -3, 50 },
942 { 0, 100, -3, 100 },
943 { 0, 100, -3, 150 },
944 { 0, 100, 10, 1 },
945 { 0, 100, 50, 1 },
946 { 0, 100, 100, 1 },
947 { 0, 100, 150, 1 },
948 { 0, 0x10000000, 1, 123 }, /* 30 */
949 { 0, 0x20000000, 1, 123 },
950 { 0, 0x40000000, 1, 123 },
951 { 0, -0x80000000, 1, 123 },
952 { 50, 50, 1, 1 },
953 { 50, 50, 1, 2 },
954 { 50, 50, 1, -1 },
955 { 50, 50, 1, -2 },
956 { 50, 50, 2, 1 },
957 { 50, 50, 3, 1 },
958 { 100, 200, 3, 1 }, /* 40 */
959 { 100, 200, 3, -1 },
960 { 0x7ffffffe, -0x80000000, 1, 123 },
961 { 0x7fffffff, -0x80000000, 1, 123 },
963 int num_threads = pomp_get_num_threads();
964 int thread_num = pomp_get_thread_num();
965 int i;
967 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
969 int my_begin, my_end, my_next, my_lastchunk;
970 int begin, end, next, lastchunk;
971 unsigned int my_loops, loops;
972 DWORD broken_flags;
974 my_loops = my_begin = my_end = my_next = my_lastchunk = 0xdeadbeef;
975 loops = begin = end = next = lastchunk = 0xdeadbeef;
976 broken_flags = my_for_static_init(tests[i].first, tests[i].last, tests[i].step, tests[i].chunksize,
977 &my_loops, &my_begin, &my_end, &my_next, &my_lastchunk);
978 p_vcomp_for_static_init(tests[i].first, tests[i].last, tests[i].step, tests[i].chunksize,
979 &loops, &begin, &end, &next, &lastchunk);
981 if (broken_flags & VCOMP_FOR_STATIC_BROKEN_LOOP)
983 ok(loops == 0 || loops == 1, "test %d, thread %d/%d: expected loops == 0 or 1, got %u\n",
984 i, thread_num, num_threads, loops);
986 else
988 ok(loops == my_loops, "test %d, thread %d/%d: expected loops == %u, got %u\n",
989 i, thread_num, num_threads, my_loops, loops);
990 ok(begin == my_begin, "test %d, thread %d/%d: expected begin == %d, got %d\n",
991 i, thread_num, num_threads, my_begin, begin);
992 ok(end == my_end, "test %d, thread %d/%d: expected end == %d, got %d\n",
993 i, thread_num, num_threads, my_end, end);
994 ok(next == my_next || broken(broken_flags & VCOMP_FOR_STATIC_BROKEN_NEXT),
995 "test %d, thread %d/%d: expected next == %d, got %d\n", i, thread_num, num_threads, my_next, next);
996 ok(lastchunk == my_lastchunk, "test %d, thread %d/%d: expected lastchunk == %d, got %d\n",
997 i, thread_num, num_threads, my_lastchunk, lastchunk);
1000 p_vcomp_for_static_end();
1001 p_vcomp_barrier();
1003 if (tests[i].first == tests[i].last) continue;
1005 my_loops = my_begin = my_end = my_next = my_lastchunk = 0xdeadbeef;
1006 loops = begin = end = next = lastchunk = 0xdeadbeef;
1007 broken_flags = my_for_static_init(tests[i].last, tests[i].first, tests[i].step, tests[i].chunksize,
1008 &my_loops, &my_begin, &my_end, &my_next, &my_lastchunk);
1009 p_vcomp_for_static_init(tests[i].last, tests[i].first, tests[i].step, tests[i].chunksize,
1010 &loops, &begin, &end, &next, &lastchunk);
1012 if (broken_flags & VCOMP_FOR_STATIC_BROKEN_LOOP)
1014 ok(loops == 0 || loops == 1, "test %d, thread %d/%d: expected loops == 0 or 1, got %u\n",
1015 i, thread_num, num_threads, loops);
1017 else
1019 ok(loops == my_loops, "test %d, thread %d/%d: expected loops == %u, got %u\n",
1020 i, thread_num, num_threads, my_loops, loops);
1021 ok(begin == my_begin, "test %d, thread %d/%d: expected begin == %d, got %d\n",
1022 i, thread_num, num_threads, my_begin, begin);
1023 ok(end == my_end, "test %d, thread %d/%d: expected end == %d, got %d\n",
1024 i, thread_num, num_threads, my_end, end);
1025 ok(next == my_next || broken(broken_flags & VCOMP_FOR_STATIC_BROKEN_NEXT),
1026 "test %d, thread %d/%d: expected next == %d, got %d\n", i, thread_num, num_threads, my_next, next);
1027 ok(lastchunk == my_lastchunk, "test %d, thread %d/%d: expected lastchunk == %d, got %d\n",
1028 i, thread_num, num_threads, my_lastchunk, lastchunk);
1031 p_vcomp_for_static_end();
1032 p_vcomp_barrier();
1036 #undef VCOMP_FOR_STATIC_BROKEN_LOOP
1037 #undef VCOMP_FOR_STATIC_BROKEN_NEXT
1039 static void test_vcomp_for_static_init(void)
1041 int max_threads = pomp_get_max_threads();
1042 int i;
1044 for_static_cb();
1046 for (i = 1; i <= 4; i++)
1048 pomp_set_num_threads(i);
1049 p_vcomp_fork(TRUE, 0, for_static_cb);
1050 p_vcomp_fork(FALSE, 0, for_static_cb);
1053 pomp_set_num_threads(max_threads);
1056 static void CDECL for_dynamic_static_cb(void)
1058 unsigned int my_begin, my_end, begin, end;
1059 int ret;
1061 begin = end = 0xdeadbeef;
1062 my_for_static_simple_init(TRUE, 0, 1000, 7, TRUE, &my_begin, &my_end);
1063 p_vcomp_for_dynamic_init(VCOMP_DYNAMIC_FLAGS_STATIC | VCOMP_DYNAMIC_FLAGS_INCREMENT, 0, 1000, 7, 1);
1064 ret = p_vcomp_for_dynamic_next(&begin, &end);
1065 ok(ret == TRUE, "expected ret == TRUE, got %d\n", ret);
1066 ok(begin == my_begin, "expected begin == %u, got %u\n", my_begin, begin);
1067 ok(end == my_end, "expected end == %u, got %u\n", my_end, end);
1068 ret = p_vcomp_for_dynamic_next(&begin, &end);
1069 ok(ret == FALSE, "expected ret == FALSE, got %d\n", ret);
1071 begin = end = 0xdeadbeef;
1072 my_for_static_simple_init(TRUE, 1000, 0, 7, FALSE, &my_begin, &my_end);
1073 p_vcomp_for_dynamic_init(VCOMP_DYNAMIC_FLAGS_STATIC, 1000, 0, 7, 1);
1074 ret = p_vcomp_for_dynamic_next(&begin, &end);
1075 ok(ret == TRUE, "expected ret == TRUE, got %d\n", ret);
1076 ok(begin == my_begin, "expected begin == %u, got %u\n", my_begin, begin);
1077 ok(end == my_end, "expected end == %u, got %u\n", my_end, end);
1078 ret = p_vcomp_for_dynamic_next(&begin, &end);
1079 ok(ret == FALSE, "expected ret == FALSE, got %d\n", ret);
1081 begin = end = 0xdeadbeef;
1082 my_for_static_simple_init(TRUE, 0, 1000, 7, TRUE, &my_begin, &my_end);
1083 p_vcomp_for_dynamic_init(VCOMP_DYNAMIC_FLAGS_STATIC | VCOMP_DYNAMIC_FLAGS_INCREMENT, 0, 1000, 7, 5);
1084 ret = p_vcomp_for_dynamic_next(&begin, &end);
1085 ok(ret == TRUE, "expected ret == TRUE, got %d\n", ret);
1086 ok(begin == my_begin, "expected begin == %u, got %u\n", my_begin, begin);
1087 ok(end == my_end, "expected end == %u, got %u\n", my_end, end);
1088 ret = p_vcomp_for_dynamic_next(&begin, &end);
1089 ok(ret == FALSE, "expected ret == FALSE, got %d\n", ret);
1091 begin = end = 0xdeadbeef;
1092 my_for_static_simple_init(TRUE, 1000, 0, 7, FALSE, &my_begin, &my_end);
1093 p_vcomp_for_dynamic_init(VCOMP_DYNAMIC_FLAGS_STATIC, 1000, 0, 7, 5);
1094 ret = p_vcomp_for_dynamic_next(&begin, &end);
1095 ok(ret == TRUE, "expected ret == TRUE, got %d\n", ret);
1096 ok(begin == my_begin, "expected begin == %u, got %u\n", my_begin, begin);
1097 ok(end == my_end, "expected end == %u, got %u\n", my_end, end);
1098 ret = p_vcomp_for_dynamic_next(&begin, &end);
1099 ok(ret == FALSE, "expected ret == FALSE, got %d\n", ret);
1102 static void CDECL for_dynamic_chunked_cb(LONG *a, LONG *b, LONG *c, LONG *d)
1104 unsigned int begin, end;
1106 p_vcomp_for_dynamic_init(VCOMP_DYNAMIC_FLAGS_CHUNKED | VCOMP_DYNAMIC_FLAGS_INCREMENT, 0, 1000, 7, 1);
1107 while (p_vcomp_for_dynamic_next(&begin, &end))
1109 if (begin == 994) ok(end == 1000, "expected end == 1000, got %u\n", end);
1110 else ok(begin == end, "expected begin == end, got %u and %u\n", begin, end);
1111 InterlockedExchangeAdd(a, begin);
1114 p_vcomp_for_dynamic_init(VCOMP_DYNAMIC_FLAGS_CHUNKED, 1000, 0, 7, 1);
1115 while (p_vcomp_for_dynamic_next(&begin, &end))
1117 if (begin == 6) ok(end == 0, "expected end == 0, got %u\n", end);
1118 else ok(begin == end, "expected begin == end, got %u and %u\n", begin, end);
1119 InterlockedExchangeAdd(b, begin);
1122 p_vcomp_for_dynamic_init(VCOMP_DYNAMIC_FLAGS_CHUNKED | VCOMP_DYNAMIC_FLAGS_INCREMENT, 0, 1000, 7, 5);
1123 while (p_vcomp_for_dynamic_next(&begin, &end))
1125 if (begin == 980) ok(end == 1000, "expected end == 1000, got %u\n", end);
1126 else ok(begin + 28 == end, "expected begin + 28 == end, got %u and %u\n", begin + 28, end);
1127 InterlockedExchangeAdd(c, begin);
1130 p_vcomp_for_dynamic_init(VCOMP_DYNAMIC_FLAGS_CHUNKED, 1000, 0, 7, 5);
1131 while (p_vcomp_for_dynamic_next(&begin, &end))
1133 if (begin == 20) ok(end == 0, "expected end == 0, got %u\n", end);
1134 else ok(begin - 28 == end, "expected begin - 28 == end, got %u and %u\n", begin - 28, end);
1135 InterlockedExchangeAdd(d, begin);
1139 static void CDECL for_dynamic_guided_cb(unsigned int flags, LONG *a, LONG *b, LONG *c, LONG *d)
1141 int num_threads = pomp_get_num_threads();
1142 unsigned int begin, end;
1144 p_vcomp_for_dynamic_init(flags | VCOMP_DYNAMIC_FLAGS_INCREMENT, 0, 1000, 7, 1);
1145 while (p_vcomp_for_dynamic_next(&begin, &end))
1147 ok(num_threads != 1 || (begin == 0 && end == 1000),
1148 "expected begin == 0 and end == 1000, got %u and %u\n", begin, end);
1149 InterlockedExchangeAdd(a, begin);
1152 p_vcomp_for_dynamic_init(flags, 1000, 0, 7, 1);
1153 while (p_vcomp_for_dynamic_next(&begin, &end))
1155 ok(num_threads != 1 || (begin == 1000 && end == 0),
1156 "expected begin == 1000 and end == 0, got %u and %u\n", begin, end);
1157 InterlockedExchangeAdd(b, begin);
1160 p_vcomp_for_dynamic_init(flags | VCOMP_DYNAMIC_FLAGS_INCREMENT, 0, 1000, 7, 5);
1161 while (p_vcomp_for_dynamic_next(&begin, &end))
1163 ok(num_threads != 1 || (begin == 0 && end == 1000),
1164 "expected begin == 0 and end == 1000, got %u and %u\n", begin, end);
1165 InterlockedExchangeAdd(c, begin);
1168 p_vcomp_for_dynamic_init(flags, 1000, 0, 7, 5);
1169 while (p_vcomp_for_dynamic_next(&begin, &end))
1171 ok(num_threads != 1 || (begin == 1000 && end == 0),
1172 "expected begin == 1000 and end == 0, got %u and %u\n", begin, end);
1173 InterlockedExchangeAdd(d, begin);
1177 static void test_vcomp_for_dynamic_init(void)
1179 static const int guided_a[] = {0, 6041, 9072, 11179};
1180 static const int guided_b[] = {1000, 1959, 2928, 3821};
1181 static const int guided_c[] = {0, 4067, 6139, 7273};
1182 static const int guided_d[] = {1000, 1933, 2861, 3727};
1183 LONG a, b, c, d;
1184 int max_threads = pomp_get_max_threads();
1185 int i;
1187 /* test static scheduling */
1188 for_dynamic_static_cb();
1190 for (i = 1; i <= 4; i++)
1192 pomp_set_num_threads(i);
1193 p_vcomp_fork(TRUE, 0, for_dynamic_static_cb);
1194 p_vcomp_fork(FALSE, 0, for_dynamic_static_cb);
1197 /* test chunked scheduling */
1198 a = b = c = d = 0;
1199 for_dynamic_chunked_cb(&a, &b, &c, &d);
1200 ok(a == 71071, "expected a == 71071, got %d\n", a);
1201 ok(b == 71929, "expected b == 71929, got %d\n", b);
1202 ok(c == 14210, "expected c == 14210, got %d\n", c);
1203 ok(d == 14790, "expected d == 14790, got %d\n", d);
1205 for (i = 1; i <= 4; i++)
1207 pomp_set_num_threads(i);
1209 a = b = c = d = 0;
1210 p_vcomp_fork(TRUE, 4, for_dynamic_chunked_cb, &a, &b, &c, &d);
1211 ok(a == 71071, "expected a == 71071, got %d\n", a);
1212 ok(b == 71929, "expected b == 71929, got %d\n", b);
1213 ok(c == 14210, "expected c == 14210, got %d\n", c);
1214 ok(d == 14790, "expected d == 14790, got %d\n", d);
1216 a = b = c = d = 0;
1217 p_vcomp_fork(FALSE, 4, for_dynamic_chunked_cb, &a, &b, &c, &d);
1218 ok(a == 71071, "expected a == 71071, got %d\n", a);
1219 ok(b == 71929, "expected b == 71929, got %d\n", b);
1220 ok(c == 14210, "expected c == 14210, got %d\n", c);
1221 ok(d == 14790, "expected d == 14790, got %d\n", d);
1224 /* test guided scheduling */
1225 a = b = c = d = 0;
1226 for_dynamic_guided_cb(VCOMP_DYNAMIC_FLAGS_GUIDED, &a, &b, &c, &d);
1227 ok(a == guided_a[0], "expected a == %d, got %d\n", guided_a[0], a);
1228 ok(b == guided_b[0], "expected b == %d, got %d\n", guided_b[0], b);
1229 ok(c == guided_c[0], "expected c == %d, got %d\n", guided_c[0], c);
1230 ok(d == guided_d[0], "expected d == %d, got %d\n", guided_d[0], d);
1232 for (i = 1; i <= 4; i++)
1234 pomp_set_num_threads(i);
1236 a = b = c = d = 0;
1237 p_vcomp_fork(TRUE, 5, for_dynamic_guided_cb, VCOMP_DYNAMIC_FLAGS_GUIDED, &a, &b, &c, &d);
1238 ok(a == guided_a[i - 1], "expected a == %d, got %d\n", guided_a[i - 1], a);
1239 ok(b == guided_b[i - 1], "expected b == %d, got %d\n", guided_b[i - 1], b);
1240 ok(c == guided_c[i - 1], "expected c == %d, got %d\n", guided_c[i - 1], c);
1241 ok(d == guided_d[i - 1], "expected d == %d, got %d\n", guided_d[i - 1], d);
1243 a = b = c = d = 0;
1244 p_vcomp_fork(FALSE, 5, for_dynamic_guided_cb, VCOMP_DYNAMIC_FLAGS_GUIDED, &a, &b, &c, &d);
1245 ok(a == guided_a[0], "expected a == %d, got %d\n", guided_a[0], a);
1246 ok(b == guided_b[0], "expected b == %d, got %d\n", guided_b[0], b);
1247 ok(c == guided_c[0], "expected c == %d, got %d\n", guided_c[0], c);
1248 ok(d == guided_d[0], "expected d == %d, got %d\n", guided_d[0], d);
1251 /* test with empty flags */
1252 a = b = c = d = 0;
1253 for_dynamic_guided_cb(0, &a, &b, &c, &d);
1254 ok(a == guided_a[0], "expected a == %d, got %d\n", guided_a[0], a);
1255 ok(b == guided_b[0], "expected b == %d, got %d\n", guided_b[0], b);
1256 ok(c == guided_c[0], "expected c == %d, got %d\n", guided_c[0], c);
1257 ok(d == guided_d[0], "expected d == %d, got %d\n", guided_d[0], d);
1259 for (i = 1; i <= 4; i++)
1261 pomp_set_num_threads(i);
1263 a = b = c = d = 0;
1264 p_vcomp_fork(TRUE, 5, for_dynamic_guided_cb, 0, &a, &b, &c, &d);
1265 ok(a == guided_a[i - 1], "expected a == %d, got %d\n", guided_a[i - 1], a);
1266 ok(b == guided_b[i - 1], "expected b == %d, got %d\n", guided_b[i - 1], b);
1267 ok(c == guided_c[i - 1], "expected c == %d, got %d\n", guided_c[i - 1], c);
1268 ok(d == guided_d[i - 1], "expected d == %d, got %d\n", guided_d[i - 1], d);
1270 a = b = c = d = 0;
1271 p_vcomp_fork(FALSE, 5, for_dynamic_guided_cb, 0, &a, &b, &c, &d);
1272 ok(a == guided_a[0], "expected a == %d, got %d\n", guided_a[0], a);
1273 ok(b == guided_b[0], "expected b == %d, got %d\n", guided_b[0], b);
1274 ok(c == guided_c[0], "expected c == %d, got %d\n", guided_c[0], c);
1275 ok(d == guided_d[0], "expected d == %d, got %d\n", guided_d[0], d);
1278 pomp_set_num_threads(max_threads);
1281 static void CDECL master_cb(HANDLE semaphore)
1283 int num_threads = pomp_get_num_threads();
1284 int thread_num = pomp_get_thread_num();
1286 if (p_vcomp_master_begin())
1288 ok(thread_num == 0, "expected thread_num == 0, got %d\n", thread_num);
1289 if (num_threads >= 2)
1291 DWORD result = WaitForSingleObject(semaphore, 1000);
1292 ok(result == WAIT_OBJECT_0, "WaitForSingleObject returned %u\n", result);
1294 p_vcomp_master_end();
1297 if (thread_num == 1)
1298 ReleaseSemaphore(semaphore, 1, NULL);
1301 static void test_vcomp_master_begin(void)
1303 int max_threads = pomp_get_max_threads();
1304 HANDLE semaphore;
1305 int i;
1307 semaphore = CreateSemaphoreA(NULL, 0, 1, NULL);
1308 ok(semaphore != NULL, "CreateSemaphoreA failed %u\n", GetLastError());
1310 master_cb(semaphore);
1312 for (i = 1; i <= 4; i++)
1314 pomp_set_num_threads(i);
1315 p_vcomp_fork(TRUE, 1, master_cb, semaphore);
1316 p_vcomp_fork(FALSE, 1, master_cb, semaphore);
1319 CloseHandle(semaphore);
1320 pomp_set_num_threads(max_threads);
1323 static void CDECL single_cb(int flags, HANDLE semaphore)
1325 int num_threads = pomp_get_num_threads();
1327 if (p_vcomp_single_begin(flags))
1329 if (num_threads >= 2)
1331 DWORD result = WaitForSingleObject(semaphore, 1000);
1332 ok(result == WAIT_OBJECT_0, "WaitForSingleObject returned %u\n", result);
1335 p_vcomp_single_end();
1337 if (p_vcomp_single_begin(flags))
1339 if (num_threads >= 2)
1340 ReleaseSemaphore(semaphore, 1, NULL);
1342 p_vcomp_single_end();
1345 static void test_vcomp_single_begin(void)
1347 int max_threads = pomp_get_max_threads();
1348 HANDLE semaphore;
1349 int i;
1351 semaphore = CreateSemaphoreA(NULL, 0, 1, NULL);
1352 ok(semaphore != NULL, "CreateSemaphoreA failed %u\n", GetLastError());
1354 single_cb(0, semaphore);
1355 single_cb(1, semaphore);
1357 for (i = 1; i <= 4; i++)
1359 pomp_set_num_threads(i);
1360 p_vcomp_fork(TRUE, 2, single_cb, 0, semaphore);
1361 p_vcomp_fork(TRUE, 2, single_cb, 1, semaphore);
1362 p_vcomp_fork(FALSE, 2, single_cb, 0, semaphore);
1363 p_vcomp_fork(FALSE, 2, single_cb, 1, semaphore);
1366 CloseHandle(semaphore);
1367 pomp_set_num_threads(max_threads);
1370 static void CDECL critsect_cb(LONG *a)
1372 static CRITICAL_SECTION *critsect;
1373 LONG tmp;
1375 p_vcomp_enter_critsect(&critsect);
1376 tmp = *a;
1377 Sleep(50);
1378 *a = tmp + 1;
1379 p_vcomp_leave_critsect(critsect);
1381 ok(critsect != NULL, "expected critsect != NULL\n");
1383 EnterCriticalSection(critsect);
1384 tmp = *a;
1385 Sleep(50);
1386 *a = tmp + 1;
1387 LeaveCriticalSection(critsect);
1390 static void test_vcomp_enter_critsect(void)
1392 int max_threads = pomp_get_max_threads();
1393 LONG a;
1394 int i;
1396 a = 0;
1397 critsect_cb(&a);
1398 ok(a == 2, "expected a == 2, got %d\n", a);
1400 for (i = 1; i <= 4; i++)
1402 pomp_set_num_threads(i);
1404 a = 0;
1405 p_vcomp_fork(TRUE, 1, critsect_cb, &a);
1406 ok(a == 2 * i, "expected a == %d, got %d\n", 2 * i, a);
1408 a = 0;
1409 p_vcomp_fork(FALSE, 1, critsect_cb, &a);
1410 ok(a == 2, "expected a == 2, got %d\n", a);
1413 pomp_set_num_threads(max_threads);
1416 static void test_vcomp_flush(void)
1418 p_vcomp_flush();
1419 p_vcomp_flush();
1420 p_vcomp_flush();
1423 static void test_omp_init_lock(void)
1425 omp_lock_t lock;
1426 int ret;
1428 pomp_init_lock(&lock);
1430 /* test omp_set_lock */
1431 pomp_set_lock(&lock);
1432 pomp_unset_lock(&lock);
1434 /* test omp_test_lock */
1435 ret = pomp_test_lock(&lock);
1436 ok(ret == 1, "expected ret == 1, got %d\n", ret);
1437 ret = pomp_test_lock(&lock);
1438 ok(ret == 0, "expected ret == 0, got %d\n", ret);
1439 pomp_unset_lock(&lock);
1441 /* test with EnterCriticalSection */
1442 EnterCriticalSection(lock);
1443 ret = pomp_test_lock(&lock);
1444 todo_wine
1445 ok(ret == 1, "expected ret == 1, got %d\n", ret);
1446 if (ret)
1448 ret = pomp_test_lock(&lock);
1449 ok(ret == 0, "expected ret == 0, got %d\n", ret);
1450 pomp_unset_lock(&lock);
1452 LeaveCriticalSection(lock);
1454 pomp_destroy_lock(&lock);
1457 static void test_omp_init_nest_lock(void)
1459 omp_nest_lock_t lock;
1460 int ret;
1462 ok(pomp_init_nest_lock == pomp_init_lock, "expected omp_init_nest_lock == %p, got %p\n",
1463 pomp_init_lock, pomp_init_nest_lock);
1464 ok(pomp_destroy_nest_lock == pomp_destroy_lock, "expected omp_destroy_nest_lock == %p, got %p\n",
1465 pomp_destroy_lock, pomp_destroy_nest_lock);
1467 pomp_init_nest_lock(&lock);
1469 /* test omp_set_nest_lock */
1470 pomp_set_nest_lock(&lock);
1471 pomp_set_nest_lock(&lock);
1472 pomp_unset_nest_lock(&lock);
1473 pomp_unset_nest_lock(&lock);
1475 /* test omp_test_nest_lock */
1476 ret = pomp_test_nest_lock(&lock);
1477 ok(ret == 1, "expected ret == 1, got %d\n", ret);
1478 ret = pomp_test_nest_lock(&lock);
1479 ok(ret == 2, "expected ret == 2, got %d\n", ret);
1480 ret = pomp_test_nest_lock(&lock);
1481 ok(ret == 3, "expected ret == 3, got %d\n", ret);
1482 pomp_unset_nest_lock(&lock);
1483 pomp_unset_nest_lock(&lock);
1484 pomp_unset_nest_lock(&lock);
1486 /* test with EnterCriticalSection */
1487 EnterCriticalSection(lock);
1488 ret = pomp_test_nest_lock(&lock);
1489 todo_wine
1490 ok(ret == 1, "expected ret == 1, got %d\n", ret);
1491 pomp_unset_nest_lock(&lock);
1492 LeaveCriticalSection(lock);
1494 pomp_destroy_nest_lock(&lock);
1497 static void test_atomic_integer8(void)
1499 struct
1501 void (CDECL *func)(char *, char);
1502 char v1, v2, expected;
1504 tests1[] =
1506 { p_vcomp_atomic_add_i1, 0x11, 0x77, -0x78 },
1507 { p_vcomp_atomic_and_i1, 0x11, 0x77, 0x11 },
1508 { p_vcomp_atomic_div_i1, 0x77, 0x11, 7 },
1509 { p_vcomp_atomic_div_i1, 0x77, -0x11, -7 },
1510 { p_vcomp_atomic_mul_i1, 0x11, 0x77, -0x19 },
1511 { p_vcomp_atomic_mul_i1, 0x11, -0x77, 0x19 },
1512 { p_vcomp_atomic_or_i1, 0x11, 0x77, 0x77 },
1513 { p_vcomp_atomic_sub_i1, 0x11, 0x77, -0x66 },
1514 { p_vcomp_atomic_xor_i1, 0x11, 0x77, 0x66 },
1516 struct
1518 void (CDECL *func)(char *, unsigned int);
1519 char v1;
1520 unsigned int v2;
1521 char expected;
1523 tests2[] =
1525 { p_vcomp_atomic_shl_i1, 0x11, 3, -0x78 },
1526 { p_vcomp_atomic_shl_i1, -0x11, 3, 0x78 },
1527 { p_vcomp_atomic_shr_i1, 0x11, 3, 2 },
1528 { p_vcomp_atomic_shr_i1, -0x11, 3, -3 },
1529 #if defined(__i386__) || defined(__x86_64__)
1530 { p_vcomp_atomic_shl_i1, 0x11, 11, 0 },
1531 { p_vcomp_atomic_shl_i1, 0x11, 19, 0 },
1532 { p_vcomp_atomic_shl_i1, 0x11, 35, -0x78 },
1533 { p_vcomp_atomic_shr_i1, 0x11, 11, 0 },
1534 { p_vcomp_atomic_shr_i1, 0x11, 19, 0 },
1535 { p_vcomp_atomic_shr_i1, 0x11, 35, 2 },
1536 #endif
1538 struct
1540 void (CDECL *func)(unsigned char *, unsigned char);
1541 unsigned char v1, v2, expected;
1543 tests3[] =
1545 { p_vcomp_atomic_div_ui1, 0x77, 0x11, 7 },
1546 { p_vcomp_atomic_div_ui1, 0x77, 0xef, 0 },
1548 struct
1550 void (CDECL *func)(unsigned char *, unsigned int);
1551 unsigned char v1;
1552 unsigned int v2;
1553 unsigned char expected;
1555 tests4[] =
1557 { p_vcomp_atomic_shr_ui1, 0x11, 3, 2 },
1558 { p_vcomp_atomic_shr_ui1, 0xef, 3, 0x1d },
1559 #if defined(__i386__) || defined(__x86_64__)
1560 { p_vcomp_atomic_shr_ui1, 0x11, 11, 0 },
1561 { p_vcomp_atomic_shr_ui1, 0x11, 19, 0 },
1562 { p_vcomp_atomic_shr_ui1, 0x11, 35, 2 },
1563 #endif
1565 int i;
1567 for (i = 0; i < sizeof(tests1)/sizeof(tests1[0]); i++)
1569 char val = tests1[i].v1;
1570 tests1[i].func(&val, tests1[i].v2);
1571 ok(val == tests1[i].expected, "test %d: expected val == %d, got %d\n", i, tests1[i].expected, val);
1573 for (i = 0; i < sizeof(tests2)/sizeof(tests2[0]); i++)
1575 char val = tests2[i].v1;
1576 tests2[i].func(&val, tests2[i].v2);
1577 ok(val == tests2[i].expected, "test %d: expected val == %d, got %d\n", i, tests2[i].expected, val);
1579 for (i = 0; i < sizeof(tests3)/sizeof(tests3[0]); i++)
1581 unsigned char val = tests3[i].v1;
1582 tests3[i].func(&val, tests3[i].v2);
1583 ok(val == tests3[i].expected, "test %d: expected val == %u, got %u\n", i, tests3[i].expected, val);
1585 for (i = 0; i < sizeof(tests4)/sizeof(tests4[0]); i++)
1587 unsigned char val = tests4[i].v1;
1588 tests4[i].func(&val, tests4[i].v2);
1589 ok(val == tests4[i].expected, "test %d: expected val == %u, got %u\n", i, tests4[i].expected, val);
1593 static void test_atomic_integer16(void)
1595 struct
1597 void (CDECL *func)(short *, short);
1598 short v1, v2, expected;
1600 tests1[] =
1602 { p_vcomp_atomic_add_i2, 0x1122, 0x7766, -0x7778 },
1603 { p_vcomp_atomic_and_i2, 0x1122, 0x7766, 0x1122 },
1604 { p_vcomp_atomic_div_i2, 0x7766, 0x1122, 6 },
1605 { p_vcomp_atomic_div_i2, 0x7766, -0x1122, -6 },
1606 { p_vcomp_atomic_mul_i2, 0x1122, 0x7766, -0x5e74 },
1607 { p_vcomp_atomic_mul_i2, 0x1122, -0x7766, 0x5e74 },
1608 { p_vcomp_atomic_or_i2, 0x1122, 0x7766, 0x7766 },
1609 { p_vcomp_atomic_sub_i2, 0x1122, 0x7766, -0x6644 },
1610 { p_vcomp_atomic_xor_i2, 0x1122, 0x7766, 0x6644 },
1612 struct
1614 void (CDECL *func)(short *, unsigned int);
1615 short v1;
1616 unsigned int v2;
1617 short expected;
1619 tests2[] =
1621 { p_vcomp_atomic_shl_i2, 0x1122, 3, -0x76f0 },
1622 { p_vcomp_atomic_shl_i2, -0x1122, 3, 0x76f0 },
1623 { p_vcomp_atomic_shr_i2, 0x1122, 3, 0x224 },
1624 { p_vcomp_atomic_shr_i2, -0x1122, 3, -0x225 },
1625 #if defined(__i386__) || defined(__x86_64__)
1626 { p_vcomp_atomic_shl_i2, 0x1122, 19, 0 },
1627 { p_vcomp_atomic_shl_i2, 0x1122, 35, -0x76f0 },
1628 { p_vcomp_atomic_shr_i2, 0x1122, 19, 0 },
1629 { p_vcomp_atomic_shr_i2, 0x1122, 35, 0x224 },
1630 #endif
1632 struct
1634 void (CDECL *func)(unsigned short *, unsigned short);
1635 unsigned short v1, v2, expected;
1637 tests3[] =
1639 { p_vcomp_atomic_div_ui2, 0x7766, 0x1122, 6 },
1640 { p_vcomp_atomic_div_ui2, 0x7766, 0xeede, 0 },
1642 struct
1644 void (CDECL *func)(unsigned short *, unsigned int);
1645 unsigned short v1;
1646 unsigned int v2;
1647 unsigned short expected;
1649 tests4[] =
1651 { p_vcomp_atomic_shr_ui2, 0x1122, 3, 0x224 },
1652 { p_vcomp_atomic_shr_ui2, 0xeede, 3, 0x1ddb },
1653 #if defined(__i386__) || defined(__x86_64__)
1654 { p_vcomp_atomic_shr_ui2, 0x1122, 19, 0 },
1655 { p_vcomp_atomic_shr_ui2, 0x1122, 35, 0x224 },
1656 #endif
1658 int i;
1660 for (i = 0; i < sizeof(tests1)/sizeof(tests1[0]); i++)
1662 short val = tests1[i].v1;
1663 tests1[i].func(&val, tests1[i].v2);
1664 ok(val == tests1[i].expected, "test %d: expected val == %d, got %d\n", i, tests1[i].expected, val);
1666 for (i = 0; i < sizeof(tests2)/sizeof(tests2[0]); i++)
1668 short val = tests2[i].v1;
1669 tests2[i].func(&val, tests2[i].v2);
1670 ok(val == tests2[i].expected, "test %d: expected val == %d, got %d\n", i, tests2[i].expected, val);
1672 for (i = 0; i < sizeof(tests3)/sizeof(tests3[0]); i++)
1674 unsigned short val = tests3[i].v1;
1675 tests3[i].func(&val, tests3[i].v2);
1676 ok(val == tests3[i].expected, "test %d: expected val == %u, got %u\n", i, tests3[i].expected, val);
1678 for (i = 0; i < sizeof(tests4)/sizeof(tests4[0]); i++)
1680 unsigned short val = tests4[i].v1;
1681 tests4[i].func(&val, tests4[i].v2);
1682 ok(val == tests4[i].expected, "test %d: expected val == %u, got %u\n", i, tests4[i].expected, val);
1686 static void test_atomic_integer32(void)
1688 struct
1690 void (CDECL *func)(int *, int);
1691 int v1, v2, expected;
1693 tests1[] =
1695 { p_vcomp_atomic_add_i4, 0x11223344, 0x77665544, -0x77777778 },
1696 { p_vcomp_atomic_and_i4, 0x11223344, 0x77665544, 0x11221144 },
1697 { p_vcomp_atomic_div_i4, 0x77665544, 0x11223344, 6 },
1698 { p_vcomp_atomic_div_i4, 0x77665544, -0x11223344, -6 },
1699 { p_vcomp_atomic_mul_i4, 0x11223344, 0x77665544, -0xecccdf0 },
1700 { p_vcomp_atomic_mul_i4, 0x11223344, -0x77665544, 0xecccdf0 },
1701 { p_vcomp_atomic_or_i4, 0x11223344, 0x77665544, 0x77667744 },
1702 { p_vcomp_atomic_shl_i4, 0x11223344, 3, -0x76ee65e0 },
1703 { p_vcomp_atomic_shl_i4, -0x11223344, 3, 0x76ee65e0 },
1704 { p_vcomp_atomic_shr_i4, 0x11223344, 3, 0x2244668 },
1705 { p_vcomp_atomic_shr_i4, -0x11223344, 3, -0x2244669 },
1706 { p_vcomp_atomic_sub_i4, 0x11223344, 0x77665544, -0x66442200 },
1707 { p_vcomp_atomic_xor_i4, 0x11223344, 0x77665544, 0x66446600 },
1708 #if defined(__i386__) || defined(__x86_64__)
1709 { p_vcomp_atomic_shl_i4, 0x11223344, 35, -0x76ee65e0 },
1710 { p_vcomp_atomic_shr_i4, 0x11223344, 35, 0x2244668 },
1711 #endif
1713 struct
1715 void (CDECL *func)(unsigned int *, unsigned int);
1716 unsigned int v1, v2, expected;
1718 tests2[] =
1720 { p_vcomp_atomic_div_ui4, 0x77665544, 0x11223344, 6 },
1721 { p_vcomp_atomic_div_ui4, 0x77665544, 0xeeddccbc, 0 },
1722 { p_vcomp_atomic_shr_ui4, 0x11223344, 3, 0x2244668 },
1723 { p_vcomp_atomic_shr_ui4, 0xeeddccbc, 3, 0x1ddbb997 },
1724 #if defined(__i386__) || defined(__x86_64__)
1725 { p_vcomp_atomic_shr_ui4, 0x11223344, 35, 0x2244668 },
1726 #endif
1728 int i;
1730 for (i = 0; i < sizeof(tests1)/sizeof(tests1[0]); i++)
1732 int val = tests1[i].v1;
1733 tests1[i].func(&val, tests1[i].v2);
1734 ok(val == tests1[i].expected, "test %d: expected val == %d, got %d\n", i, tests1[i].expected, val);
1736 for (i = 0; i < sizeof(tests2)/sizeof(tests2[0]); i++)
1738 unsigned int val = tests2[i].v1;
1739 tests2[i].func(&val, tests2[i].v2);
1740 ok(val == tests2[i].expected, "test %d: expected val == %u, got %u\n", i, tests2[i].expected, val);
1744 static void test_atomic_integer64(void)
1746 struct
1748 void (CDECL *func)(LONG64 *, LONG64);
1749 LONG64 v1, v2, expected;
1751 tests1[] =
1753 { p_vcomp_atomic_add_i8, 0x1122334455667788, 0x7766554433221100, -0x7777777777777778 },
1754 { p_vcomp_atomic_and_i8, 0x1122334455667788, 0x7766554433221100, 0x1122114411221100 },
1755 { p_vcomp_atomic_div_i8, 0x7766554433221100, 0x1122334455667788, 6 },
1756 { p_vcomp_atomic_div_i8, 0x7766554433221100, -0x1122334455667788, -6 },
1757 { p_vcomp_atomic_mul_i8, 0x1122334455667788, 0x7766554433221100, 0x3e963337c6000800 },
1758 { p_vcomp_atomic_mul_i8, 0x1122334455667788, -0x7766554433221100, 0xc169ccc839fff800 },
1759 { p_vcomp_atomic_or_i8, 0x1122334455667788, 0x7766554433221100, 0x7766774477667788 },
1760 { p_vcomp_atomic_sub_i8, 0x1122334455667788, 0x7766554433221100, -0x664421ffddbb9978 },
1761 { p_vcomp_atomic_xor_i8, 0x1122334455667788, 0x7766554433221100, 0x6644660066446688 },
1763 struct
1765 void (CDECL *func)(LONG64 *, unsigned int);
1766 LONG64 v1;
1767 unsigned int v2;
1768 LONG64 expected;
1769 BOOL todo;
1771 tests2[] =
1773 { p_vcomp_atomic_shl_i8, 0x1122334455667788, 3, -0x76ee65dd54cc43c0 },
1774 { p_vcomp_atomic_shl_i8, 0x1122334455667788, 60, 0x8000000000000000 },
1775 { p_vcomp_atomic_shl_i8, -0x1122334455667788, 3, 0x76ee65dd54cc43c0 },
1776 { p_vcomp_atomic_shr_i8, 0x1122334455667788, 3, 0x22446688aaccef1 },
1777 { p_vcomp_atomic_shr_i8, 0x1122334455667788, 60, 1 },
1778 { p_vcomp_atomic_shr_i8, -0x1122334455667788, 3, -0x22446688aaccef1 },
1779 #if defined(__i386__)
1780 { p_vcomp_atomic_shl_i8, 0x1122334455667788, 64, 0, TRUE },
1781 { p_vcomp_atomic_shl_i8, 0x1122334455667788, 67, 0, TRUE },
1782 { p_vcomp_atomic_shr_i8, 0x1122334455667788, 64, 0, TRUE },
1783 { p_vcomp_atomic_shr_i8, 0x1122334455667788, 67, 0, TRUE },
1784 #elif defined(__x86_64__)
1785 { p_vcomp_atomic_shl_i8, 0x1122334455667788, 64, 0x1122334455667788 },
1786 { p_vcomp_atomic_shl_i8, 0x1122334455667788, 67, -0x76ee65dd54cc43c0 },
1787 { p_vcomp_atomic_shr_i8, 0x1122334455667788, 64, 0x1122334455667788 },
1788 { p_vcomp_atomic_shr_i8, 0x1122334455667788, 67, 0x22446688aaccef1 },
1789 #endif
1791 struct
1793 void (CDECL *func)(ULONG64 *, ULONG64);
1794 ULONG64 v1, v2, expected;
1796 tests3[] =
1798 { p_vcomp_atomic_div_ui8, 0x7766554455667788, 0x1122334433221100, 6 },
1799 { p_vcomp_atomic_div_ui8, 0x7766554455667788, 0xeeddccbbaa998878, 0 },
1801 struct
1803 void (CDECL *func)(ULONG64 *, unsigned int);
1804 ULONG64 v1;
1805 unsigned int v2;
1806 ULONG64 expected;
1807 BOOL todo;
1809 tests4[] =
1811 { p_vcomp_atomic_shr_ui8, 0x1122334455667788, 3, 0x22446688aaccef1 },
1812 { p_vcomp_atomic_shr_ui8, 0x1122334455667788, 60, 1 },
1813 { p_vcomp_atomic_shr_ui8, 0xeeddccbbaa998878, 3, 0x1ddbb9977553310f },
1814 #if defined(__i386__)
1815 { p_vcomp_atomic_shr_ui8, 0x1122334455667788, 64, 0, TRUE },
1816 { p_vcomp_atomic_shr_ui8, 0x1122334455667788, 67, 0, TRUE },
1817 #elif defined(__x86_64__)
1818 { p_vcomp_atomic_shr_ui8, 0x1122334455667788, 64, 0x1122334455667788 },
1819 { p_vcomp_atomic_shr_ui8, 0x1122334455667788, 67, 0x22446688aaccef1 },
1820 #endif
1822 int i;
1824 for (i = 0; i < sizeof(tests1)/sizeof(tests1[0]); i++)
1826 LONG64 val = tests1[i].v1;
1827 tests1[i].func(&val, tests1[i].v2);
1828 ok(val == tests1[i].expected, "test %d: unexpectedly got %s\n", i, debugstr_longlong(val));
1830 for (i = 0; i < sizeof(tests2)/sizeof(tests2[0]); i++)
1832 LONG64 val = tests2[i].v1;
1833 tests2[i].func(&val, tests2[i].v2);
1834 todo_wine_if(tests2[i].todo)
1835 ok(val == tests2[i].expected, "test %d: unexpectedly got %s\n", i, debugstr_longlong(val));
1837 for (i = 0; i < sizeof(tests3)/sizeof(tests3[0]); i++)
1839 ULONG64 val = tests3[i].v1;
1840 tests3[i].func(&val, tests3[i].v2);
1841 ok(val == tests3[i].expected, "test %d: unexpectedly got %s\n", i, debugstr_longlong(val));
1843 for (i = 0; i < sizeof(tests4)/sizeof(tests4[0]); i++)
1845 ULONG64 val = tests4[i].v1;
1846 tests4[i].func(&val, tests4[i].v2);
1847 todo_wine_if(tests4[i].todo)
1848 ok(val == tests4[i].expected, "test %d: unexpectedly got %s\n", i, debugstr_longlong(val));
1852 static void test_atomic_float(void)
1854 struct
1856 void (CDECL *func)(float *, float);
1857 float v1, v2, expected;
1859 tests[] =
1861 { p_vcomp_atomic_add_r4, 42.0, 17.0, 42.0 + 17.0 },
1862 { p_vcomp_atomic_div_r4, 42.0, 17.0, 42.0 / 17.0 },
1863 { p_vcomp_atomic_mul_r4, 42.0, 17.0, 42.0 * 17.0 },
1864 { p_vcomp_atomic_sub_r4, 42.0, 17.0, 42.0 - 17.0 },
1866 int i;
1868 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
1870 float val = tests[i].v1;
1871 tests[i].func(&val, tests[i].v2);
1872 ok(tests[i].expected - 0.001 < val && val < tests[i].expected + 0.001,
1873 "test %d: expected val == %f, got %f\n", i, tests[i].expected, val);
1877 static void test_atomic_double(void)
1879 struct
1881 void (CDECL *func)(double *, double);
1882 double v1, v2, expected;
1884 tests[] =
1886 { p_vcomp_atomic_add_r8, 42.0, 17.0, 42.0 + 17.0 },
1887 { p_vcomp_atomic_div_r8, 42.0, 17.0, 42.0 / 17.0 },
1888 { p_vcomp_atomic_mul_r8, 42.0, 17.0, 42.0 * 17.0 },
1889 { p_vcomp_atomic_sub_r8, 42.0, 17.0, 42.0 - 17.0 },
1891 int i;
1893 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
1895 double val = tests[i].v1;
1896 tests[i].func(&val, tests[i].v2);
1897 ok(tests[i].expected - 0.001 < val && val < tests[i].expected + 0.001,
1898 "test %d: expected val == %f, got %f\n", i, tests[i].expected, val);
1902 static void test_reduction_integer8(void)
1904 static const struct
1906 unsigned int flags;
1907 char v1, v2, expected;
1909 tests[] =
1911 { 0x000, 0x11, 0x77, -0x78 },
1912 { VCOMP_REDUCTION_FLAGS_ADD, 0x11, 0x77, -0x78 },
1913 { VCOMP_REDUCTION_FLAGS_MUL, 0x11, 0x77, -0x19 },
1914 { VCOMP_REDUCTION_FLAGS_MUL, 0x11, -0x77, 0x19 },
1915 { VCOMP_REDUCTION_FLAGS_AND, 0x11, 0x77, 0x11 },
1916 { VCOMP_REDUCTION_FLAGS_OR, 0x11, 0x77, 0x77 },
1917 { VCOMP_REDUCTION_FLAGS_XOR, 0x11, 0x77, 0x66 },
1918 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1, 2, 1 },
1919 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0, 2, 1 },
1920 { 0x800, 0, 2, 1 },
1921 { 0x900, 0, 2, 1 },
1922 { 0xa00, 0, 2, 1 },
1923 { 0xb00, 0, 2, 1 },
1924 { 0xc00, 0, 2, 1 },
1925 { 0xd00, 0, 2, 1 },
1926 { 0xe00, 0, 2, 1 },
1927 { 0xf00, 0, 2, 1 },
1929 int i;
1931 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
1933 char val = tests[i].v1;
1934 p_vcomp_reduction_i1(tests[i].flags, &val, tests[i].v2);
1935 ok(val == tests[i].expected, "test %d: expected val == %d, got %d\n", i, tests[i].expected, val);
1937 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
1939 unsigned char val = tests[i].v1;
1940 p_vcomp_reduction_u1(tests[i].flags, &val, tests[i].v2);
1941 ok(val == (unsigned char)tests[i].expected,
1942 "test %d: expected val == %u, got %u\n", i, (unsigned char)tests[i].expected, val);
1946 static void test_reduction_integer16(void)
1948 static const struct
1950 unsigned int flags;
1951 short v1, v2, expected;
1953 tests[] =
1955 { 0x000, 0x1122, 0x7766, -0x7778 },
1956 { VCOMP_REDUCTION_FLAGS_ADD, 0x1122, 0x7766, -0x7778 },
1957 { VCOMP_REDUCTION_FLAGS_MUL, 0x1122, 0x7766, -0x5e74 },
1958 { VCOMP_REDUCTION_FLAGS_MUL, 0x1122, -0x7766, 0x5e74 },
1959 { VCOMP_REDUCTION_FLAGS_AND, 0x1122, 0x7766, 0x1122 },
1960 { VCOMP_REDUCTION_FLAGS_OR, 0x1122, 0x7766, 0x7766 },
1961 { VCOMP_REDUCTION_FLAGS_XOR, 0x1122, 0x7766, 0x6644 },
1962 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1, 2, 1 },
1963 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0, 2, 1 },
1964 { 0x800, 0, 2, 1 },
1965 { 0x900, 0, 2, 1 },
1966 { 0xa00, 0, 2, 1 },
1967 { 0xb00, 0, 2, 1 },
1968 { 0xc00, 0, 2, 1 },
1969 { 0xd00, 0, 2, 1 },
1970 { 0xe00, 0, 2, 1 },
1971 { 0xf00, 0, 2, 1 },
1973 int i;
1975 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
1977 short val = tests[i].v1;
1978 p_vcomp_reduction_i2(tests[i].flags, &val, tests[i].v2);
1979 ok(val == tests[i].expected, "test %d: expected val == %d, got %d\n", i, tests[i].expected, val);
1981 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
1983 unsigned short val = tests[i].v1;
1984 p_vcomp_reduction_u2(tests[i].flags, &val, tests[i].v2);
1985 ok(val == (unsigned short)tests[i].expected,
1986 "test %d: expected val == %u, got %u\n", i, (unsigned short)tests[i].expected, val);
1990 static void CDECL reduction_cb(int *a, int *b)
1992 p_vcomp_reduction_i4(VCOMP_REDUCTION_FLAGS_ADD, a, 1);
1993 p_vcomp_reduction_i4(VCOMP_REDUCTION_FLAGS_ADD | 0xfffff0ff, b, 1);
1996 static void test_reduction_integer32(void)
1998 static const struct
2000 unsigned int flags;
2001 int v1, v2, expected;
2003 tests[] =
2005 { 0x000, 0x11223344, 0x77665544, -0x77777778 },
2006 { VCOMP_REDUCTION_FLAGS_ADD, 0x11223344, 0x77665544, -0x77777778 },
2007 { VCOMP_REDUCTION_FLAGS_MUL, 0x11223344, 0x77665544, -0xecccdf0 },
2008 { VCOMP_REDUCTION_FLAGS_MUL, 0x11223344, -0x77665544, 0xecccdf0 },
2009 { VCOMP_REDUCTION_FLAGS_AND, 0x11223344, 0x77665544, 0x11221144 },
2010 { VCOMP_REDUCTION_FLAGS_OR, 0x11223344, 0x77665544, 0x77667744 },
2011 { VCOMP_REDUCTION_FLAGS_XOR, 0x11223344, 0x77665544, 0x66446600 },
2012 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 0, 0, 0 },
2013 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 0, 2, 0 },
2014 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1, 0, 0 },
2015 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1, 2, 1 },
2016 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 2, 0, 0 },
2017 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 2, 2, 1 },
2018 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0, 0, 0 },
2019 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0, 2, 1 },
2020 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 1, 0, 1 },
2021 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 1, 2, 1 },
2022 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 2, 0, 2 },
2023 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 2, 2, 2 },
2024 { 0x800, 0, 2, 1 },
2025 { 0x900, 0, 2, 1 },
2026 { 0xa00, 0, 2, 1 },
2027 { 0xb00, 0, 2, 1 },
2028 { 0xc00, 0, 2, 1 },
2029 { 0xd00, 0, 2, 1 },
2030 { 0xe00, 0, 2, 1 },
2031 { 0xf00, 0, 2, 1 },
2033 int max_threads = pomp_get_max_threads();
2034 int a, b, i;
2036 a = b = 42;
2037 reduction_cb(&a, &b);
2038 ok(a == 43, "expected a == 43, got %d\n", a);
2039 ok(b == 43, "expected b == 43, got %d\n", b);
2041 for (i = 1; i <= 4; i++)
2043 pomp_set_num_threads(i);
2045 a = b = 42;
2046 p_vcomp_fork(TRUE, 2, reduction_cb, &a, &b);
2047 ok(a == 42 + i, "expected a == %d, got %d\n", 42 + i, a);
2048 ok(b == 42 + i, "expected b == %d, got %d\n", 42 + i, b);
2050 a = b = 42;
2051 p_vcomp_fork(FALSE, 2, reduction_cb, &a, &b);
2052 ok(a == 43, "expected a == 43, got %d\n", a);
2053 ok(b == 43, "expected b == 43, got %d\n", b);
2056 pomp_set_num_threads(max_threads);
2058 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
2060 int val = tests[i].v1;
2061 p_vcomp_reduction_i4(tests[i].flags, &val, tests[i].v2);
2062 ok(val == tests[i].expected, "test %d: expected val == %d, got %d\n", i, tests[i].expected, val);
2064 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
2066 unsigned int val = tests[i].v1;
2067 p_vcomp_reduction_u4(tests[i].flags, &val, tests[i].v2);
2068 ok(val == tests[i].expected, "test %d: expected val == %u, got %u\n", i, tests[i].expected, val);
2072 static void test_reduction_integer64(void)
2074 static const struct
2076 unsigned int flags;
2077 LONG64 v1, v2, expected;
2079 tests[] =
2081 { 0x000, 0x1122334455667788, 0x7766554433221100, -0x7777777777777778 },
2082 { VCOMP_REDUCTION_FLAGS_ADD, 0x1122334455667788, 0x7766554433221100, -0x7777777777777778 },
2083 { VCOMP_REDUCTION_FLAGS_MUL, 0x1122334455667788, 0x7766554433221100, 0x3e963337c6000800 },
2084 { VCOMP_REDUCTION_FLAGS_MUL, 0x1122334455667788, -0x7766554433221100, 0xc169ccc839fff800 },
2085 { VCOMP_REDUCTION_FLAGS_AND, 0x1122334455667788, 0x7766554433221100, 0x1122114411221100 },
2086 { VCOMP_REDUCTION_FLAGS_OR, 0x1122334455667788, 0x7766554433221100, 0x7766774477667788 },
2087 { VCOMP_REDUCTION_FLAGS_XOR, 0x1122334455667788, 0x7766554433221100, 0x6644660066446688 },
2088 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1, 2, 1 },
2089 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0, 2, 1 },
2090 { 0x800, 0, 2, 1 },
2091 { 0x900, 0, 2, 1 },
2092 { 0xa00, 0, 2, 1 },
2093 { 0xb00, 0, 2, 1 },
2094 { 0xc00, 0, 2, 1 },
2095 { 0xd00, 0, 2, 1 },
2096 { 0xe00, 0, 2, 1 },
2097 { 0xf00, 0, 2, 1 },
2099 int i;
2101 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
2103 LONG64 val = tests[i].v1;
2104 p_vcomp_reduction_i8(tests[i].flags, &val, tests[i].v2);
2105 ok(val == tests[i].expected, "test %d: unexpectedly got %s\n", i, debugstr_longlong(val));
2107 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
2109 ULONG64 val = tests[i].v1;
2110 p_vcomp_reduction_u8(tests[i].flags, &val, tests[i].v2);
2111 ok(val == tests[i].expected, "test %d: unexpectedly got %s\n", i, debugstr_longlong(val));
2115 static void test_reduction_float_double(void)
2117 static const struct
2119 unsigned int flags;
2120 float v1, v2, expected;
2122 tests[] =
2124 { 0x000, 42.0, 17.0, 42.0 + 17.0 },
2125 { VCOMP_REDUCTION_FLAGS_ADD, 42.0, 17.0, 42.0 + 17.0 },
2126 { VCOMP_REDUCTION_FLAGS_MUL, 42.0, 17.0, 42.0 * 17.0 },
2127 { 0x300, 0.0, 2.0, 1.0 },
2128 { 0x400, 0.0, 2.0, 1.0 },
2129 { 0x500, 0.0, 2.0, 1.0 },
2130 { VCOMP_REDUCTION_FLAGS_BOOL_AND, -0.0, 1.0, 0.0 },
2131 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 0.0, 0.0, 0.0 },
2132 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 0.0, 2.0, 0.0 },
2133 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1.0, -0.0, 0.0 },
2134 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1.0, 0.0, 0.0 },
2135 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1.0, 1.0e-5, 1.0 },
2136 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 1.0, 2.0, 1.0 },
2137 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 2.0, 0.0, 0.0 },
2138 { VCOMP_REDUCTION_FLAGS_BOOL_AND, 2.0, 2.0, 1.0 },
2139 { VCOMP_REDUCTION_FLAGS_BOOL_OR, -0.0, 0.0, 0.0 },
2140 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0.0, -0.0, 0.0 },
2141 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0.0, 0.0, 0.0 },
2142 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0.0, 1.0e-5, 1.0 },
2143 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 0.0, 2.0, 1.0 },
2144 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 1.0, 0.0, 1.0 },
2145 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 1.0, 2.0, 1.0 },
2146 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 2.0, 0.0, 2.0 },
2147 { VCOMP_REDUCTION_FLAGS_BOOL_OR, 2.0, 2.0, 2.0 },
2148 { 0x800, 0.0, 2.0, 1.0 },
2149 { 0x900, 0.0, 2.0, 1.0 },
2150 { 0xa00, 0.0, 2.0, 1.0 },
2151 { 0xb00, 0.0, 2.0, 1.0 },
2152 { 0xc00, 0.0, 2.0, 1.0 },
2153 { 0xd00, 0.0, 2.0, 1.0 },
2154 { 0xe00, 0.0, 2.0, 1.0 },
2155 { 0xf00, 0.0, 2.0, 1.0 },
2157 int i;
2159 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
2161 float val = tests[i].v1;
2162 p_vcomp_reduction_r4(tests[i].flags, &val, tests[i].v2);
2163 ok(tests[i].expected - 0.001 < val && val < tests[i].expected + 0.001,
2164 "test %d: expected val == %f, got %f\n", i, tests[i].expected, val);
2166 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
2168 double val = tests[i].v1;
2169 p_vcomp_reduction_r8(tests[i].flags, &val, tests[i].v2);
2170 ok(tests[i].expected - 0.001 < val && val < tests[i].expected + 0.001,
2171 "test %d: expected val == %f, got %f\n", i, tests[i].expected, val);
2175 START_TEST(vcomp)
2177 if (!init_vcomp())
2178 return;
2180 test_omp_get_num_threads(FALSE);
2181 test_omp_get_num_threads(TRUE);
2182 test_vcomp_fork();
2183 test_vcomp_sections_init();
2184 test_vcomp_for_static_simple_init();
2185 test_vcomp_for_static_init();
2186 test_vcomp_for_dynamic_init();
2187 test_vcomp_master_begin();
2188 test_vcomp_single_begin();
2189 test_vcomp_enter_critsect();
2190 test_vcomp_flush();
2191 test_omp_init_lock();
2192 test_omp_init_nest_lock();
2193 test_atomic_integer8();
2194 test_atomic_integer16();
2195 test_atomic_integer32();
2196 test_atomic_integer64();
2197 test_atomic_float();
2198 test_atomic_double();
2199 test_reduction_integer8();
2200 test_reduction_integer16();
2201 test_reduction_integer32();
2202 test_reduction_integer64();
2203 test_reduction_float_double();
2205 release_vcomp();