vcomp/tests: Add tests for atomic float functions.
[wine.git] / dlls / vcomp / tests / vcomp.c
blob5be6965ef83ac3dcb2632ef8bac67b433dbe6499
1 /*
2 * Unit test suite for vcomp
4 * Copyright 2012 Dan Kegel
5 * Copyright 2015 Sebastian Lackner
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 #include "wine/test.h"
24 static char vcomp_manifest_file[MAX_PATH];
25 static HANDLE vcomp_actctx_hctx;
26 static ULONG_PTR vcomp_actctx_cookie;
27 static HMODULE vcomp_handle;
29 static HANDLE (WINAPI *pCreateActCtxA)(ACTCTXA*);
30 static BOOL (WINAPI *pActivateActCtx)(HANDLE, ULONG_PTR*);
31 static BOOL (WINAPI *pDeactivateActCtx)(DWORD, ULONG_PTR);
32 static VOID (WINAPI *pReleaseActCtx)(HANDLE);
34 static void (CDECL *p_vcomp_atomic_add_i4)(int *dest, int val);
35 static void (CDECL *p_vcomp_atomic_add_r4)(float *dest, float val);
36 static void (CDECL *p_vcomp_atomic_and_i4)(int *dest, int val);
37 static void (CDECL *p_vcomp_atomic_div_i4)(int *dest, int val);
38 static void (CDECL *p_vcomp_atomic_div_r4)(float *dest, float val);
39 static void (CDECL *p_vcomp_atomic_div_ui4)(unsigned int *dest, unsigned int val);
40 static void (CDECL *p_vcomp_atomic_mul_i4)(int *dest, int val);
41 static void (CDECL *p_vcomp_atomic_mul_r4)(float *dest, float val);
42 static void (CDECL *p_vcomp_atomic_or_i4)(int *dest, int val);
43 static void (CDECL *p_vcomp_atomic_shl_i4)(int *dest, int val);
44 static void (CDECL *p_vcomp_atomic_shr_i4)(int *dest, int val);
45 static void (CDECL *p_vcomp_atomic_shr_ui4)(unsigned int *dest, unsigned int val);
46 static void (CDECL *p_vcomp_atomic_sub_i4)(int *dest, int val);
47 static void (CDECL *p_vcomp_atomic_sub_r4)(float *dest, float val);
48 static void (CDECL *p_vcomp_atomic_xor_i4)(int *dest, int val);
49 static void (CDECL *p_vcomp_barrier)(void);
50 static void (CDECL *p_vcomp_for_static_end)(void);
51 static void (CDECL *p_vcomp_for_static_init)(int first, int last, int step, int chunksize, unsigned int *loops,
52 int *begin, int *end, int *next, int *lastchunk);
53 static void (CDECL *p_vcomp_for_static_simple_init)(unsigned int first, unsigned int last, int step,
54 BOOL increment, unsigned int *begin, unsigned int *end);
55 static void (WINAPIV *p_vcomp_fork)(BOOL ifval, int nargs, void *wrapper, ...);
56 static void (CDECL *p_vcomp_sections_init)(int n);
57 static int (CDECL *p_vcomp_sections_next)(void);
58 static void (CDECL *p_vcomp_set_num_threads)(int num_threads);
59 static int (CDECL *pomp_get_max_threads)(void);
60 static int (CDECL *pomp_get_nested)(void);
61 static int (CDECL *pomp_get_num_threads)(void);
62 static int (CDECL *pomp_get_thread_num)(void);
63 static int (CDECL *pomp_in_parallel)(void);
64 static void (CDECL *pomp_set_nested)(int nested);
65 static void (CDECL *pomp_set_num_threads)(int num_threads);
67 #ifdef __i386__
68 #define ARCH "x86"
69 #elif defined(__x86_64__)
70 #define ARCH "amd64"
71 #else
72 #define ARCH "none"
73 #endif
75 static const char vcomp_manifest[] =
76 "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
77 "<assembly xmlns=\"urn:schemas-microsoft-com:asm.v1\" manifestVersion=\"1.0\">\n"
78 " <assemblyIdentity\n"
79 " type=\"win32\"\n"
80 " name=\"Wine.vcomp.Test\"\n"
81 " version=\"1.0.0.0\"\n"
82 " processorArchitecture=\"" ARCH "\"\n"
83 " />\n"
84 "<description>Wine vcomp test suite</description>\n"
85 "<dependency>\n"
86 " <dependentAssembly>\n"
87 " <assemblyIdentity\n"
88 " type=\"win32\"\n"
89 " name=\"Microsoft.VC80.OpenMP\"\n"
90 " version=\"8.0.50608.0\"\n"
91 " processorArchitecture=\"" ARCH "\"\n"
92 " publicKeyToken=\"1fc8b3b9a1e18e3b\"\n"
93 " />\n"
94 " </dependentAssembly>\n"
95 "</dependency>\n"
96 "</assembly>\n";
98 #undef ARCH
100 static void create_vcomp_manifest(void)
102 char temp_path[MAX_PATH];
103 HMODULE kernel32;
104 DWORD written;
105 ACTCTXA ctx;
106 HANDLE file;
108 kernel32 = GetModuleHandleA("kernel32.dll");
109 pCreateActCtxA = (void *)GetProcAddress(kernel32, "CreateActCtxA");
110 pActivateActCtx = (void *)GetProcAddress(kernel32, "ActivateActCtx");
111 pDeactivateActCtx = (void *)GetProcAddress(kernel32, "DeactivateActCtx");
112 pReleaseActCtx = (void *)GetProcAddress(kernel32, "ReleaseActCtx");
113 if (!pCreateActCtxA) return;
115 if (!GetTempPathA(sizeof(temp_path), temp_path) ||
116 !GetTempFileNameA(temp_path, "vcomp", 0, vcomp_manifest_file))
118 ok(0, "failed to create manifest file\n");
119 return;
122 file = CreateFileA(vcomp_manifest_file, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, 0, NULL);
123 if (file == INVALID_HANDLE_VALUE)
125 ok(0, "failed to open manifest file\n");
126 return;
129 if (!WriteFile(file, vcomp_manifest, sizeof(vcomp_manifest) - 1, &written, NULL))
130 written = 0;
131 CloseHandle(file);
133 if (written != sizeof(vcomp_manifest) - 1)
135 ok(0, "failed to write manifest file\n");
136 DeleteFileA(vcomp_manifest_file);
137 return;
140 memset(&ctx, 0, sizeof(ctx));
141 ctx.cbSize = sizeof(ctx);
142 ctx.lpSource = vcomp_manifest_file;
143 vcomp_actctx_hctx = pCreateActCtxA(&ctx);
144 if (!vcomp_actctx_hctx)
146 ok(0, "failed to create activation context\n");
147 DeleteFileA(vcomp_manifest_file);
148 return;
151 if (!pActivateActCtx(vcomp_actctx_hctx, &vcomp_actctx_cookie))
153 win_skip("failed to activate context\n");
154 pReleaseActCtx(vcomp_actctx_hctx);
155 DeleteFileA(vcomp_manifest_file);
156 vcomp_actctx_hctx = NULL;
160 static void release_vcomp(void)
162 if (vcomp_handle)
163 FreeLibrary(vcomp_handle);
165 if (vcomp_actctx_hctx)
167 pDeactivateActCtx(0, vcomp_actctx_cookie);
168 pReleaseActCtx(vcomp_actctx_hctx);
169 DeleteFileA(vcomp_manifest_file);
173 #define VCOMP_GET_PROC(func) \
174 do \
176 p ## func = (void *)GetProcAddress(vcomp_handle, #func); \
177 if (!p ## func) trace("Failed to get address for %s\n", #func); \
179 while (0)
181 static BOOL init_vcomp(void)
183 create_vcomp_manifest();
185 vcomp_handle = LoadLibraryA("vcomp.dll");
186 if (!vcomp_handle)
188 win_skip("vcomp.dll not installed\n");
189 release_vcomp();
190 return FALSE;
193 VCOMP_GET_PROC(_vcomp_atomic_add_i4);
194 VCOMP_GET_PROC(_vcomp_atomic_add_r4);
195 VCOMP_GET_PROC(_vcomp_atomic_and_i4);
196 VCOMP_GET_PROC(_vcomp_atomic_div_i4);
197 VCOMP_GET_PROC(_vcomp_atomic_div_r4);
198 VCOMP_GET_PROC(_vcomp_atomic_div_ui4);
199 VCOMP_GET_PROC(_vcomp_atomic_mul_i4);
200 VCOMP_GET_PROC(_vcomp_atomic_mul_r4);
201 VCOMP_GET_PROC(_vcomp_atomic_or_i4);
202 VCOMP_GET_PROC(_vcomp_atomic_shl_i4);
203 VCOMP_GET_PROC(_vcomp_atomic_shr_i4);
204 VCOMP_GET_PROC(_vcomp_atomic_shr_ui4);
205 VCOMP_GET_PROC(_vcomp_atomic_sub_i4);
206 VCOMP_GET_PROC(_vcomp_atomic_sub_r4);
207 VCOMP_GET_PROC(_vcomp_atomic_xor_i4);
208 VCOMP_GET_PROC(_vcomp_barrier);
209 VCOMP_GET_PROC(_vcomp_for_static_end);
210 VCOMP_GET_PROC(_vcomp_for_static_init);
211 VCOMP_GET_PROC(_vcomp_for_static_simple_init);
212 VCOMP_GET_PROC(_vcomp_fork);
213 VCOMP_GET_PROC(_vcomp_sections_init);
214 VCOMP_GET_PROC(_vcomp_sections_next);
215 VCOMP_GET_PROC(_vcomp_set_num_threads);
216 VCOMP_GET_PROC(omp_get_max_threads);
217 VCOMP_GET_PROC(omp_get_nested);
218 VCOMP_GET_PROC(omp_get_num_threads);
219 VCOMP_GET_PROC(omp_get_thread_num);
220 VCOMP_GET_PROC(omp_in_parallel);
221 VCOMP_GET_PROC(omp_set_nested);
222 VCOMP_GET_PROC(omp_set_num_threads);
224 return TRUE;
227 #undef VCOMP_GET_PROC
229 static void CDECL num_threads_cb2(int parallel, LONG *count)
231 int is_parallel = pomp_in_parallel();
232 ok(is_parallel == parallel, "expected %d, got %d\n", parallel, is_parallel);
234 InterlockedIncrement(count);
237 static void CDECL num_threads_cb(BOOL nested, int parallel, int nested_threads, LONG *count)
239 int is_parallel, num_threads, thread_num;
240 LONG thread_count;
242 InterlockedIncrement(count);
243 p_vcomp_barrier();
245 num_threads = pomp_get_num_threads();
246 ok(num_threads == *count, "expected num_threads == %d, got %d\n", *count, num_threads);
247 thread_num = pomp_get_thread_num();
248 ok(thread_num >= 0 && thread_num < num_threads,
249 "expected thread_num in range [0, %d], got %d\n", num_threads - 1, thread_num);
251 is_parallel = pomp_in_parallel();
252 ok(is_parallel == parallel, "expected %d, got %d\n", parallel, is_parallel);
254 thread_count = 0;
255 p_vcomp_fork(TRUE, 2, num_threads_cb2, TRUE, &thread_count);
256 if (nested)
257 ok(thread_count == nested_threads, "expected %d threads, got %d\n", nested_threads, thread_count);
258 else
259 ok(thread_count == 1, "expected 1 thread, got %d\n", thread_count);
261 is_parallel = pomp_in_parallel();
262 ok(is_parallel == parallel, "expected %d, got %d\n", parallel, is_parallel);
264 thread_count = 0;
265 p_vcomp_fork(FALSE, 2, num_threads_cb2, parallel, &thread_count);
266 ok(thread_count == 1, "expected 1 thread, got %d\n", thread_count);
268 is_parallel = pomp_in_parallel();
269 ok(is_parallel == parallel, "expected %d, got %d\n", parallel, is_parallel);
271 p_vcomp_set_num_threads(4);
272 thread_count = 0;
273 p_vcomp_fork(TRUE, 2, num_threads_cb2, TRUE, &thread_count);
274 if (nested)
275 ok(thread_count == 4, "expected 4 threads, got %d\n", thread_count);
276 else
277 ok(thread_count == 1, "expected 1 thread, got %d\n", thread_count);
279 is_parallel = pomp_in_parallel();
280 ok(is_parallel == parallel, "expected %d, got %d\n", parallel, is_parallel);
283 static void test_omp_get_num_threads(BOOL nested)
285 int is_nested, is_parallel, max_threads, num_threads, thread_num;
286 LONG thread_count;
288 pomp_set_nested(nested);
289 is_nested = pomp_get_nested();
290 ok(is_nested == nested, "expected %d, got %d\n", nested, is_nested);
292 max_threads = pomp_get_max_threads();
293 ok(max_threads >= 1, "expected max_threads >= 1, got %d\n", max_threads);
294 thread_num = pomp_get_thread_num();
295 ok(thread_num == 0, "expected thread_num == 0, got %d\n", thread_num);
297 is_parallel = pomp_in_parallel();
298 ok(is_parallel == FALSE, "expected FALSE, got %d\n", is_parallel);
300 num_threads = pomp_get_num_threads();
301 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
302 thread_count = 0;
303 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, max_threads, &thread_count);
304 ok(thread_count == max_threads, "expected %d threads, got %d\n", max_threads, thread_count);
306 is_parallel = pomp_in_parallel();
307 ok(is_parallel == FALSE, "expected FALSE, got %d\n", is_parallel);
309 num_threads = pomp_get_num_threads();
310 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
311 thread_count = 0;
312 p_vcomp_fork(FALSE, 4, num_threads_cb, TRUE, FALSE, max_threads, &thread_count);
313 ok(thread_count == 1, "expected 1 thread, got %d\n", thread_count);
315 is_parallel = pomp_in_parallel();
316 ok(is_parallel == FALSE, "expected FALSE, got %d\n", is_parallel);
318 pomp_set_num_threads(1);
319 num_threads = pomp_get_num_threads();
320 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
321 thread_count = 0;
322 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 1, &thread_count);
323 ok(thread_count == 1, "expected 1 thread, got %d\n", thread_count);
325 is_parallel = pomp_in_parallel();
326 ok(is_parallel == FALSE, "expected FALSE, got %d\n", is_parallel);
328 pomp_set_num_threads(2);
329 num_threads = pomp_get_num_threads();
330 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
331 thread_count = 0;
332 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 2, &thread_count);
333 ok(thread_count == 2, "expected 2 threads, got %d\n", thread_count);
335 pomp_set_num_threads(4);
336 num_threads = pomp_get_num_threads();
337 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
338 thread_count = 0;
339 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 4, &thread_count);
340 ok(thread_count == 4, "expected 4 threads, got %d\n", thread_count);
342 p_vcomp_set_num_threads(8);
343 num_threads = pomp_get_num_threads();
344 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
345 thread_count = 0;
346 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 4, &thread_count);
347 ok(thread_count == 8, "expected 8 threads, got %d\n", thread_count);
348 thread_count = 0;
349 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 4, &thread_count);
350 ok(thread_count == 4, "expected 4 threads, got %d\n", thread_count);
352 p_vcomp_set_num_threads(0);
353 num_threads = pomp_get_num_threads();
354 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
355 thread_count = 0;
356 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 4, &thread_count);
357 ok(thread_count == 4, "expected 4 threads, got %d\n", thread_count);
359 pomp_set_num_threads(0);
360 num_threads = pomp_get_num_threads();
361 ok(num_threads == 1, "expected num_threads == 1, got %d\n", num_threads);
362 thread_count = 0;
363 p_vcomp_fork(TRUE, 4, num_threads_cb, nested, TRUE, 4, &thread_count);
364 ok(thread_count == 4, "expected 4 threads, got %d\n", thread_count);
366 pomp_set_num_threads(max_threads);
367 pomp_set_nested(FALSE);
370 static void CDECL fork_ptr_cb(LONG *a, LONG *b, LONG *c, LONG *d, LONG *e)
372 InterlockedIncrement(a);
373 InterlockedIncrement(b);
374 InterlockedIncrement(c);
375 InterlockedIncrement(d);
376 InterlockedIncrement(e);
379 static void CDECL fork_uintptr_cb(UINT_PTR a, UINT_PTR b, UINT_PTR c, UINT_PTR d, UINT_PTR e)
381 ok(a == 1, "expected a == 1, got %p\n", (void *)a);
382 ok(b == MAXUINT_PTR - 2, "expected b == MAXUINT_PTR - 2, got %p\n", (void *)b);
383 ok(c == 3, "expected c == 3, got %p\n", (void *)c);
384 ok(d == MAXUINT_PTR - 4, "expected d == MAXUINT_PTR - 4, got %p\n", (void *)d);
385 ok(e == 5, "expected e == 5, got %p\n", (void *)e);
388 static void CDECL fork_float_cb(float a, float b, float c, float d, float e)
390 ok(1.4999 < a && a < 1.5001, "expected a == 1.5, got %f\n", a);
391 ok(2.4999 < b && b < 2.5001, "expected b == 2.5, got %f\n", b);
392 ok(3.4999 < c && c < 3.5001, "expected c == 3.5, got %f\n", c);
393 ok(4.4999 < d && d < 4.5001, "expected d == 4.5, got %f\n", d);
394 ok(5.4999 < e && e < 5.5001, "expected e == 5.5, got %f\n", e);
397 static void test_vcomp_fork(void)
399 LONG a, b, c, d, e;
400 int max_threads = pomp_get_max_threads();
401 pomp_set_num_threads(4);
403 a = 0; b = 1; c = 2; d = 3; e = 4;
404 p_vcomp_fork(FALSE, 5, fork_ptr_cb, &a, &b, &c, &d, &e);
405 ok(a == 1, "expected a == 1, got %d\n", a);
406 ok(b == 2, "expected b == 2, got %d\n", b);
407 ok(c == 3, "expected c == 3, got %d\n", c);
408 ok(d == 4, "expected d == 4, got %d\n", d);
409 ok(e == 5, "expected e == 5, got %d\n", e);
411 a = 0; b = 1; c = 2; d = 3; e = 4;
412 p_vcomp_fork(TRUE, 5, fork_ptr_cb, &a, &b, &c, &d, &e);
413 ok(a == 4, "expected a == 4, got %d\n", a);
414 ok(b == 5, "expected b == 5, got %d\n", b);
415 ok(c == 6, "expected c == 6, got %d\n", c);
416 ok(d == 7, "expected d == 7, got %d\n", d);
417 ok(e == 8, "expected e == 8, got %d\n", e);
419 p_vcomp_fork(TRUE, 5, fork_uintptr_cb, (UINT_PTR)1, (UINT_PTR)(MAXUINT_PTR - 2),
420 (UINT_PTR)3, (UINT_PTR)(MAXUINT_PTR - 4), (UINT_PTR)5);
422 if (sizeof(int) < sizeof(void *))
423 skip("skipping float test on x86_64\n");
424 else
426 void (CDECL *func)(BOOL, int, void *, float, float, float, float, float) = (void *)p_vcomp_fork;
427 func(TRUE, 5, fork_float_cb, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f);
430 pomp_set_num_threads(max_threads);
433 static void CDECL section_cb(LONG *a, LONG *b, LONG *c)
435 int i;
437 p_vcomp_sections_init(20);
438 while ((i = p_vcomp_sections_next()) != -1)
440 InterlockedIncrement(a);
441 Sleep(1);
444 p_vcomp_sections_init(30);
445 while ((i = p_vcomp_sections_next()) != -1)
447 InterlockedIncrement(b);
448 Sleep(1);
451 p_vcomp_sections_init(40);
452 while ((i = p_vcomp_sections_next()) != -1)
454 InterlockedIncrement(c);
455 Sleep(1);
459 static void test_vcomp_sections_init(void)
461 LONG a, b, c;
462 int max_threads = pomp_get_max_threads();
463 int i;
465 if (0)
467 /* calling _vcomp_sections_next without prior _vcomp_sections_init
468 * returns uninitialized memory on Windows. */
469 i = p_vcomp_sections_next();
470 ok(i == -1, "expected -1, got %d\n", i);
473 a = b = c = 0;
474 section_cb(&a, &b, &c);
475 ok(a == 20, "expected a == 20, got %d\n", a);
476 ok(b == 30, "expected b == 30, got %d\n", b);
477 ok(c == 40, "expected c == 40, got %d\n", c);
479 for (i = 1; i <= 4; i++)
481 pomp_set_num_threads(i);
483 a = b = c = 0;
484 p_vcomp_fork(TRUE, 3, section_cb, &a, &b, &c);
485 ok(a == 20, "expected a == 20, got %d\n", a);
486 ok(b == 30, "expected b == 30, got %d\n", b);
487 ok(c == 40, "expected c == 40, got %d\n", c);
489 a = b = c = 0;
490 p_vcomp_fork(FALSE, 3, section_cb, &a, &b, &c);
491 ok(a == 20, "expected a == 20, got %d\n", a);
492 ok(b == 30, "expected b == 30, got %d\n", b);
493 ok(c == 40, "expected c == 40, got %d\n", c);
496 pomp_set_num_threads(max_threads);
499 static void my_for_static_simple_init(unsigned int first, unsigned int last, int step,
500 BOOL increment, unsigned int *begin, unsigned int *end)
502 unsigned int iterations, per_thread, remaining;
503 int num_threads = pomp_get_num_threads();
504 int thread_num = pomp_get_thread_num();
506 if (num_threads == 1)
508 *begin = first;
509 *end = last;
510 return;
513 if (step <= 0)
515 *begin = 0;
516 *end = increment ? -1 : 1;
517 return;
520 if (increment)
521 iterations = 1 + (last - first) / step;
522 else
524 iterations = 1 + (first - last) / step;
525 step *= -1;
528 per_thread = iterations / num_threads;
529 remaining = iterations - per_thread * num_threads;
531 if (thread_num < remaining)
532 per_thread++;
533 else if (per_thread)
534 first += remaining * step;
535 else
537 *begin = first;
538 *end = first - step;
539 return;
542 *begin = first + per_thread * thread_num * step;
543 *end = *begin + (per_thread - 1) * step;
547 static void CDECL for_static_simple_cb(void)
549 static const struct
551 unsigned int first;
552 unsigned int last;
553 int step;
555 tests[] =
557 { 0, 0, 1 }, /* 0 */
558 { 0, 1, 1 },
559 { 0, 2, 1 },
560 { 0, 3, 1 },
561 { 0, 100, 0 },
562 { 0, 100, 1 },
563 { 0, 100, 2 },
564 { 0, 100, 3 },
565 { 0, 100, -1 },
566 { 0, 100, -2 },
567 { 0, 100, -3 }, /* 10 */
568 { 0, 100, 10 },
569 { 0, 100, 50 },
570 { 0, 100, 100 },
571 { 0, 100, 150 },
572 { 0, 0x80000000, 1 },
573 { 0, 0xfffffffe, 1 },
574 { 0, 0xffffffff, 1 },
575 { 50, 50, 0 },
576 { 50, 50, 1 },
577 { 50, 50, 2 }, /* 20 */
578 { 50, 50, 3 },
579 { 50, 50, -1 },
580 { 50, 50, -2 },
581 { 50, 50, -3 },
582 { 100, 200, 1 },
583 { 100, 200, 5 },
584 { 100, 200, 10 },
585 { 100, 200, 50 },
586 { 100, 200, 100 },
587 { 100, 200, 150 }, /* 30 */
589 int num_threads = pomp_get_num_threads();
590 int thread_num = pomp_get_thread_num();
591 int i;
593 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
595 unsigned int my_begin, my_end, begin, end;
597 begin = end = 0xdeadbeef;
598 my_for_static_simple_init(tests[i].first, tests[i].last, tests[i].step, FALSE, &my_begin, &my_end);
599 p_vcomp_for_static_simple_init(tests[i].first, tests[i].last, tests[i].step, FALSE, &begin, &end);
601 ok(begin == my_begin, "test %d, thread %d/%d: expected begin == %u, got %u\n",
602 i, thread_num, num_threads, my_begin, end);
603 ok(end == my_end, "test %d, thread %d/%d: expected end == %u, got %u\n",
604 i, thread_num, num_threads, my_end, end);
606 p_vcomp_for_static_end();
607 p_vcomp_barrier();
609 begin = end = 0xdeadbeef;
610 my_for_static_simple_init(tests[i].first, tests[i].last, tests[i].step, TRUE, &my_begin, &my_end);
611 p_vcomp_for_static_simple_init(tests[i].first, tests[i].last, tests[i].step, TRUE, &begin, &end);
613 ok(begin == my_begin, "test %d, thread %d/%d: expected begin == %u, got %u\n",
614 i, thread_num, num_threads, my_begin, end);
615 ok(end == my_end, "test %d, thread %d/%d: expected end == %u, got %u\n",
616 i, thread_num, num_threads, my_end, end);
618 p_vcomp_for_static_end();
619 p_vcomp_barrier();
621 if (tests[i].first == tests[i].last) continue;
623 begin = end = 0xdeadbeef;
624 my_for_static_simple_init(tests[i].last, tests[i].first, tests[i].step, FALSE, &my_begin, &my_end);
625 p_vcomp_for_static_simple_init(tests[i].last, tests[i].first, tests[i].step, FALSE, &begin, &end);
627 ok(begin == my_begin, "test %d, thread %d/%d: expected begin == %u, got %u\n",
628 i, thread_num, num_threads, my_begin, end);
629 ok(end == my_end, "test %d, thread %d/%d: expected end == %u, got %u\n",
630 i, thread_num, num_threads, my_end, end);
632 p_vcomp_for_static_end();
633 p_vcomp_barrier();
635 begin = end = 0xdeadbeef;
636 my_for_static_simple_init(tests[i].last, tests[i].first, tests[i].step, TRUE, &my_begin, &my_end);
637 p_vcomp_for_static_simple_init(tests[i].last, tests[i].first, tests[i].step, TRUE, &begin, &end);
639 ok(begin == my_begin, "test %d, thread %d/%d: expected begin == %u, got %u\n",
640 i, thread_num, num_threads, my_begin, end);
641 ok(end == my_end, "test %d, thread %d/%d: expected end == %u, got %u\n",
642 i, thread_num, num_threads, my_end, end);
644 p_vcomp_for_static_end();
645 p_vcomp_barrier();
649 static void test_vcomp_for_static_simple_init(void)
651 int max_threads = pomp_get_max_threads();
652 int i;
654 for_static_simple_cb();
656 for (i = 1; i <= 4; i++)
658 pomp_set_num_threads(i);
659 p_vcomp_fork(TRUE, 0, for_static_simple_cb);
660 p_vcomp_fork(FALSE, 0, for_static_simple_cb);
663 pomp_set_num_threads(max_threads);
666 #define VCOMP_FOR_STATIC_BROKEN_LOOP 1
667 #define VCOMP_FOR_STATIC_BROKEN_NEXT 2
669 DWORD CDECL my_for_static_init(int first, int last, int step, int chunksize, unsigned int *loops,
670 int *begin, int *end, int *next, int *lastchunk)
672 unsigned int iterations, num_chunks, per_thread, remaining;
673 int num_threads = pomp_get_num_threads();
674 int thread_num = pomp_get_thread_num();
676 if (num_threads == 1 && chunksize != 1)
678 *loops = 1;
679 *begin = first;
680 *end = last;
681 *next = 0;
682 *lastchunk = first;
683 return 0;
686 if (first == last)
688 *loops = !thread_num;
689 if (!thread_num)
691 /* The value in *next on Windows is either uninitialized, or contains
692 * garbage. The value shouldn't matter for *loops <= 1, so no need to
693 * reproduce that. */
694 *begin = first;
695 *end = last;
696 *next = 0;
697 *lastchunk = first;
699 return thread_num ? 0 : VCOMP_FOR_STATIC_BROKEN_NEXT;
702 if (step <= 0)
704 /* The total number of iterations depends on the number of threads here,
705 * which doesn't make any sense. This is most likely a bug in the Windows
706 * implementation. */
707 return VCOMP_FOR_STATIC_BROKEN_LOOP;
710 if (first < last)
711 iterations = 1 + (last - first) / step;
712 else
714 iterations = 1 + (first - last) / step;
715 step *= -1;
718 if (chunksize < 1)
719 chunksize = 1;
721 num_chunks = ((DWORD64)iterations + chunksize - 1) / chunksize;
722 per_thread = num_chunks / num_threads;
723 remaining = num_chunks - per_thread * num_threads;
725 *loops = per_thread + (thread_num < remaining);
726 *begin = first + thread_num * chunksize * step;
727 *end = *begin + (chunksize - 1) * step;
728 *next = chunksize * num_threads * step;
729 *lastchunk = first + (num_chunks - 1) * chunksize * step;
730 return 0;
733 static void CDECL for_static_cb(void)
735 static const struct
737 int first;
738 int last;
739 int step;
740 int chunksize;
742 tests[] =
744 { 0, 0, 1, 1 }, /* 0 */
745 { 0, 1, 1, 1 },
746 { 0, 2, 1, 1 },
747 { 0, 3, 1, 1 },
748 { 0, 100, 1, 0 },
749 { 0, 100, 1, 1 },
750 { 0, 100, 1, 5 },
751 { 0, 100, 1, 10 },
752 { 0, 100, 1, 50 },
753 { 0, 100, 1, 100 },
754 { 0, 100, 1, 150 }, /* 10 */
755 { 0, 100, 3, 0 },
756 { 0, 100, 3, 1 },
757 { 0, 100, 3, 5 },
758 { 0, 100, 3, 10 },
759 { 0, 100, 3, 50 },
760 { 0, 100, 3, 100 },
761 { 0, 100, 3, 150 },
762 { 0, 100, 5, 1 },
763 { 0, 100, -3, 0 },
764 { 0, 100, -3, 1 }, /* 20 */
765 { 0, 100, -3, 5 },
766 { 0, 100, -3, 10 },
767 { 0, 100, -3, 50 },
768 { 0, 100, -3, 100 },
769 { 0, 100, -3, 150 },
770 { 0, 100, 10, 1 },
771 { 0, 100, 50, 1 },
772 { 0, 100, 100, 1 },
773 { 0, 100, 150, 1 },
774 { 0, 0x10000000, 1, 123 }, /* 30 */
775 { 0, 0x20000000, 1, 123 },
776 { 0, 0x40000000, 1, 123 },
777 { 0, -0x80000000, 1, 123 },
778 { 50, 50, 1, 1 },
779 { 50, 50, 1, 2 },
780 { 50, 50, 1, -1 },
781 { 50, 50, 1, -2 },
782 { 50, 50, 2, 1 },
783 { 50, 50, 3, 1 },
784 { 100, 200, 3, 1 }, /* 40 */
785 { 100, 200, 3, -1 },
786 { 0x7ffffffe, -0x80000000, 1, 123 },
787 { 0x7fffffff, -0x80000000, 1, 123 },
789 int num_threads = pomp_get_num_threads();
790 int thread_num = pomp_get_thread_num();
791 int i;
793 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
795 int my_begin, my_end, my_next, my_lastchunk;
796 int begin, end, next, lastchunk;
797 unsigned int my_loops, loops;
798 DWORD broken_flags;
800 my_loops = my_begin = my_end = my_next = my_lastchunk = 0xdeadbeef;
801 loops = begin = end = next = lastchunk = 0xdeadbeef;
802 broken_flags = my_for_static_init(tests[i].first, tests[i].last, tests[i].step, tests[i].chunksize,
803 &my_loops, &my_begin, &my_end, &my_next, &my_lastchunk);
804 p_vcomp_for_static_init(tests[i].first, tests[i].last, tests[i].step, tests[i].chunksize,
805 &loops, &begin, &end, &next, &lastchunk);
807 if (broken_flags & VCOMP_FOR_STATIC_BROKEN_LOOP)
809 ok(loops == 0 || loops == 1, "test %d, thread %d/%d: expected loops == 0 or 1, got %u\n",
810 i, thread_num, num_threads, loops);
812 else
814 ok(loops == my_loops, "test %d, thread %d/%d: expected loops == %u, got %u\n",
815 i, thread_num, num_threads, my_loops, loops);
816 ok(begin == my_begin, "test %d, thread %d/%d: expected begin == %d, got %d\n",
817 i, thread_num, num_threads, my_begin, begin);
818 ok(end == my_end, "test %d, thread %d/%d: expected end == %d, got %d\n",
819 i, thread_num, num_threads, my_end, end);
820 ok(next == my_next || broken(broken_flags & VCOMP_FOR_STATIC_BROKEN_NEXT),
821 "test %d, thread %d/%d: expected next == %d, got %d\n", i, thread_num, num_threads, my_next, next);
822 ok(lastchunk == my_lastchunk, "test %d, thread %d/%d: expected lastchunk == %d, got %d\n",
823 i, thread_num, num_threads, my_lastchunk, lastchunk);
826 p_vcomp_for_static_end();
827 p_vcomp_barrier();
829 if (tests[i].first == tests[i].last) continue;
831 my_loops = my_begin = my_end = my_next = my_lastchunk = 0xdeadbeef;
832 loops = begin = end = next = lastchunk = 0xdeadbeef;
833 broken_flags = my_for_static_init(tests[i].last, tests[i].first, tests[i].step, tests[i].chunksize,
834 &my_loops, &my_begin, &my_end, &my_next, &my_lastchunk);
835 p_vcomp_for_static_init(tests[i].last, tests[i].first, tests[i].step, tests[i].chunksize,
836 &loops, &begin, &end, &next, &lastchunk);
838 if (broken_flags & VCOMP_FOR_STATIC_BROKEN_LOOP)
840 ok(loops == 0 || loops == 1, "test %d, thread %d/%d: expected loops == 0 or 1, got %u\n",
841 i, thread_num, num_threads, loops);
843 else
845 ok(loops == my_loops, "test %d, thread %d/%d: expected loops == %u, got %u\n",
846 i, thread_num, num_threads, my_loops, loops);
847 ok(begin == my_begin, "test %d, thread %d/%d: expected begin == %d, got %d\n",
848 i, thread_num, num_threads, my_begin, begin);
849 ok(end == my_end, "test %d, thread %d/%d: expected end == %d, got %d\n",
850 i, thread_num, num_threads, my_end, end);
851 ok(next == my_next || broken(broken_flags & VCOMP_FOR_STATIC_BROKEN_NEXT),
852 "test %d, thread %d/%d: expected next == %d, got %d\n", i, thread_num, num_threads, my_next, next);
853 ok(lastchunk == my_lastchunk, "test %d, thread %d/%d: expected lastchunk == %d, got %d\n",
854 i, thread_num, num_threads, my_lastchunk, lastchunk);
857 p_vcomp_for_static_end();
858 p_vcomp_barrier();
862 #undef VCOMP_FOR_STATIC_BROKEN_LOOP
863 #undef VCOMP_FOR_STATIC_BROKEN_NEXT
865 static void test_vcomp_for_static_init(void)
867 int max_threads = pomp_get_max_threads();
868 int i;
870 for_static_cb();
872 for (i = 1; i <= 4; i++)
874 pomp_set_num_threads(i);
875 p_vcomp_fork(TRUE, 0, for_static_cb);
876 p_vcomp_fork(FALSE, 0, for_static_cb);
879 pomp_set_num_threads(max_threads);
882 static void test_atomic_integer32(void)
884 struct
886 void (CDECL *func)(int *, int);
887 int v1, v2, expected;
889 tests1[] =
891 { p_vcomp_atomic_add_i4, 0x11223344, 0x77665544, -0x77777778 },
892 { p_vcomp_atomic_and_i4, 0x11223344, 0x77665544, 0x11221144 },
893 { p_vcomp_atomic_div_i4, 0x77665544, 0x11223344, 6 },
894 { p_vcomp_atomic_div_i4, 0x77665544, -0x11223344, -6 },
895 { p_vcomp_atomic_mul_i4, 0x11223344, 0x77665544, -0xecccdf0 },
896 { p_vcomp_atomic_mul_i4, 0x11223344, -0x77665544, 0xecccdf0 },
897 { p_vcomp_atomic_or_i4, 0x11223344, 0x77665544, 0x77667744 },
898 { p_vcomp_atomic_shl_i4, 0x11223344, 3, -0x76ee65e0 },
899 { p_vcomp_atomic_shl_i4, 0x11223344, 35, -0x76ee65e0 },
900 { p_vcomp_atomic_shl_i4, -0x11223344, 3, 0x76ee65e0 },
901 { p_vcomp_atomic_shr_i4, 0x11223344, 3, 0x2244668 },
902 { p_vcomp_atomic_shr_i4, 0x11223344, 35, 0x2244668 },
903 { p_vcomp_atomic_shr_i4, -0x11223344, 3, -0x2244669 },
904 { p_vcomp_atomic_sub_i4, 0x11223344, 0x77665544, -0x66442200 },
905 { p_vcomp_atomic_xor_i4, 0x11223344, 0x77665544, 0x66446600 },
907 struct
909 void (CDECL *func)(unsigned int *, unsigned int);
910 unsigned int v1, v2, expected;
912 tests2[] =
914 { p_vcomp_atomic_div_ui4, 0x77665544, 0x11223344, 6 },
915 { p_vcomp_atomic_div_ui4, 0x77665544, 0xeeddccbc, 0 },
916 { p_vcomp_atomic_shr_ui4, 0x11223344, 3, 0x2244668 },
917 { p_vcomp_atomic_shr_ui4, 0x11223344, 35, 0x2244668 },
918 { p_vcomp_atomic_shr_ui4, 0xeeddccbc, 3, 0x1ddbb997 },
920 int i;
922 for (i = 0; i < sizeof(tests1)/sizeof(tests1[0]); i++)
924 int val = tests1[i].v1;
925 tests1[i].func(&val, tests1[i].v2);
926 ok(val == tests1[i].expected, "test %d: expected val == %d, got %d\n", i, tests1[i].expected, val);
928 for (i = 0; i < sizeof(tests2)/sizeof(tests2[0]); i++)
930 unsigned int val = tests2[i].v1;
931 tests2[i].func(&val, tests2[i].v2);
932 ok(val == tests2[i].expected, "test %d: expected val == %u, got %u\n", i, tests2[i].expected, val);
936 static void test_atomic_float(void)
938 struct
940 void (CDECL *func)(float *, float);
941 float v1, v2, expected;
943 tests[] =
945 { p_vcomp_atomic_add_r4, 42.0, 17.0, 42.0 + 17.0 },
946 { p_vcomp_atomic_div_r4, 42.0, 17.0, 42.0 / 17.0 },
947 { p_vcomp_atomic_mul_r4, 42.0, 17.0, 42.0 * 17.0 },
948 { p_vcomp_atomic_sub_r4, 42.0, 17.0, 42.0 - 17.0 },
950 int i;
952 for (i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
954 float val = tests[i].v1;
955 tests[i].func(&val, tests[i].v2);
956 ok(tests[i].expected - 0.001 < val && val < tests[i].expected + 0.001,
957 "test %d: expected val == %f, got %f\n", i, tests[i].expected, val);
961 START_TEST(vcomp)
963 if (!init_vcomp())
964 return;
966 test_omp_get_num_threads(FALSE);
967 test_omp_get_num_threads(TRUE);
968 test_vcomp_fork();
969 test_vcomp_sections_init();
970 test_vcomp_for_static_simple_init();
971 test_vcomp_for_static_init();
972 test_atomic_integer32();
973 test_atomic_float();
975 release_vcomp();