2 * Unit test suite for vcomp
4 * Copyright 2012 Dan Kegel
5 * Copyright 2015 Sebastian Lackner
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 #include "wine/test.h"
24 static char vcomp_manifest_file
[MAX_PATH
];
25 static HANDLE vcomp_actctx_hctx
;
26 static ULONG_PTR vcomp_actctx_cookie
;
27 static HMODULE vcomp_handle
;
29 static HANDLE (WINAPI
*pCreateActCtxA
)(ACTCTXA
*);
30 static BOOL (WINAPI
*pActivateActCtx
)(HANDLE
, ULONG_PTR
*);
31 static BOOL (WINAPI
*pDeactivateActCtx
)(DWORD
, ULONG_PTR
);
32 static VOID (WINAPI
*pReleaseActCtx
)(HANDLE
);
34 static void (CDECL
*p_vcomp_atomic_add_i4
)(int *dest
, int val
);
35 static void (CDECL
*p_vcomp_atomic_add_r4
)(float *dest
, float val
);
36 static void (CDECL
*p_vcomp_atomic_and_i4
)(int *dest
, int val
);
37 static void (CDECL
*p_vcomp_atomic_div_i4
)(int *dest
, int val
);
38 static void (CDECL
*p_vcomp_atomic_div_r4
)(float *dest
, float val
);
39 static void (CDECL
*p_vcomp_atomic_div_ui4
)(unsigned int *dest
, unsigned int val
);
40 static void (CDECL
*p_vcomp_atomic_mul_i4
)(int *dest
, int val
);
41 static void (CDECL
*p_vcomp_atomic_mul_r4
)(float *dest
, float val
);
42 static void (CDECL
*p_vcomp_atomic_or_i4
)(int *dest
, int val
);
43 static void (CDECL
*p_vcomp_atomic_shl_i4
)(int *dest
, int val
);
44 static void (CDECL
*p_vcomp_atomic_shr_i4
)(int *dest
, int val
);
45 static void (CDECL
*p_vcomp_atomic_shr_ui4
)(unsigned int *dest
, unsigned int val
);
46 static void (CDECL
*p_vcomp_atomic_sub_i4
)(int *dest
, int val
);
47 static void (CDECL
*p_vcomp_atomic_sub_r4
)(float *dest
, float val
);
48 static void (CDECL
*p_vcomp_atomic_xor_i4
)(int *dest
, int val
);
49 static void (CDECL
*p_vcomp_barrier
)(void);
50 static void (CDECL
*p_vcomp_for_static_end
)(void);
51 static void (CDECL
*p_vcomp_for_static_init
)(int first
, int last
, int step
, int chunksize
, unsigned int *loops
,
52 int *begin
, int *end
, int *next
, int *lastchunk
);
53 static void (CDECL
*p_vcomp_for_static_simple_init
)(unsigned int first
, unsigned int last
, int step
,
54 BOOL increment
, unsigned int *begin
, unsigned int *end
);
55 static void (WINAPIV
*p_vcomp_fork
)(BOOL ifval
, int nargs
, void *wrapper
, ...);
56 static void (CDECL
*p_vcomp_sections_init
)(int n
);
57 static int (CDECL
*p_vcomp_sections_next
)(void);
58 static void (CDECL
*p_vcomp_set_num_threads
)(int num_threads
);
59 static int (CDECL
*pomp_get_max_threads
)(void);
60 static int (CDECL
*pomp_get_nested
)(void);
61 static int (CDECL
*pomp_get_num_threads
)(void);
62 static int (CDECL
*pomp_get_thread_num
)(void);
63 static int (CDECL
*pomp_in_parallel
)(void);
64 static void (CDECL
*pomp_set_nested
)(int nested
);
65 static void (CDECL
*pomp_set_num_threads
)(int num_threads
);
69 #elif defined(__x86_64__)
75 static const char vcomp_manifest
[] =
76 "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n"
77 "<assembly xmlns=\"urn:schemas-microsoft-com:asm.v1\" manifestVersion=\"1.0\">\n"
78 " <assemblyIdentity\n"
80 " name=\"Wine.vcomp.Test\"\n"
81 " version=\"1.0.0.0\"\n"
82 " processorArchitecture=\"" ARCH
"\"\n"
84 "<description>Wine vcomp test suite</description>\n"
86 " <dependentAssembly>\n"
87 " <assemblyIdentity\n"
89 " name=\"Microsoft.VC80.OpenMP\"\n"
90 " version=\"8.0.50608.0\"\n"
91 " processorArchitecture=\"" ARCH
"\"\n"
92 " publicKeyToken=\"1fc8b3b9a1e18e3b\"\n"
94 " </dependentAssembly>\n"
100 static void create_vcomp_manifest(void)
102 char temp_path
[MAX_PATH
];
108 kernel32
= GetModuleHandleA("kernel32.dll");
109 pCreateActCtxA
= (void *)GetProcAddress(kernel32
, "CreateActCtxA");
110 pActivateActCtx
= (void *)GetProcAddress(kernel32
, "ActivateActCtx");
111 pDeactivateActCtx
= (void *)GetProcAddress(kernel32
, "DeactivateActCtx");
112 pReleaseActCtx
= (void *)GetProcAddress(kernel32
, "ReleaseActCtx");
113 if (!pCreateActCtxA
) return;
115 if (!GetTempPathA(sizeof(temp_path
), temp_path
) ||
116 !GetTempFileNameA(temp_path
, "vcomp", 0, vcomp_manifest_file
))
118 ok(0, "failed to create manifest file\n");
122 file
= CreateFileA(vcomp_manifest_file
, GENERIC_WRITE
, 0, NULL
, CREATE_ALWAYS
, 0, NULL
);
123 if (file
== INVALID_HANDLE_VALUE
)
125 ok(0, "failed to open manifest file\n");
129 if (!WriteFile(file
, vcomp_manifest
, sizeof(vcomp_manifest
) - 1, &written
, NULL
))
133 if (written
!= sizeof(vcomp_manifest
) - 1)
135 ok(0, "failed to write manifest file\n");
136 DeleteFileA(vcomp_manifest_file
);
140 memset(&ctx
, 0, sizeof(ctx
));
141 ctx
.cbSize
= sizeof(ctx
);
142 ctx
.lpSource
= vcomp_manifest_file
;
143 vcomp_actctx_hctx
= pCreateActCtxA(&ctx
);
144 if (!vcomp_actctx_hctx
)
146 ok(0, "failed to create activation context\n");
147 DeleteFileA(vcomp_manifest_file
);
151 if (!pActivateActCtx(vcomp_actctx_hctx
, &vcomp_actctx_cookie
))
153 win_skip("failed to activate context\n");
154 pReleaseActCtx(vcomp_actctx_hctx
);
155 DeleteFileA(vcomp_manifest_file
);
156 vcomp_actctx_hctx
= NULL
;
160 static void release_vcomp(void)
163 FreeLibrary(vcomp_handle
);
165 if (vcomp_actctx_hctx
)
167 pDeactivateActCtx(0, vcomp_actctx_cookie
);
168 pReleaseActCtx(vcomp_actctx_hctx
);
169 DeleteFileA(vcomp_manifest_file
);
173 #define VCOMP_GET_PROC(func) \
176 p ## func = (void *)GetProcAddress(vcomp_handle, #func); \
177 if (!p ## func) trace("Failed to get address for %s\n", #func); \
181 static BOOL
init_vcomp(void)
183 create_vcomp_manifest();
185 vcomp_handle
= LoadLibraryA("vcomp.dll");
188 win_skip("vcomp.dll not installed\n");
193 VCOMP_GET_PROC(_vcomp_atomic_add_i4
);
194 VCOMP_GET_PROC(_vcomp_atomic_add_r4
);
195 VCOMP_GET_PROC(_vcomp_atomic_and_i4
);
196 VCOMP_GET_PROC(_vcomp_atomic_div_i4
);
197 VCOMP_GET_PROC(_vcomp_atomic_div_r4
);
198 VCOMP_GET_PROC(_vcomp_atomic_div_ui4
);
199 VCOMP_GET_PROC(_vcomp_atomic_mul_i4
);
200 VCOMP_GET_PROC(_vcomp_atomic_mul_r4
);
201 VCOMP_GET_PROC(_vcomp_atomic_or_i4
);
202 VCOMP_GET_PROC(_vcomp_atomic_shl_i4
);
203 VCOMP_GET_PROC(_vcomp_atomic_shr_i4
);
204 VCOMP_GET_PROC(_vcomp_atomic_shr_ui4
);
205 VCOMP_GET_PROC(_vcomp_atomic_sub_i4
);
206 VCOMP_GET_PROC(_vcomp_atomic_sub_r4
);
207 VCOMP_GET_PROC(_vcomp_atomic_xor_i4
);
208 VCOMP_GET_PROC(_vcomp_barrier
);
209 VCOMP_GET_PROC(_vcomp_for_static_end
);
210 VCOMP_GET_PROC(_vcomp_for_static_init
);
211 VCOMP_GET_PROC(_vcomp_for_static_simple_init
);
212 VCOMP_GET_PROC(_vcomp_fork
);
213 VCOMP_GET_PROC(_vcomp_sections_init
);
214 VCOMP_GET_PROC(_vcomp_sections_next
);
215 VCOMP_GET_PROC(_vcomp_set_num_threads
);
216 VCOMP_GET_PROC(omp_get_max_threads
);
217 VCOMP_GET_PROC(omp_get_nested
);
218 VCOMP_GET_PROC(omp_get_num_threads
);
219 VCOMP_GET_PROC(omp_get_thread_num
);
220 VCOMP_GET_PROC(omp_in_parallel
);
221 VCOMP_GET_PROC(omp_set_nested
);
222 VCOMP_GET_PROC(omp_set_num_threads
);
227 #undef VCOMP_GET_PROC
229 static void CDECL
num_threads_cb2(int parallel
, LONG
*count
)
231 int is_parallel
= pomp_in_parallel();
232 ok(is_parallel
== parallel
, "expected %d, got %d\n", parallel
, is_parallel
);
234 InterlockedIncrement(count
);
237 static void CDECL
num_threads_cb(BOOL nested
, int parallel
, int nested_threads
, LONG
*count
)
239 int is_parallel
, num_threads
, thread_num
;
242 InterlockedIncrement(count
);
245 num_threads
= pomp_get_num_threads();
246 ok(num_threads
== *count
, "expected num_threads == %d, got %d\n", *count
, num_threads
);
247 thread_num
= pomp_get_thread_num();
248 ok(thread_num
>= 0 && thread_num
< num_threads
,
249 "expected thread_num in range [0, %d], got %d\n", num_threads
- 1, thread_num
);
251 is_parallel
= pomp_in_parallel();
252 ok(is_parallel
== parallel
, "expected %d, got %d\n", parallel
, is_parallel
);
255 p_vcomp_fork(TRUE
, 2, num_threads_cb2
, TRUE
, &thread_count
);
257 ok(thread_count
== nested_threads
, "expected %d threads, got %d\n", nested_threads
, thread_count
);
259 ok(thread_count
== 1, "expected 1 thread, got %d\n", thread_count
);
261 is_parallel
= pomp_in_parallel();
262 ok(is_parallel
== parallel
, "expected %d, got %d\n", parallel
, is_parallel
);
265 p_vcomp_fork(FALSE
, 2, num_threads_cb2
, parallel
, &thread_count
);
266 ok(thread_count
== 1, "expected 1 thread, got %d\n", thread_count
);
268 is_parallel
= pomp_in_parallel();
269 ok(is_parallel
== parallel
, "expected %d, got %d\n", parallel
, is_parallel
);
271 p_vcomp_set_num_threads(4);
273 p_vcomp_fork(TRUE
, 2, num_threads_cb2
, TRUE
, &thread_count
);
275 ok(thread_count
== 4, "expected 4 threads, got %d\n", thread_count
);
277 ok(thread_count
== 1, "expected 1 thread, got %d\n", thread_count
);
279 is_parallel
= pomp_in_parallel();
280 ok(is_parallel
== parallel
, "expected %d, got %d\n", parallel
, is_parallel
);
283 static void test_omp_get_num_threads(BOOL nested
)
285 int is_nested
, is_parallel
, max_threads
, num_threads
, thread_num
;
288 pomp_set_nested(nested
);
289 is_nested
= pomp_get_nested();
290 ok(is_nested
== nested
, "expected %d, got %d\n", nested
, is_nested
);
292 max_threads
= pomp_get_max_threads();
293 ok(max_threads
>= 1, "expected max_threads >= 1, got %d\n", max_threads
);
294 thread_num
= pomp_get_thread_num();
295 ok(thread_num
== 0, "expected thread_num == 0, got %d\n", thread_num
);
297 is_parallel
= pomp_in_parallel();
298 ok(is_parallel
== FALSE
, "expected FALSE, got %d\n", is_parallel
);
300 num_threads
= pomp_get_num_threads();
301 ok(num_threads
== 1, "expected num_threads == 1, got %d\n", num_threads
);
303 p_vcomp_fork(TRUE
, 4, num_threads_cb
, nested
, TRUE
, max_threads
, &thread_count
);
304 ok(thread_count
== max_threads
, "expected %d threads, got %d\n", max_threads
, thread_count
);
306 is_parallel
= pomp_in_parallel();
307 ok(is_parallel
== FALSE
, "expected FALSE, got %d\n", is_parallel
);
309 num_threads
= pomp_get_num_threads();
310 ok(num_threads
== 1, "expected num_threads == 1, got %d\n", num_threads
);
312 p_vcomp_fork(FALSE
, 4, num_threads_cb
, TRUE
, FALSE
, max_threads
, &thread_count
);
313 ok(thread_count
== 1, "expected 1 thread, got %d\n", thread_count
);
315 is_parallel
= pomp_in_parallel();
316 ok(is_parallel
== FALSE
, "expected FALSE, got %d\n", is_parallel
);
318 pomp_set_num_threads(1);
319 num_threads
= pomp_get_num_threads();
320 ok(num_threads
== 1, "expected num_threads == 1, got %d\n", num_threads
);
322 p_vcomp_fork(TRUE
, 4, num_threads_cb
, nested
, TRUE
, 1, &thread_count
);
323 ok(thread_count
== 1, "expected 1 thread, got %d\n", thread_count
);
325 is_parallel
= pomp_in_parallel();
326 ok(is_parallel
== FALSE
, "expected FALSE, got %d\n", is_parallel
);
328 pomp_set_num_threads(2);
329 num_threads
= pomp_get_num_threads();
330 ok(num_threads
== 1, "expected num_threads == 1, got %d\n", num_threads
);
332 p_vcomp_fork(TRUE
, 4, num_threads_cb
, nested
, TRUE
, 2, &thread_count
);
333 ok(thread_count
== 2, "expected 2 threads, got %d\n", thread_count
);
335 pomp_set_num_threads(4);
336 num_threads
= pomp_get_num_threads();
337 ok(num_threads
== 1, "expected num_threads == 1, got %d\n", num_threads
);
339 p_vcomp_fork(TRUE
, 4, num_threads_cb
, nested
, TRUE
, 4, &thread_count
);
340 ok(thread_count
== 4, "expected 4 threads, got %d\n", thread_count
);
342 p_vcomp_set_num_threads(8);
343 num_threads
= pomp_get_num_threads();
344 ok(num_threads
== 1, "expected num_threads == 1, got %d\n", num_threads
);
346 p_vcomp_fork(TRUE
, 4, num_threads_cb
, nested
, TRUE
, 4, &thread_count
);
347 ok(thread_count
== 8, "expected 8 threads, got %d\n", thread_count
);
349 p_vcomp_fork(TRUE
, 4, num_threads_cb
, nested
, TRUE
, 4, &thread_count
);
350 ok(thread_count
== 4, "expected 4 threads, got %d\n", thread_count
);
352 p_vcomp_set_num_threads(0);
353 num_threads
= pomp_get_num_threads();
354 ok(num_threads
== 1, "expected num_threads == 1, got %d\n", num_threads
);
356 p_vcomp_fork(TRUE
, 4, num_threads_cb
, nested
, TRUE
, 4, &thread_count
);
357 ok(thread_count
== 4, "expected 4 threads, got %d\n", thread_count
);
359 pomp_set_num_threads(0);
360 num_threads
= pomp_get_num_threads();
361 ok(num_threads
== 1, "expected num_threads == 1, got %d\n", num_threads
);
363 p_vcomp_fork(TRUE
, 4, num_threads_cb
, nested
, TRUE
, 4, &thread_count
);
364 ok(thread_count
== 4, "expected 4 threads, got %d\n", thread_count
);
366 pomp_set_num_threads(max_threads
);
367 pomp_set_nested(FALSE
);
370 static void CDECL
fork_ptr_cb(LONG
*a
, LONG
*b
, LONG
*c
, LONG
*d
, LONG
*e
)
372 InterlockedIncrement(a
);
373 InterlockedIncrement(b
);
374 InterlockedIncrement(c
);
375 InterlockedIncrement(d
);
376 InterlockedIncrement(e
);
379 static void CDECL
fork_uintptr_cb(UINT_PTR a
, UINT_PTR b
, UINT_PTR c
, UINT_PTR d
, UINT_PTR e
)
381 ok(a
== 1, "expected a == 1, got %p\n", (void *)a
);
382 ok(b
== MAXUINT_PTR
- 2, "expected b == MAXUINT_PTR - 2, got %p\n", (void *)b
);
383 ok(c
== 3, "expected c == 3, got %p\n", (void *)c
);
384 ok(d
== MAXUINT_PTR
- 4, "expected d == MAXUINT_PTR - 4, got %p\n", (void *)d
);
385 ok(e
== 5, "expected e == 5, got %p\n", (void *)e
);
388 static void CDECL
fork_float_cb(float a
, float b
, float c
, float d
, float e
)
390 ok(1.4999 < a
&& a
< 1.5001, "expected a == 1.5, got %f\n", a
);
391 ok(2.4999 < b
&& b
< 2.5001, "expected b == 2.5, got %f\n", b
);
392 ok(3.4999 < c
&& c
< 3.5001, "expected c == 3.5, got %f\n", c
);
393 ok(4.4999 < d
&& d
< 4.5001, "expected d == 4.5, got %f\n", d
);
394 ok(5.4999 < e
&& e
< 5.5001, "expected e == 5.5, got %f\n", e
);
397 static void test_vcomp_fork(void)
400 int max_threads
= pomp_get_max_threads();
401 pomp_set_num_threads(4);
403 a
= 0; b
= 1; c
= 2; d
= 3; e
= 4;
404 p_vcomp_fork(FALSE
, 5, fork_ptr_cb
, &a
, &b
, &c
, &d
, &e
);
405 ok(a
== 1, "expected a == 1, got %d\n", a
);
406 ok(b
== 2, "expected b == 2, got %d\n", b
);
407 ok(c
== 3, "expected c == 3, got %d\n", c
);
408 ok(d
== 4, "expected d == 4, got %d\n", d
);
409 ok(e
== 5, "expected e == 5, got %d\n", e
);
411 a
= 0; b
= 1; c
= 2; d
= 3; e
= 4;
412 p_vcomp_fork(TRUE
, 5, fork_ptr_cb
, &a
, &b
, &c
, &d
, &e
);
413 ok(a
== 4, "expected a == 4, got %d\n", a
);
414 ok(b
== 5, "expected b == 5, got %d\n", b
);
415 ok(c
== 6, "expected c == 6, got %d\n", c
);
416 ok(d
== 7, "expected d == 7, got %d\n", d
);
417 ok(e
== 8, "expected e == 8, got %d\n", e
);
419 p_vcomp_fork(TRUE
, 5, fork_uintptr_cb
, (UINT_PTR
)1, (UINT_PTR
)(MAXUINT_PTR
- 2),
420 (UINT_PTR
)3, (UINT_PTR
)(MAXUINT_PTR
- 4), (UINT_PTR
)5);
422 if (sizeof(int) < sizeof(void *))
423 skip("skipping float test on x86_64\n");
426 void (CDECL
*func
)(BOOL
, int, void *, float, float, float, float, float) = (void *)p_vcomp_fork
;
427 func(TRUE
, 5, fork_float_cb
, 1.5f
, 2.5f
, 3.5f
, 4.5f
, 5.5f
);
430 pomp_set_num_threads(max_threads
);
433 static void CDECL
section_cb(LONG
*a
, LONG
*b
, LONG
*c
)
437 p_vcomp_sections_init(20);
438 while ((i
= p_vcomp_sections_next()) != -1)
440 InterlockedIncrement(a
);
444 p_vcomp_sections_init(30);
445 while ((i
= p_vcomp_sections_next()) != -1)
447 InterlockedIncrement(b
);
451 p_vcomp_sections_init(40);
452 while ((i
= p_vcomp_sections_next()) != -1)
454 InterlockedIncrement(c
);
459 static void test_vcomp_sections_init(void)
462 int max_threads
= pomp_get_max_threads();
467 /* calling _vcomp_sections_next without prior _vcomp_sections_init
468 * returns uninitialized memory on Windows. */
469 i
= p_vcomp_sections_next();
470 ok(i
== -1, "expected -1, got %d\n", i
);
474 section_cb(&a
, &b
, &c
);
475 ok(a
== 20, "expected a == 20, got %d\n", a
);
476 ok(b
== 30, "expected b == 30, got %d\n", b
);
477 ok(c
== 40, "expected c == 40, got %d\n", c
);
479 for (i
= 1; i
<= 4; i
++)
481 pomp_set_num_threads(i
);
484 p_vcomp_fork(TRUE
, 3, section_cb
, &a
, &b
, &c
);
485 ok(a
== 20, "expected a == 20, got %d\n", a
);
486 ok(b
== 30, "expected b == 30, got %d\n", b
);
487 ok(c
== 40, "expected c == 40, got %d\n", c
);
490 p_vcomp_fork(FALSE
, 3, section_cb
, &a
, &b
, &c
);
491 ok(a
== 20, "expected a == 20, got %d\n", a
);
492 ok(b
== 30, "expected b == 30, got %d\n", b
);
493 ok(c
== 40, "expected c == 40, got %d\n", c
);
496 pomp_set_num_threads(max_threads
);
499 static void my_for_static_simple_init(unsigned int first
, unsigned int last
, int step
,
500 BOOL increment
, unsigned int *begin
, unsigned int *end
)
502 unsigned int iterations
, per_thread
, remaining
;
503 int num_threads
= pomp_get_num_threads();
504 int thread_num
= pomp_get_thread_num();
506 if (num_threads
== 1)
516 *end
= increment
? -1 : 1;
521 iterations
= 1 + (last
- first
) / step
;
524 iterations
= 1 + (first
- last
) / step
;
528 per_thread
= iterations
/ num_threads
;
529 remaining
= iterations
- per_thread
* num_threads
;
531 if (thread_num
< remaining
)
534 first
+= remaining
* step
;
542 *begin
= first
+ per_thread
* thread_num
* step
;
543 *end
= *begin
+ (per_thread
- 1) * step
;
547 static void CDECL
for_static_simple_cb(void)
567 { 0, 100, -3 }, /* 10 */
572 { 0, 0x80000000, 1 },
573 { 0, 0xfffffffe, 1 },
574 { 0, 0xffffffff, 1 },
577 { 50, 50, 2 }, /* 20 */
587 { 100, 200, 150 }, /* 30 */
589 int num_threads
= pomp_get_num_threads();
590 int thread_num
= pomp_get_thread_num();
593 for (i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++)
595 unsigned int my_begin
, my_end
, begin
, end
;
597 begin
= end
= 0xdeadbeef;
598 my_for_static_simple_init(tests
[i
].first
, tests
[i
].last
, tests
[i
].step
, FALSE
, &my_begin
, &my_end
);
599 p_vcomp_for_static_simple_init(tests
[i
].first
, tests
[i
].last
, tests
[i
].step
, FALSE
, &begin
, &end
);
601 ok(begin
== my_begin
, "test %d, thread %d/%d: expected begin == %u, got %u\n",
602 i
, thread_num
, num_threads
, my_begin
, end
);
603 ok(end
== my_end
, "test %d, thread %d/%d: expected end == %u, got %u\n",
604 i
, thread_num
, num_threads
, my_end
, end
);
606 p_vcomp_for_static_end();
609 begin
= end
= 0xdeadbeef;
610 my_for_static_simple_init(tests
[i
].first
, tests
[i
].last
, tests
[i
].step
, TRUE
, &my_begin
, &my_end
);
611 p_vcomp_for_static_simple_init(tests
[i
].first
, tests
[i
].last
, tests
[i
].step
, TRUE
, &begin
, &end
);
613 ok(begin
== my_begin
, "test %d, thread %d/%d: expected begin == %u, got %u\n",
614 i
, thread_num
, num_threads
, my_begin
, end
);
615 ok(end
== my_end
, "test %d, thread %d/%d: expected end == %u, got %u\n",
616 i
, thread_num
, num_threads
, my_end
, end
);
618 p_vcomp_for_static_end();
621 if (tests
[i
].first
== tests
[i
].last
) continue;
623 begin
= end
= 0xdeadbeef;
624 my_for_static_simple_init(tests
[i
].last
, tests
[i
].first
, tests
[i
].step
, FALSE
, &my_begin
, &my_end
);
625 p_vcomp_for_static_simple_init(tests
[i
].last
, tests
[i
].first
, tests
[i
].step
, FALSE
, &begin
, &end
);
627 ok(begin
== my_begin
, "test %d, thread %d/%d: expected begin == %u, got %u\n",
628 i
, thread_num
, num_threads
, my_begin
, end
);
629 ok(end
== my_end
, "test %d, thread %d/%d: expected end == %u, got %u\n",
630 i
, thread_num
, num_threads
, my_end
, end
);
632 p_vcomp_for_static_end();
635 begin
= end
= 0xdeadbeef;
636 my_for_static_simple_init(tests
[i
].last
, tests
[i
].first
, tests
[i
].step
, TRUE
, &my_begin
, &my_end
);
637 p_vcomp_for_static_simple_init(tests
[i
].last
, tests
[i
].first
, tests
[i
].step
, TRUE
, &begin
, &end
);
639 ok(begin
== my_begin
, "test %d, thread %d/%d: expected begin == %u, got %u\n",
640 i
, thread_num
, num_threads
, my_begin
, end
);
641 ok(end
== my_end
, "test %d, thread %d/%d: expected end == %u, got %u\n",
642 i
, thread_num
, num_threads
, my_end
, end
);
644 p_vcomp_for_static_end();
649 static void test_vcomp_for_static_simple_init(void)
651 int max_threads
= pomp_get_max_threads();
654 for_static_simple_cb();
656 for (i
= 1; i
<= 4; i
++)
658 pomp_set_num_threads(i
);
659 p_vcomp_fork(TRUE
, 0, for_static_simple_cb
);
660 p_vcomp_fork(FALSE
, 0, for_static_simple_cb
);
663 pomp_set_num_threads(max_threads
);
666 #define VCOMP_FOR_STATIC_BROKEN_LOOP 1
667 #define VCOMP_FOR_STATIC_BROKEN_NEXT 2
669 DWORD CDECL
my_for_static_init(int first
, int last
, int step
, int chunksize
, unsigned int *loops
,
670 int *begin
, int *end
, int *next
, int *lastchunk
)
672 unsigned int iterations
, num_chunks
, per_thread
, remaining
;
673 int num_threads
= pomp_get_num_threads();
674 int thread_num
= pomp_get_thread_num();
676 if (num_threads
== 1 && chunksize
!= 1)
688 *loops
= !thread_num
;
691 /* The value in *next on Windows is either uninitialized, or contains
692 * garbage. The value shouldn't matter for *loops <= 1, so no need to
699 return thread_num
? 0 : VCOMP_FOR_STATIC_BROKEN_NEXT
;
704 /* The total number of iterations depends on the number of threads here,
705 * which doesn't make any sense. This is most likely a bug in the Windows
707 return VCOMP_FOR_STATIC_BROKEN_LOOP
;
711 iterations
= 1 + (last
- first
) / step
;
714 iterations
= 1 + (first
- last
) / step
;
721 num_chunks
= ((DWORD64
)iterations
+ chunksize
- 1) / chunksize
;
722 per_thread
= num_chunks
/ num_threads
;
723 remaining
= num_chunks
- per_thread
* num_threads
;
725 *loops
= per_thread
+ (thread_num
< remaining
);
726 *begin
= first
+ thread_num
* chunksize
* step
;
727 *end
= *begin
+ (chunksize
- 1) * step
;
728 *next
= chunksize
* num_threads
* step
;
729 *lastchunk
= first
+ (num_chunks
- 1) * chunksize
* step
;
733 static void CDECL
for_static_cb(void)
744 { 0, 0, 1, 1 }, /* 0 */
754 { 0, 100, 1, 150 }, /* 10 */
764 { 0, 100, -3, 1 }, /* 20 */
774 { 0, 0x10000000, 1, 123 }, /* 30 */
775 { 0, 0x20000000, 1, 123 },
776 { 0, 0x40000000, 1, 123 },
777 { 0, -0x80000000, 1, 123 },
784 { 100, 200, 3, 1 }, /* 40 */
786 { 0x7ffffffe, -0x80000000, 1, 123 },
787 { 0x7fffffff, -0x80000000, 1, 123 },
789 int num_threads
= pomp_get_num_threads();
790 int thread_num
= pomp_get_thread_num();
793 for (i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++)
795 int my_begin
, my_end
, my_next
, my_lastchunk
;
796 int begin
, end
, next
, lastchunk
;
797 unsigned int my_loops
, loops
;
800 my_loops
= my_begin
= my_end
= my_next
= my_lastchunk
= 0xdeadbeef;
801 loops
= begin
= end
= next
= lastchunk
= 0xdeadbeef;
802 broken_flags
= my_for_static_init(tests
[i
].first
, tests
[i
].last
, tests
[i
].step
, tests
[i
].chunksize
,
803 &my_loops
, &my_begin
, &my_end
, &my_next
, &my_lastchunk
);
804 p_vcomp_for_static_init(tests
[i
].first
, tests
[i
].last
, tests
[i
].step
, tests
[i
].chunksize
,
805 &loops
, &begin
, &end
, &next
, &lastchunk
);
807 if (broken_flags
& VCOMP_FOR_STATIC_BROKEN_LOOP
)
809 ok(loops
== 0 || loops
== 1, "test %d, thread %d/%d: expected loops == 0 or 1, got %u\n",
810 i
, thread_num
, num_threads
, loops
);
814 ok(loops
== my_loops
, "test %d, thread %d/%d: expected loops == %u, got %u\n",
815 i
, thread_num
, num_threads
, my_loops
, loops
);
816 ok(begin
== my_begin
, "test %d, thread %d/%d: expected begin == %d, got %d\n",
817 i
, thread_num
, num_threads
, my_begin
, begin
);
818 ok(end
== my_end
, "test %d, thread %d/%d: expected end == %d, got %d\n",
819 i
, thread_num
, num_threads
, my_end
, end
);
820 ok(next
== my_next
|| broken(broken_flags
& VCOMP_FOR_STATIC_BROKEN_NEXT
),
821 "test %d, thread %d/%d: expected next == %d, got %d\n", i
, thread_num
, num_threads
, my_next
, next
);
822 ok(lastchunk
== my_lastchunk
, "test %d, thread %d/%d: expected lastchunk == %d, got %d\n",
823 i
, thread_num
, num_threads
, my_lastchunk
, lastchunk
);
826 p_vcomp_for_static_end();
829 if (tests
[i
].first
== tests
[i
].last
) continue;
831 my_loops
= my_begin
= my_end
= my_next
= my_lastchunk
= 0xdeadbeef;
832 loops
= begin
= end
= next
= lastchunk
= 0xdeadbeef;
833 broken_flags
= my_for_static_init(tests
[i
].last
, tests
[i
].first
, tests
[i
].step
, tests
[i
].chunksize
,
834 &my_loops
, &my_begin
, &my_end
, &my_next
, &my_lastchunk
);
835 p_vcomp_for_static_init(tests
[i
].last
, tests
[i
].first
, tests
[i
].step
, tests
[i
].chunksize
,
836 &loops
, &begin
, &end
, &next
, &lastchunk
);
838 if (broken_flags
& VCOMP_FOR_STATIC_BROKEN_LOOP
)
840 ok(loops
== 0 || loops
== 1, "test %d, thread %d/%d: expected loops == 0 or 1, got %u\n",
841 i
, thread_num
, num_threads
, loops
);
845 ok(loops
== my_loops
, "test %d, thread %d/%d: expected loops == %u, got %u\n",
846 i
, thread_num
, num_threads
, my_loops
, loops
);
847 ok(begin
== my_begin
, "test %d, thread %d/%d: expected begin == %d, got %d\n",
848 i
, thread_num
, num_threads
, my_begin
, begin
);
849 ok(end
== my_end
, "test %d, thread %d/%d: expected end == %d, got %d\n",
850 i
, thread_num
, num_threads
, my_end
, end
);
851 ok(next
== my_next
|| broken(broken_flags
& VCOMP_FOR_STATIC_BROKEN_NEXT
),
852 "test %d, thread %d/%d: expected next == %d, got %d\n", i
, thread_num
, num_threads
, my_next
, next
);
853 ok(lastchunk
== my_lastchunk
, "test %d, thread %d/%d: expected lastchunk == %d, got %d\n",
854 i
, thread_num
, num_threads
, my_lastchunk
, lastchunk
);
857 p_vcomp_for_static_end();
862 #undef VCOMP_FOR_STATIC_BROKEN_LOOP
863 #undef VCOMP_FOR_STATIC_BROKEN_NEXT
865 static void test_vcomp_for_static_init(void)
867 int max_threads
= pomp_get_max_threads();
872 for (i
= 1; i
<= 4; i
++)
874 pomp_set_num_threads(i
);
875 p_vcomp_fork(TRUE
, 0, for_static_cb
);
876 p_vcomp_fork(FALSE
, 0, for_static_cb
);
879 pomp_set_num_threads(max_threads
);
882 static void test_atomic_integer32(void)
886 void (CDECL
*func
)(int *, int);
887 int v1
, v2
, expected
;
891 { p_vcomp_atomic_add_i4
, 0x11223344, 0x77665544, -0x77777778 },
892 { p_vcomp_atomic_and_i4
, 0x11223344, 0x77665544, 0x11221144 },
893 { p_vcomp_atomic_div_i4
, 0x77665544, 0x11223344, 6 },
894 { p_vcomp_atomic_div_i4
, 0x77665544, -0x11223344, -6 },
895 { p_vcomp_atomic_mul_i4
, 0x11223344, 0x77665544, -0xecccdf0 },
896 { p_vcomp_atomic_mul_i4
, 0x11223344, -0x77665544, 0xecccdf0 },
897 { p_vcomp_atomic_or_i4
, 0x11223344, 0x77665544, 0x77667744 },
898 { p_vcomp_atomic_shl_i4
, 0x11223344, 3, -0x76ee65e0 },
899 { p_vcomp_atomic_shl_i4
, 0x11223344, 35, -0x76ee65e0 },
900 { p_vcomp_atomic_shl_i4
, -0x11223344, 3, 0x76ee65e0 },
901 { p_vcomp_atomic_shr_i4
, 0x11223344, 3, 0x2244668 },
902 { p_vcomp_atomic_shr_i4
, 0x11223344, 35, 0x2244668 },
903 { p_vcomp_atomic_shr_i4
, -0x11223344, 3, -0x2244669 },
904 { p_vcomp_atomic_sub_i4
, 0x11223344, 0x77665544, -0x66442200 },
905 { p_vcomp_atomic_xor_i4
, 0x11223344, 0x77665544, 0x66446600 },
909 void (CDECL
*func
)(unsigned int *, unsigned int);
910 unsigned int v1
, v2
, expected
;
914 { p_vcomp_atomic_div_ui4
, 0x77665544, 0x11223344, 6 },
915 { p_vcomp_atomic_div_ui4
, 0x77665544, 0xeeddccbc, 0 },
916 { p_vcomp_atomic_shr_ui4
, 0x11223344, 3, 0x2244668 },
917 { p_vcomp_atomic_shr_ui4
, 0x11223344, 35, 0x2244668 },
918 { p_vcomp_atomic_shr_ui4
, 0xeeddccbc, 3, 0x1ddbb997 },
922 for (i
= 0; i
< sizeof(tests1
)/sizeof(tests1
[0]); i
++)
924 int val
= tests1
[i
].v1
;
925 tests1
[i
].func(&val
, tests1
[i
].v2
);
926 ok(val
== tests1
[i
].expected
, "test %d: expected val == %d, got %d\n", i
, tests1
[i
].expected
, val
);
928 for (i
= 0; i
< sizeof(tests2
)/sizeof(tests2
[0]); i
++)
930 unsigned int val
= tests2
[i
].v1
;
931 tests2
[i
].func(&val
, tests2
[i
].v2
);
932 ok(val
== tests2
[i
].expected
, "test %d: expected val == %u, got %u\n", i
, tests2
[i
].expected
, val
);
936 static void test_atomic_float(void)
940 void (CDECL
*func
)(float *, float);
941 float v1
, v2
, expected
;
945 { p_vcomp_atomic_add_r4
, 42.0, 17.0, 42.0 + 17.0 },
946 { p_vcomp_atomic_div_r4
, 42.0, 17.0, 42.0 / 17.0 },
947 { p_vcomp_atomic_mul_r4
, 42.0, 17.0, 42.0 * 17.0 },
948 { p_vcomp_atomic_sub_r4
, 42.0, 17.0, 42.0 - 17.0 },
952 for (i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++)
954 float val
= tests
[i
].v1
;
955 tests
[i
].func(&val
, tests
[i
].v2
);
956 ok(tests
[i
].expected
- 0.001 < val
&& val
< tests
[i
].expected
+ 0.001,
957 "test %d: expected val == %f, got %f\n", i
, tests
[i
].expected
, val
);
966 test_omp_get_num_threads(FALSE
);
967 test_omp_get_num_threads(TRUE
);
969 test_vcomp_sections_init();
970 test_vcomp_for_static_simple_init();
971 test_vcomp_for_static_init();
972 test_atomic_integer32();