5 * Copyright 2011 Austin English
6 * Copyright 2012 Dan Kegel
7 * Copyright 2015-2016 Sebastian Lackner
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "wine/debug.h"
31 #include "wine/list.h"
34 WINE_DEFAULT_DEBUG_CHANNEL(vcomp
);
36 #define MAX_VECT_PARALLEL_CALLBACK_ARGS 128
38 typedef CRITICAL_SECTION
*omp_lock_t
;
39 typedef CRITICAL_SECTION
*omp_nest_lock_t
;
41 static struct list vcomp_idle_threads
= LIST_INIT(vcomp_idle_threads
);
42 static DWORD vcomp_context_tls
= TLS_OUT_OF_INDEXES
;
43 static HMODULE vcomp_module
;
44 static int vcomp_max_threads
;
45 static int vcomp_num_threads
;
46 static int vcomp_num_procs
;
47 static BOOL vcomp_nested_fork
= FALSE
;
49 static RTL_CRITICAL_SECTION vcomp_section
;
50 static RTL_CRITICAL_SECTION_DEBUG critsect_debug
=
53 { &critsect_debug
.ProcessLocksList
, &critsect_debug
.ProcessLocksList
},
54 0, 0, { (DWORD_PTR
)(__FILE__
": vcomp_section") }
56 static RTL_CRITICAL_SECTION vcomp_section
= { &critsect_debug
, -1, 0, 0, 0, 0 };
58 #define VCOMP_DYNAMIC_FLAGS_STATIC 0x01
59 #define VCOMP_DYNAMIC_FLAGS_CHUNKED 0x02
60 #define VCOMP_DYNAMIC_FLAGS_GUIDED 0x03
61 #define VCOMP_DYNAMIC_FLAGS_INCREMENT 0x40
63 struct vcomp_thread_data
65 struct vcomp_team_data
*team
;
66 struct vcomp_task_data
*task
;
71 /* only used for concurrent tasks */
73 CONDITION_VARIABLE cond
;
83 unsigned int dynamic_type
;
84 unsigned int dynamic_begin
;
85 unsigned int dynamic_end
;
88 struct vcomp_team_data
90 CONDITION_VARIABLE cond
;
94 /* callback arguments */
100 unsigned int barrier
;
104 struct vcomp_task_data
110 unsigned int section
;
115 unsigned int dynamic
;
116 unsigned int dynamic_first
;
117 unsigned int dynamic_last
;
118 unsigned int dynamic_iterations
;
120 unsigned int dynamic_chunksize
;
123 static void **ptr_from_va_list(va_list valist
)
125 return *(void ***)&valist
;
128 static void copy_va_list_data(void **args
, va_list valist
, int args_count
)
132 for (i
= 0; i
< args_count
; ++i
)
133 args
[i
] = va_arg(valist
, void *);
136 #if defined(__i386__)
138 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, void **args
);
139 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
141 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
142 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
144 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
146 __ASM_CFI(".cfi_rel_offset %esi,-4\n\t")
148 __ASM_CFI(".cfi_rel_offset %edi,-8\n\t")
149 "movl 12(%ebp),%edx\n\t"
156 "movl 12(%ebp),%ecx\n\t"
157 "movl 16(%ebp),%esi\n\t"
160 "1:\tcall *8(%ebp)\n\t"
161 "leal -8(%ebp),%esp\n\t"
163 __ASM_CFI(".cfi_same_value %edi\n\t")
165 __ASM_CFI(".cfi_same_value %esi\n\t")
167 __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
168 __ASM_CFI(".cfi_same_value %ebp\n\t")
171 #elif defined(__x86_64__)
173 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, void **args
);
174 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
176 __ASM_SEH(".seh_pushreg %rbp\n\t")
177 __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t")
178 __ASM_CFI(".cfi_rel_offset %rbp,0\n\t")
180 __ASM_SEH(".seh_setframe %rbp,0\n\t")
181 __ASM_CFI(".cfi_def_cfa_register %rbp\n\t")
183 __ASM_SEH(".seh_pushreg %rsi\n\t")
184 __ASM_CFI(".cfi_rel_offset %rsi,-8\n\t")
186 __ASM_SEH(".seh_pushreg %rdi\n\t")
187 __ASM_SEH(".seh_endprologue\n\t")
188 __ASM_CFI(".cfi_rel_offset %rdi,-16\n\t")
192 "cmovgq %rdx,%rcx\n\t"
193 "leaq 0(,%rcx,8),%rdx\n\t"
199 "movq 0(%rsp),%rcx\n\t"
200 "movq 8(%rsp),%rdx\n\t"
201 "movq 16(%rsp),%r8\n\t"
202 "movq 24(%rsp),%r9\n\t"
204 "leaq -16(%rbp),%rsp\n\t"
206 __ASM_CFI(".cfi_same_value %rdi\n\t")
208 __ASM_CFI(".cfi_same_value %rsi\n\t")
209 __ASM_CFI(".cfi_def_cfa_register %rsp\n\t")
211 __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t")
212 __ASM_CFI(".cfi_same_value %rbp\n\t")
215 #elif defined(__arm__)
217 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, void **args
);
218 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
219 "push {r4, r5, LR}\n\t"
228 "subeq SP, SP, #4\n\t"
229 "1:\tsub r3, r3, #4\n\t"
230 "ldr r0, [r2, r3]\n\t"
231 "str r0, [SP, r3]\n\t"
246 "4:\tpop {r0-r3}\n\t"
251 #elif defined(__aarch64__)
253 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, void **args
);
254 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
255 "stp x29, x30, [SP,#-16]!\n\t"
263 "1:\ttbz w8, #3, 2f\n\t"
265 "2:\tsub x10, x29, x8\n\t"
267 "3:\tldr x0, [x2], #8\n\t"
268 "str x0, [x10], #8\n\t"
269 "subs w1, w1, #1\n\t"
271 "ldp x0, x1, [sp], #16\n\t"
272 "ldp x2, x3, [sp], #16\n\t"
273 "ldp x4, x5, [sp], #16\n\t"
274 "ldp x6, x7, [sp], #16\n"
277 "ldp x29, x30, [SP], #16\n\t"
282 static void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, void **args
)
284 ERR("Not implemented for this architecture\n");
289 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
291 static inline char interlocked_cmpxchg8(char *dest
, char xchg
, char compare
)
294 __asm__
__volatile__( "lock; cmpxchgb %2,(%1)"
295 : "=a" (ret
) : "r" (dest
), "q" (xchg
), "0" (compare
) : "memory" );
299 static inline short interlocked_cmpxchg16(short *dest
, short xchg
, short compare
)
302 __asm__
__volatile__( "lock; cmpxchgw %2,(%1)"
303 : "=a" (ret
) : "r" (dest
), "r" (xchg
), "0" (compare
) : "memory" );
307 static inline char interlocked_xchg_add8(char *dest
, char incr
)
310 __asm__
__volatile__( "lock; xaddb %0,(%1)"
311 : "=q" (ret
) : "r" (dest
), "0" (incr
) : "memory" );
315 static inline short interlocked_xchg_add16(short *dest
, short incr
)
318 __asm__
__volatile__( "lock; xaddw %0,(%1)"
319 : "=r" (ret
) : "r" (dest
), "0" (incr
) : "memory" );
325 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
326 static inline char interlocked_cmpxchg8(char *dest
, char xchg
, char compare
)
328 return __sync_val_compare_and_swap(dest
, compare
, xchg
);
331 static inline char interlocked_xchg_add8(char *dest
, char incr
)
333 return __sync_fetch_and_add(dest
, incr
);
336 static char interlocked_cmpxchg8(char *dest
, char xchg
, char compare
)
338 EnterCriticalSection(&vcomp_section
);
339 if (*dest
== compare
) *dest
= xchg
; else compare
= *dest
;
340 LeaveCriticalSection(&vcomp_section
);
344 static char interlocked_xchg_add8(char *dest
, char incr
)
347 EnterCriticalSection(&vcomp_section
);
348 ret
= *dest
; *dest
+= incr
;
349 LeaveCriticalSection(&vcomp_section
);
354 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
355 static inline short interlocked_cmpxchg16(short *dest
, short xchg
, short compare
)
357 return __sync_val_compare_and_swap(dest
, compare
, xchg
);
360 static inline short interlocked_xchg_add16(short *dest
, short incr
)
362 return __sync_fetch_and_add(dest
, incr
);
365 static short interlocked_cmpxchg16(short *dest
, short xchg
, short compare
)
367 EnterCriticalSection(&vcomp_section
);
368 if (*dest
== compare
) *dest
= xchg
; else compare
= *dest
;
369 LeaveCriticalSection(&vcomp_section
);
373 static short interlocked_xchg_add16(short *dest
, short incr
)
376 EnterCriticalSection(&vcomp_section
);
377 ret
= *dest
; *dest
+= incr
;
378 LeaveCriticalSection(&vcomp_section
);
383 #endif /* __GNUC__ */
385 static inline struct vcomp_thread_data
*vcomp_get_thread_data(void)
387 return (struct vcomp_thread_data
*)TlsGetValue(vcomp_context_tls
);
390 static inline void vcomp_set_thread_data(struct vcomp_thread_data
*thread_data
)
392 TlsSetValue(vcomp_context_tls
, thread_data
);
395 static struct vcomp_thread_data
*vcomp_init_thread_data(void)
397 struct vcomp_thread_data
*thread_data
= vcomp_get_thread_data();
400 struct vcomp_thread_data thread
;
401 struct vcomp_task_data task
;
404 if (thread_data
) return thread_data
;
405 if (!(data
= HeapAlloc(GetProcessHeap(), 0, sizeof(*data
))))
407 ERR("could not create thread data\n");
411 data
->task
.single
= 0;
412 data
->task
.section
= 0;
413 data
->task
.dynamic
= 0;
415 thread_data
= &data
->thread
;
416 thread_data
->team
= NULL
;
417 thread_data
->task
= &data
->task
;
418 thread_data
->thread_num
= 0;
419 thread_data
->parallel
= FALSE
;
420 thread_data
->fork_threads
= 0;
421 thread_data
->single
= 1;
422 thread_data
->section
= 1;
423 thread_data
->dynamic
= 1;
424 thread_data
->dynamic_type
= 0;
426 vcomp_set_thread_data(thread_data
);
430 static void vcomp_free_thread_data(void)
432 struct vcomp_thread_data
*thread_data
= vcomp_get_thread_data();
433 if (!thread_data
) return;
435 HeapFree(GetProcessHeap(), 0, thread_data
);
436 vcomp_set_thread_data(NULL
);
439 void CDECL
_vcomp_atomic_add_i1(char *dest
, char val
)
441 interlocked_xchg_add8(dest
, val
);
444 void CDECL
_vcomp_atomic_and_i1(char *dest
, char val
)
447 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
& val
, old
) != old
);
450 void CDECL
_vcomp_atomic_div_i1(signed char *dest
, signed char val
)
453 do old
= *dest
; while ((signed char)interlocked_cmpxchg8((char *)dest
, old
/ val
, old
) != old
);
456 void CDECL
_vcomp_atomic_div_ui1(unsigned char *dest
, unsigned char val
)
459 do old
= *dest
; while ((unsigned char)interlocked_cmpxchg8((char *)dest
, old
/ val
, old
) != old
);
462 void CDECL
_vcomp_atomic_mul_i1(char *dest
, char val
)
465 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
* val
, old
) != old
);
468 void CDECL
_vcomp_atomic_or_i1(char *dest
, char val
)
471 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
| val
, old
) != old
);
474 void CDECL
_vcomp_atomic_shl_i1(char *dest
, unsigned int val
)
477 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
<< val
, old
) != old
);
480 void CDECL
_vcomp_atomic_shr_i1(signed char *dest
, unsigned int val
)
483 do old
= *dest
; while ((signed char)interlocked_cmpxchg8((char *)dest
, old
>> val
, old
) != old
);
486 void CDECL
_vcomp_atomic_shr_ui1(unsigned char *dest
, unsigned int val
)
489 do old
= *dest
; while ((unsigned char)interlocked_cmpxchg8((char *)dest
, old
>> val
, old
) != old
);
492 void CDECL
_vcomp_atomic_sub_i1(char *dest
, char val
)
494 interlocked_xchg_add8(dest
, -val
);
497 void CDECL
_vcomp_atomic_xor_i1(char *dest
, char val
)
500 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
^ val
, old
) != old
);
503 static void CDECL
_vcomp_atomic_bool_and_i1(char *dest
, char val
)
506 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
&& val
, old
) != old
);
509 static void CDECL
_vcomp_atomic_bool_or_i1(char *dest
, char val
)
512 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
? old
: (val
!= 0), old
) != old
);
515 void CDECL
_vcomp_reduction_i1(unsigned int flags
, char *dest
, char val
)
517 static void (CDECL
* const funcs
[])(char *, char) =
519 _vcomp_atomic_add_i1
,
520 _vcomp_atomic_add_i1
,
521 _vcomp_atomic_mul_i1
,
522 _vcomp_atomic_and_i1
,
524 _vcomp_atomic_xor_i1
,
525 _vcomp_atomic_bool_and_i1
,
526 _vcomp_atomic_bool_or_i1
,
528 unsigned int op
= (flags
>> 8) & 0xf;
529 op
= min(op
, ARRAY_SIZE(funcs
) - 1);
530 funcs
[op
](dest
, val
);
533 void CDECL
_vcomp_atomic_add_i2(short *dest
, short val
)
535 interlocked_xchg_add16(dest
, val
);
538 void CDECL
_vcomp_atomic_and_i2(short *dest
, short val
)
541 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
& val
, old
) != old
);
544 void CDECL
_vcomp_atomic_div_i2(short *dest
, short val
)
547 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
/ val
, old
) != old
);
550 void CDECL
_vcomp_atomic_div_ui2(unsigned short *dest
, unsigned short val
)
553 do old
= *dest
; while ((unsigned short)interlocked_cmpxchg16((short *)dest
, old
/ val
, old
) != old
);
556 void CDECL
_vcomp_atomic_mul_i2(short *dest
, short val
)
559 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
* val
, old
) != old
);
562 void CDECL
_vcomp_atomic_or_i2(short *dest
, short val
)
565 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
| val
, old
) != old
);
568 void CDECL
_vcomp_atomic_shl_i2(short *dest
, unsigned int val
)
571 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
<< val
, old
) != old
);
574 void CDECL
_vcomp_atomic_shr_i2(short *dest
, unsigned int val
)
577 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
>> val
, old
) != old
);
580 void CDECL
_vcomp_atomic_shr_ui2(unsigned short *dest
, unsigned int val
)
583 do old
= *dest
; while ((unsigned short)interlocked_cmpxchg16((short *)dest
, old
>> val
, old
) != old
);
586 void CDECL
_vcomp_atomic_sub_i2(short *dest
, short val
)
588 interlocked_xchg_add16(dest
, -val
);
591 void CDECL
_vcomp_atomic_xor_i2(short *dest
, short val
)
594 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
^ val
, old
) != old
);
597 static void CDECL
_vcomp_atomic_bool_and_i2(short *dest
, short val
)
600 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
&& val
, old
) != old
);
603 static void CDECL
_vcomp_atomic_bool_or_i2(short *dest
, short val
)
606 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
? old
: (val
!= 0), old
) != old
);
609 void CDECL
_vcomp_reduction_i2(unsigned int flags
, short *dest
, short val
)
611 static void (CDECL
* const funcs
[])(short *, short) =
613 _vcomp_atomic_add_i2
,
614 _vcomp_atomic_add_i2
,
615 _vcomp_atomic_mul_i2
,
616 _vcomp_atomic_and_i2
,
618 _vcomp_atomic_xor_i2
,
619 _vcomp_atomic_bool_and_i2
,
620 _vcomp_atomic_bool_or_i2
,
622 unsigned int op
= (flags
>> 8) & 0xf;
623 op
= min(op
, ARRAY_SIZE(funcs
) - 1);
624 funcs
[op
](dest
, val
);
627 void CDECL
_vcomp_atomic_add_i4(int *dest
, int val
)
629 InterlockedExchangeAdd((LONG
*)dest
, val
);
632 void CDECL
_vcomp_atomic_and_i4(int *dest
, int val
)
635 do old
= *dest
; while (InterlockedCompareExchange((LONG
*)dest
, old
& val
, old
) != old
);
638 void CDECL
_vcomp_atomic_div_i4(int *dest
, int val
)
641 do old
= *dest
; while (InterlockedCompareExchange((LONG
*)dest
, old
/ val
, old
) != old
);
644 void CDECL
_vcomp_atomic_div_ui4(unsigned int *dest
, unsigned int val
)
647 do old
= *dest
; while (InterlockedCompareExchange((LONG
*)dest
, old
/ val
, old
) != old
);
650 void CDECL
_vcomp_atomic_mul_i4(int *dest
, int val
)
653 do old
= *dest
; while (InterlockedCompareExchange((LONG
*)dest
, old
* val
, old
) != old
);
656 void CDECL
_vcomp_atomic_or_i4(int *dest
, int val
)
659 do old
= *dest
; while (InterlockedCompareExchange((LONG
*)dest
, old
| val
, old
) != old
);
662 void CDECL
_vcomp_atomic_shl_i4(int *dest
, int val
)
665 do old
= *dest
; while (InterlockedCompareExchange((LONG
*)dest
, old
<< val
, old
) != old
);
668 void CDECL
_vcomp_atomic_shr_i4(int *dest
, int val
)
671 do old
= *dest
; while (InterlockedCompareExchange((LONG
*)dest
, old
>> val
, old
) != old
);
674 void CDECL
_vcomp_atomic_shr_ui4(unsigned int *dest
, unsigned int val
)
677 do old
= *dest
; while (InterlockedCompareExchange((LONG
*)dest
, old
>> val
, old
) != old
);
680 void CDECL
_vcomp_atomic_sub_i4(int *dest
, int val
)
682 InterlockedExchangeAdd((LONG
*)dest
, -val
);
685 void CDECL
_vcomp_atomic_xor_i4(int *dest
, int val
)
688 do old
= *dest
; while (InterlockedCompareExchange((LONG
*)dest
, old
^ val
, old
) != old
);
691 static void CDECL
_vcomp_atomic_bool_and_i4(int *dest
, int val
)
694 do old
= *dest
; while (InterlockedCompareExchange((LONG
*)dest
, old
&& val
, old
) != old
);
697 static void CDECL
_vcomp_atomic_bool_or_i4(int *dest
, int val
)
700 do old
= *dest
; while (InterlockedCompareExchange((LONG
*)dest
, old
? old
: (val
!= 0), old
) != old
);
703 void CDECL
_vcomp_reduction_i4(unsigned int flags
, int *dest
, int val
)
705 static void (CDECL
* const funcs
[])(int *, int) =
707 _vcomp_atomic_add_i4
,
708 _vcomp_atomic_add_i4
,
709 _vcomp_atomic_mul_i4
,
710 _vcomp_atomic_and_i4
,
712 _vcomp_atomic_xor_i4
,
713 _vcomp_atomic_bool_and_i4
,
714 _vcomp_atomic_bool_or_i4
,
716 unsigned int op
= (flags
>> 8) & 0xf;
717 op
= min(op
, ARRAY_SIZE(funcs
) - 1);
718 funcs
[op
](dest
, val
);
721 void CDECL
_vcomp_atomic_add_i8(LONG64
*dest
, LONG64 val
)
724 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
+ val
, old
) != old
);
727 void CDECL
_vcomp_atomic_and_i8(LONG64
*dest
, LONG64 val
)
730 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
& val
, old
) != old
);
733 void CDECL
_vcomp_atomic_div_i8(LONG64
*dest
, LONG64 val
)
736 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
/ val
, old
) != old
);
739 void CDECL
_vcomp_atomic_div_ui8(ULONG64
*dest
, ULONG64 val
)
742 do old
= *dest
; while (InterlockedCompareExchange64((LONG64
*)dest
, old
/ val
, old
) != old
);
745 void CDECL
_vcomp_atomic_mul_i8(LONG64
*dest
, LONG64 val
)
748 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
* val
, old
) != old
);
751 void CDECL
_vcomp_atomic_or_i8(LONG64
*dest
, LONG64 val
)
754 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
| val
, old
) != old
);
757 void CDECL
_vcomp_atomic_shl_i8(LONG64
*dest
, unsigned int val
)
760 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
<< val
, old
) != old
);
763 void CDECL
_vcomp_atomic_shr_i8(LONG64
*dest
, unsigned int val
)
766 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
>> val
, old
) != old
);
769 void CDECL
_vcomp_atomic_shr_ui8(ULONG64
*dest
, unsigned int val
)
772 do old
= *dest
; while (InterlockedCompareExchange64((LONG64
*)dest
, old
>> val
, old
) != old
);
775 void CDECL
_vcomp_atomic_sub_i8(LONG64
*dest
, LONG64 val
)
778 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
- val
, old
) != old
);
781 void CDECL
_vcomp_atomic_xor_i8(LONG64
*dest
, LONG64 val
)
784 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
^ val
, old
) != old
);
787 static void CDECL
_vcomp_atomic_bool_and_i8(LONG64
*dest
, LONG64 val
)
790 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
&& val
, old
) != old
);
793 static void CDECL
_vcomp_atomic_bool_or_i8(LONG64
*dest
, LONG64 val
)
796 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
? old
: (val
!= 0), old
) != old
);
799 void CDECL
_vcomp_reduction_i8(unsigned int flags
, LONG64
*dest
, LONG64 val
)
801 static void (CDECL
* const funcs
[])(LONG64
*, LONG64
) =
803 _vcomp_atomic_add_i8
,
804 _vcomp_atomic_add_i8
,
805 _vcomp_atomic_mul_i8
,
806 _vcomp_atomic_and_i8
,
808 _vcomp_atomic_xor_i8
,
809 _vcomp_atomic_bool_and_i8
,
810 _vcomp_atomic_bool_or_i8
,
812 unsigned int op
= (flags
>> 8) & 0xf;
813 op
= min(op
, ARRAY_SIZE(funcs
) - 1);
814 funcs
[op
](dest
, val
);
817 void CDECL
_vcomp_atomic_add_r4(float *dest
, float val
)
823 *(float *)&new = *(float *)&old
+ val
;
825 while (InterlockedCompareExchange((LONG
*)dest
, new, old
) != old
);
828 void CDECL
_vcomp_atomic_div_r4(float *dest
, float val
)
834 *(float *)&new = *(float *)&old
/ val
;
836 while (InterlockedCompareExchange((LONG
*)dest
, new, old
) != old
);
839 void CDECL
_vcomp_atomic_mul_r4(float *dest
, float val
)
845 *(float *)&new = *(float *)&old
* val
;
847 while (InterlockedCompareExchange((LONG
*)dest
, new, old
) != old
);
850 void CDECL
_vcomp_atomic_sub_r4(float *dest
, float val
)
856 *(float *)&new = *(float *)&old
- val
;
858 while (InterlockedCompareExchange((LONG
*)dest
, new, old
) != old
);
861 static void CDECL
_vcomp_atomic_bool_and_r4(float *dest
, float val
)
867 *(float *)&new = (*(float *)&old
!= 0.0) ? (val
!= 0.0) : 0.0;
869 while (InterlockedCompareExchange((LONG
*)dest
, new, old
) != old
);
872 static void CDECL
_vcomp_atomic_bool_or_r4(float *dest
, float val
)
878 *(float *)&new = (*(float *)&old
!= 0.0) ? *(float *)&old
: (val
!= 0.0);
880 while (InterlockedCompareExchange((LONG
*)dest
, new, old
) != old
);
883 void CDECL
_vcomp_reduction_r4(unsigned int flags
, float *dest
, float val
)
885 static void (CDECL
* const funcs
[])(float *, float) =
887 _vcomp_atomic_add_r4
,
888 _vcomp_atomic_add_r4
,
889 _vcomp_atomic_mul_r4
,
890 _vcomp_atomic_bool_or_r4
,
891 _vcomp_atomic_bool_or_r4
,
892 _vcomp_atomic_bool_or_r4
,
893 _vcomp_atomic_bool_and_r4
,
894 _vcomp_atomic_bool_or_r4
,
896 unsigned int op
= (flags
>> 8) & 0xf;
897 op
= min(op
, ARRAY_SIZE(funcs
) - 1);
898 funcs
[op
](dest
, val
);
901 void CDECL
_vcomp_atomic_add_r8(double *dest
, double val
)
906 old
= *(LONG64
*)dest
;
907 *(double *)&new = *(double *)&old
+ val
;
909 while (InterlockedCompareExchange64((LONG64
*)dest
, new, old
) != old
);
912 void CDECL
_vcomp_atomic_div_r8(double *dest
, double val
)
917 old
= *(LONG64
*)dest
;
918 *(double *)&new = *(double *)&old
/ val
;
920 while (InterlockedCompareExchange64((LONG64
*)dest
, new, old
) != old
);
923 void CDECL
_vcomp_atomic_mul_r8(double *dest
, double val
)
928 old
= *(LONG64
*)dest
;
929 *(double *)&new = *(double *)&old
* val
;
931 while (InterlockedCompareExchange64((LONG64
*)dest
, new, old
) != old
);
934 void CDECL
_vcomp_atomic_sub_r8(double *dest
, double val
)
939 old
= *(LONG64
*)dest
;
940 *(double *)&new = *(double *)&old
- val
;
942 while (InterlockedCompareExchange64((LONG64
*)dest
, new, old
) != old
);
945 static void CDECL
_vcomp_atomic_bool_and_r8(double *dest
, double val
)
950 old
= *(LONG64
*)dest
;
951 *(double *)&new = (*(double *)&old
!= 0.0) ? (val
!= 0.0) : 0.0;
953 while (InterlockedCompareExchange64((LONG64
*)dest
, new, old
) != old
);
956 static void CDECL
_vcomp_atomic_bool_or_r8(double *dest
, double val
)
961 old
= *(LONG64
*)dest
;
962 *(double *)&new = (*(double *)&old
!= 0.0) ? *(double *)&old
: (val
!= 0.0);
964 while (InterlockedCompareExchange64((LONG64
*)dest
, new, old
) != old
);
967 void CDECL
_vcomp_reduction_r8(unsigned int flags
, double *dest
, double val
)
969 static void (CDECL
* const funcs
[])(double *, double) =
971 _vcomp_atomic_add_r8
,
972 _vcomp_atomic_add_r8
,
973 _vcomp_atomic_mul_r8
,
974 _vcomp_atomic_bool_or_r8
,
975 _vcomp_atomic_bool_or_r8
,
976 _vcomp_atomic_bool_or_r8
,
977 _vcomp_atomic_bool_and_r8
,
978 _vcomp_atomic_bool_or_r8
,
980 unsigned int op
= (flags
>> 8) & 0xf;
981 op
= min(op
, ARRAY_SIZE(funcs
) - 1);
982 funcs
[op
](dest
, val
);
985 int CDECL
omp_get_dynamic(void)
991 int CDECL
omp_get_max_threads(void)
994 return vcomp_max_threads
;
997 int CDECL
omp_get_nested(void)
1000 return vcomp_nested_fork
;
1003 int CDECL
omp_get_num_procs(void)
1006 return vcomp_num_procs
;
1009 int CDECL
omp_get_num_threads(void)
1011 struct vcomp_team_data
*team_data
= vcomp_init_thread_data()->team
;
1013 return team_data
? team_data
->num_threads
: 1;
1016 int CDECL
omp_get_thread_num(void)
1019 return vcomp_init_thread_data()->thread_num
;
1022 int CDECL
_vcomp_get_thread_num(void)
1025 return vcomp_init_thread_data()->thread_num
;
1028 /* Time in seconds since "some time in the past" */
1029 double CDECL
omp_get_wtime(void)
1031 return GetTickCount() / 1000.0;
1034 void CDECL
omp_set_dynamic(int val
)
1036 TRACE("(%d): stub\n", val
);
1039 void CDECL
omp_set_nested(int nested
)
1041 TRACE("(%d)\n", nested
);
1042 vcomp_nested_fork
= (nested
!= 0);
1045 void CDECL
omp_set_num_threads(int num_threads
)
1047 TRACE("(%d)\n", num_threads
);
1048 if (num_threads
>= 1)
1049 vcomp_num_threads
= num_threads
;
1052 void CDECL
_vcomp_flush(void)
1054 TRACE("(): stub\n");
1057 void CDECL
_vcomp_barrier(void)
1059 struct vcomp_team_data
*team_data
= vcomp_init_thread_data()->team
;
1066 EnterCriticalSection(&vcomp_section
);
1067 if (++team_data
->barrier_count
>= team_data
->num_threads
)
1069 team_data
->barrier
++;
1070 team_data
->barrier_count
= 0;
1071 WakeAllConditionVariable(&team_data
->cond
);
1075 unsigned int barrier
= team_data
->barrier
;
1076 while (team_data
->barrier
== barrier
)
1077 SleepConditionVariableCS(&team_data
->cond
, &vcomp_section
, INFINITE
);
1079 LeaveCriticalSection(&vcomp_section
);
1082 void CDECL
_vcomp_set_num_threads(int num_threads
)
1084 TRACE("(%d)\n", num_threads
);
1085 if (num_threads
>= 1)
1086 vcomp_init_thread_data()->fork_threads
= num_threads
;
1089 int CDECL
_vcomp_master_begin(void)
1092 return !vcomp_init_thread_data()->thread_num
;
1095 void CDECL
_vcomp_master_end(void)
1098 /* nothing to do here */
1101 int CDECL
_vcomp_single_begin(int flags
)
1103 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1104 struct vcomp_task_data
*task_data
= thread_data
->task
;
1107 TRACE("(%x): semi-stub\n", flags
);
1109 EnterCriticalSection(&vcomp_section
);
1110 thread_data
->single
++;
1111 if ((int)(thread_data
->single
- task_data
->single
) > 0)
1113 task_data
->single
= thread_data
->single
;
1116 LeaveCriticalSection(&vcomp_section
);
1121 void CDECL
_vcomp_single_end(void)
1124 /* nothing to do here */
1127 void CDECL
_vcomp_sections_init(int n
)
1129 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1130 struct vcomp_task_data
*task_data
= thread_data
->task
;
1134 EnterCriticalSection(&vcomp_section
);
1135 thread_data
->section
++;
1136 if ((int)(thread_data
->section
- task_data
->section
) > 0)
1138 task_data
->section
= thread_data
->section
;
1139 task_data
->num_sections
= n
;
1140 task_data
->section_index
= 0;
1142 LeaveCriticalSection(&vcomp_section
);
1145 int CDECL
_vcomp_sections_next(void)
1147 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1148 struct vcomp_task_data
*task_data
= thread_data
->task
;
1153 EnterCriticalSection(&vcomp_section
);
1154 if (thread_data
->section
== task_data
->section
&&
1155 task_data
->section_index
!= task_data
->num_sections
)
1157 i
= task_data
->section_index
++;
1159 LeaveCriticalSection(&vcomp_section
);
1163 void CDECL
_vcomp_for_static_simple_init(unsigned int first
, unsigned int last
, int step
,
1164 BOOL increment
, unsigned int *begin
, unsigned int *end
)
1166 unsigned int iterations
, per_thread
, remaining
;
1167 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1168 struct vcomp_team_data
*team_data
= thread_data
->team
;
1169 int num_threads
= team_data
? team_data
->num_threads
: 1;
1170 int thread_num
= thread_data
->thread_num
;
1172 TRACE("(%u, %u, %d, %u, %p, %p)\n", first
, last
, step
, increment
, begin
, end
);
1174 if (num_threads
== 1)
1184 *end
= increment
? -1 : 1;
1189 iterations
= 1 + (last
- first
) / step
;
1192 iterations
= 1 + (first
- last
) / step
;
1196 per_thread
= iterations
/ num_threads
;
1197 remaining
= iterations
- per_thread
* num_threads
;
1199 if (thread_num
< remaining
)
1201 else if (per_thread
)
1202 first
+= remaining
* step
;
1206 *end
= first
- step
;
1210 *begin
= first
+ per_thread
* thread_num
* step
;
1211 *end
= *begin
+ (per_thread
- 1) * step
;
1214 void CDECL
_vcomp_for_static_simple_init_i8(ULONG64 first
, ULONG64 last
, LONG64 step
,
1215 BOOL increment
, ULONG64
*begin
, ULONG64
*end
)
1217 ULONG64 iterations
, per_thread
, remaining
;
1218 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1219 struct vcomp_team_data
*team_data
= thread_data
->team
;
1220 int num_threads
= team_data
? team_data
->num_threads
: 1;
1221 int thread_num
= thread_data
->thread_num
;
1223 TRACE("(%s, %s, %s, %x, %p, %p)\n", wine_dbgstr_longlong(first
), wine_dbgstr_longlong(last
),
1224 wine_dbgstr_longlong(step
), increment
, begin
, end
);
1226 if (num_threads
== 1)
1236 *end
= increment
? -1 : 1;
1241 iterations
= 1 + (last
- first
) / step
;
1244 iterations
= 1 + (first
- last
) / step
;
1248 per_thread
= iterations
/ num_threads
;
1249 remaining
= iterations
- per_thread
* num_threads
;
1251 if (thread_num
< remaining
)
1253 else if (per_thread
)
1254 first
+= remaining
* step
;
1258 *end
= first
- step
;
1262 *begin
= first
+ per_thread
* thread_num
* step
;
1263 *end
= *begin
+ (per_thread
- 1) * step
;
1266 void CDECL
_vcomp_for_static_init(int first
, int last
, int step
, int chunksize
, unsigned int *loops
,
1267 int *begin
, int *end
, int *next
, int *lastchunk
)
1269 unsigned int iterations
, num_chunks
, per_thread
, remaining
;
1270 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1271 struct vcomp_team_data
*team_data
= thread_data
->team
;
1272 int num_threads
= team_data
? team_data
->num_threads
: 1;
1273 int thread_num
= thread_data
->thread_num
;
1274 int no_begin
, no_lastchunk
;
1276 TRACE("(%d, %d, %d, %d, %p, %p, %p, %p, %p)\n",
1277 first
, last
, step
, chunksize
, loops
, begin
, end
, next
, lastchunk
);
1282 lastchunk
= &no_lastchunk
;
1285 if (num_threads
== 1 && chunksize
!= 1)
1297 *loops
= !thread_num
;
1315 iterations
= 1 + (last
- first
) / step
;
1318 iterations
= 1 + (first
- last
) / step
;
1325 num_chunks
= ((DWORD64
)iterations
+ chunksize
- 1) / chunksize
;
1326 per_thread
= num_chunks
/ num_threads
;
1327 remaining
= num_chunks
- per_thread
* num_threads
;
1329 *loops
= per_thread
+ (thread_num
< remaining
);
1330 *begin
= first
+ thread_num
* chunksize
* step
;
1331 *end
= *begin
+ (chunksize
- 1) * step
;
1332 *next
= chunksize
* num_threads
* step
;
1333 *lastchunk
= first
+ (num_chunks
- 1) * chunksize
* step
;
1336 void CDECL
_vcomp_for_static_init_i8(LONG64 first
, LONG64 last
, LONG64 step
, LONG64 chunksize
, ULONG64
*loops
,
1337 LONG64
*begin
, LONG64
*end
, LONG64
*next
, LONG64
*lastchunk
)
1339 ULONG64 iterations
, num_chunks
, per_thread
, remaining
;
1340 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1341 struct vcomp_team_data
*team_data
= thread_data
->team
;
1342 int num_threads
= team_data
? team_data
->num_threads
: 1;
1343 int thread_num
= thread_data
->thread_num
;
1344 LONG64 no_begin
, no_lastchunk
;
1346 TRACE("(%s, %s, %s, %s, %p, %p, %p, %p, %p)\n",
1347 wine_dbgstr_longlong(first
), wine_dbgstr_longlong(last
),
1348 wine_dbgstr_longlong(step
), wine_dbgstr_longlong(chunksize
),
1349 loops
, begin
, end
, next
, lastchunk
);
1354 lastchunk
= &no_lastchunk
;
1357 if (num_threads
== 1 && chunksize
!= 1)
1369 *loops
= !thread_num
;
1387 iterations
= 1 + (last
- first
) / step
;
1390 iterations
= 1 + (first
- last
) / step
;
1397 num_chunks
= iterations
/ chunksize
;
1398 if (iterations
% chunksize
) num_chunks
++;
1399 per_thread
= num_chunks
/ num_threads
;
1400 remaining
= num_chunks
- per_thread
* num_threads
;
1402 *loops
= per_thread
+ (thread_num
< remaining
);
1403 *begin
= first
+ thread_num
* chunksize
* step
;
1404 *end
= *begin
+ (chunksize
- 1) * step
;
1405 *next
= chunksize
* num_threads
* step
;
1406 *lastchunk
= first
+ (num_chunks
- 1) * chunksize
* step
;
1409 void CDECL
_vcomp_for_static_end(void)
1412 /* nothing to do here */
1415 void CDECL
_vcomp_for_dynamic_init(unsigned int flags
, unsigned int first
, unsigned int last
,
1416 int step
, unsigned int chunksize
)
1418 unsigned int iterations
, per_thread
, remaining
;
1419 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1420 struct vcomp_team_data
*team_data
= thread_data
->team
;
1421 struct vcomp_task_data
*task_data
= thread_data
->task
;
1422 int num_threads
= team_data
? team_data
->num_threads
: 1;
1423 int thread_num
= thread_data
->thread_num
;
1424 unsigned int type
= flags
& ~VCOMP_DYNAMIC_FLAGS_INCREMENT
;
1426 TRACE("(%u, %u, %u, %d, %u)\n", flags
, first
, last
, step
, chunksize
);
1430 thread_data
->dynamic_type
= 0;
1434 if (flags
& VCOMP_DYNAMIC_FLAGS_INCREMENT
)
1435 iterations
= 1 + (last
- first
) / step
;
1438 iterations
= 1 + (first
- last
) / step
;
1442 if (type
== VCOMP_DYNAMIC_FLAGS_STATIC
)
1444 per_thread
= iterations
/ num_threads
;
1445 remaining
= iterations
- per_thread
* num_threads
;
1447 if (thread_num
< remaining
)
1449 else if (per_thread
)
1450 first
+= remaining
* step
;
1453 thread_data
->dynamic_type
= 0;
1457 thread_data
->dynamic_type
= VCOMP_DYNAMIC_FLAGS_STATIC
;
1458 thread_data
->dynamic_begin
= first
+ per_thread
* thread_num
* step
;
1459 thread_data
->dynamic_end
= thread_data
->dynamic_begin
+ (per_thread
- 1) * step
;
1463 if (type
!= VCOMP_DYNAMIC_FLAGS_CHUNKED
&&
1464 type
!= VCOMP_DYNAMIC_FLAGS_GUIDED
)
1466 FIXME("unsupported flags %u\n", flags
);
1467 type
= VCOMP_DYNAMIC_FLAGS_GUIDED
;
1470 EnterCriticalSection(&vcomp_section
);
1471 thread_data
->dynamic
++;
1472 thread_data
->dynamic_type
= type
;
1473 if ((int)(thread_data
->dynamic
- task_data
->dynamic
) > 0)
1475 task_data
->dynamic
= thread_data
->dynamic
;
1476 task_data
->dynamic_first
= first
;
1477 task_data
->dynamic_last
= last
;
1478 task_data
->dynamic_iterations
= iterations
;
1479 task_data
->dynamic_step
= step
;
1480 task_data
->dynamic_chunksize
= chunksize
;
1482 LeaveCriticalSection(&vcomp_section
);
1486 int CDECL
_vcomp_for_dynamic_next(unsigned int *begin
, unsigned int *end
)
1488 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1489 struct vcomp_task_data
*task_data
= thread_data
->task
;
1490 struct vcomp_team_data
*team_data
= thread_data
->team
;
1491 int num_threads
= team_data
? team_data
->num_threads
: 1;
1493 TRACE("(%p, %p)\n", begin
, end
);
1495 if (thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_STATIC
)
1497 *begin
= thread_data
->dynamic_begin
;
1498 *end
= thread_data
->dynamic_end
;
1499 thread_data
->dynamic_type
= 0;
1502 else if (thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_CHUNKED
||
1503 thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_GUIDED
)
1505 unsigned int iterations
= 0;
1506 EnterCriticalSection(&vcomp_section
);
1507 if (thread_data
->dynamic
== task_data
->dynamic
&&
1508 task_data
->dynamic_iterations
!= 0)
1510 iterations
= min(task_data
->dynamic_iterations
, task_data
->dynamic_chunksize
);
1511 if (thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_GUIDED
&&
1512 task_data
->dynamic_iterations
> num_threads
* task_data
->dynamic_chunksize
)
1514 iterations
= (task_data
->dynamic_iterations
+ num_threads
- 1) / num_threads
;
1516 *begin
= task_data
->dynamic_first
;
1517 *end
= task_data
->dynamic_first
+ (iterations
- 1) * task_data
->dynamic_step
;
1518 task_data
->dynamic_iterations
-= iterations
;
1519 task_data
->dynamic_first
+= iterations
* task_data
->dynamic_step
;
1520 if (!task_data
->dynamic_iterations
)
1521 *end
= task_data
->dynamic_last
;
1523 LeaveCriticalSection(&vcomp_section
);
1524 return iterations
!= 0;
1530 int CDECL
omp_in_parallel(void)
1533 return vcomp_init_thread_data()->parallel
;
1536 static DWORD WINAPI
_vcomp_fork_worker(void *param
)
1538 struct vcomp_thread_data
*thread_data
= param
;
1539 vcomp_set_thread_data(thread_data
);
1541 TRACE("starting worker thread for %p\n", thread_data
);
1543 EnterCriticalSection(&vcomp_section
);
1546 struct vcomp_team_data
*team
= thread_data
->team
;
1549 LeaveCriticalSection(&vcomp_section
);
1550 _vcomp_fork_call_wrapper(team
->wrapper
, team
->nargs
, ptr_from_va_list(team
->valist
));
1551 EnterCriticalSection(&vcomp_section
);
1553 thread_data
->team
= NULL
;
1554 list_remove(&thread_data
->entry
);
1555 list_add_tail(&vcomp_idle_threads
, &thread_data
->entry
);
1556 if (++team
->finished_threads
>= team
->num_threads
)
1557 WakeAllConditionVariable(&team
->cond
);
1560 if (!SleepConditionVariableCS(&thread_data
->cond
, &vcomp_section
, 5000) &&
1561 GetLastError() == ERROR_TIMEOUT
&& !thread_data
->team
)
1566 list_remove(&thread_data
->entry
);
1567 LeaveCriticalSection(&vcomp_section
);
1569 TRACE("terminating worker thread for %p\n", thread_data
);
1571 HeapFree(GetProcessHeap(), 0, thread_data
);
1572 vcomp_set_thread_data(NULL
);
1573 FreeLibraryAndExitThread(vcomp_module
, 0);
1577 void WINAPIV
_vcomp_fork(BOOL ifval
, int nargs
, void *wrapper
, ...)
1579 struct vcomp_thread_data
*prev_thread_data
= vcomp_init_thread_data();
1580 struct vcomp_thread_data thread_data
;
1581 struct vcomp_team_data team_data
;
1582 struct vcomp_task_data task_data
;
1585 TRACE("(%d, %d, %p, ...)\n", ifval
, nargs
, wrapper
);
1587 if (prev_thread_data
->parallel
&& !vcomp_nested_fork
)
1592 else if (prev_thread_data
->fork_threads
)
1593 num_threads
= prev_thread_data
->fork_threads
;
1595 num_threads
= vcomp_num_threads
;
1597 InitializeConditionVariable(&team_data
.cond
);
1598 team_data
.num_threads
= 1;
1599 team_data
.finished_threads
= 0;
1600 team_data
.nargs
= nargs
;
1601 team_data
.wrapper
= wrapper
;
1602 va_start(team_data
.valist
, wrapper
);
1603 team_data
.barrier
= 0;
1604 team_data
.barrier_count
= 0;
1606 task_data
.single
= 0;
1607 task_data
.section
= 0;
1608 task_data
.dynamic
= 0;
1610 thread_data
.team
= &team_data
;
1611 thread_data
.task
= &task_data
;
1612 thread_data
.thread_num
= 0;
1613 thread_data
.parallel
= ifval
|| prev_thread_data
->parallel
;
1614 thread_data
.fork_threads
= 0;
1615 thread_data
.single
= 1;
1616 thread_data
.section
= 1;
1617 thread_data
.dynamic
= 1;
1618 thread_data
.dynamic_type
= 0;
1619 list_init(&thread_data
.entry
);
1620 InitializeConditionVariable(&thread_data
.cond
);
1622 if (num_threads
> 1)
1625 EnterCriticalSection(&vcomp_section
);
1627 /* reuse existing threads (if any) */
1628 while (team_data
.num_threads
< num_threads
&& (ptr
= list_head(&vcomp_idle_threads
)))
1630 struct vcomp_thread_data
*data
= LIST_ENTRY(ptr
, struct vcomp_thread_data
, entry
);
1631 data
->team
= &team_data
;
1632 data
->task
= &task_data
;
1633 data
->thread_num
= team_data
.num_threads
++;
1634 data
->parallel
= thread_data
.parallel
;
1635 data
->fork_threads
= 0;
1639 data
->dynamic_type
= 0;
1640 list_remove(&data
->entry
);
1641 list_add_tail(&thread_data
.entry
, &data
->entry
);
1642 WakeAllConditionVariable(&data
->cond
);
1645 /* spawn additional threads */
1646 while (team_data
.num_threads
< num_threads
)
1648 struct vcomp_thread_data
*data
;
1652 data
= HeapAlloc(GetProcessHeap(), 0, sizeof(*data
));
1655 data
->team
= &team_data
;
1656 data
->task
= &task_data
;
1657 data
->thread_num
= team_data
.num_threads
;
1658 data
->parallel
= thread_data
.parallel
;
1659 data
->fork_threads
= 0;
1663 data
->dynamic_type
= 0;
1664 InitializeConditionVariable(&data
->cond
);
1666 thread
= CreateThread(NULL
, 0, _vcomp_fork_worker
, data
, 0, NULL
);
1669 HeapFree(GetProcessHeap(), 0, data
);
1673 GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
,
1674 (const WCHAR
*)vcomp_module
, &module
);
1675 team_data
.num_threads
++;
1676 list_add_tail(&thread_data
.entry
, &data
->entry
);
1677 CloseHandle(thread
);
1680 LeaveCriticalSection(&vcomp_section
);
1683 vcomp_set_thread_data(&thread_data
);
1684 _vcomp_fork_call_wrapper(team_data
.wrapper
, team_data
.nargs
, ptr_from_va_list(team_data
.valist
));
1685 vcomp_set_thread_data(prev_thread_data
);
1686 prev_thread_data
->fork_threads
= 0;
1688 if (team_data
.num_threads
> 1)
1690 EnterCriticalSection(&vcomp_section
);
1692 team_data
.finished_threads
++;
1693 while (team_data
.finished_threads
< team_data
.num_threads
)
1694 SleepConditionVariableCS(&team_data
.cond
, &vcomp_section
, INFINITE
);
1696 LeaveCriticalSection(&vcomp_section
);
1697 assert(list_empty(&thread_data
.entry
));
1700 va_end(team_data
.valist
);
1703 static CRITICAL_SECTION
*alloc_critsect(void)
1705 CRITICAL_SECTION
*critsect
;
1706 if (!(critsect
= HeapAlloc(GetProcessHeap(), 0, sizeof(*critsect
))))
1708 ERR("could not allocate critical section\n");
1712 InitializeCriticalSection(critsect
);
1713 critsect
->DebugInfo
->Spare
[0] = (DWORD_PTR
)(__FILE__
": critsect");
1717 static void destroy_critsect(CRITICAL_SECTION
*critsect
)
1719 if (!critsect
) return;
1720 critsect
->DebugInfo
->Spare
[0] = 0;
1721 DeleteCriticalSection(critsect
);
1722 HeapFree(GetProcessHeap(), 0, critsect
);
1725 void CDECL
omp_init_lock(omp_lock_t
*lock
)
1727 TRACE("(%p)\n", lock
);
1728 *lock
= alloc_critsect();
1731 void CDECL
omp_destroy_lock(omp_lock_t
*lock
)
1733 TRACE("(%p)\n", lock
);
1734 destroy_critsect(*lock
);
1737 void CDECL
omp_set_lock(omp_lock_t
*lock
)
1739 TRACE("(%p)\n", lock
);
1741 if (RtlIsCriticalSectionLockedByThread(*lock
))
1743 ERR("omp_set_lock called while holding lock %p\n", *lock
);
1747 EnterCriticalSection(*lock
);
1750 void CDECL
omp_unset_lock(omp_lock_t
*lock
)
1752 TRACE("(%p)\n", lock
);
1753 LeaveCriticalSection(*lock
);
1756 int CDECL
omp_test_lock(omp_lock_t
*lock
)
1758 TRACE("(%p)\n", lock
);
1760 if (RtlIsCriticalSectionLockedByThread(*lock
))
1763 return TryEnterCriticalSection(*lock
);
1766 void CDECL
omp_set_nest_lock(omp_nest_lock_t
*lock
)
1768 TRACE("(%p)\n", lock
);
1769 EnterCriticalSection(*lock
);
1772 void CDECL
omp_unset_nest_lock(omp_nest_lock_t
*lock
)
1774 TRACE("(%p)\n", lock
);
1775 LeaveCriticalSection(*lock
);
1778 int CDECL
omp_test_nest_lock(omp_nest_lock_t
*lock
)
1780 TRACE("(%p)\n", lock
);
1781 return TryEnterCriticalSection(*lock
) ? (*lock
)->RecursionCount
: 0;
1784 void CDECL
_vcomp_enter_critsect(CRITICAL_SECTION
**critsect
)
1786 TRACE("(%p)\n", critsect
);
1790 CRITICAL_SECTION
*new_critsect
= alloc_critsect();
1791 if (InterlockedCompareExchangePointer((void **)critsect
, new_critsect
, NULL
) != NULL
)
1792 destroy_critsect(new_critsect
); /* someone beat us to it */
1795 EnterCriticalSection(*critsect
);
1798 void CDECL
_vcomp_leave_critsect(CRITICAL_SECTION
*critsect
)
1800 TRACE("(%p)\n", critsect
);
1801 LeaveCriticalSection(critsect
);
1804 static unsigned int get_step_count(int start
, int end
, int range_offset
, int step
)
1806 int range
= end
- start
+ step
- range_offset
;
1809 return (unsigned)-range
/ -step
;
1811 return (unsigned)range
/ step
;
1814 static void CDECL
c2vectparallel_wrapper(int start
, int end
, int step
, int end_included
, BOOL dynamic_distribution
,
1815 int volatile *dynamic_start
, void *function
, int nargs
, va_list valist
)
1817 void *wrapper_args
[MAX_VECT_PARALLEL_CALLBACK_ARGS
];
1818 unsigned int step_count
, steps_per_call
, remainder
;
1819 int thread_count
= omp_get_num_threads();
1820 int curr_start
, curr_end
, range_offset
;
1821 int thread
= _vcomp_get_thread_num();
1824 copy_va_list_data(&wrapper_args
[2], valist
, nargs
- 2);
1826 step_sign
= step
> 0 ? 1 : -1;
1827 range_offset
= step_sign
* !end_included
;
1829 if (dynamic_distribution
)
1831 int next_start
, new_start
, end_value
;
1833 start
= *dynamic_start
;
1834 end_value
= end
+ !!end_included
* step
;
1835 while (start
!= end_value
)
1837 step_count
= get_step_count(start
, end
, range_offset
, step
);
1839 curr_end
= start
+ (step_count
+ thread_count
- 1) / thread_count
* step
1842 if ((curr_end
- end
) * step_sign
> 0)
1844 next_start
= end_value
;
1849 next_start
= curr_end
- range_offset
;
1853 if ((new_start
= InterlockedCompareExchange((LONG
volatile*)dynamic_start
, next_start
, start
)) != start
)
1859 wrapper_args
[0] = (void *)(ULONG_PTR
)start
;
1860 wrapper_args
[1] = (void *)(ULONG_PTR
)curr_end
;
1861 _vcomp_fork_call_wrapper(function
, nargs
, wrapper_args
);
1862 start
= *dynamic_start
;
1867 step_count
= get_step_count(start
, end
, range_offset
, step
);
1869 /* According to the tests native vcomp still makes extra calls
1870 * with empty range from excessive threads under certain conditions
1871 * for unclear reason. */
1872 if (thread
>= step_count
&& (end_included
|| (step
!= 1 && step
!= -1)))
1875 steps_per_call
= step_count
/ thread_count
;
1876 remainder
= step_count
% thread_count
;
1878 if (thread
< remainder
)
1880 curr_start
= thread
* (steps_per_call
+ 1);
1881 curr_end
= curr_start
+ steps_per_call
+ 1;
1883 else if (thread
< step_count
)
1885 curr_start
= remainder
+ steps_per_call
* thread
;
1886 curr_end
= curr_start
+ steps_per_call
;
1890 curr_start
= curr_end
= 0;
1893 curr_start
= start
+ curr_start
* step
;
1894 curr_end
= start
+ (curr_end
- 1) * step
+ range_offset
;
1896 wrapper_args
[0] = (void *)(ULONG_PTR
)curr_start
;
1897 wrapper_args
[1] = (void *)(ULONG_PTR
)curr_end
;
1898 _vcomp_fork_call_wrapper(function
, nargs
, wrapper_args
);
1901 void WINAPIV
C2VectParallel(int start
, int end
, int step
, BOOL end_included
, int thread_count
,
1902 BOOL dynamic_distribution
, void *function
, int nargs
, ...)
1904 struct vcomp_thread_data
*thread_data
;
1905 int volatile dynamic_start
;
1906 int prev_thread_count
;
1909 TRACE("start %d, end %d, step %d, end_included %d, thread_count %d, dynamic_distribution %#x,"
1910 " function %p, nargs %d.\n", start
, end
, step
, end_included
, thread_count
,
1911 dynamic_distribution
, function
, nargs
);
1913 if (nargs
> MAX_VECT_PARALLEL_CALLBACK_ARGS
)
1915 FIXME("Number of arguments %u exceeds supported maximum %u"
1916 " (not calling the loop code, expect problems).\n",
1917 nargs
, MAX_VECT_PARALLEL_CALLBACK_ARGS
);
1921 va_start(valist
, nargs
);
1923 /* This expression can result in integer overflow. According to the tests,
1924 * native vcomp runs the function as a single thread both for empty range
1925 * and (end - start) not fitting the integer range. */
1926 if ((step
> 0 && end
< start
) || (step
< 0 && end
> start
)
1927 || (end
- start
) / step
< 2 || thread_count
< 0)
1929 void *wrapper_args
[MAX_VECT_PARALLEL_CALLBACK_ARGS
];
1931 wrapper_args
[0] = (void *)(ULONG_PTR
)start
;
1932 wrapper_args
[1] = (void *)(ULONG_PTR
)end
;
1933 copy_va_list_data(&wrapper_args
[2], valist
, nargs
- 2);
1934 _vcomp_fork_call_wrapper(function
, nargs
, wrapper_args
);
1939 thread_data
= vcomp_init_thread_data();
1940 prev_thread_count
= thread_data
->fork_threads
;
1941 thread_data
->fork_threads
= thread_count
;
1943 dynamic_start
= start
;
1945 _vcomp_fork(TRUE
, 9, c2vectparallel_wrapper
, start
, end
, step
, end_included
, dynamic_distribution
,
1946 &dynamic_start
, function
, nargs
, valist
);
1948 thread_data
->fork_threads
= prev_thread_count
;
1952 BOOL WINAPI
DllMain(HINSTANCE instance
, DWORD reason
, LPVOID reserved
)
1954 TRACE("(%p, %ld, %p)\n", instance
, reason
, reserved
);
1958 case DLL_PROCESS_ATTACH
:
1960 SYSTEM_INFO sysinfo
;
1962 if ((vcomp_context_tls
= TlsAlloc()) == TLS_OUT_OF_INDEXES
)
1964 ERR("Failed to allocate TLS index\n");
1968 GetSystemInfo(&sysinfo
);
1969 vcomp_module
= instance
;
1970 vcomp_max_threads
= sysinfo
.dwNumberOfProcessors
;
1971 vcomp_num_threads
= sysinfo
.dwNumberOfProcessors
;
1972 vcomp_num_procs
= sysinfo
.dwNumberOfProcessors
;
1976 case DLL_PROCESS_DETACH
:
1978 if (reserved
) break;
1979 if (vcomp_context_tls
!= TLS_OUT_OF_INDEXES
)
1981 vcomp_free_thread_data();
1982 TlsFree(vcomp_context_tls
);
1987 case DLL_THREAD_DETACH
:
1989 vcomp_free_thread_data();