5 * Copyright 2011 Austin English
6 * Copyright 2012 Dan Kegel
7 * Copyright 2015-2016 Sebastian Lackner
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
25 #include "wine/port.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(vcomp
);
38 typedef CRITICAL_SECTION
*omp_lock_t
;
39 typedef CRITICAL_SECTION
*omp_nest_lock_t
;
41 static struct list vcomp_idle_threads
= LIST_INIT(vcomp_idle_threads
);
42 static DWORD vcomp_context_tls
= TLS_OUT_OF_INDEXES
;
43 static HMODULE vcomp_module
;
44 static int vcomp_max_threads
;
45 static int vcomp_num_threads
;
46 static BOOL vcomp_nested_fork
= FALSE
;
48 static RTL_CRITICAL_SECTION vcomp_section
;
49 static RTL_CRITICAL_SECTION_DEBUG critsect_debug
=
52 { &critsect_debug
.ProcessLocksList
, &critsect_debug
.ProcessLocksList
},
53 0, 0, { (DWORD_PTR
)(__FILE__
": vcomp_section") }
55 static RTL_CRITICAL_SECTION vcomp_section
= { &critsect_debug
, -1, 0, 0, 0, 0 };
57 #define VCOMP_DYNAMIC_FLAGS_STATIC 0x01
58 #define VCOMP_DYNAMIC_FLAGS_CHUNKED 0x02
59 #define VCOMP_DYNAMIC_FLAGS_GUIDED 0x03
60 #define VCOMP_DYNAMIC_FLAGS_INCREMENT 0x40
62 struct vcomp_thread_data
64 struct vcomp_team_data
*team
;
65 struct vcomp_task_data
*task
;
70 /* only used for concurrent tasks */
72 CONDITION_VARIABLE cond
;
82 unsigned int dynamic_type
;
83 unsigned int dynamic_begin
;
84 unsigned int dynamic_end
;
87 struct vcomp_team_data
89 CONDITION_VARIABLE cond
;
93 /* callback arguments */
103 struct vcomp_task_data
109 unsigned int section
;
114 unsigned int dynamic
;
115 unsigned int dynamic_first
;
116 unsigned int dynamic_last
;
117 unsigned int dynamic_iterations
;
119 unsigned int dynamic_chunksize
;
122 #if defined(__i386__)
124 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, __ms_va_list args
);
125 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
127 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
128 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
130 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
132 __ASM_CFI(".cfi_rel_offset %esi,-4\n\t")
134 __ASM_CFI(".cfi_rel_offset %edi,-8\n\t")
135 "movl 12(%ebp),%edx\n\t"
142 "movl 12(%ebp),%ecx\n\t"
143 "movl 16(%ebp),%esi\n\t"
146 "1:\tcall *8(%ebp)\n\t"
147 "leal -8(%ebp),%esp\n\t"
149 __ASM_CFI(".cfi_same_value %edi\n\t")
151 __ASM_CFI(".cfi_same_value %esi\n\t")
153 __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
154 __ASM_CFI(".cfi_same_value %ebp\n\t")
157 #elif defined(__x86_64__)
159 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, __ms_va_list args
);
160 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
162 __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t")
163 __ASM_CFI(".cfi_rel_offset %rbp,0\n\t")
165 __ASM_CFI(".cfi_def_cfa_register %rbp\n\t")
167 __ASM_CFI(".cfi_rel_offset %rsi,-8\n\t")
169 __ASM_CFI(".cfi_rel_offset %rdi,-16\n\t")
173 "cmovgq %rdx,%rcx\n\t"
174 "leaq 0(,%rcx,8),%rdx\n\t"
180 "movq 0(%rsp),%rcx\n\t"
181 "movq 8(%rsp),%rdx\n\t"
182 "movq 16(%rsp),%r8\n\t"
183 "movq 24(%rsp),%r9\n\t"
185 "leaq -16(%rbp),%rsp\n\t"
187 __ASM_CFI(".cfi_same_value %rdi\n\t")
189 __ASM_CFI(".cfi_same_value %rsi\n\t")
190 __ASM_CFI(".cfi_def_cfa_register %rsp\n\t")
192 __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t")
193 __ASM_CFI(".cfi_same_value %rbp\n\t")
196 #elif defined(__arm__)
198 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, __ms_va_list args
);
199 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
201 "push {r4, r5, LR}\n\t"
209 "subeq SP, SP, #4\n\t"
210 "1:\tsub r3, r3, #4\n\t"
211 "ldr r0, [r2, r3]\n\t"
212 "str r0, [SP, r3]\n\t"
227 "4:\tpop {r0-r3}\n\t"
232 #elif defined(__aarch64__)
234 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, __ms_va_list args
);
235 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
236 "stp x29, x30, [SP,#-16]!\n\t"
242 "ldr w12, [x11, #24]\n\t"
243 "ldr x13, [x11, #8]\n\t"
244 "ldr x0, [x13, w12, sxtw]\n\t"
245 "add w12, w12, #8\n\t"
246 "ldr x1, [x13, w12, sxtw]\n\t"
247 "add w12, w12, #8\n\t"
248 "ldr x2, [x13, w12, sxtw]\n\t"
249 "add w12, w12, #8\n\t"
250 "ldr x3, [x13, w12, sxtw]\n\t"
251 "add w12, w12, #8\n\t"
252 "ldr x4, [x13, w12, sxtw]\n\t"
253 "add w12, w12, #8\n\t"
254 "ldr x5, [x13, w12, sxtw]\n\t"
255 "add w12, w12, #8\n\t"
256 "ldr x6, [x13, w12, sxtw]\n\t"
257 "add w12, w12, #8\n\t"
258 "ldr x7, [x13, w12, sxtw]\n\t"
259 "add w12, w12, #8\n\t"
260 "add x13, x13, w12, sxtw\n\t"
261 "subs w12, w10, #8\n\t"
264 "lsl w12, w12, #3\n\t"
265 "sub SP, SP, w12, sxtw\n\t"
266 "tbz w12, #3, 1f\n\t"
268 "1: sub w12, w12, #8\n\t"
269 "ldr x14, [x13, w12, sxtw]\n\t"
270 "str x14, [SP, w12, sxtw]\n\t"
274 "ldp x29, x30, [SP], #16\n\t"
279 static void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, __ms_va_list args
)
281 ERR("Not implemented for this architecture\n");
286 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
288 static inline char interlocked_cmpxchg8(char *dest
, char xchg
, char compare
)
291 __asm__
__volatile__( "lock; cmpxchgb %2,(%1)"
292 : "=a" (ret
) : "r" (dest
), "q" (xchg
), "0" (compare
) : "memory" );
296 static inline short interlocked_cmpxchg16(short *dest
, short xchg
, short compare
)
299 __asm__
__volatile__( "lock; cmpxchgw %2,(%1)"
300 : "=a" (ret
) : "r" (dest
), "r" (xchg
), "0" (compare
) : "memory" );
304 static inline char interlocked_xchg_add8(char *dest
, char incr
)
307 __asm__
__volatile__( "lock; xaddb %0,(%1)"
308 : "=q" (ret
) : "r" (dest
), "0" (incr
) : "memory" );
312 static inline short interlocked_xchg_add16(short *dest
, short incr
)
315 __asm__
__volatile__( "lock; xaddw %0,(%1)"
316 : "=r" (ret
) : "r" (dest
), "0" (incr
) : "memory" );
322 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
323 static inline char interlocked_cmpxchg8(char *dest
, char xchg
, char compare
)
325 return __sync_val_compare_and_swap(dest
, compare
, xchg
);
328 static inline char interlocked_xchg_add8(char *dest
, char incr
)
330 return __sync_fetch_and_add(dest
, incr
);
333 static char interlocked_cmpxchg8(char *dest
, char xchg
, char compare
)
335 EnterCriticalSection(&vcomp_section
);
336 if (*dest
== compare
) *dest
= xchg
; else compare
= *dest
;
337 LeaveCriticalSection(&vcomp_section
);
341 static char interlocked_xchg_add8(char *dest
, char incr
)
344 EnterCriticalSection(&vcomp_section
);
345 ret
= *dest
; *dest
+= incr
;
346 LeaveCriticalSection(&vcomp_section
);
351 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
352 static inline short interlocked_cmpxchg16(short *dest
, short xchg
, short compare
)
354 return __sync_val_compare_and_swap(dest
, compare
, xchg
);
357 static inline short interlocked_xchg_add16(short *dest
, short incr
)
359 return __sync_fetch_and_add(dest
, incr
);
362 static short interlocked_cmpxchg16(short *dest
, short xchg
, short compare
)
364 EnterCriticalSection(&vcomp_section
);
365 if (*dest
== compare
) *dest
= xchg
; else compare
= *dest
;
366 LeaveCriticalSection(&vcomp_section
);
370 static short interlocked_xchg_add16(short *dest
, short incr
)
373 EnterCriticalSection(&vcomp_section
);
374 ret
= *dest
; *dest
+= incr
;
375 LeaveCriticalSection(&vcomp_section
);
380 #endif /* __GNUC__ */
382 static inline struct vcomp_thread_data
*vcomp_get_thread_data(void)
384 return (struct vcomp_thread_data
*)TlsGetValue(vcomp_context_tls
);
387 static inline void vcomp_set_thread_data(struct vcomp_thread_data
*thread_data
)
389 TlsSetValue(vcomp_context_tls
, thread_data
);
392 static struct vcomp_thread_data
*vcomp_init_thread_data(void)
394 struct vcomp_thread_data
*thread_data
= vcomp_get_thread_data();
397 struct vcomp_thread_data thread
;
398 struct vcomp_task_data task
;
401 if (thread_data
) return thread_data
;
402 if (!(data
= HeapAlloc(GetProcessHeap(), 0, sizeof(*data
))))
404 ERR("could not create thread data\n");
408 data
->task
.single
= 0;
409 data
->task
.section
= 0;
410 data
->task
.dynamic
= 0;
412 thread_data
= &data
->thread
;
413 thread_data
->team
= NULL
;
414 thread_data
->task
= &data
->task
;
415 thread_data
->thread_num
= 0;
416 thread_data
->parallel
= FALSE
;
417 thread_data
->fork_threads
= 0;
418 thread_data
->single
= 1;
419 thread_data
->section
= 1;
420 thread_data
->dynamic
= 1;
421 thread_data
->dynamic_type
= 0;
423 vcomp_set_thread_data(thread_data
);
427 static void vcomp_free_thread_data(void)
429 struct vcomp_thread_data
*thread_data
= vcomp_get_thread_data();
430 if (!thread_data
) return;
432 HeapFree(GetProcessHeap(), 0, thread_data
);
433 vcomp_set_thread_data(NULL
);
436 void CDECL
_vcomp_atomic_add_i1(char *dest
, char val
)
438 interlocked_xchg_add8(dest
, val
);
441 void CDECL
_vcomp_atomic_and_i1(char *dest
, char val
)
444 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
& val
, old
) != old
);
447 void CDECL
_vcomp_atomic_div_i1(char *dest
, char val
)
450 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
/ val
, old
) != old
);
453 void CDECL
_vcomp_atomic_div_ui1(unsigned char *dest
, unsigned char val
)
456 do old
= *dest
; while ((unsigned char)interlocked_cmpxchg8((char *)dest
, old
/ val
, old
) != old
);
459 void CDECL
_vcomp_atomic_mul_i1(char *dest
, char val
)
462 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
* val
, old
) != old
);
465 void CDECL
_vcomp_atomic_or_i1(char *dest
, char val
)
468 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
| val
, old
) != old
);
471 void CDECL
_vcomp_atomic_shl_i1(char *dest
, unsigned int val
)
474 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
<< val
, old
) != old
);
477 void CDECL
_vcomp_atomic_shr_i1(char *dest
, unsigned int val
)
480 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
>> val
, old
) != old
);
483 void CDECL
_vcomp_atomic_shr_ui1(unsigned char *dest
, unsigned int val
)
486 do old
= *dest
; while ((unsigned char)interlocked_cmpxchg8((char *)dest
, old
>> val
, old
) != old
);
489 void CDECL
_vcomp_atomic_sub_i1(char *dest
, char val
)
491 interlocked_xchg_add8(dest
, -val
);
494 void CDECL
_vcomp_atomic_xor_i1(char *dest
, char val
)
497 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
^ val
, old
) != old
);
500 static void CDECL
_vcomp_atomic_bool_and_i1(char *dest
, char val
)
503 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
&& val
, old
) != old
);
506 static void CDECL
_vcomp_atomic_bool_or_i1(char *dest
, char val
)
509 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
? old
: (val
!= 0), old
) != old
);
512 void CDECL
_vcomp_reduction_i1(unsigned int flags
, char *dest
, char val
)
514 static void (CDECL
* const funcs
[])(char *, char) =
516 _vcomp_atomic_add_i1
,
517 _vcomp_atomic_add_i1
,
518 _vcomp_atomic_mul_i1
,
519 _vcomp_atomic_and_i1
,
521 _vcomp_atomic_xor_i1
,
522 _vcomp_atomic_bool_and_i1
,
523 _vcomp_atomic_bool_or_i1
,
525 unsigned int op
= (flags
>> 8) & 0xf;
526 op
= min(op
, sizeof(funcs
)/sizeof(funcs
[0]) - 1);
527 funcs
[op
](dest
, val
);
530 void CDECL
_vcomp_atomic_add_i2(short *dest
, short val
)
532 interlocked_xchg_add16(dest
, val
);
535 void CDECL
_vcomp_atomic_and_i2(short *dest
, short val
)
538 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
& val
, old
) != old
);
541 void CDECL
_vcomp_atomic_div_i2(short *dest
, short val
)
544 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
/ val
, old
) != old
);
547 void CDECL
_vcomp_atomic_div_ui2(unsigned short *dest
, unsigned short val
)
550 do old
= *dest
; while ((unsigned short)interlocked_cmpxchg16((short *)dest
, old
/ val
, old
) != old
);
553 void CDECL
_vcomp_atomic_mul_i2(short *dest
, short val
)
556 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
* val
, old
) != old
);
559 void CDECL
_vcomp_atomic_or_i2(short *dest
, short val
)
562 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
| val
, old
) != old
);
565 void CDECL
_vcomp_atomic_shl_i2(short *dest
, unsigned int val
)
568 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
<< val
, old
) != old
);
571 void CDECL
_vcomp_atomic_shr_i2(short *dest
, unsigned int val
)
574 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
>> val
, old
) != old
);
577 void CDECL
_vcomp_atomic_shr_ui2(unsigned short *dest
, unsigned int val
)
580 do old
= *dest
; while ((unsigned short)interlocked_cmpxchg16((short *)dest
, old
>> val
, old
) != old
);
583 void CDECL
_vcomp_atomic_sub_i2(short *dest
, short val
)
585 interlocked_xchg_add16(dest
, -val
);
588 void CDECL
_vcomp_atomic_xor_i2(short *dest
, short val
)
591 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
^ val
, old
) != old
);
594 static void CDECL
_vcomp_atomic_bool_and_i2(short *dest
, short val
)
597 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
&& val
, old
) != old
);
600 static void CDECL
_vcomp_atomic_bool_or_i2(short *dest
, short val
)
603 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
? old
: (val
!= 0), old
) != old
);
606 void CDECL
_vcomp_reduction_i2(unsigned int flags
, short *dest
, short val
)
608 static void (CDECL
* const funcs
[])(short *, short) =
610 _vcomp_atomic_add_i2
,
611 _vcomp_atomic_add_i2
,
612 _vcomp_atomic_mul_i2
,
613 _vcomp_atomic_and_i2
,
615 _vcomp_atomic_xor_i2
,
616 _vcomp_atomic_bool_and_i2
,
617 _vcomp_atomic_bool_or_i2
,
619 unsigned int op
= (flags
>> 8) & 0xf;
620 op
= min(op
, sizeof(funcs
)/sizeof(funcs
[0]) - 1);
621 funcs
[op
](dest
, val
);
624 void CDECL
_vcomp_atomic_add_i4(int *dest
, int val
)
626 interlocked_xchg_add(dest
, val
);
629 void CDECL
_vcomp_atomic_and_i4(int *dest
, int val
)
632 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
& val
, old
) != old
);
635 void CDECL
_vcomp_atomic_div_i4(int *dest
, int val
)
638 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
/ val
, old
) != old
);
641 void CDECL
_vcomp_atomic_div_ui4(unsigned int *dest
, unsigned int val
)
644 do old
= *dest
; while (interlocked_cmpxchg((int *)dest
, old
/ val
, old
) != old
);
647 void CDECL
_vcomp_atomic_mul_i4(int *dest
, int val
)
650 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
* val
, old
) != old
);
653 void CDECL
_vcomp_atomic_or_i4(int *dest
, int val
)
656 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
| val
, old
) != old
);
659 void CDECL
_vcomp_atomic_shl_i4(int *dest
, int val
)
662 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
<< val
, old
) != old
);
665 void CDECL
_vcomp_atomic_shr_i4(int *dest
, int val
)
668 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
>> val
, old
) != old
);
671 void CDECL
_vcomp_atomic_shr_ui4(unsigned int *dest
, unsigned int val
)
674 do old
= *dest
; while (interlocked_cmpxchg((int *)dest
, old
>> val
, old
) != old
);
677 void CDECL
_vcomp_atomic_sub_i4(int *dest
, int val
)
679 interlocked_xchg_add(dest
, -val
);
682 void CDECL
_vcomp_atomic_xor_i4(int *dest
, int val
)
685 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
^ val
, old
) != old
);
688 static void CDECL
_vcomp_atomic_bool_and_i4(int *dest
, int val
)
691 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
&& val
, old
) != old
);
694 static void CDECL
_vcomp_atomic_bool_or_i4(int *dest
, int val
)
697 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
? old
: (val
!= 0), old
) != old
);
700 void CDECL
_vcomp_reduction_i4(unsigned int flags
, int *dest
, int val
)
702 static void (CDECL
* const funcs
[])(int *, int) =
704 _vcomp_atomic_add_i4
,
705 _vcomp_atomic_add_i4
,
706 _vcomp_atomic_mul_i4
,
707 _vcomp_atomic_and_i4
,
709 _vcomp_atomic_xor_i4
,
710 _vcomp_atomic_bool_and_i4
,
711 _vcomp_atomic_bool_or_i4
,
713 unsigned int op
= (flags
>> 8) & 0xf;
714 op
= min(op
, sizeof(funcs
)/sizeof(funcs
[0]) - 1);
715 funcs
[op
](dest
, val
);
718 void CDECL
_vcomp_atomic_add_i8(LONG64
*dest
, LONG64 val
)
721 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
+ val
, old
) != old
);
724 void CDECL
_vcomp_atomic_and_i8(LONG64
*dest
, LONG64 val
)
727 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
& val
, old
) != old
);
730 void CDECL
_vcomp_atomic_div_i8(LONG64
*dest
, LONG64 val
)
733 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
/ val
, old
) != old
);
736 void CDECL
_vcomp_atomic_div_ui8(ULONG64
*dest
, ULONG64 val
)
739 do old
= *dest
; while (interlocked_cmpxchg64((LONG64
*)dest
, old
/ val
, old
) != old
);
742 void CDECL
_vcomp_atomic_mul_i8(LONG64
*dest
, LONG64 val
)
745 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
* val
, old
) != old
);
748 void CDECL
_vcomp_atomic_or_i8(LONG64
*dest
, LONG64 val
)
751 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
| val
, old
) != old
);
754 void CDECL
_vcomp_atomic_shl_i8(LONG64
*dest
, unsigned int val
)
757 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
<< val
, old
) != old
);
760 void CDECL
_vcomp_atomic_shr_i8(LONG64
*dest
, unsigned int val
)
763 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
>> val
, old
) != old
);
766 void CDECL
_vcomp_atomic_shr_ui8(ULONG64
*dest
, unsigned int val
)
769 do old
= *dest
; while (interlocked_cmpxchg64((LONG64
*)dest
, old
>> val
, old
) != old
);
772 void CDECL
_vcomp_atomic_sub_i8(LONG64
*dest
, LONG64 val
)
775 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
- val
, old
) != old
);
778 void CDECL
_vcomp_atomic_xor_i8(LONG64
*dest
, LONG64 val
)
781 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
^ val
, old
) != old
);
784 static void CDECL
_vcomp_atomic_bool_and_i8(LONG64
*dest
, LONG64 val
)
787 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
&& val
, old
) != old
);
790 static void CDECL
_vcomp_atomic_bool_or_i8(LONG64
*dest
, LONG64 val
)
793 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
? old
: (val
!= 0), old
) != old
);
796 void CDECL
_vcomp_reduction_i8(unsigned int flags
, LONG64
*dest
, LONG64 val
)
798 static void (CDECL
* const funcs
[])(LONG64
*, LONG64
) =
800 _vcomp_atomic_add_i8
,
801 _vcomp_atomic_add_i8
,
802 _vcomp_atomic_mul_i8
,
803 _vcomp_atomic_and_i8
,
805 _vcomp_atomic_xor_i8
,
806 _vcomp_atomic_bool_and_i8
,
807 _vcomp_atomic_bool_or_i8
,
809 unsigned int op
= (flags
>> 8) & 0xf;
810 op
= min(op
, sizeof(funcs
)/sizeof(funcs
[0]) - 1);
811 funcs
[op
](dest
, val
);
814 void CDECL
_vcomp_atomic_add_r4(float *dest
, float val
)
820 *(float *)&new = *(float *)&old
+ val
;
822 while (interlocked_cmpxchg((int *)dest
, new, old
) != old
);
825 void CDECL
_vcomp_atomic_div_r4(float *dest
, float val
)
831 *(float *)&new = *(float *)&old
/ val
;
833 while (interlocked_cmpxchg((int *)dest
, new, old
) != old
);
836 void CDECL
_vcomp_atomic_mul_r4(float *dest
, float val
)
842 *(float *)&new = *(float *)&old
* val
;
844 while (interlocked_cmpxchg((int *)dest
, new, old
) != old
);
847 void CDECL
_vcomp_atomic_sub_r4(float *dest
, float val
)
853 *(float *)&new = *(float *)&old
- val
;
855 while (interlocked_cmpxchg((int *)dest
, new, old
) != old
);
858 static void CDECL
_vcomp_atomic_bool_and_r4(float *dest
, float val
)
864 *(float *)&new = (*(float *)&old
!= 0.0) ? (val
!= 0.0) : 0.0;
866 while (interlocked_cmpxchg((int *)dest
, new, old
) != old
);
869 static void CDECL
_vcomp_atomic_bool_or_r4(float *dest
, float val
)
875 *(float *)&new = (*(float *)&old
!= 0.0) ? *(float *)&old
: (val
!= 0.0);
877 while (interlocked_cmpxchg((int *)dest
, new, old
) != old
);
880 void CDECL
_vcomp_reduction_r4(unsigned int flags
, float *dest
, float val
)
882 static void (CDECL
* const funcs
[])(float *, float) =
884 _vcomp_atomic_add_r4
,
885 _vcomp_atomic_add_r4
,
886 _vcomp_atomic_mul_r4
,
887 _vcomp_atomic_bool_or_r4
,
888 _vcomp_atomic_bool_or_r4
,
889 _vcomp_atomic_bool_or_r4
,
890 _vcomp_atomic_bool_and_r4
,
891 _vcomp_atomic_bool_or_r4
,
893 unsigned int op
= (flags
>> 8) & 0xf;
894 op
= min(op
, sizeof(funcs
)/sizeof(funcs
[0]) - 1);
895 funcs
[op
](dest
, val
);
898 void CDECL
_vcomp_atomic_add_r8(double *dest
, double val
)
903 old
= *(LONG64
*)dest
;
904 *(double *)&new = *(double *)&old
+ val
;
906 while (interlocked_cmpxchg64((LONG64
*)dest
, new, old
) != old
);
909 void CDECL
_vcomp_atomic_div_r8(double *dest
, double val
)
914 old
= *(LONG64
*)dest
;
915 *(double *)&new = *(double *)&old
/ val
;
917 while (interlocked_cmpxchg64((LONG64
*)dest
, new, old
) != old
);
920 void CDECL
_vcomp_atomic_mul_r8(double *dest
, double val
)
925 old
= *(LONG64
*)dest
;
926 *(double *)&new = *(double *)&old
* val
;
928 while (interlocked_cmpxchg64((LONG64
*)dest
, new, old
) != old
);
931 void CDECL
_vcomp_atomic_sub_r8(double *dest
, double val
)
936 old
= *(LONG64
*)dest
;
937 *(double *)&new = *(double *)&old
- val
;
939 while (interlocked_cmpxchg64((LONG64
*)dest
, new, old
) != old
);
942 static void CDECL
_vcomp_atomic_bool_and_r8(double *dest
, double val
)
947 old
= *(LONG64
*)dest
;
948 *(double *)&new = (*(double *)&old
!= 0.0) ? (val
!= 0.0) : 0.0;
950 while (interlocked_cmpxchg64((LONG64
*)dest
, new, old
) != old
);
953 static void CDECL
_vcomp_atomic_bool_or_r8(double *dest
, double val
)
958 old
= *(LONG64
*)dest
;
959 *(double *)&new = (*(double *)&old
!= 0.0) ? *(double *)&old
: (val
!= 0.0);
961 while (interlocked_cmpxchg64((LONG64
*)dest
, new, old
) != old
);
964 void CDECL
_vcomp_reduction_r8(unsigned int flags
, double *dest
, double val
)
966 static void (CDECL
* const funcs
[])(double *, double) =
968 _vcomp_atomic_add_r8
,
969 _vcomp_atomic_add_r8
,
970 _vcomp_atomic_mul_r8
,
971 _vcomp_atomic_bool_or_r8
,
972 _vcomp_atomic_bool_or_r8
,
973 _vcomp_atomic_bool_or_r8
,
974 _vcomp_atomic_bool_and_r8
,
975 _vcomp_atomic_bool_or_r8
,
977 unsigned int op
= (flags
>> 8) & 0xf;
978 op
= min(op
, sizeof(funcs
)/sizeof(funcs
[0]) - 1);
979 funcs
[op
](dest
, val
);
982 int CDECL
omp_get_dynamic(void)
988 int CDECL
omp_get_max_threads(void)
991 return vcomp_max_threads
;
994 int CDECL
omp_get_nested(void)
997 return vcomp_nested_fork
;
1000 int CDECL
omp_get_num_procs(void)
1006 int CDECL
omp_get_num_threads(void)
1008 struct vcomp_team_data
*team_data
= vcomp_init_thread_data()->team
;
1010 return team_data
? team_data
->num_threads
: 1;
1013 int CDECL
omp_get_thread_num(void)
1016 return vcomp_init_thread_data()->thread_num
;
1019 int CDECL
_vcomp_get_thread_num(void)
1022 return vcomp_init_thread_data()->thread_num
;
1025 /* Time in seconds since "some time in the past" */
1026 double CDECL
omp_get_wtime(void)
1028 return GetTickCount() / 1000.0;
1031 void CDECL
omp_set_dynamic(int val
)
1033 TRACE("(%d): stub\n", val
);
1036 void CDECL
omp_set_nested(int nested
)
1038 TRACE("(%d)\n", nested
);
1039 vcomp_nested_fork
= (nested
!= 0);
1042 void CDECL
omp_set_num_threads(int num_threads
)
1044 TRACE("(%d)\n", num_threads
);
1045 if (num_threads
>= 1)
1046 vcomp_num_threads
= num_threads
;
1049 void CDECL
_vcomp_flush(void)
1051 TRACE("(): stub\n");
1054 void CDECL
_vcomp_barrier(void)
1056 struct vcomp_team_data
*team_data
= vcomp_init_thread_data()->team
;
1063 EnterCriticalSection(&vcomp_section
);
1064 if (++team_data
->barrier_count
>= team_data
->num_threads
)
1066 team_data
->barrier
++;
1067 team_data
->barrier_count
= 0;
1068 WakeAllConditionVariable(&team_data
->cond
);
1072 unsigned int barrier
= team_data
->barrier
;
1073 while (team_data
->barrier
== barrier
)
1074 SleepConditionVariableCS(&team_data
->cond
, &vcomp_section
, INFINITE
);
1076 LeaveCriticalSection(&vcomp_section
);
1079 void CDECL
_vcomp_set_num_threads(int num_threads
)
1081 TRACE("(%d)\n", num_threads
);
1082 if (num_threads
>= 1)
1083 vcomp_init_thread_data()->fork_threads
= num_threads
;
1086 int CDECL
_vcomp_master_begin(void)
1089 return !vcomp_init_thread_data()->thread_num
;
1092 void CDECL
_vcomp_master_end(void)
1095 /* nothing to do here */
1098 int CDECL
_vcomp_single_begin(int flags
)
1100 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1101 struct vcomp_task_data
*task_data
= thread_data
->task
;
1104 TRACE("(%x): semi-stub\n", flags
);
1106 EnterCriticalSection(&vcomp_section
);
1107 thread_data
->single
++;
1108 if ((int)(thread_data
->single
- task_data
->single
) > 0)
1110 task_data
->single
= thread_data
->single
;
1113 LeaveCriticalSection(&vcomp_section
);
1118 void CDECL
_vcomp_single_end(void)
1121 /* nothing to do here */
1124 void CDECL
_vcomp_sections_init(int n
)
1126 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1127 struct vcomp_task_data
*task_data
= thread_data
->task
;
1131 EnterCriticalSection(&vcomp_section
);
1132 thread_data
->section
++;
1133 if ((int)(thread_data
->section
- task_data
->section
) > 0)
1135 task_data
->section
= thread_data
->section
;
1136 task_data
->num_sections
= n
;
1137 task_data
->section_index
= 0;
1139 LeaveCriticalSection(&vcomp_section
);
1142 int CDECL
_vcomp_sections_next(void)
1144 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1145 struct vcomp_task_data
*task_data
= thread_data
->task
;
1150 EnterCriticalSection(&vcomp_section
);
1151 if (thread_data
->section
== task_data
->section
&&
1152 task_data
->section_index
!= task_data
->num_sections
)
1154 i
= task_data
->section_index
++;
1156 LeaveCriticalSection(&vcomp_section
);
1160 void CDECL
_vcomp_for_static_simple_init(unsigned int first
, unsigned int last
, int step
,
1161 BOOL increment
, unsigned int *begin
, unsigned int *end
)
1163 unsigned int iterations
, per_thread
, remaining
;
1164 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1165 struct vcomp_team_data
*team_data
= thread_data
->team
;
1166 int num_threads
= team_data
? team_data
->num_threads
: 1;
1167 int thread_num
= thread_data
->thread_num
;
1169 TRACE("(%u, %u, %d, %u, %p, %p)\n", first
, last
, step
, increment
, begin
, end
);
1171 if (num_threads
== 1)
1181 *end
= increment
? -1 : 1;
1186 iterations
= 1 + (last
- first
) / step
;
1189 iterations
= 1 + (first
- last
) / step
;
1193 per_thread
= iterations
/ num_threads
;
1194 remaining
= iterations
- per_thread
* num_threads
;
1196 if (thread_num
< remaining
)
1198 else if (per_thread
)
1199 first
+= remaining
* step
;
1203 *end
= first
- step
;
1207 *begin
= first
+ per_thread
* thread_num
* step
;
1208 *end
= *begin
+ (per_thread
- 1) * step
;
1211 void CDECL
_vcomp_for_static_init(int first
, int last
, int step
, int chunksize
, unsigned int *loops
,
1212 int *begin
, int *end
, int *next
, int *lastchunk
)
1214 unsigned int iterations
, num_chunks
, per_thread
, remaining
;
1215 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1216 struct vcomp_team_data
*team_data
= thread_data
->team
;
1217 int num_threads
= team_data
? team_data
->num_threads
: 1;
1218 int thread_num
= thread_data
->thread_num
;
1219 int no_begin
, no_lastchunk
;
1221 TRACE("(%d, %d, %d, %d, %p, %p, %p, %p, %p)\n",
1222 first
, last
, step
, chunksize
, loops
, begin
, end
, next
, lastchunk
);
1227 lastchunk
= &no_lastchunk
;
1230 if (num_threads
== 1 && chunksize
!= 1)
1242 *loops
= !thread_num
;
1260 iterations
= 1 + (last
- first
) / step
;
1263 iterations
= 1 + (first
- last
) / step
;
1270 num_chunks
= ((DWORD64
)iterations
+ chunksize
- 1) / chunksize
;
1271 per_thread
= num_chunks
/ num_threads
;
1272 remaining
= num_chunks
- per_thread
* num_threads
;
1274 *loops
= per_thread
+ (thread_num
< remaining
);
1275 *begin
= first
+ thread_num
* chunksize
* step
;
1276 *end
= *begin
+ (chunksize
- 1) * step
;
1277 *next
= chunksize
* num_threads
* step
;
1278 *lastchunk
= first
+ (num_chunks
- 1) * chunksize
* step
;
1281 void CDECL
_vcomp_for_static_end(void)
1284 /* nothing to do here */
1287 void CDECL
_vcomp_for_dynamic_init(unsigned int flags
, unsigned int first
, unsigned int last
,
1288 int step
, unsigned int chunksize
)
1290 unsigned int iterations
, per_thread
, remaining
;
1291 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1292 struct vcomp_team_data
*team_data
= thread_data
->team
;
1293 struct vcomp_task_data
*task_data
= thread_data
->task
;
1294 int num_threads
= team_data
? team_data
->num_threads
: 1;
1295 int thread_num
= thread_data
->thread_num
;
1296 unsigned int type
= flags
& ~VCOMP_DYNAMIC_FLAGS_INCREMENT
;
1298 TRACE("(%u, %u, %u, %d, %u)\n", flags
, first
, last
, step
, chunksize
);
1302 thread_data
->dynamic_type
= 0;
1306 if (flags
& VCOMP_DYNAMIC_FLAGS_INCREMENT
)
1307 iterations
= 1 + (last
- first
) / step
;
1310 iterations
= 1 + (first
- last
) / step
;
1314 if (type
== VCOMP_DYNAMIC_FLAGS_STATIC
)
1316 per_thread
= iterations
/ num_threads
;
1317 remaining
= iterations
- per_thread
* num_threads
;
1319 if (thread_num
< remaining
)
1321 else if (per_thread
)
1322 first
+= remaining
* step
;
1325 thread_data
->dynamic_type
= 0;
1329 thread_data
->dynamic_type
= VCOMP_DYNAMIC_FLAGS_STATIC
;
1330 thread_data
->dynamic_begin
= first
+ per_thread
* thread_num
* step
;
1331 thread_data
->dynamic_end
= thread_data
->dynamic_begin
+ (per_thread
- 1) * step
;
1335 if (type
!= VCOMP_DYNAMIC_FLAGS_CHUNKED
&&
1336 type
!= VCOMP_DYNAMIC_FLAGS_GUIDED
)
1338 FIXME("unsupported flags %u\n", flags
);
1339 type
= VCOMP_DYNAMIC_FLAGS_GUIDED
;
1342 EnterCriticalSection(&vcomp_section
);
1343 thread_data
->dynamic
++;
1344 thread_data
->dynamic_type
= type
;
1345 if ((int)(thread_data
->dynamic
- task_data
->dynamic
) > 0)
1347 task_data
->dynamic
= thread_data
->dynamic
;
1348 task_data
->dynamic_first
= first
;
1349 task_data
->dynamic_last
= last
;
1350 task_data
->dynamic_iterations
= iterations
;
1351 task_data
->dynamic_step
= step
;
1352 task_data
->dynamic_chunksize
= chunksize
;
1354 LeaveCriticalSection(&vcomp_section
);
1358 int CDECL
_vcomp_for_dynamic_next(unsigned int *begin
, unsigned int *end
)
1360 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1361 struct vcomp_task_data
*task_data
= thread_data
->task
;
1362 struct vcomp_team_data
*team_data
= thread_data
->team
;
1363 int num_threads
= team_data
? team_data
->num_threads
: 1;
1365 TRACE("(%p, %p)\n", begin
, end
);
1367 if (thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_STATIC
)
1369 *begin
= thread_data
->dynamic_begin
;
1370 *end
= thread_data
->dynamic_end
;
1371 thread_data
->dynamic_type
= 0;
1374 else if (thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_CHUNKED
||
1375 thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_GUIDED
)
1377 unsigned int iterations
= 0;
1378 EnterCriticalSection(&vcomp_section
);
1379 if (thread_data
->dynamic
== task_data
->dynamic
&&
1380 task_data
->dynamic_iterations
!= 0)
1382 iterations
= min(task_data
->dynamic_iterations
, task_data
->dynamic_chunksize
);
1383 if (thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_GUIDED
&&
1384 task_data
->dynamic_iterations
> num_threads
* task_data
->dynamic_chunksize
)
1386 iterations
= (task_data
->dynamic_iterations
+ num_threads
- 1) / num_threads
;
1388 *begin
= task_data
->dynamic_first
;
1389 *end
= task_data
->dynamic_first
+ (iterations
- 1) * task_data
->dynamic_step
;
1390 task_data
->dynamic_iterations
-= iterations
;
1391 task_data
->dynamic_first
+= iterations
* task_data
->dynamic_step
;
1392 if (!task_data
->dynamic_iterations
)
1393 *end
= task_data
->dynamic_last
;
1395 LeaveCriticalSection(&vcomp_section
);
1396 return iterations
!= 0;
1402 int CDECL
omp_in_parallel(void)
1405 return vcomp_init_thread_data()->parallel
;
1408 static DWORD WINAPI
_vcomp_fork_worker(void *param
)
1410 struct vcomp_thread_data
*thread_data
= param
;
1411 vcomp_set_thread_data(thread_data
);
1413 TRACE("starting worker thread for %p\n", thread_data
);
1415 EnterCriticalSection(&vcomp_section
);
1418 struct vcomp_team_data
*team
= thread_data
->team
;
1421 LeaveCriticalSection(&vcomp_section
);
1422 _vcomp_fork_call_wrapper(team
->wrapper
, team
->nargs
, team
->valist
);
1423 EnterCriticalSection(&vcomp_section
);
1425 thread_data
->team
= NULL
;
1426 list_remove(&thread_data
->entry
);
1427 list_add_tail(&vcomp_idle_threads
, &thread_data
->entry
);
1428 if (++team
->finished_threads
>= team
->num_threads
)
1429 WakeAllConditionVariable(&team
->cond
);
1432 if (!SleepConditionVariableCS(&thread_data
->cond
, &vcomp_section
, 5000) &&
1433 GetLastError() == ERROR_TIMEOUT
&& !thread_data
->team
)
1438 list_remove(&thread_data
->entry
);
1439 LeaveCriticalSection(&vcomp_section
);
1441 TRACE("terminating worker thread for %p\n", thread_data
);
1443 HeapFree(GetProcessHeap(), 0, thread_data
);
1444 vcomp_set_thread_data(NULL
);
1445 FreeLibraryAndExitThread(vcomp_module
, 0);
1449 void WINAPIV
_vcomp_fork(BOOL ifval
, int nargs
, void *wrapper
, ...)
1451 struct vcomp_thread_data
*prev_thread_data
= vcomp_init_thread_data();
1452 struct vcomp_thread_data thread_data
;
1453 struct vcomp_team_data team_data
;
1454 struct vcomp_task_data task_data
;
1457 TRACE("(%d, %d, %p, ...)\n", ifval
, nargs
, wrapper
);
1459 if (prev_thread_data
->parallel
&& !vcomp_nested_fork
)
1464 else if (prev_thread_data
->fork_threads
)
1465 num_threads
= prev_thread_data
->fork_threads
;
1467 num_threads
= vcomp_num_threads
;
1469 InitializeConditionVariable(&team_data
.cond
);
1470 team_data
.num_threads
= 1;
1471 team_data
.finished_threads
= 0;
1472 team_data
.nargs
= nargs
;
1473 team_data
.wrapper
= wrapper
;
1474 __ms_va_start(team_data
.valist
, wrapper
);
1475 team_data
.barrier
= 0;
1476 team_data
.barrier_count
= 0;
1478 task_data
.single
= 0;
1479 task_data
.section
= 0;
1480 task_data
.dynamic
= 0;
1482 thread_data
.team
= &team_data
;
1483 thread_data
.task
= &task_data
;
1484 thread_data
.thread_num
= 0;
1485 thread_data
.parallel
= ifval
|| prev_thread_data
->parallel
;
1486 thread_data
.fork_threads
= 0;
1487 thread_data
.single
= 1;
1488 thread_data
.section
= 1;
1489 thread_data
.dynamic
= 1;
1490 thread_data
.dynamic_type
= 0;
1491 list_init(&thread_data
.entry
);
1492 InitializeConditionVariable(&thread_data
.cond
);
1494 if (num_threads
> 1)
1497 EnterCriticalSection(&vcomp_section
);
1499 /* reuse existing threads (if any) */
1500 while (team_data
.num_threads
< num_threads
&& (ptr
= list_head(&vcomp_idle_threads
)))
1502 struct vcomp_thread_data
*data
= LIST_ENTRY(ptr
, struct vcomp_thread_data
, entry
);
1503 data
->team
= &team_data
;
1504 data
->task
= &task_data
;
1505 data
->thread_num
= team_data
.num_threads
++;
1506 data
->parallel
= thread_data
.parallel
;
1507 data
->fork_threads
= 0;
1511 data
->dynamic_type
= 0;
1512 list_remove(&data
->entry
);
1513 list_add_tail(&thread_data
.entry
, &data
->entry
);
1514 WakeAllConditionVariable(&data
->cond
);
1517 /* spawn additional threads */
1518 while (team_data
.num_threads
< num_threads
)
1520 struct vcomp_thread_data
*data
;
1524 data
= HeapAlloc(GetProcessHeap(), 0, sizeof(*data
));
1527 data
->team
= &team_data
;
1528 data
->task
= &task_data
;
1529 data
->thread_num
= team_data
.num_threads
;
1530 data
->parallel
= thread_data
.parallel
;
1531 data
->fork_threads
= 0;
1535 data
->dynamic_type
= 0;
1536 InitializeConditionVariable(&data
->cond
);
1538 thread
= CreateThread(NULL
, 0, _vcomp_fork_worker
, data
, 0, NULL
);
1541 HeapFree(GetProcessHeap(), 0, data
);
1545 GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
,
1546 (const WCHAR
*)vcomp_module
, &module
);
1547 team_data
.num_threads
++;
1548 list_add_tail(&thread_data
.entry
, &data
->entry
);
1549 CloseHandle(thread
);
1552 LeaveCriticalSection(&vcomp_section
);
1555 vcomp_set_thread_data(&thread_data
);
1556 _vcomp_fork_call_wrapper(team_data
.wrapper
, team_data
.nargs
, team_data
.valist
);
1557 vcomp_set_thread_data(prev_thread_data
);
1558 prev_thread_data
->fork_threads
= 0;
1560 if (team_data
.num_threads
> 1)
1562 EnterCriticalSection(&vcomp_section
);
1564 team_data
.finished_threads
++;
1565 while (team_data
.finished_threads
< team_data
.num_threads
)
1566 SleepConditionVariableCS(&team_data
.cond
, &vcomp_section
, INFINITE
);
1568 LeaveCriticalSection(&vcomp_section
);
1569 assert(list_empty(&thread_data
.entry
));
1572 __ms_va_end(team_data
.valist
);
1575 static CRITICAL_SECTION
*alloc_critsect(void)
1577 CRITICAL_SECTION
*critsect
;
1578 if (!(critsect
= HeapAlloc(GetProcessHeap(), 0, sizeof(*critsect
))))
1580 ERR("could not allocate critical section\n");
1584 InitializeCriticalSection(critsect
);
1585 critsect
->DebugInfo
->Spare
[0] = (DWORD_PTR
)(__FILE__
": critsect");
1589 static void destroy_critsect(CRITICAL_SECTION
*critsect
)
1591 if (!critsect
) return;
1592 critsect
->DebugInfo
->Spare
[0] = 0;
1593 DeleteCriticalSection(critsect
);
1594 HeapFree(GetProcessHeap(), 0, critsect
);
1597 void CDECL
omp_init_lock(omp_lock_t
*lock
)
1599 TRACE("(%p)\n", lock
);
1600 *lock
= alloc_critsect();
1603 void CDECL
omp_destroy_lock(omp_lock_t
*lock
)
1605 TRACE("(%p)\n", lock
);
1606 destroy_critsect(*lock
);
1609 void CDECL
omp_set_lock(omp_lock_t
*lock
)
1611 TRACE("(%p)\n", lock
);
1613 if (RtlIsCriticalSectionLockedByThread(*lock
))
1615 ERR("omp_set_lock called while holding lock %p\n", *lock
);
1619 EnterCriticalSection(*lock
);
1622 void CDECL
omp_unset_lock(omp_lock_t
*lock
)
1624 TRACE("(%p)\n", lock
);
1625 LeaveCriticalSection(*lock
);
1628 int CDECL
omp_test_lock(omp_lock_t
*lock
)
1630 TRACE("(%p)\n", lock
);
1632 if (RtlIsCriticalSectionLockedByThread(*lock
))
1635 return TryEnterCriticalSection(*lock
);
1638 void CDECL
omp_set_nest_lock(omp_nest_lock_t
*lock
)
1640 TRACE("(%p)\n", lock
);
1641 EnterCriticalSection(*lock
);
1644 void CDECL
omp_unset_nest_lock(omp_nest_lock_t
*lock
)
1646 TRACE("(%p)\n", lock
);
1647 LeaveCriticalSection(*lock
);
1650 int CDECL
omp_test_nest_lock(omp_nest_lock_t
*lock
)
1652 TRACE("(%p)\n", lock
);
1653 return TryEnterCriticalSection(*lock
) ? (*lock
)->RecursionCount
: 0;
1656 void CDECL
_vcomp_enter_critsect(CRITICAL_SECTION
**critsect
)
1658 TRACE("(%p)\n", critsect
);
1662 CRITICAL_SECTION
*new_critsect
= alloc_critsect();
1663 if (interlocked_cmpxchg_ptr((void **)critsect
, new_critsect
, NULL
) != NULL
)
1664 destroy_critsect(new_critsect
); /* someone beat us to it */
1667 EnterCriticalSection(*critsect
);
1670 void CDECL
_vcomp_leave_critsect(CRITICAL_SECTION
*critsect
)
1672 TRACE("(%p)\n", critsect
);
1673 LeaveCriticalSection(critsect
);
1676 BOOL WINAPI
DllMain(HINSTANCE instance
, DWORD reason
, LPVOID reserved
)
1678 TRACE("(%p, %d, %p)\n", instance
, reason
, reserved
);
1682 case DLL_PROCESS_ATTACH
:
1684 SYSTEM_INFO sysinfo
;
1686 if ((vcomp_context_tls
= TlsAlloc()) == TLS_OUT_OF_INDEXES
)
1688 ERR("Failed to allocate TLS index\n");
1692 GetSystemInfo(&sysinfo
);
1693 vcomp_module
= instance
;
1694 vcomp_max_threads
= sysinfo
.dwNumberOfProcessors
;
1695 vcomp_num_threads
= sysinfo
.dwNumberOfProcessors
;
1699 case DLL_PROCESS_DETACH
:
1701 if (reserved
) break;
1702 if (vcomp_context_tls
!= TLS_OUT_OF_INDEXES
)
1704 vcomp_free_thread_data();
1705 TlsFree(vcomp_context_tls
);
1710 case DLL_THREAD_DETACH
:
1712 vcomp_free_thread_data();