vcomp: Implement _vcomp_reduction_r8 and add tests.
[wine.git] / dlls / vcomp / main.c
blob34e327eedb7024a5563e32ba3304d7f3cff80e6c
1 /*
3 * vcomp implementation
5 * Copyright 2011 Austin English
6 * Copyright 2012 Dan Kegel
7 * Copyright 2015-2016 Sebastian Lackner
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 #include "config.h"
25 #include "wine/port.h"
27 #include <stdarg.h>
28 #include <assert.h>
30 #include "windef.h"
31 #include "winbase.h"
32 #include "winternl.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(vcomp);
38 typedef CRITICAL_SECTION *omp_lock_t;
39 typedef CRITICAL_SECTION *omp_nest_lock_t;
41 static struct list vcomp_idle_threads = LIST_INIT(vcomp_idle_threads);
42 static DWORD vcomp_context_tls = TLS_OUT_OF_INDEXES;
43 static HMODULE vcomp_module;
44 static int vcomp_max_threads;
45 static int vcomp_num_threads;
46 static BOOL vcomp_nested_fork = FALSE;
48 static RTL_CRITICAL_SECTION vcomp_section;
49 static RTL_CRITICAL_SECTION_DEBUG critsect_debug =
51 0, 0, &vcomp_section,
52 { &critsect_debug.ProcessLocksList, &critsect_debug.ProcessLocksList },
53 0, 0, { (DWORD_PTR)(__FILE__ ": vcomp_section") }
55 static RTL_CRITICAL_SECTION vcomp_section = { &critsect_debug, -1, 0, 0, 0, 0 };
57 #define VCOMP_DYNAMIC_FLAGS_STATIC 0x01
58 #define VCOMP_DYNAMIC_FLAGS_CHUNKED 0x02
59 #define VCOMP_DYNAMIC_FLAGS_GUIDED 0x03
60 #define VCOMP_DYNAMIC_FLAGS_INCREMENT 0x40
62 struct vcomp_thread_data
64 struct vcomp_team_data *team;
65 struct vcomp_task_data *task;
66 int thread_num;
67 BOOL parallel;
68 int fork_threads;
70 /* only used for concurrent tasks */
71 struct list entry;
72 CONDITION_VARIABLE cond;
74 /* single */
75 unsigned int single;
77 /* section */
78 unsigned int section;
80 /* dynamic */
81 unsigned int dynamic;
82 unsigned int dynamic_type;
83 unsigned int dynamic_begin;
84 unsigned int dynamic_end;
87 struct vcomp_team_data
89 CONDITION_VARIABLE cond;
90 int num_threads;
91 int finished_threads;
93 /* callback arguments */
94 int nargs;
95 void *wrapper;
96 __ms_va_list valist;
98 /* barrier */
99 unsigned int barrier;
100 int barrier_count;
103 struct vcomp_task_data
105 /* single */
106 unsigned int single;
108 /* section */
109 unsigned int section;
110 int num_sections;
111 int section_index;
113 /* dynamic */
114 unsigned int dynamic;
115 unsigned int dynamic_first;
116 unsigned int dynamic_last;
117 unsigned int dynamic_iterations;
118 int dynamic_step;
119 unsigned int dynamic_chunksize;
122 #if defined(__i386__)
124 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
125 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
126 "pushl %ebp\n\t"
127 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
128 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
129 "movl %esp,%ebp\n\t"
130 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
131 "pushl %esi\n\t"
132 __ASM_CFI(".cfi_rel_offset %esi,-4\n\t")
133 "pushl %edi\n\t"
134 __ASM_CFI(".cfi_rel_offset %edi,-8\n\t")
135 "movl 12(%ebp),%edx\n\t"
136 "movl %esp,%edi\n\t"
137 "shll $2,%edx\n\t"
138 "jz 1f\n\t"
139 "subl %edx,%edi\n\t"
140 "andl $~15,%edi\n\t"
141 "movl %edi,%esp\n\t"
142 "movl 12(%ebp),%ecx\n\t"
143 "movl 16(%ebp),%esi\n\t"
144 "cld\n\t"
145 "rep; movsl\n"
146 "1:\tcall *8(%ebp)\n\t"
147 "leal -8(%ebp),%esp\n\t"
148 "popl %edi\n\t"
149 __ASM_CFI(".cfi_same_value %edi\n\t")
150 "popl %esi\n\t"
151 __ASM_CFI(".cfi_same_value %esi\n\t")
152 "popl %ebp\n\t"
153 __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
154 __ASM_CFI(".cfi_same_value %ebp\n\t")
155 "ret" )
157 #elif defined(__x86_64__)
159 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
160 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
161 "pushq %rbp\n\t"
162 __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t")
163 __ASM_CFI(".cfi_rel_offset %rbp,0\n\t")
164 "movq %rsp,%rbp\n\t"
165 __ASM_CFI(".cfi_def_cfa_register %rbp\n\t")
166 "pushq %rsi\n\t"
167 __ASM_CFI(".cfi_rel_offset %rsi,-8\n\t")
168 "pushq %rdi\n\t"
169 __ASM_CFI(".cfi_rel_offset %rdi,-16\n\t")
170 "movq %rcx,%rax\n\t"
171 "movq $4,%rcx\n\t"
172 "cmp %rcx,%rdx\n\t"
173 "cmovgq %rdx,%rcx\n\t"
174 "leaq 0(,%rcx,8),%rdx\n\t"
175 "subq %rdx,%rsp\n\t"
176 "andq $~15,%rsp\n\t"
177 "movq %rsp,%rdi\n\t"
178 "movq %r8,%rsi\n\t"
179 "rep; movsq\n\t"
180 "movq 0(%rsp),%rcx\n\t"
181 "movq 8(%rsp),%rdx\n\t"
182 "movq 16(%rsp),%r8\n\t"
183 "movq 24(%rsp),%r9\n\t"
184 "callq *%rax\n\t"
185 "leaq -16(%rbp),%rsp\n\t"
186 "popq %rdi\n\t"
187 __ASM_CFI(".cfi_same_value %rdi\n\t")
188 "popq %rsi\n\t"
189 __ASM_CFI(".cfi_same_value %rsi\n\t")
190 __ASM_CFI(".cfi_def_cfa_register %rsp\n\t")
191 "popq %rbp\n\t"
192 __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t")
193 __ASM_CFI(".cfi_same_value %rbp\n\t")
194 "ret")
196 #elif defined(__arm__)
198 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
199 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
200 ".arm\n\t"
201 "push {r4, r5, LR}\n\t"
202 "mov r4, r0\n\t"
203 "mov r5, SP\n\t"
204 "lsl r3, r1, #2\n\t"
205 "cmp r3, #0\n\t"
206 "beq 5f\n\t"
207 "sub SP, SP, r3\n\t"
208 "tst r1, #1\n\t"
209 "subeq SP, SP, #4\n\t"
210 "1:\tsub r3, r3, #4\n\t"
211 "ldr r0, [r2, r3]\n\t"
212 "str r0, [SP, r3]\n\t"
213 "cmp r3, #0\n\t"
214 "bgt 1b\n\t"
215 "cmp r1, #1\n\t"
216 "bgt 2f\n\t"
217 "pop {r0}\n\t"
218 "b 5f\n\t"
219 "2:\tcmp r1, #2\n\t"
220 "bgt 3f\n\t"
221 "pop {r0-r1}\n\t"
222 "b 5f\n\t"
223 "3:\tcmp r1, #3\n\t"
224 "bgt 4f\n\t"
225 "pop {r0-r2}\n\t"
226 "b 5f\n\t"
227 "4:\tpop {r0-r3}\n\t"
228 "5:\tblx r4\n\t"
229 "mov SP, r5\n\t"
230 "pop {r4, r5, PC}" )
232 #else
234 static void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args)
236 ERR("Not implemented for this architecture\n");
239 #endif
241 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
243 static inline char interlocked_cmpxchg8(char *dest, char xchg, char compare)
245 char ret;
246 __asm__ __volatile__( "lock; cmpxchgb %2,(%1)"
247 : "=a" (ret) : "r" (dest), "q" (xchg), "0" (compare) : "memory" );
248 return ret;
251 static inline short interlocked_cmpxchg16(short *dest, short xchg, short compare)
253 short ret;
254 __asm__ __volatile__( "lock; cmpxchgw %2,(%1)"
255 : "=a" (ret) : "r" (dest), "r" (xchg), "0" (compare) : "memory" );
256 return ret;
259 static inline char interlocked_xchg_add8(char *dest, char incr)
261 char ret;
262 __asm__ __volatile__( "lock; xaddb %0,(%1)"
263 : "=q" (ret) : "r" (dest), "0" (incr) : "memory" );
264 return ret;
267 static inline short interlocked_xchg_add16(short *dest, short incr)
269 short ret;
270 __asm__ __volatile__( "lock; xaddw %0,(%1)"
271 : "=r" (ret) : "r" (dest), "0" (incr) : "memory" );
272 return ret;
275 #else /* __GNUC__ */
277 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
278 static inline char interlocked_cmpxchg8(char *dest, char xchg, char compare)
280 return __sync_val_compare_and_swap(dest, compare, xchg);
283 static inline char interlocked_xchg_add8(char *dest, char incr)
285 return __sync_fetch_and_add(dest, incr);
287 #else
288 static char interlocked_cmpxchg8(char *dest, char xchg, char compare)
290 EnterCriticalSection(&vcomp_section);
291 if (*dest == compare) *dest = xchg; else compare = *dest;
292 LeaveCriticalSection(&vcomp_section);
293 return compare;
296 static char interlocked_xchg_add8(char *dest, char incr)
298 char ret;
299 EnterCriticalSection(&vcomp_section);
300 ret = *dest; *dest += incr;
301 LeaveCriticalSection(&vcomp_section);
302 return ret;
304 #endif
306 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
307 static inline short interlocked_cmpxchg16(short *dest, short xchg, short compare)
309 return __sync_val_compare_and_swap(dest, compare, xchg);
312 static inline short interlocked_xchg_add16(short *dest, short incr)
314 return __sync_fetch_and_add(dest, incr);
316 #else
317 static short interlocked_cmpxchg16(short *dest, short xchg, short compare)
319 EnterCriticalSection(&vcomp_section);
320 if (*dest == compare) *dest = xchg; else compare = *dest;
321 LeaveCriticalSection(&vcomp_section);
322 return compare;
325 static short interlocked_xchg_add16(short *dest, short incr)
327 short ret;
328 EnterCriticalSection(&vcomp_section);
329 ret = *dest; *dest += incr;
330 LeaveCriticalSection(&vcomp_section);
331 return ret;
333 #endif
335 #endif /* __GNUC__ */
337 static inline struct vcomp_thread_data *vcomp_get_thread_data(void)
339 return (struct vcomp_thread_data *)TlsGetValue(vcomp_context_tls);
342 static inline void vcomp_set_thread_data(struct vcomp_thread_data *thread_data)
344 TlsSetValue(vcomp_context_tls, thread_data);
347 static struct vcomp_thread_data *vcomp_init_thread_data(void)
349 struct vcomp_thread_data *thread_data = vcomp_get_thread_data();
350 struct
352 struct vcomp_thread_data thread;
353 struct vcomp_task_data task;
354 } *data;
356 if (thread_data) return thread_data;
357 if (!(data = HeapAlloc(GetProcessHeap(), 0, sizeof(*data))))
359 ERR("could not create thread data\n");
360 ExitProcess(1);
363 data->task.single = 0;
364 data->task.section = 0;
365 data->task.dynamic = 0;
367 thread_data = &data->thread;
368 thread_data->team = NULL;
369 thread_data->task = &data->task;
370 thread_data->thread_num = 0;
371 thread_data->parallel = FALSE;
372 thread_data->fork_threads = 0;
373 thread_data->single = 1;
374 thread_data->section = 1;
375 thread_data->dynamic = 1;
376 thread_data->dynamic_type = 0;
378 vcomp_set_thread_data(thread_data);
379 return thread_data;
382 static void vcomp_free_thread_data(void)
384 struct vcomp_thread_data *thread_data = vcomp_get_thread_data();
385 if (!thread_data) return;
387 HeapFree(GetProcessHeap(), 0, thread_data);
388 vcomp_set_thread_data(NULL);
391 void CDECL _vcomp_atomic_add_i1(char *dest, char val)
393 interlocked_xchg_add8(dest, val);
396 void CDECL _vcomp_atomic_and_i1(char *dest, char val)
398 char old;
399 do old = *dest; while (interlocked_cmpxchg8(dest, old & val, old) != old);
402 void CDECL _vcomp_atomic_div_i1(char *dest, char val)
404 char old;
405 do old = *dest; while (interlocked_cmpxchg8(dest, old / val, old) != old);
408 void CDECL _vcomp_atomic_div_ui1(unsigned char *dest, unsigned char val)
410 unsigned char old;
411 do old = *dest; while ((unsigned char)interlocked_cmpxchg8((char *)dest, old / val, old) != old);
414 void CDECL _vcomp_atomic_mul_i1(char *dest, char val)
416 char old;
417 do old = *dest; while (interlocked_cmpxchg8(dest, old * val, old) != old);
420 void CDECL _vcomp_atomic_or_i1(char *dest, char val)
422 char old;
423 do old = *dest; while (interlocked_cmpxchg8(dest, old | val, old) != old);
426 void CDECL _vcomp_atomic_shl_i1(char *dest, unsigned int val)
428 char old;
429 do old = *dest; while (interlocked_cmpxchg8(dest, old << val, old) != old);
432 void CDECL _vcomp_atomic_shr_i1(char *dest, unsigned int val)
434 char old;
435 do old = *dest; while (interlocked_cmpxchg8(dest, old >> val, old) != old);
438 void CDECL _vcomp_atomic_shr_ui1(unsigned char *dest, unsigned int val)
440 unsigned char old;
441 do old = *dest; while ((unsigned char)interlocked_cmpxchg8((char *)dest, old >> val, old) != old);
444 void CDECL _vcomp_atomic_sub_i1(char *dest, char val)
446 interlocked_xchg_add8(dest, -val);
449 void CDECL _vcomp_atomic_xor_i1(char *dest, char val)
451 char old;
452 do old = *dest; while (interlocked_cmpxchg8(dest, old ^ val, old) != old);
455 static void CDECL _vcomp_atomic_bool_and_i1(char *dest, char val)
457 char old;
458 do old = *dest; while (interlocked_cmpxchg8(dest, old && val, old) != old);
461 static void CDECL _vcomp_atomic_bool_or_i1(char *dest, char val)
463 char old;
464 do old = *dest; while (interlocked_cmpxchg8(dest, old ? old : (val != 0), old) != old);
467 void CDECL _vcomp_reduction_i1(unsigned int flags, char *dest, char val)
469 static void (CDECL * const funcs[])(char *, char) =
471 _vcomp_atomic_add_i1,
472 _vcomp_atomic_add_i1,
473 _vcomp_atomic_mul_i1,
474 _vcomp_atomic_and_i1,
475 _vcomp_atomic_or_i1,
476 _vcomp_atomic_xor_i1,
477 _vcomp_atomic_bool_and_i1,
478 _vcomp_atomic_bool_or_i1,
480 unsigned int op = (flags >> 8) & 0xf;
481 op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1);
482 funcs[op](dest, val);
485 void CDECL _vcomp_atomic_add_i2(short *dest, short val)
487 interlocked_xchg_add16(dest, val);
490 void CDECL _vcomp_atomic_and_i2(short *dest, short val)
492 short old;
493 do old = *dest; while (interlocked_cmpxchg16(dest, old & val, old) != old);
496 void CDECL _vcomp_atomic_div_i2(short *dest, short val)
498 short old;
499 do old = *dest; while (interlocked_cmpxchg16(dest, old / val, old) != old);
502 void CDECL _vcomp_atomic_div_ui2(unsigned short *dest, unsigned short val)
504 unsigned short old;
505 do old = *dest; while ((unsigned short)interlocked_cmpxchg16((short *)dest, old / val, old) != old);
508 void CDECL _vcomp_atomic_mul_i2(short *dest, short val)
510 short old;
511 do old = *dest; while (interlocked_cmpxchg16(dest, old * val, old) != old);
514 void CDECL _vcomp_atomic_or_i2(short *dest, short val)
516 short old;
517 do old = *dest; while (interlocked_cmpxchg16(dest, old | val, old) != old);
520 void CDECL _vcomp_atomic_shl_i2(short *dest, unsigned int val)
522 short old;
523 do old = *dest; while (interlocked_cmpxchg16(dest, old << val, old) != old);
526 void CDECL _vcomp_atomic_shr_i2(short *dest, unsigned int val)
528 short old;
529 do old = *dest; while (interlocked_cmpxchg16(dest, old >> val, old) != old);
532 void CDECL _vcomp_atomic_shr_ui2(unsigned short *dest, unsigned int val)
534 unsigned short old;
535 do old = *dest; while ((unsigned short)interlocked_cmpxchg16((short *)dest, old >> val, old) != old);
538 void CDECL _vcomp_atomic_sub_i2(short *dest, short val)
540 interlocked_xchg_add16(dest, -val);
543 void CDECL _vcomp_atomic_xor_i2(short *dest, short val)
545 short old;
546 do old = *dest; while (interlocked_cmpxchg16(dest, old ^ val, old) != old);
549 static void CDECL _vcomp_atomic_bool_and_i2(short *dest, short val)
551 short old;
552 do old = *dest; while (interlocked_cmpxchg16(dest, old && val, old) != old);
555 static void CDECL _vcomp_atomic_bool_or_i2(short *dest, short val)
557 short old;
558 do old = *dest; while (interlocked_cmpxchg16(dest, old ? old : (val != 0), old) != old);
561 void CDECL _vcomp_reduction_i2(unsigned int flags, short *dest, short val)
563 static void (CDECL * const funcs[])(short *, short) =
565 _vcomp_atomic_add_i2,
566 _vcomp_atomic_add_i2,
567 _vcomp_atomic_mul_i2,
568 _vcomp_atomic_and_i2,
569 _vcomp_atomic_or_i2,
570 _vcomp_atomic_xor_i2,
571 _vcomp_atomic_bool_and_i2,
572 _vcomp_atomic_bool_or_i2,
574 unsigned int op = (flags >> 8) & 0xf;
575 op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1);
576 funcs[op](dest, val);
579 void CDECL _vcomp_atomic_add_i4(int *dest, int val)
581 interlocked_xchg_add(dest, val);
584 void CDECL _vcomp_atomic_and_i4(int *dest, int val)
586 int old;
587 do old = *dest; while (interlocked_cmpxchg(dest, old & val, old) != old);
590 void CDECL _vcomp_atomic_div_i4(int *dest, int val)
592 int old;
593 do old = *dest; while (interlocked_cmpxchg(dest, old / val, old) != old);
596 void CDECL _vcomp_atomic_div_ui4(unsigned int *dest, unsigned int val)
598 unsigned int old;
599 do old = *dest; while (interlocked_cmpxchg((int *)dest, old / val, old) != old);
602 void CDECL _vcomp_atomic_mul_i4(int *dest, int val)
604 int old;
605 do old = *dest; while (interlocked_cmpxchg(dest, old * val, old) != old);
608 void CDECL _vcomp_atomic_or_i4(int *dest, int val)
610 int old;
611 do old = *dest; while (interlocked_cmpxchg(dest, old | val, old) != old);
614 void CDECL _vcomp_atomic_shl_i4(int *dest, int val)
616 int old;
617 do old = *dest; while (interlocked_cmpxchg(dest, old << val, old) != old);
620 void CDECL _vcomp_atomic_shr_i4(int *dest, int val)
622 int old;
623 do old = *dest; while (interlocked_cmpxchg(dest, old >> val, old) != old);
626 void CDECL _vcomp_atomic_shr_ui4(unsigned int *dest, unsigned int val)
628 unsigned int old;
629 do old = *dest; while (interlocked_cmpxchg((int *)dest, old >> val, old) != old);
632 void CDECL _vcomp_atomic_sub_i4(int *dest, int val)
634 interlocked_xchg_add(dest, -val);
637 void CDECL _vcomp_atomic_xor_i4(int *dest, int val)
639 int old;
640 do old = *dest; while (interlocked_cmpxchg(dest, old ^ val, old) != old);
643 static void CDECL _vcomp_atomic_bool_and_i4(int *dest, int val)
645 int old;
646 do old = *dest; while (interlocked_cmpxchg(dest, old && val, old) != old);
649 static void CDECL _vcomp_atomic_bool_or_i4(int *dest, int val)
651 int old;
652 do old = *dest; while (interlocked_cmpxchg(dest, old ? old : (val != 0), old) != old);
655 void CDECL _vcomp_reduction_i4(unsigned int flags, int *dest, int val)
657 static void (CDECL * const funcs[])(int *, int) =
659 _vcomp_atomic_add_i4,
660 _vcomp_atomic_add_i4,
661 _vcomp_atomic_mul_i4,
662 _vcomp_atomic_and_i4,
663 _vcomp_atomic_or_i4,
664 _vcomp_atomic_xor_i4,
665 _vcomp_atomic_bool_and_i4,
666 _vcomp_atomic_bool_or_i4,
668 unsigned int op = (flags >> 8) & 0xf;
669 op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1);
670 funcs[op](dest, val);
673 void CDECL _vcomp_atomic_add_i8(LONG64 *dest, LONG64 val)
675 LONG64 old;
676 do old = *dest; while (interlocked_cmpxchg64(dest, old + val, old) != old);
679 void CDECL _vcomp_atomic_and_i8(LONG64 *dest, LONG64 val)
681 LONG64 old;
682 do old = *dest; while (interlocked_cmpxchg64(dest, old & val, old) != old);
685 void CDECL _vcomp_atomic_div_i8(LONG64 *dest, LONG64 val)
687 LONG64 old;
688 do old = *dest; while (interlocked_cmpxchg64(dest, old / val, old) != old);
691 void CDECL _vcomp_atomic_div_ui8(ULONG64 *dest, ULONG64 val)
693 ULONG64 old;
694 do old = *dest; while (interlocked_cmpxchg64((LONG64 *)dest, old / val, old) != old);
697 void CDECL _vcomp_atomic_mul_i8(LONG64 *dest, LONG64 val)
699 LONG64 old;
700 do old = *dest; while (interlocked_cmpxchg64(dest, old * val, old) != old);
703 void CDECL _vcomp_atomic_or_i8(LONG64 *dest, LONG64 val)
705 LONG64 old;
706 do old = *dest; while (interlocked_cmpxchg64(dest, old | val, old) != old);
709 void CDECL _vcomp_atomic_shl_i8(LONG64 *dest, unsigned int val)
711 LONG64 old;
712 do old = *dest; while (interlocked_cmpxchg64(dest, old << val, old) != old);
715 void CDECL _vcomp_atomic_shr_i8(LONG64 *dest, unsigned int val)
717 LONG64 old;
718 do old = *dest; while (interlocked_cmpxchg64(dest, old >> val, old) != old);
721 void CDECL _vcomp_atomic_shr_ui8(ULONG64 *dest, unsigned int val)
723 ULONG64 old;
724 do old = *dest; while (interlocked_cmpxchg64((LONG64 *)dest, old >> val, old) != old);
727 void CDECL _vcomp_atomic_sub_i8(LONG64 *dest, LONG64 val)
729 LONG64 old;
730 do old = *dest; while (interlocked_cmpxchg64(dest, old - val, old) != old);
733 void CDECL _vcomp_atomic_xor_i8(LONG64 *dest, LONG64 val)
735 LONG64 old;
736 do old = *dest; while (interlocked_cmpxchg64(dest, old ^ val, old) != old);
739 static void CDECL _vcomp_atomic_bool_and_i8(LONG64 *dest, LONG64 val)
741 LONG64 old;
742 do old = *dest; while (interlocked_cmpxchg64(dest, old && val, old) != old);
745 static void CDECL _vcomp_atomic_bool_or_i8(LONG64 *dest, LONG64 val)
747 LONG64 old;
748 do old = *dest; while (interlocked_cmpxchg64(dest, old ? old : (val != 0), old) != old);
751 void CDECL _vcomp_reduction_i8(unsigned int flags, LONG64 *dest, LONG64 val)
753 static void (CDECL * const funcs[])(LONG64 *, LONG64) =
755 _vcomp_atomic_add_i8,
756 _vcomp_atomic_add_i8,
757 _vcomp_atomic_mul_i8,
758 _vcomp_atomic_and_i8,
759 _vcomp_atomic_or_i8,
760 _vcomp_atomic_xor_i8,
761 _vcomp_atomic_bool_and_i8,
762 _vcomp_atomic_bool_or_i8,
764 unsigned int op = (flags >> 8) & 0xf;
765 op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1);
766 funcs[op](dest, val);
769 void CDECL _vcomp_atomic_add_r4(float *dest, float val)
771 int old, new;
774 old = *(int *)dest;
775 *(float *)&new = *(float *)&old + val;
777 while (interlocked_cmpxchg((int *)dest, new, old) != old);
780 void CDECL _vcomp_atomic_div_r4(float *dest, float val)
782 int old, new;
785 old = *(int *)dest;
786 *(float *)&new = *(float *)&old / val;
788 while (interlocked_cmpxchg((int *)dest, new, old) != old);
791 void CDECL _vcomp_atomic_mul_r4(float *dest, float val)
793 int old, new;
796 old = *(int *)dest;
797 *(float *)&new = *(float *)&old * val;
799 while (interlocked_cmpxchg((int *)dest, new, old) != old);
802 void CDECL _vcomp_atomic_sub_r4(float *dest, float val)
804 int old, new;
807 old = *(int *)dest;
808 *(float *)&new = *(float *)&old - val;
810 while (interlocked_cmpxchg((int *)dest, new, old) != old);
813 static void CDECL _vcomp_atomic_bool_and_r4(float *dest, float val)
815 int old, new;
818 old = *(int *)dest;
819 *(float *)&new = (*(float *)&old != 0.0) ? (val != 0.0) : 0.0;
821 while (interlocked_cmpxchg((int *)dest, new, old) != old);
824 static void CDECL _vcomp_atomic_bool_or_r4(float *dest, float val)
826 int old, new;
829 old = *(int *)dest;
830 *(float *)&new = (*(float *)&old != 0.0) ? *(float *)&old : (val != 0.0);
832 while (interlocked_cmpxchg((int *)dest, new, old) != old);
835 void CDECL _vcomp_reduction_r4(unsigned int flags, float *dest, float val)
837 static void (CDECL * const funcs[])(float *, float) =
839 _vcomp_atomic_add_r4,
840 _vcomp_atomic_add_r4,
841 _vcomp_atomic_mul_r4,
842 _vcomp_atomic_bool_or_r4,
843 _vcomp_atomic_bool_or_r4,
844 _vcomp_atomic_bool_or_r4,
845 _vcomp_atomic_bool_and_r4,
846 _vcomp_atomic_bool_or_r4,
848 unsigned int op = (flags >> 8) & 0xf;
849 op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1);
850 funcs[op](dest, val);
853 void CDECL _vcomp_atomic_add_r8(double *dest, double val)
855 LONG64 old, new;
858 old = *(LONG64 *)dest;
859 *(double *)&new = *(double *)&old + val;
861 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
864 void CDECL _vcomp_atomic_div_r8(double *dest, double val)
866 LONG64 old, new;
869 old = *(LONG64 *)dest;
870 *(double *)&new = *(double *)&old / val;
872 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
875 void CDECL _vcomp_atomic_mul_r8(double *dest, double val)
877 LONG64 old, new;
880 old = *(LONG64 *)dest;
881 *(double *)&new = *(double *)&old * val;
883 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
886 void CDECL _vcomp_atomic_sub_r8(double *dest, double val)
888 LONG64 old, new;
891 old = *(LONG64 *)dest;
892 *(double *)&new = *(double *)&old - val;
894 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
897 static void CDECL _vcomp_atomic_bool_and_r8(double *dest, double val)
899 LONG64 old, new;
902 old = *(LONG64 *)dest;
903 *(double *)&new = (*(double *)&old != 0.0) ? (val != 0.0) : 0.0;
905 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
908 static void CDECL _vcomp_atomic_bool_or_r8(double *dest, double val)
910 LONG64 old, new;
913 old = *(LONG64 *)dest;
914 *(double *)&new = (*(double *)&old != 0.0) ? *(double *)&old : (val != 0.0);
916 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
919 void CDECL _vcomp_reduction_r8(unsigned int flags, double *dest, double val)
921 static void (CDECL * const funcs[])(double *, double) =
923 _vcomp_atomic_add_r8,
924 _vcomp_atomic_add_r8,
925 _vcomp_atomic_mul_r8,
926 _vcomp_atomic_bool_or_r8,
927 _vcomp_atomic_bool_or_r8,
928 _vcomp_atomic_bool_or_r8,
929 _vcomp_atomic_bool_and_r8,
930 _vcomp_atomic_bool_or_r8,
932 unsigned int op = (flags >> 8) & 0xf;
933 op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1);
934 funcs[op](dest, val);
937 int CDECL omp_get_dynamic(void)
939 TRACE("stub\n");
940 return 0;
943 int CDECL omp_get_max_threads(void)
945 TRACE("()\n");
946 return vcomp_max_threads;
949 int CDECL omp_get_nested(void)
951 TRACE("stub\n");
952 return vcomp_nested_fork;
955 int CDECL omp_get_num_procs(void)
957 TRACE("stub\n");
958 return 1;
961 int CDECL omp_get_num_threads(void)
963 struct vcomp_team_data *team_data = vcomp_init_thread_data()->team;
964 TRACE("()\n");
965 return team_data ? team_data->num_threads : 1;
968 int CDECL omp_get_thread_num(void)
970 TRACE("()\n");
971 return vcomp_init_thread_data()->thread_num;
974 int CDECL _vcomp_get_thread_num(void)
976 TRACE("()\n");
977 return vcomp_init_thread_data()->thread_num;
980 /* Time in seconds since "some time in the past" */
981 double CDECL omp_get_wtime(void)
983 return GetTickCount() / 1000.0;
986 void CDECL omp_set_dynamic(int val)
988 TRACE("(%d): stub\n", val);
991 void CDECL omp_set_nested(int nested)
993 TRACE("(%d)\n", nested);
994 vcomp_nested_fork = (nested != 0);
997 void CDECL omp_set_num_threads(int num_threads)
999 TRACE("(%d)\n", num_threads);
1000 if (num_threads >= 1)
1001 vcomp_num_threads = num_threads;
1004 void CDECL _vcomp_flush(void)
1006 TRACE("(): stub\n");
1009 void CDECL _vcomp_barrier(void)
1011 struct vcomp_team_data *team_data = vcomp_init_thread_data()->team;
1013 TRACE("()\n");
1015 if (!team_data)
1016 return;
1018 EnterCriticalSection(&vcomp_section);
1019 if (++team_data->barrier_count >= team_data->num_threads)
1021 team_data->barrier++;
1022 team_data->barrier_count = 0;
1023 WakeAllConditionVariable(&team_data->cond);
1025 else
1027 unsigned int barrier = team_data->barrier;
1028 while (team_data->barrier == barrier)
1029 SleepConditionVariableCS(&team_data->cond, &vcomp_section, INFINITE);
1031 LeaveCriticalSection(&vcomp_section);
1034 void CDECL _vcomp_set_num_threads(int num_threads)
1036 TRACE("(%d)\n", num_threads);
1037 if (num_threads >= 1)
1038 vcomp_init_thread_data()->fork_threads = num_threads;
1041 int CDECL _vcomp_master_begin(void)
1043 TRACE("()\n");
1044 return !vcomp_init_thread_data()->thread_num;
1047 void CDECL _vcomp_master_end(void)
1049 TRACE("()\n");
1050 /* nothing to do here */
1053 int CDECL _vcomp_single_begin(int flags)
1055 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1056 struct vcomp_task_data *task_data = thread_data->task;
1057 int ret = FALSE;
1059 TRACE("(%x): semi-stub\n", flags);
1061 EnterCriticalSection(&vcomp_section);
1062 thread_data->single++;
1063 if ((int)(thread_data->single - task_data->single) > 0)
1065 task_data->single = thread_data->single;
1066 ret = TRUE;
1068 LeaveCriticalSection(&vcomp_section);
1070 return ret;
1073 void CDECL _vcomp_single_end(void)
1075 TRACE("()\n");
1076 /* nothing to do here */
1079 void CDECL _vcomp_sections_init(int n)
1081 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1082 struct vcomp_task_data *task_data = thread_data->task;
1084 TRACE("(%d)\n", n);
1086 EnterCriticalSection(&vcomp_section);
1087 thread_data->section++;
1088 if ((int)(thread_data->section - task_data->section) > 0)
1090 task_data->section = thread_data->section;
1091 task_data->num_sections = n;
1092 task_data->section_index = 0;
1094 LeaveCriticalSection(&vcomp_section);
1097 int CDECL _vcomp_sections_next(void)
1099 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1100 struct vcomp_task_data *task_data = thread_data->task;
1101 int i = -1;
1103 TRACE("()\n");
1105 EnterCriticalSection(&vcomp_section);
1106 if (thread_data->section == task_data->section &&
1107 task_data->section_index != task_data->num_sections)
1109 i = task_data->section_index++;
1111 LeaveCriticalSection(&vcomp_section);
1112 return i;
1115 void CDECL _vcomp_for_static_simple_init(unsigned int first, unsigned int last, int step,
1116 BOOL increment, unsigned int *begin, unsigned int *end)
1118 unsigned int iterations, per_thread, remaining;
1119 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1120 struct vcomp_team_data *team_data = thread_data->team;
1121 int num_threads = team_data ? team_data->num_threads : 1;
1122 int thread_num = thread_data->thread_num;
1124 TRACE("(%u, %u, %d, %u, %p, %p)\n", first, last, step, increment, begin, end);
1126 if (num_threads == 1)
1128 *begin = first;
1129 *end = last;
1130 return;
1133 if (step <= 0)
1135 *begin = 0;
1136 *end = increment ? -1 : 1;
1137 return;
1140 if (increment)
1141 iterations = 1 + (last - first) / step;
1142 else
1144 iterations = 1 + (first - last) / step;
1145 step *= -1;
1148 per_thread = iterations / num_threads;
1149 remaining = iterations - per_thread * num_threads;
1151 if (thread_num < remaining)
1152 per_thread++;
1153 else if (per_thread)
1154 first += remaining * step;
1155 else
1157 *begin = first;
1158 *end = first - step;
1159 return;
1162 *begin = first + per_thread * thread_num * step;
1163 *end = *begin + (per_thread - 1) * step;
1166 void CDECL _vcomp_for_static_init(int first, int last, int step, int chunksize, unsigned int *loops,
1167 int *begin, int *end, int *next, int *lastchunk)
1169 unsigned int iterations, num_chunks, per_thread, remaining;
1170 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1171 struct vcomp_team_data *team_data = thread_data->team;
1172 int num_threads = team_data ? team_data->num_threads : 1;
1173 int thread_num = thread_data->thread_num;
1175 TRACE("(%d, %d, %d, %d, %p, %p, %p, %p, %p)\n",
1176 first, last, step, chunksize, loops, begin, end, next, lastchunk);
1178 if (num_threads == 1 && chunksize != 1)
1180 *loops = 1;
1181 *begin = first;
1182 *end = last;
1183 *next = 0;
1184 *lastchunk = first;
1185 return;
1188 if (first == last)
1190 *loops = !thread_num;
1191 if (!thread_num)
1193 *begin = first;
1194 *end = last;
1195 *next = 0;
1196 *lastchunk = first;
1198 return;
1201 if (step <= 0)
1203 *loops = 0;
1204 return;
1207 if (first < last)
1208 iterations = 1 + (last - first) / step;
1209 else
1211 iterations = 1 + (first - last) / step;
1212 step *= -1;
1215 if (chunksize < 1)
1216 chunksize = 1;
1218 num_chunks = ((DWORD64)iterations + chunksize - 1) / chunksize;
1219 per_thread = num_chunks / num_threads;
1220 remaining = num_chunks - per_thread * num_threads;
1222 *loops = per_thread + (thread_num < remaining);
1223 *begin = first + thread_num * chunksize * step;
1224 *end = *begin + (chunksize - 1) * step;
1225 *next = chunksize * num_threads * step;
1226 *lastchunk = first + (num_chunks - 1) * chunksize * step;
1229 void CDECL _vcomp_for_static_end(void)
1231 TRACE("()\n");
1232 /* nothing to do here */
1235 void CDECL _vcomp_for_dynamic_init(unsigned int flags, unsigned int first, unsigned int last,
1236 int step, unsigned int chunksize)
1238 unsigned int iterations, per_thread, remaining;
1239 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1240 struct vcomp_team_data *team_data = thread_data->team;
1241 struct vcomp_task_data *task_data = thread_data->task;
1242 int num_threads = team_data ? team_data->num_threads : 1;
1243 int thread_num = thread_data->thread_num;
1244 unsigned int type = flags & ~VCOMP_DYNAMIC_FLAGS_INCREMENT;
1246 TRACE("(%u, %u, %u, %d, %u)\n", flags, first, last, step, chunksize);
1248 if (step <= 0)
1250 thread_data->dynamic_type = 0;
1251 return;
1254 if (flags & VCOMP_DYNAMIC_FLAGS_INCREMENT)
1255 iterations = 1 + (last - first) / step;
1256 else
1258 iterations = 1 + (first - last) / step;
1259 step *= -1;
1262 if (type == VCOMP_DYNAMIC_FLAGS_STATIC)
1264 per_thread = iterations / num_threads;
1265 remaining = iterations - per_thread * num_threads;
1267 if (thread_num < remaining)
1268 per_thread++;
1269 else if (per_thread)
1270 first += remaining * step;
1271 else
1273 thread_data->dynamic_type = 0;
1274 return;
1277 thread_data->dynamic_type = VCOMP_DYNAMIC_FLAGS_STATIC;
1278 thread_data->dynamic_begin = first + per_thread * thread_num * step;
1279 thread_data->dynamic_end = thread_data->dynamic_begin + (per_thread - 1) * step;
1281 else
1283 if (type != VCOMP_DYNAMIC_FLAGS_CHUNKED &&
1284 type != VCOMP_DYNAMIC_FLAGS_GUIDED)
1286 FIXME("unsupported flags %u\n", flags);
1287 type = VCOMP_DYNAMIC_FLAGS_GUIDED;
1290 EnterCriticalSection(&vcomp_section);
1291 thread_data->dynamic++;
1292 thread_data->dynamic_type = type;
1293 if ((int)(thread_data->dynamic - task_data->dynamic) > 0)
1295 task_data->dynamic = thread_data->dynamic;
1296 task_data->dynamic_first = first;
1297 task_data->dynamic_last = last;
1298 task_data->dynamic_iterations = iterations;
1299 task_data->dynamic_step = step;
1300 task_data->dynamic_chunksize = chunksize;
1302 LeaveCriticalSection(&vcomp_section);
1306 int CDECL _vcomp_for_dynamic_next(unsigned int *begin, unsigned int *end)
1308 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1309 struct vcomp_task_data *task_data = thread_data->task;
1310 struct vcomp_team_data *team_data = thread_data->team;
1311 int num_threads = team_data ? team_data->num_threads : 1;
1313 TRACE("(%p, %p)\n", begin, end);
1315 if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_STATIC)
1317 *begin = thread_data->dynamic_begin;
1318 *end = thread_data->dynamic_end;
1319 thread_data->dynamic_type = 0;
1320 return 1;
1322 else if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_CHUNKED ||
1323 thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_GUIDED)
1325 unsigned int iterations = 0;
1326 EnterCriticalSection(&vcomp_section);
1327 if (thread_data->dynamic == task_data->dynamic &&
1328 task_data->dynamic_iterations != 0)
1330 iterations = min(task_data->dynamic_iterations, task_data->dynamic_chunksize);
1331 if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_GUIDED &&
1332 task_data->dynamic_iterations > num_threads * task_data->dynamic_chunksize)
1334 iterations = (task_data->dynamic_iterations + num_threads - 1) / num_threads;
1336 *begin = task_data->dynamic_first;
1337 *end = task_data->dynamic_first + (iterations - 1) * task_data->dynamic_step;
1338 task_data->dynamic_iterations -= iterations;
1339 task_data->dynamic_first += iterations * task_data->dynamic_step;
1340 if (!task_data->dynamic_iterations)
1341 *end = task_data->dynamic_last;
1343 LeaveCriticalSection(&vcomp_section);
1344 return iterations != 0;
1347 return 0;
1350 int CDECL omp_in_parallel(void)
1352 TRACE("()\n");
1353 return vcomp_init_thread_data()->parallel;
1356 static DWORD WINAPI _vcomp_fork_worker(void *param)
1358 struct vcomp_thread_data *thread_data = param;
1359 vcomp_set_thread_data(thread_data);
1361 TRACE("starting worker thread for %p\n", thread_data);
1363 EnterCriticalSection(&vcomp_section);
1364 for (;;)
1366 struct vcomp_team_data *team = thread_data->team;
1367 if (team != NULL)
1369 LeaveCriticalSection(&vcomp_section);
1370 _vcomp_fork_call_wrapper(team->wrapper, team->nargs, team->valist);
1371 EnterCriticalSection(&vcomp_section);
1373 thread_data->team = NULL;
1374 list_remove(&thread_data->entry);
1375 list_add_tail(&vcomp_idle_threads, &thread_data->entry);
1376 if (++team->finished_threads >= team->num_threads)
1377 WakeAllConditionVariable(&team->cond);
1380 if (!SleepConditionVariableCS(&thread_data->cond, &vcomp_section, 5000) &&
1381 GetLastError() == ERROR_TIMEOUT && !thread_data->team)
1383 break;
1386 list_remove(&thread_data->entry);
1387 LeaveCriticalSection(&vcomp_section);
1389 TRACE("terminating worker thread for %p\n", thread_data);
1391 HeapFree(GetProcessHeap(), 0, thread_data);
1392 vcomp_set_thread_data(NULL);
1393 FreeLibraryAndExitThread(vcomp_module, 0);
1394 return 0;
1397 void WINAPIV _vcomp_fork(BOOL ifval, int nargs, void *wrapper, ...)
1399 struct vcomp_thread_data *prev_thread_data = vcomp_init_thread_data();
1400 struct vcomp_thread_data thread_data;
1401 struct vcomp_team_data team_data;
1402 struct vcomp_task_data task_data;
1403 int num_threads;
1405 TRACE("(%d, %d, %p, ...)\n", ifval, nargs, wrapper);
1407 if (prev_thread_data->parallel && !vcomp_nested_fork)
1408 ifval = FALSE;
1410 if (!ifval)
1411 num_threads = 1;
1412 else if (prev_thread_data->fork_threads)
1413 num_threads = prev_thread_data->fork_threads;
1414 else
1415 num_threads = vcomp_num_threads;
1417 InitializeConditionVariable(&team_data.cond);
1418 team_data.num_threads = 1;
1419 team_data.finished_threads = 0;
1420 team_data.nargs = nargs;
1421 team_data.wrapper = wrapper;
1422 __ms_va_start(team_data.valist, wrapper);
1423 team_data.barrier = 0;
1424 team_data.barrier_count = 0;
1426 task_data.single = 0;
1427 task_data.section = 0;
1428 task_data.dynamic = 0;
1430 thread_data.team = &team_data;
1431 thread_data.task = &task_data;
1432 thread_data.thread_num = 0;
1433 thread_data.parallel = ifval || prev_thread_data->parallel;
1434 thread_data.fork_threads = 0;
1435 thread_data.single = 1;
1436 thread_data.section = 1;
1437 thread_data.dynamic = 1;
1438 thread_data.dynamic_type = 0;
1439 list_init(&thread_data.entry);
1440 InitializeConditionVariable(&thread_data.cond);
1442 if (num_threads > 1)
1444 struct list *ptr;
1445 EnterCriticalSection(&vcomp_section);
1447 /* reuse existing threads (if any) */
1448 while (team_data.num_threads < num_threads && (ptr = list_head(&vcomp_idle_threads)))
1450 struct vcomp_thread_data *data = LIST_ENTRY(ptr, struct vcomp_thread_data, entry);
1451 data->team = &team_data;
1452 data->task = &task_data;
1453 data->thread_num = team_data.num_threads++;
1454 data->parallel = thread_data.parallel;
1455 data->fork_threads = 0;
1456 data->single = 1;
1457 data->section = 1;
1458 data->dynamic = 1;
1459 data->dynamic_type = 0;
1460 list_remove(&data->entry);
1461 list_add_tail(&thread_data.entry, &data->entry);
1462 WakeAllConditionVariable(&data->cond);
1465 /* spawn additional threads */
1466 while (team_data.num_threads < num_threads)
1468 struct vcomp_thread_data *data;
1469 HMODULE module;
1470 HANDLE thread;
1472 data = HeapAlloc(GetProcessHeap(), 0, sizeof(*data));
1473 if (!data) break;
1475 data->team = &team_data;
1476 data->task = &task_data;
1477 data->thread_num = team_data.num_threads;
1478 data->parallel = thread_data.parallel;
1479 data->fork_threads = 0;
1480 data->single = 1;
1481 data->section = 1;
1482 data->dynamic = 1;
1483 data->dynamic_type = 0;
1484 InitializeConditionVariable(&data->cond);
1486 thread = CreateThread(NULL, 0, _vcomp_fork_worker, data, 0, NULL);
1487 if (!thread)
1489 HeapFree(GetProcessHeap(), 0, data);
1490 break;
1493 GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS,
1494 (const WCHAR *)vcomp_module, &module);
1495 team_data.num_threads++;
1496 list_add_tail(&thread_data.entry, &data->entry);
1497 CloseHandle(thread);
1500 LeaveCriticalSection(&vcomp_section);
1503 vcomp_set_thread_data(&thread_data);
1504 _vcomp_fork_call_wrapper(team_data.wrapper, team_data.nargs, team_data.valist);
1505 vcomp_set_thread_data(prev_thread_data);
1506 prev_thread_data->fork_threads = 0;
1508 if (team_data.num_threads > 1)
1510 EnterCriticalSection(&vcomp_section);
1512 team_data.finished_threads++;
1513 while (team_data.finished_threads < team_data.num_threads)
1514 SleepConditionVariableCS(&team_data.cond, &vcomp_section, INFINITE);
1516 LeaveCriticalSection(&vcomp_section);
1517 assert(list_empty(&thread_data.entry));
1520 __ms_va_end(team_data.valist);
1523 static CRITICAL_SECTION *alloc_critsect(void)
1525 CRITICAL_SECTION *critsect;
1526 if (!(critsect = HeapAlloc(GetProcessHeap(), 0, sizeof(*critsect))))
1528 ERR("could not allocate critical section\n");
1529 ExitProcess(1);
1532 InitializeCriticalSection(critsect);
1533 critsect->DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": critsect");
1534 return critsect;
1537 static void destroy_critsect(CRITICAL_SECTION *critsect)
1539 if (!critsect) return;
1540 critsect->DebugInfo->Spare[0] = 0;
1541 DeleteCriticalSection(critsect);
1542 HeapFree(GetProcessHeap(), 0, critsect);
1545 void CDECL omp_init_lock(omp_lock_t *lock)
1547 TRACE("(%p)\n", lock);
1548 *lock = alloc_critsect();
1551 void CDECL omp_destroy_lock(omp_lock_t *lock)
1553 TRACE("(%p)\n", lock);
1554 destroy_critsect(*lock);
1557 void CDECL omp_set_lock(omp_lock_t *lock)
1559 TRACE("(%p)\n", lock);
1561 if (RtlIsCriticalSectionLockedByThread(*lock))
1563 ERR("omp_set_lock called while holding lock %p\n", *lock);
1564 ExitProcess(1);
1567 EnterCriticalSection(*lock);
1570 void CDECL omp_unset_lock(omp_lock_t *lock)
1572 TRACE("(%p)\n", lock);
1573 LeaveCriticalSection(*lock);
1576 int CDECL omp_test_lock(omp_lock_t *lock)
1578 TRACE("(%p)\n", lock);
1580 if (RtlIsCriticalSectionLockedByThread(*lock))
1581 return 0;
1583 return TryEnterCriticalSection(*lock);
1586 void CDECL omp_set_nest_lock(omp_nest_lock_t *lock)
1588 TRACE("(%p)\n", lock);
1589 EnterCriticalSection(*lock);
1592 void CDECL omp_unset_nest_lock(omp_nest_lock_t *lock)
1594 TRACE("(%p)\n", lock);
1595 LeaveCriticalSection(*lock);
1598 int CDECL omp_test_nest_lock(omp_nest_lock_t *lock)
1600 TRACE("(%p)\n", lock);
1601 return TryEnterCriticalSection(*lock) ? (*lock)->RecursionCount : 0;
1604 void CDECL _vcomp_enter_critsect(CRITICAL_SECTION **critsect)
1606 TRACE("(%p)\n", critsect);
1608 if (!*critsect)
1610 CRITICAL_SECTION *new_critsect = alloc_critsect();
1611 if (interlocked_cmpxchg_ptr((void **)critsect, new_critsect, NULL) != NULL)
1612 destroy_critsect(new_critsect); /* someone beat us to it */
1615 EnterCriticalSection(*critsect);
1618 void CDECL _vcomp_leave_critsect(CRITICAL_SECTION *critsect)
1620 TRACE("(%p)\n", critsect);
1621 LeaveCriticalSection(critsect);
1624 BOOL WINAPI DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved)
1626 TRACE("(%p, %d, %p)\n", instance, reason, reserved);
1628 switch (reason)
1630 case DLL_PROCESS_ATTACH:
1632 SYSTEM_INFO sysinfo;
1634 if ((vcomp_context_tls = TlsAlloc()) == TLS_OUT_OF_INDEXES)
1636 ERR("Failed to allocate TLS index\n");
1637 return FALSE;
1640 GetSystemInfo(&sysinfo);
1641 vcomp_module = instance;
1642 vcomp_max_threads = sysinfo.dwNumberOfProcessors;
1643 vcomp_num_threads = sysinfo.dwNumberOfProcessors;
1644 break;
1647 case DLL_PROCESS_DETACH:
1649 if (reserved) break;
1650 if (vcomp_context_tls != TLS_OUT_OF_INDEXES)
1652 vcomp_free_thread_data();
1653 TlsFree(vcomp_context_tls);
1655 break;
1658 case DLL_THREAD_DETACH:
1660 vcomp_free_thread_data();
1661 break;
1665 return TRUE;