d3d11/tests: Add test for shaders interstage interface.
[wine.git] / dlls / vcomp / main.c
blobab51ef2bcc536f19a148398559d29bf260ae5509
1 /*
3 * vcomp implementation
5 * Copyright 2011 Austin English
6 * Copyright 2012 Dan Kegel
7 * Copyright 2015-2016 Sebastian Lackner
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 #include "config.h"
25 #include "wine/port.h"
27 #include <stdarg.h>
28 #include <assert.h>
30 #include "windef.h"
31 #include "winbase.h"
32 #include "winternl.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(vcomp);
38 typedef CRITICAL_SECTION *omp_lock_t;
39 typedef CRITICAL_SECTION *omp_nest_lock_t;
41 static struct list vcomp_idle_threads = LIST_INIT(vcomp_idle_threads);
42 static DWORD vcomp_context_tls = TLS_OUT_OF_INDEXES;
43 static HMODULE vcomp_module;
44 static int vcomp_max_threads;
45 static int vcomp_num_threads;
46 static BOOL vcomp_nested_fork = FALSE;
48 static RTL_CRITICAL_SECTION vcomp_section;
49 static RTL_CRITICAL_SECTION_DEBUG critsect_debug =
51 0, 0, &vcomp_section,
52 { &critsect_debug.ProcessLocksList, &critsect_debug.ProcessLocksList },
53 0, 0, { (DWORD_PTR)(__FILE__ ": vcomp_section") }
55 static RTL_CRITICAL_SECTION vcomp_section = { &critsect_debug, -1, 0, 0, 0, 0 };
57 #define VCOMP_DYNAMIC_FLAGS_STATIC 0x01
58 #define VCOMP_DYNAMIC_FLAGS_CHUNKED 0x02
59 #define VCOMP_DYNAMIC_FLAGS_GUIDED 0x03
60 #define VCOMP_DYNAMIC_FLAGS_INCREMENT 0x40
62 struct vcomp_thread_data
64 struct vcomp_team_data *team;
65 struct vcomp_task_data *task;
66 int thread_num;
67 BOOL parallel;
68 int fork_threads;
70 /* only used for concurrent tasks */
71 struct list entry;
72 CONDITION_VARIABLE cond;
74 /* single */
75 unsigned int single;
77 /* section */
78 unsigned int section;
80 /* dynamic */
81 unsigned int dynamic;
82 unsigned int dynamic_type;
83 unsigned int dynamic_begin;
84 unsigned int dynamic_end;
87 struct vcomp_team_data
89 CONDITION_VARIABLE cond;
90 int num_threads;
91 int finished_threads;
93 /* callback arguments */
94 int nargs;
95 void *wrapper;
96 __ms_va_list valist;
98 /* barrier */
99 unsigned int barrier;
100 int barrier_count;
103 struct vcomp_task_data
105 /* single */
106 unsigned int single;
108 /* section */
109 unsigned int section;
110 int num_sections;
111 int section_index;
113 /* dynamic */
114 unsigned int dynamic;
115 unsigned int dynamic_first;
116 unsigned int dynamic_last;
117 unsigned int dynamic_iterations;
118 int dynamic_step;
119 unsigned int dynamic_chunksize;
122 #if defined(__i386__)
124 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
125 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
126 "pushl %ebp\n\t"
127 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
128 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
129 "movl %esp,%ebp\n\t"
130 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
131 "pushl %esi\n\t"
132 __ASM_CFI(".cfi_rel_offset %esi,-4\n\t")
133 "pushl %edi\n\t"
134 __ASM_CFI(".cfi_rel_offset %edi,-8\n\t")
135 "movl 12(%ebp),%edx\n\t"
136 "movl %esp,%edi\n\t"
137 "shll $2,%edx\n\t"
138 "jz 1f\n\t"
139 "subl %edx,%edi\n\t"
140 "andl $~15,%edi\n\t"
141 "movl %edi,%esp\n\t"
142 "movl 12(%ebp),%ecx\n\t"
143 "movl 16(%ebp),%esi\n\t"
144 "cld\n\t"
145 "rep; movsl\n"
146 "1:\tcall *8(%ebp)\n\t"
147 "leal -8(%ebp),%esp\n\t"
148 "popl %edi\n\t"
149 __ASM_CFI(".cfi_same_value %edi\n\t")
150 "popl %esi\n\t"
151 __ASM_CFI(".cfi_same_value %esi\n\t")
152 "popl %ebp\n\t"
153 __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
154 __ASM_CFI(".cfi_same_value %ebp\n\t")
155 "ret" )
157 #elif defined(__x86_64__)
159 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
160 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
161 "pushq %rbp\n\t"
162 __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t")
163 __ASM_CFI(".cfi_rel_offset %rbp,0\n\t")
164 "movq %rsp,%rbp\n\t"
165 __ASM_CFI(".cfi_def_cfa_register %rbp\n\t")
166 "pushq %rsi\n\t"
167 __ASM_CFI(".cfi_rel_offset %rsi,-8\n\t")
168 "pushq %rdi\n\t"
169 __ASM_CFI(".cfi_rel_offset %rdi,-16\n\t")
170 "movq %rcx,%rax\n\t"
171 "movq $4,%rcx\n\t"
172 "cmp %rcx,%rdx\n\t"
173 "cmovgq %rdx,%rcx\n\t"
174 "leaq 0(,%rcx,8),%rdx\n\t"
175 "subq %rdx,%rsp\n\t"
176 "andq $~15,%rsp\n\t"
177 "movq %rsp,%rdi\n\t"
178 "movq %r8,%rsi\n\t"
179 "rep; movsq\n\t"
180 "movq 0(%rsp),%rcx\n\t"
181 "movq 8(%rsp),%rdx\n\t"
182 "movq 16(%rsp),%r8\n\t"
183 "movq 24(%rsp),%r9\n\t"
184 "callq *%rax\n\t"
185 "leaq -16(%rbp),%rsp\n\t"
186 "popq %rdi\n\t"
187 __ASM_CFI(".cfi_same_value %rdi\n\t")
188 "popq %rsi\n\t"
189 __ASM_CFI(".cfi_same_value %rsi\n\t")
190 __ASM_CFI(".cfi_def_cfa_register %rsp\n\t")
191 "popq %rbp\n\t"
192 __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t")
193 __ASM_CFI(".cfi_same_value %rbp\n\t")
194 "ret")
196 #elif defined(__arm__)
198 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
199 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
200 ".arm\n\t"
201 "push {r4, r5, LR}\n\t"
202 "mov r4, r0\n\t"
203 "mov r5, SP\n\t"
204 "lsl r3, r1, #2\n\t"
205 "cmp r3, #0\n\t"
206 "beq 5f\n\t"
207 "sub SP, SP, r3\n\t"
208 "tst r1, #1\n\t"
209 "subeq SP, SP, #4\n\t"
210 "1:\tsub r3, r3, #4\n\t"
211 "ldr r0, [r2, r3]\n\t"
212 "str r0, [SP, r3]\n\t"
213 "cmp r3, #0\n\t"
214 "bgt 1b\n\t"
215 "cmp r1, #1\n\t"
216 "bgt 2f\n\t"
217 "pop {r0}\n\t"
218 "b 5f\n\t"
219 "2:\tcmp r1, #2\n\t"
220 "bgt 3f\n\t"
221 "pop {r0-r1}\n\t"
222 "b 5f\n\t"
223 "3:\tcmp r1, #3\n\t"
224 "bgt 4f\n\t"
225 "pop {r0-r2}\n\t"
226 "b 5f\n\t"
227 "4:\tpop {r0-r3}\n\t"
228 "5:\tblx r4\n\t"
229 "mov SP, r5\n\t"
230 "pop {r4, r5, PC}" )
232 #elif defined(__aarch64__)
234 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
235 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
236 "stp x29, x30, [SP,#-16]!\n\t"
237 "mov x29, SP\n\t"
238 "mov x9, x0\n\t"
239 "cbz w1, 2f\n\t"
240 "mov w10, w1\n\t"
241 "mov x11, x2\n\t"
242 "ldr w12, [x11, #24]\n\t"
243 "ldr x13, [x11, #8]\n\t"
244 "ldr x0, [x13, w12, sxtw]\n\t"
245 "add w12, w12, #8\n\t"
246 "ldr x1, [x13, w12, sxtw]\n\t"
247 "add w12, w12, #8\n\t"
248 "ldr x2, [x13, w12, sxtw]\n\t"
249 "add w12, w12, #8\n\t"
250 "ldr x3, [x13, w12, sxtw]\n\t"
251 "add w12, w12, #8\n\t"
252 "ldr x4, [x13, w12, sxtw]\n\t"
253 "add w12, w12, #8\n\t"
254 "ldr x5, [x13, w12, sxtw]\n\t"
255 "add w12, w12, #8\n\t"
256 "ldr x6, [x13, w12, sxtw]\n\t"
257 "add w12, w12, #8\n\t"
258 "ldr x7, [x13, w12, sxtw]\n\t"
259 "add w12, w12, #8\n\t"
260 "add x13, x13, w12, sxtw\n\t"
261 "subs w12, w10, #8\n\t"
262 "b.le 2f\n\t"
263 "ldr x11, [x11]\n\t"
264 "lsl w12, w12, #3\n\t"
265 "sub SP, SP, w12, sxtw\n\t"
266 "tbz w12, #3, 1f\n\t"
267 "sub SP, SP, #8\n\t"
268 "1: sub w12, w12, #8\n\t"
269 "ldr x14, [x13, w12, sxtw]\n\t"
270 "str x14, [SP, w12, sxtw]\n\t"
271 "cbnz w12, 1b\n\t"
272 "2: blr x9\n\t"
273 "mov SP, x29\n\t"
274 "ldp x29, x30, [SP], #16\n\t"
275 "ret\n" )
277 #else
279 static void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args)
281 ERR("Not implemented for this architecture\n");
284 #endif
286 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
288 static inline char interlocked_cmpxchg8(char *dest, char xchg, char compare)
290 char ret;
291 __asm__ __volatile__( "lock; cmpxchgb %2,(%1)"
292 : "=a" (ret) : "r" (dest), "q" (xchg), "0" (compare) : "memory" );
293 return ret;
296 static inline short interlocked_cmpxchg16(short *dest, short xchg, short compare)
298 short ret;
299 __asm__ __volatile__( "lock; cmpxchgw %2,(%1)"
300 : "=a" (ret) : "r" (dest), "r" (xchg), "0" (compare) : "memory" );
301 return ret;
304 static inline char interlocked_xchg_add8(char *dest, char incr)
306 char ret;
307 __asm__ __volatile__( "lock; xaddb %0,(%1)"
308 : "=q" (ret) : "r" (dest), "0" (incr) : "memory" );
309 return ret;
312 static inline short interlocked_xchg_add16(short *dest, short incr)
314 short ret;
315 __asm__ __volatile__( "lock; xaddw %0,(%1)"
316 : "=r" (ret) : "r" (dest), "0" (incr) : "memory" );
317 return ret;
320 #else /* __GNUC__ */
322 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
323 static inline char interlocked_cmpxchg8(char *dest, char xchg, char compare)
325 return __sync_val_compare_and_swap(dest, compare, xchg);
328 static inline char interlocked_xchg_add8(char *dest, char incr)
330 return __sync_fetch_and_add(dest, incr);
332 #else
333 static char interlocked_cmpxchg8(char *dest, char xchg, char compare)
335 EnterCriticalSection(&vcomp_section);
336 if (*dest == compare) *dest = xchg; else compare = *dest;
337 LeaveCriticalSection(&vcomp_section);
338 return compare;
341 static char interlocked_xchg_add8(char *dest, char incr)
343 char ret;
344 EnterCriticalSection(&vcomp_section);
345 ret = *dest; *dest += incr;
346 LeaveCriticalSection(&vcomp_section);
347 return ret;
349 #endif
351 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
352 static inline short interlocked_cmpxchg16(short *dest, short xchg, short compare)
354 return __sync_val_compare_and_swap(dest, compare, xchg);
357 static inline short interlocked_xchg_add16(short *dest, short incr)
359 return __sync_fetch_and_add(dest, incr);
361 #else
362 static short interlocked_cmpxchg16(short *dest, short xchg, short compare)
364 EnterCriticalSection(&vcomp_section);
365 if (*dest == compare) *dest = xchg; else compare = *dest;
366 LeaveCriticalSection(&vcomp_section);
367 return compare;
370 static short interlocked_xchg_add16(short *dest, short incr)
372 short ret;
373 EnterCriticalSection(&vcomp_section);
374 ret = *dest; *dest += incr;
375 LeaveCriticalSection(&vcomp_section);
376 return ret;
378 #endif
380 #endif /* __GNUC__ */
382 static inline struct vcomp_thread_data *vcomp_get_thread_data(void)
384 return (struct vcomp_thread_data *)TlsGetValue(vcomp_context_tls);
387 static inline void vcomp_set_thread_data(struct vcomp_thread_data *thread_data)
389 TlsSetValue(vcomp_context_tls, thread_data);
392 static struct vcomp_thread_data *vcomp_init_thread_data(void)
394 struct vcomp_thread_data *thread_data = vcomp_get_thread_data();
395 struct
397 struct vcomp_thread_data thread;
398 struct vcomp_task_data task;
399 } *data;
401 if (thread_data) return thread_data;
402 if (!(data = HeapAlloc(GetProcessHeap(), 0, sizeof(*data))))
404 ERR("could not create thread data\n");
405 ExitProcess(1);
408 data->task.single = 0;
409 data->task.section = 0;
410 data->task.dynamic = 0;
412 thread_data = &data->thread;
413 thread_data->team = NULL;
414 thread_data->task = &data->task;
415 thread_data->thread_num = 0;
416 thread_data->parallel = FALSE;
417 thread_data->fork_threads = 0;
418 thread_data->single = 1;
419 thread_data->section = 1;
420 thread_data->dynamic = 1;
421 thread_data->dynamic_type = 0;
423 vcomp_set_thread_data(thread_data);
424 return thread_data;
427 static void vcomp_free_thread_data(void)
429 struct vcomp_thread_data *thread_data = vcomp_get_thread_data();
430 if (!thread_data) return;
432 HeapFree(GetProcessHeap(), 0, thread_data);
433 vcomp_set_thread_data(NULL);
436 void CDECL _vcomp_atomic_add_i1(char *dest, char val)
438 interlocked_xchg_add8(dest, val);
441 void CDECL _vcomp_atomic_and_i1(char *dest, char val)
443 char old;
444 do old = *dest; while (interlocked_cmpxchg8(dest, old & val, old) != old);
447 void CDECL _vcomp_atomic_div_i1(char *dest, char val)
449 char old;
450 do old = *dest; while (interlocked_cmpxchg8(dest, old / val, old) != old);
453 void CDECL _vcomp_atomic_div_ui1(unsigned char *dest, unsigned char val)
455 unsigned char old;
456 do old = *dest; while ((unsigned char)interlocked_cmpxchg8((char *)dest, old / val, old) != old);
459 void CDECL _vcomp_atomic_mul_i1(char *dest, char val)
461 char old;
462 do old = *dest; while (interlocked_cmpxchg8(dest, old * val, old) != old);
465 void CDECL _vcomp_atomic_or_i1(char *dest, char val)
467 char old;
468 do old = *dest; while (interlocked_cmpxchg8(dest, old | val, old) != old);
471 void CDECL _vcomp_atomic_shl_i1(char *dest, unsigned int val)
473 char old;
474 do old = *dest; while (interlocked_cmpxchg8(dest, old << val, old) != old);
477 void CDECL _vcomp_atomic_shr_i1(char *dest, unsigned int val)
479 char old;
480 do old = *dest; while (interlocked_cmpxchg8(dest, old >> val, old) != old);
483 void CDECL _vcomp_atomic_shr_ui1(unsigned char *dest, unsigned int val)
485 unsigned char old;
486 do old = *dest; while ((unsigned char)interlocked_cmpxchg8((char *)dest, old >> val, old) != old);
489 void CDECL _vcomp_atomic_sub_i1(char *dest, char val)
491 interlocked_xchg_add8(dest, -val);
494 void CDECL _vcomp_atomic_xor_i1(char *dest, char val)
496 char old;
497 do old = *dest; while (interlocked_cmpxchg8(dest, old ^ val, old) != old);
500 static void CDECL _vcomp_atomic_bool_and_i1(char *dest, char val)
502 char old;
503 do old = *dest; while (interlocked_cmpxchg8(dest, old && val, old) != old);
506 static void CDECL _vcomp_atomic_bool_or_i1(char *dest, char val)
508 char old;
509 do old = *dest; while (interlocked_cmpxchg8(dest, old ? old : (val != 0), old) != old);
512 void CDECL _vcomp_reduction_i1(unsigned int flags, char *dest, char val)
514 static void (CDECL * const funcs[])(char *, char) =
516 _vcomp_atomic_add_i1,
517 _vcomp_atomic_add_i1,
518 _vcomp_atomic_mul_i1,
519 _vcomp_atomic_and_i1,
520 _vcomp_atomic_or_i1,
521 _vcomp_atomic_xor_i1,
522 _vcomp_atomic_bool_and_i1,
523 _vcomp_atomic_bool_or_i1,
525 unsigned int op = (flags >> 8) & 0xf;
526 op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1);
527 funcs[op](dest, val);
530 void CDECL _vcomp_atomic_add_i2(short *dest, short val)
532 interlocked_xchg_add16(dest, val);
535 void CDECL _vcomp_atomic_and_i2(short *dest, short val)
537 short old;
538 do old = *dest; while (interlocked_cmpxchg16(dest, old & val, old) != old);
541 void CDECL _vcomp_atomic_div_i2(short *dest, short val)
543 short old;
544 do old = *dest; while (interlocked_cmpxchg16(dest, old / val, old) != old);
547 void CDECL _vcomp_atomic_div_ui2(unsigned short *dest, unsigned short val)
549 unsigned short old;
550 do old = *dest; while ((unsigned short)interlocked_cmpxchg16((short *)dest, old / val, old) != old);
553 void CDECL _vcomp_atomic_mul_i2(short *dest, short val)
555 short old;
556 do old = *dest; while (interlocked_cmpxchg16(dest, old * val, old) != old);
559 void CDECL _vcomp_atomic_or_i2(short *dest, short val)
561 short old;
562 do old = *dest; while (interlocked_cmpxchg16(dest, old | val, old) != old);
565 void CDECL _vcomp_atomic_shl_i2(short *dest, unsigned int val)
567 short old;
568 do old = *dest; while (interlocked_cmpxchg16(dest, old << val, old) != old);
571 void CDECL _vcomp_atomic_shr_i2(short *dest, unsigned int val)
573 short old;
574 do old = *dest; while (interlocked_cmpxchg16(dest, old >> val, old) != old);
577 void CDECL _vcomp_atomic_shr_ui2(unsigned short *dest, unsigned int val)
579 unsigned short old;
580 do old = *dest; while ((unsigned short)interlocked_cmpxchg16((short *)dest, old >> val, old) != old);
583 void CDECL _vcomp_atomic_sub_i2(short *dest, short val)
585 interlocked_xchg_add16(dest, -val);
588 void CDECL _vcomp_atomic_xor_i2(short *dest, short val)
590 short old;
591 do old = *dest; while (interlocked_cmpxchg16(dest, old ^ val, old) != old);
594 static void CDECL _vcomp_atomic_bool_and_i2(short *dest, short val)
596 short old;
597 do old = *dest; while (interlocked_cmpxchg16(dest, old && val, old) != old);
600 static void CDECL _vcomp_atomic_bool_or_i2(short *dest, short val)
602 short old;
603 do old = *dest; while (interlocked_cmpxchg16(dest, old ? old : (val != 0), old) != old);
606 void CDECL _vcomp_reduction_i2(unsigned int flags, short *dest, short val)
608 static void (CDECL * const funcs[])(short *, short) =
610 _vcomp_atomic_add_i2,
611 _vcomp_atomic_add_i2,
612 _vcomp_atomic_mul_i2,
613 _vcomp_atomic_and_i2,
614 _vcomp_atomic_or_i2,
615 _vcomp_atomic_xor_i2,
616 _vcomp_atomic_bool_and_i2,
617 _vcomp_atomic_bool_or_i2,
619 unsigned int op = (flags >> 8) & 0xf;
620 op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1);
621 funcs[op](dest, val);
624 void CDECL _vcomp_atomic_add_i4(int *dest, int val)
626 interlocked_xchg_add(dest, val);
629 void CDECL _vcomp_atomic_and_i4(int *dest, int val)
631 int old;
632 do old = *dest; while (interlocked_cmpxchg(dest, old & val, old) != old);
635 void CDECL _vcomp_atomic_div_i4(int *dest, int val)
637 int old;
638 do old = *dest; while (interlocked_cmpxchg(dest, old / val, old) != old);
641 void CDECL _vcomp_atomic_div_ui4(unsigned int *dest, unsigned int val)
643 unsigned int old;
644 do old = *dest; while (interlocked_cmpxchg((int *)dest, old / val, old) != old);
647 void CDECL _vcomp_atomic_mul_i4(int *dest, int val)
649 int old;
650 do old = *dest; while (interlocked_cmpxchg(dest, old * val, old) != old);
653 void CDECL _vcomp_atomic_or_i4(int *dest, int val)
655 int old;
656 do old = *dest; while (interlocked_cmpxchg(dest, old | val, old) != old);
659 void CDECL _vcomp_atomic_shl_i4(int *dest, int val)
661 int old;
662 do old = *dest; while (interlocked_cmpxchg(dest, old << val, old) != old);
665 void CDECL _vcomp_atomic_shr_i4(int *dest, int val)
667 int old;
668 do old = *dest; while (interlocked_cmpxchg(dest, old >> val, old) != old);
671 void CDECL _vcomp_atomic_shr_ui4(unsigned int *dest, unsigned int val)
673 unsigned int old;
674 do old = *dest; while (interlocked_cmpxchg((int *)dest, old >> val, old) != old);
677 void CDECL _vcomp_atomic_sub_i4(int *dest, int val)
679 interlocked_xchg_add(dest, -val);
682 void CDECL _vcomp_atomic_xor_i4(int *dest, int val)
684 int old;
685 do old = *dest; while (interlocked_cmpxchg(dest, old ^ val, old) != old);
688 static void CDECL _vcomp_atomic_bool_and_i4(int *dest, int val)
690 int old;
691 do old = *dest; while (interlocked_cmpxchg(dest, old && val, old) != old);
694 static void CDECL _vcomp_atomic_bool_or_i4(int *dest, int val)
696 int old;
697 do old = *dest; while (interlocked_cmpxchg(dest, old ? old : (val != 0), old) != old);
700 void CDECL _vcomp_reduction_i4(unsigned int flags, int *dest, int val)
702 static void (CDECL * const funcs[])(int *, int) =
704 _vcomp_atomic_add_i4,
705 _vcomp_atomic_add_i4,
706 _vcomp_atomic_mul_i4,
707 _vcomp_atomic_and_i4,
708 _vcomp_atomic_or_i4,
709 _vcomp_atomic_xor_i4,
710 _vcomp_atomic_bool_and_i4,
711 _vcomp_atomic_bool_or_i4,
713 unsigned int op = (flags >> 8) & 0xf;
714 op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1);
715 funcs[op](dest, val);
718 void CDECL _vcomp_atomic_add_i8(LONG64 *dest, LONG64 val)
720 LONG64 old;
721 do old = *dest; while (interlocked_cmpxchg64(dest, old + val, old) != old);
724 void CDECL _vcomp_atomic_and_i8(LONG64 *dest, LONG64 val)
726 LONG64 old;
727 do old = *dest; while (interlocked_cmpxchg64(dest, old & val, old) != old);
730 void CDECL _vcomp_atomic_div_i8(LONG64 *dest, LONG64 val)
732 LONG64 old;
733 do old = *dest; while (interlocked_cmpxchg64(dest, old / val, old) != old);
736 void CDECL _vcomp_atomic_div_ui8(ULONG64 *dest, ULONG64 val)
738 ULONG64 old;
739 do old = *dest; while (interlocked_cmpxchg64((LONG64 *)dest, old / val, old) != old);
742 void CDECL _vcomp_atomic_mul_i8(LONG64 *dest, LONG64 val)
744 LONG64 old;
745 do old = *dest; while (interlocked_cmpxchg64(dest, old * val, old) != old);
748 void CDECL _vcomp_atomic_or_i8(LONG64 *dest, LONG64 val)
750 LONG64 old;
751 do old = *dest; while (interlocked_cmpxchg64(dest, old | val, old) != old);
754 void CDECL _vcomp_atomic_shl_i8(LONG64 *dest, unsigned int val)
756 LONG64 old;
757 do old = *dest; while (interlocked_cmpxchg64(dest, old << val, old) != old);
760 void CDECL _vcomp_atomic_shr_i8(LONG64 *dest, unsigned int val)
762 LONG64 old;
763 do old = *dest; while (interlocked_cmpxchg64(dest, old >> val, old) != old);
766 void CDECL _vcomp_atomic_shr_ui8(ULONG64 *dest, unsigned int val)
768 ULONG64 old;
769 do old = *dest; while (interlocked_cmpxchg64((LONG64 *)dest, old >> val, old) != old);
772 void CDECL _vcomp_atomic_sub_i8(LONG64 *dest, LONG64 val)
774 LONG64 old;
775 do old = *dest; while (interlocked_cmpxchg64(dest, old - val, old) != old);
778 void CDECL _vcomp_atomic_xor_i8(LONG64 *dest, LONG64 val)
780 LONG64 old;
781 do old = *dest; while (interlocked_cmpxchg64(dest, old ^ val, old) != old);
784 static void CDECL _vcomp_atomic_bool_and_i8(LONG64 *dest, LONG64 val)
786 LONG64 old;
787 do old = *dest; while (interlocked_cmpxchg64(dest, old && val, old) != old);
790 static void CDECL _vcomp_atomic_bool_or_i8(LONG64 *dest, LONG64 val)
792 LONG64 old;
793 do old = *dest; while (interlocked_cmpxchg64(dest, old ? old : (val != 0), old) != old);
796 void CDECL _vcomp_reduction_i8(unsigned int flags, LONG64 *dest, LONG64 val)
798 static void (CDECL * const funcs[])(LONG64 *, LONG64) =
800 _vcomp_atomic_add_i8,
801 _vcomp_atomic_add_i8,
802 _vcomp_atomic_mul_i8,
803 _vcomp_atomic_and_i8,
804 _vcomp_atomic_or_i8,
805 _vcomp_atomic_xor_i8,
806 _vcomp_atomic_bool_and_i8,
807 _vcomp_atomic_bool_or_i8,
809 unsigned int op = (flags >> 8) & 0xf;
810 op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1);
811 funcs[op](dest, val);
814 void CDECL _vcomp_atomic_add_r4(float *dest, float val)
816 int old, new;
819 old = *(int *)dest;
820 *(float *)&new = *(float *)&old + val;
822 while (interlocked_cmpxchg((int *)dest, new, old) != old);
825 void CDECL _vcomp_atomic_div_r4(float *dest, float val)
827 int old, new;
830 old = *(int *)dest;
831 *(float *)&new = *(float *)&old / val;
833 while (interlocked_cmpxchg((int *)dest, new, old) != old);
836 void CDECL _vcomp_atomic_mul_r4(float *dest, float val)
838 int old, new;
841 old = *(int *)dest;
842 *(float *)&new = *(float *)&old * val;
844 while (interlocked_cmpxchg((int *)dest, new, old) != old);
847 void CDECL _vcomp_atomic_sub_r4(float *dest, float val)
849 int old, new;
852 old = *(int *)dest;
853 *(float *)&new = *(float *)&old - val;
855 while (interlocked_cmpxchg((int *)dest, new, old) != old);
858 static void CDECL _vcomp_atomic_bool_and_r4(float *dest, float val)
860 int old, new;
863 old = *(int *)dest;
864 *(float *)&new = (*(float *)&old != 0.0) ? (val != 0.0) : 0.0;
866 while (interlocked_cmpxchg((int *)dest, new, old) != old);
869 static void CDECL _vcomp_atomic_bool_or_r4(float *dest, float val)
871 int old, new;
874 old = *(int *)dest;
875 *(float *)&new = (*(float *)&old != 0.0) ? *(float *)&old : (val != 0.0);
877 while (interlocked_cmpxchg((int *)dest, new, old) != old);
880 void CDECL _vcomp_reduction_r4(unsigned int flags, float *dest, float val)
882 static void (CDECL * const funcs[])(float *, float) =
884 _vcomp_atomic_add_r4,
885 _vcomp_atomic_add_r4,
886 _vcomp_atomic_mul_r4,
887 _vcomp_atomic_bool_or_r4,
888 _vcomp_atomic_bool_or_r4,
889 _vcomp_atomic_bool_or_r4,
890 _vcomp_atomic_bool_and_r4,
891 _vcomp_atomic_bool_or_r4,
893 unsigned int op = (flags >> 8) & 0xf;
894 op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1);
895 funcs[op](dest, val);
898 void CDECL _vcomp_atomic_add_r8(double *dest, double val)
900 LONG64 old, new;
903 old = *(LONG64 *)dest;
904 *(double *)&new = *(double *)&old + val;
906 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
909 void CDECL _vcomp_atomic_div_r8(double *dest, double val)
911 LONG64 old, new;
914 old = *(LONG64 *)dest;
915 *(double *)&new = *(double *)&old / val;
917 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
920 void CDECL _vcomp_atomic_mul_r8(double *dest, double val)
922 LONG64 old, new;
925 old = *(LONG64 *)dest;
926 *(double *)&new = *(double *)&old * val;
928 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
931 void CDECL _vcomp_atomic_sub_r8(double *dest, double val)
933 LONG64 old, new;
936 old = *(LONG64 *)dest;
937 *(double *)&new = *(double *)&old - val;
939 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
942 static void CDECL _vcomp_atomic_bool_and_r8(double *dest, double val)
944 LONG64 old, new;
947 old = *(LONG64 *)dest;
948 *(double *)&new = (*(double *)&old != 0.0) ? (val != 0.0) : 0.0;
950 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
953 static void CDECL _vcomp_atomic_bool_or_r8(double *dest, double val)
955 LONG64 old, new;
958 old = *(LONG64 *)dest;
959 *(double *)&new = (*(double *)&old != 0.0) ? *(double *)&old : (val != 0.0);
961 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
964 void CDECL _vcomp_reduction_r8(unsigned int flags, double *dest, double val)
966 static void (CDECL * const funcs[])(double *, double) =
968 _vcomp_atomic_add_r8,
969 _vcomp_atomic_add_r8,
970 _vcomp_atomic_mul_r8,
971 _vcomp_atomic_bool_or_r8,
972 _vcomp_atomic_bool_or_r8,
973 _vcomp_atomic_bool_or_r8,
974 _vcomp_atomic_bool_and_r8,
975 _vcomp_atomic_bool_or_r8,
977 unsigned int op = (flags >> 8) & 0xf;
978 op = min(op, sizeof(funcs)/sizeof(funcs[0]) - 1);
979 funcs[op](dest, val);
982 int CDECL omp_get_dynamic(void)
984 TRACE("stub\n");
985 return 0;
988 int CDECL omp_get_max_threads(void)
990 TRACE("()\n");
991 return vcomp_max_threads;
994 int CDECL omp_get_nested(void)
996 TRACE("stub\n");
997 return vcomp_nested_fork;
1000 int CDECL omp_get_num_procs(void)
1002 TRACE("stub\n");
1003 return 1;
1006 int CDECL omp_get_num_threads(void)
1008 struct vcomp_team_data *team_data = vcomp_init_thread_data()->team;
1009 TRACE("()\n");
1010 return team_data ? team_data->num_threads : 1;
1013 int CDECL omp_get_thread_num(void)
1015 TRACE("()\n");
1016 return vcomp_init_thread_data()->thread_num;
1019 int CDECL _vcomp_get_thread_num(void)
1021 TRACE("()\n");
1022 return vcomp_init_thread_data()->thread_num;
1025 /* Time in seconds since "some time in the past" */
1026 double CDECL omp_get_wtime(void)
1028 return GetTickCount() / 1000.0;
1031 void CDECL omp_set_dynamic(int val)
1033 TRACE("(%d): stub\n", val);
1036 void CDECL omp_set_nested(int nested)
1038 TRACE("(%d)\n", nested);
1039 vcomp_nested_fork = (nested != 0);
1042 void CDECL omp_set_num_threads(int num_threads)
1044 TRACE("(%d)\n", num_threads);
1045 if (num_threads >= 1)
1046 vcomp_num_threads = num_threads;
1049 void CDECL _vcomp_flush(void)
1051 TRACE("(): stub\n");
1054 void CDECL _vcomp_barrier(void)
1056 struct vcomp_team_data *team_data = vcomp_init_thread_data()->team;
1058 TRACE("()\n");
1060 if (!team_data)
1061 return;
1063 EnterCriticalSection(&vcomp_section);
1064 if (++team_data->barrier_count >= team_data->num_threads)
1066 team_data->barrier++;
1067 team_data->barrier_count = 0;
1068 WakeAllConditionVariable(&team_data->cond);
1070 else
1072 unsigned int barrier = team_data->barrier;
1073 while (team_data->barrier == barrier)
1074 SleepConditionVariableCS(&team_data->cond, &vcomp_section, INFINITE);
1076 LeaveCriticalSection(&vcomp_section);
1079 void CDECL _vcomp_set_num_threads(int num_threads)
1081 TRACE("(%d)\n", num_threads);
1082 if (num_threads >= 1)
1083 vcomp_init_thread_data()->fork_threads = num_threads;
1086 int CDECL _vcomp_master_begin(void)
1088 TRACE("()\n");
1089 return !vcomp_init_thread_data()->thread_num;
1092 void CDECL _vcomp_master_end(void)
1094 TRACE("()\n");
1095 /* nothing to do here */
1098 int CDECL _vcomp_single_begin(int flags)
1100 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1101 struct vcomp_task_data *task_data = thread_data->task;
1102 int ret = FALSE;
1104 TRACE("(%x): semi-stub\n", flags);
1106 EnterCriticalSection(&vcomp_section);
1107 thread_data->single++;
1108 if ((int)(thread_data->single - task_data->single) > 0)
1110 task_data->single = thread_data->single;
1111 ret = TRUE;
1113 LeaveCriticalSection(&vcomp_section);
1115 return ret;
1118 void CDECL _vcomp_single_end(void)
1120 TRACE("()\n");
1121 /* nothing to do here */
1124 void CDECL _vcomp_sections_init(int n)
1126 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1127 struct vcomp_task_data *task_data = thread_data->task;
1129 TRACE("(%d)\n", n);
1131 EnterCriticalSection(&vcomp_section);
1132 thread_data->section++;
1133 if ((int)(thread_data->section - task_data->section) > 0)
1135 task_data->section = thread_data->section;
1136 task_data->num_sections = n;
1137 task_data->section_index = 0;
1139 LeaveCriticalSection(&vcomp_section);
1142 int CDECL _vcomp_sections_next(void)
1144 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1145 struct vcomp_task_data *task_data = thread_data->task;
1146 int i = -1;
1148 TRACE("()\n");
1150 EnterCriticalSection(&vcomp_section);
1151 if (thread_data->section == task_data->section &&
1152 task_data->section_index != task_data->num_sections)
1154 i = task_data->section_index++;
1156 LeaveCriticalSection(&vcomp_section);
1157 return i;
1160 void CDECL _vcomp_for_static_simple_init(unsigned int first, unsigned int last, int step,
1161 BOOL increment, unsigned int *begin, unsigned int *end)
1163 unsigned int iterations, per_thread, remaining;
1164 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1165 struct vcomp_team_data *team_data = thread_data->team;
1166 int num_threads = team_data ? team_data->num_threads : 1;
1167 int thread_num = thread_data->thread_num;
1169 TRACE("(%u, %u, %d, %u, %p, %p)\n", first, last, step, increment, begin, end);
1171 if (num_threads == 1)
1173 *begin = first;
1174 *end = last;
1175 return;
1178 if (step <= 0)
1180 *begin = 0;
1181 *end = increment ? -1 : 1;
1182 return;
1185 if (increment)
1186 iterations = 1 + (last - first) / step;
1187 else
1189 iterations = 1 + (first - last) / step;
1190 step *= -1;
1193 per_thread = iterations / num_threads;
1194 remaining = iterations - per_thread * num_threads;
1196 if (thread_num < remaining)
1197 per_thread++;
1198 else if (per_thread)
1199 first += remaining * step;
1200 else
1202 *begin = first;
1203 *end = first - step;
1204 return;
1207 *begin = first + per_thread * thread_num * step;
1208 *end = *begin + (per_thread - 1) * step;
1211 void CDECL _vcomp_for_static_init(int first, int last, int step, int chunksize, unsigned int *loops,
1212 int *begin, int *end, int *next, int *lastchunk)
1214 unsigned int iterations, num_chunks, per_thread, remaining;
1215 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1216 struct vcomp_team_data *team_data = thread_data->team;
1217 int num_threads = team_data ? team_data->num_threads : 1;
1218 int thread_num = thread_data->thread_num;
1219 int no_begin, no_lastchunk;
1221 TRACE("(%d, %d, %d, %d, %p, %p, %p, %p, %p)\n",
1222 first, last, step, chunksize, loops, begin, end, next, lastchunk);
1224 if (!begin)
1226 begin = &no_begin;
1227 lastchunk = &no_lastchunk;
1230 if (num_threads == 1 && chunksize != 1)
1232 *loops = 1;
1233 *begin = first;
1234 *end = last;
1235 *next = 0;
1236 *lastchunk = first;
1237 return;
1240 if (first == last)
1242 *loops = !thread_num;
1243 if (!thread_num)
1245 *begin = first;
1246 *end = last;
1247 *next = 0;
1248 *lastchunk = first;
1250 return;
1253 if (step <= 0)
1255 *loops = 0;
1256 return;
1259 if (first < last)
1260 iterations = 1 + (last - first) / step;
1261 else
1263 iterations = 1 + (first - last) / step;
1264 step *= -1;
1267 if (chunksize < 1)
1268 chunksize = 1;
1270 num_chunks = ((DWORD64)iterations + chunksize - 1) / chunksize;
1271 per_thread = num_chunks / num_threads;
1272 remaining = num_chunks - per_thread * num_threads;
1274 *loops = per_thread + (thread_num < remaining);
1275 *begin = first + thread_num * chunksize * step;
1276 *end = *begin + (chunksize - 1) * step;
1277 *next = chunksize * num_threads * step;
1278 *lastchunk = first + (num_chunks - 1) * chunksize * step;
1281 void CDECL _vcomp_for_static_end(void)
1283 TRACE("()\n");
1284 /* nothing to do here */
1287 void CDECL _vcomp_for_dynamic_init(unsigned int flags, unsigned int first, unsigned int last,
1288 int step, unsigned int chunksize)
1290 unsigned int iterations, per_thread, remaining;
1291 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1292 struct vcomp_team_data *team_data = thread_data->team;
1293 struct vcomp_task_data *task_data = thread_data->task;
1294 int num_threads = team_data ? team_data->num_threads : 1;
1295 int thread_num = thread_data->thread_num;
1296 unsigned int type = flags & ~VCOMP_DYNAMIC_FLAGS_INCREMENT;
1298 TRACE("(%u, %u, %u, %d, %u)\n", flags, first, last, step, chunksize);
1300 if (step <= 0)
1302 thread_data->dynamic_type = 0;
1303 return;
1306 if (flags & VCOMP_DYNAMIC_FLAGS_INCREMENT)
1307 iterations = 1 + (last - first) / step;
1308 else
1310 iterations = 1 + (first - last) / step;
1311 step *= -1;
1314 if (type == VCOMP_DYNAMIC_FLAGS_STATIC)
1316 per_thread = iterations / num_threads;
1317 remaining = iterations - per_thread * num_threads;
1319 if (thread_num < remaining)
1320 per_thread++;
1321 else if (per_thread)
1322 first += remaining * step;
1323 else
1325 thread_data->dynamic_type = 0;
1326 return;
1329 thread_data->dynamic_type = VCOMP_DYNAMIC_FLAGS_STATIC;
1330 thread_data->dynamic_begin = first + per_thread * thread_num * step;
1331 thread_data->dynamic_end = thread_data->dynamic_begin + (per_thread - 1) * step;
1333 else
1335 if (type != VCOMP_DYNAMIC_FLAGS_CHUNKED &&
1336 type != VCOMP_DYNAMIC_FLAGS_GUIDED)
1338 FIXME("unsupported flags %u\n", flags);
1339 type = VCOMP_DYNAMIC_FLAGS_GUIDED;
1342 EnterCriticalSection(&vcomp_section);
1343 thread_data->dynamic++;
1344 thread_data->dynamic_type = type;
1345 if ((int)(thread_data->dynamic - task_data->dynamic) > 0)
1347 task_data->dynamic = thread_data->dynamic;
1348 task_data->dynamic_first = first;
1349 task_data->dynamic_last = last;
1350 task_data->dynamic_iterations = iterations;
1351 task_data->dynamic_step = step;
1352 task_data->dynamic_chunksize = chunksize;
1354 LeaveCriticalSection(&vcomp_section);
1358 int CDECL _vcomp_for_dynamic_next(unsigned int *begin, unsigned int *end)
1360 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1361 struct vcomp_task_data *task_data = thread_data->task;
1362 struct vcomp_team_data *team_data = thread_data->team;
1363 int num_threads = team_data ? team_data->num_threads : 1;
1365 TRACE("(%p, %p)\n", begin, end);
1367 if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_STATIC)
1369 *begin = thread_data->dynamic_begin;
1370 *end = thread_data->dynamic_end;
1371 thread_data->dynamic_type = 0;
1372 return 1;
1374 else if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_CHUNKED ||
1375 thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_GUIDED)
1377 unsigned int iterations = 0;
1378 EnterCriticalSection(&vcomp_section);
1379 if (thread_data->dynamic == task_data->dynamic &&
1380 task_data->dynamic_iterations != 0)
1382 iterations = min(task_data->dynamic_iterations, task_data->dynamic_chunksize);
1383 if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_GUIDED &&
1384 task_data->dynamic_iterations > num_threads * task_data->dynamic_chunksize)
1386 iterations = (task_data->dynamic_iterations + num_threads - 1) / num_threads;
1388 *begin = task_data->dynamic_first;
1389 *end = task_data->dynamic_first + (iterations - 1) * task_data->dynamic_step;
1390 task_data->dynamic_iterations -= iterations;
1391 task_data->dynamic_first += iterations * task_data->dynamic_step;
1392 if (!task_data->dynamic_iterations)
1393 *end = task_data->dynamic_last;
1395 LeaveCriticalSection(&vcomp_section);
1396 return iterations != 0;
1399 return 0;
1402 int CDECL omp_in_parallel(void)
1404 TRACE("()\n");
1405 return vcomp_init_thread_data()->parallel;
1408 static DWORD WINAPI _vcomp_fork_worker(void *param)
1410 struct vcomp_thread_data *thread_data = param;
1411 vcomp_set_thread_data(thread_data);
1413 TRACE("starting worker thread for %p\n", thread_data);
1415 EnterCriticalSection(&vcomp_section);
1416 for (;;)
1418 struct vcomp_team_data *team = thread_data->team;
1419 if (team != NULL)
1421 LeaveCriticalSection(&vcomp_section);
1422 _vcomp_fork_call_wrapper(team->wrapper, team->nargs, team->valist);
1423 EnterCriticalSection(&vcomp_section);
1425 thread_data->team = NULL;
1426 list_remove(&thread_data->entry);
1427 list_add_tail(&vcomp_idle_threads, &thread_data->entry);
1428 if (++team->finished_threads >= team->num_threads)
1429 WakeAllConditionVariable(&team->cond);
1432 if (!SleepConditionVariableCS(&thread_data->cond, &vcomp_section, 5000) &&
1433 GetLastError() == ERROR_TIMEOUT && !thread_data->team)
1435 break;
1438 list_remove(&thread_data->entry);
1439 LeaveCriticalSection(&vcomp_section);
1441 TRACE("terminating worker thread for %p\n", thread_data);
1443 HeapFree(GetProcessHeap(), 0, thread_data);
1444 vcomp_set_thread_data(NULL);
1445 FreeLibraryAndExitThread(vcomp_module, 0);
1446 return 0;
1449 void WINAPIV _vcomp_fork(BOOL ifval, int nargs, void *wrapper, ...)
1451 struct vcomp_thread_data *prev_thread_data = vcomp_init_thread_data();
1452 struct vcomp_thread_data thread_data;
1453 struct vcomp_team_data team_data;
1454 struct vcomp_task_data task_data;
1455 int num_threads;
1457 TRACE("(%d, %d, %p, ...)\n", ifval, nargs, wrapper);
1459 if (prev_thread_data->parallel && !vcomp_nested_fork)
1460 ifval = FALSE;
1462 if (!ifval)
1463 num_threads = 1;
1464 else if (prev_thread_data->fork_threads)
1465 num_threads = prev_thread_data->fork_threads;
1466 else
1467 num_threads = vcomp_num_threads;
1469 InitializeConditionVariable(&team_data.cond);
1470 team_data.num_threads = 1;
1471 team_data.finished_threads = 0;
1472 team_data.nargs = nargs;
1473 team_data.wrapper = wrapper;
1474 __ms_va_start(team_data.valist, wrapper);
1475 team_data.barrier = 0;
1476 team_data.barrier_count = 0;
1478 task_data.single = 0;
1479 task_data.section = 0;
1480 task_data.dynamic = 0;
1482 thread_data.team = &team_data;
1483 thread_data.task = &task_data;
1484 thread_data.thread_num = 0;
1485 thread_data.parallel = ifval || prev_thread_data->parallel;
1486 thread_data.fork_threads = 0;
1487 thread_data.single = 1;
1488 thread_data.section = 1;
1489 thread_data.dynamic = 1;
1490 thread_data.dynamic_type = 0;
1491 list_init(&thread_data.entry);
1492 InitializeConditionVariable(&thread_data.cond);
1494 if (num_threads > 1)
1496 struct list *ptr;
1497 EnterCriticalSection(&vcomp_section);
1499 /* reuse existing threads (if any) */
1500 while (team_data.num_threads < num_threads && (ptr = list_head(&vcomp_idle_threads)))
1502 struct vcomp_thread_data *data = LIST_ENTRY(ptr, struct vcomp_thread_data, entry);
1503 data->team = &team_data;
1504 data->task = &task_data;
1505 data->thread_num = team_data.num_threads++;
1506 data->parallel = thread_data.parallel;
1507 data->fork_threads = 0;
1508 data->single = 1;
1509 data->section = 1;
1510 data->dynamic = 1;
1511 data->dynamic_type = 0;
1512 list_remove(&data->entry);
1513 list_add_tail(&thread_data.entry, &data->entry);
1514 WakeAllConditionVariable(&data->cond);
1517 /* spawn additional threads */
1518 while (team_data.num_threads < num_threads)
1520 struct vcomp_thread_data *data;
1521 HMODULE module;
1522 HANDLE thread;
1524 data = HeapAlloc(GetProcessHeap(), 0, sizeof(*data));
1525 if (!data) break;
1527 data->team = &team_data;
1528 data->task = &task_data;
1529 data->thread_num = team_data.num_threads;
1530 data->parallel = thread_data.parallel;
1531 data->fork_threads = 0;
1532 data->single = 1;
1533 data->section = 1;
1534 data->dynamic = 1;
1535 data->dynamic_type = 0;
1536 InitializeConditionVariable(&data->cond);
1538 thread = CreateThread(NULL, 0, _vcomp_fork_worker, data, 0, NULL);
1539 if (!thread)
1541 HeapFree(GetProcessHeap(), 0, data);
1542 break;
1545 GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS,
1546 (const WCHAR *)vcomp_module, &module);
1547 team_data.num_threads++;
1548 list_add_tail(&thread_data.entry, &data->entry);
1549 CloseHandle(thread);
1552 LeaveCriticalSection(&vcomp_section);
1555 vcomp_set_thread_data(&thread_data);
1556 _vcomp_fork_call_wrapper(team_data.wrapper, team_data.nargs, team_data.valist);
1557 vcomp_set_thread_data(prev_thread_data);
1558 prev_thread_data->fork_threads = 0;
1560 if (team_data.num_threads > 1)
1562 EnterCriticalSection(&vcomp_section);
1564 team_data.finished_threads++;
1565 while (team_data.finished_threads < team_data.num_threads)
1566 SleepConditionVariableCS(&team_data.cond, &vcomp_section, INFINITE);
1568 LeaveCriticalSection(&vcomp_section);
1569 assert(list_empty(&thread_data.entry));
1572 __ms_va_end(team_data.valist);
1575 static CRITICAL_SECTION *alloc_critsect(void)
1577 CRITICAL_SECTION *critsect;
1578 if (!(critsect = HeapAlloc(GetProcessHeap(), 0, sizeof(*critsect))))
1580 ERR("could not allocate critical section\n");
1581 ExitProcess(1);
1584 InitializeCriticalSection(critsect);
1585 critsect->DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": critsect");
1586 return critsect;
1589 static void destroy_critsect(CRITICAL_SECTION *critsect)
1591 if (!critsect) return;
1592 critsect->DebugInfo->Spare[0] = 0;
1593 DeleteCriticalSection(critsect);
1594 HeapFree(GetProcessHeap(), 0, critsect);
1597 void CDECL omp_init_lock(omp_lock_t *lock)
1599 TRACE("(%p)\n", lock);
1600 *lock = alloc_critsect();
1603 void CDECL omp_destroy_lock(omp_lock_t *lock)
1605 TRACE("(%p)\n", lock);
1606 destroy_critsect(*lock);
1609 void CDECL omp_set_lock(omp_lock_t *lock)
1611 TRACE("(%p)\n", lock);
1613 if (RtlIsCriticalSectionLockedByThread(*lock))
1615 ERR("omp_set_lock called while holding lock %p\n", *lock);
1616 ExitProcess(1);
1619 EnterCriticalSection(*lock);
1622 void CDECL omp_unset_lock(omp_lock_t *lock)
1624 TRACE("(%p)\n", lock);
1625 LeaveCriticalSection(*lock);
1628 int CDECL omp_test_lock(omp_lock_t *lock)
1630 TRACE("(%p)\n", lock);
1632 if (RtlIsCriticalSectionLockedByThread(*lock))
1633 return 0;
1635 return TryEnterCriticalSection(*lock);
1638 void CDECL omp_set_nest_lock(omp_nest_lock_t *lock)
1640 TRACE("(%p)\n", lock);
1641 EnterCriticalSection(*lock);
1644 void CDECL omp_unset_nest_lock(omp_nest_lock_t *lock)
1646 TRACE("(%p)\n", lock);
1647 LeaveCriticalSection(*lock);
1650 int CDECL omp_test_nest_lock(omp_nest_lock_t *lock)
1652 TRACE("(%p)\n", lock);
1653 return TryEnterCriticalSection(*lock) ? (*lock)->RecursionCount : 0;
1656 void CDECL _vcomp_enter_critsect(CRITICAL_SECTION **critsect)
1658 TRACE("(%p)\n", critsect);
1660 if (!*critsect)
1662 CRITICAL_SECTION *new_critsect = alloc_critsect();
1663 if (interlocked_cmpxchg_ptr((void **)critsect, new_critsect, NULL) != NULL)
1664 destroy_critsect(new_critsect); /* someone beat us to it */
1667 EnterCriticalSection(*critsect);
1670 void CDECL _vcomp_leave_critsect(CRITICAL_SECTION *critsect)
1672 TRACE("(%p)\n", critsect);
1673 LeaveCriticalSection(critsect);
1676 BOOL WINAPI DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved)
1678 TRACE("(%p, %d, %p)\n", instance, reason, reserved);
1680 switch (reason)
1682 case DLL_PROCESS_ATTACH:
1684 SYSTEM_INFO sysinfo;
1686 if ((vcomp_context_tls = TlsAlloc()) == TLS_OUT_OF_INDEXES)
1688 ERR("Failed to allocate TLS index\n");
1689 return FALSE;
1692 GetSystemInfo(&sysinfo);
1693 vcomp_module = instance;
1694 vcomp_max_threads = sysinfo.dwNumberOfProcessors;
1695 vcomp_num_threads = sysinfo.dwNumberOfProcessors;
1696 break;
1699 case DLL_PROCESS_DETACH:
1701 if (reserved) break;
1702 if (vcomp_context_tls != TLS_OUT_OF_INDEXES)
1704 vcomp_free_thread_data();
1705 TlsFree(vcomp_context_tls);
1707 break;
1710 case DLL_THREAD_DETACH:
1712 vcomp_free_thread_data();
1713 break;
1717 return TRUE;