msvcrt: Remove non-needed defines from msvcrt.h.
[wine.git] / dlls / vcomp / main.c
blob3c6d48d41cc042153df51f8c9206a9a78fe68596
1 /*
3 * vcomp implementation
5 * Copyright 2011 Austin English
6 * Copyright 2012 Dan Kegel
7 * Copyright 2015-2016 Sebastian Lackner
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 #include <stdarg.h>
25 #include <assert.h>
27 #include "windef.h"
28 #include "winbase.h"
29 #include "winternl.h"
30 #include "wine/debug.h"
31 #include "wine/list.h"
32 #include "wine/asm.h"
34 WINE_DEFAULT_DEBUG_CHANNEL(vcomp);
36 typedef CRITICAL_SECTION *omp_lock_t;
37 typedef CRITICAL_SECTION *omp_nest_lock_t;
39 static struct list vcomp_idle_threads = LIST_INIT(vcomp_idle_threads);
40 static DWORD vcomp_context_tls = TLS_OUT_OF_INDEXES;
41 static HMODULE vcomp_module;
42 static int vcomp_max_threads;
43 static int vcomp_num_threads;
44 static BOOL vcomp_nested_fork = FALSE;
46 static RTL_CRITICAL_SECTION vcomp_section;
47 static RTL_CRITICAL_SECTION_DEBUG critsect_debug =
49 0, 0, &vcomp_section,
50 { &critsect_debug.ProcessLocksList, &critsect_debug.ProcessLocksList },
51 0, 0, { (DWORD_PTR)(__FILE__ ": vcomp_section") }
53 static RTL_CRITICAL_SECTION vcomp_section = { &critsect_debug, -1, 0, 0, 0, 0 };
55 #define VCOMP_DYNAMIC_FLAGS_STATIC 0x01
56 #define VCOMP_DYNAMIC_FLAGS_CHUNKED 0x02
57 #define VCOMP_DYNAMIC_FLAGS_GUIDED 0x03
58 #define VCOMP_DYNAMIC_FLAGS_INCREMENT 0x40
60 struct vcomp_thread_data
62 struct vcomp_team_data *team;
63 struct vcomp_task_data *task;
64 int thread_num;
65 BOOL parallel;
66 int fork_threads;
68 /* only used for concurrent tasks */
69 struct list entry;
70 CONDITION_VARIABLE cond;
72 /* single */
73 unsigned int single;
75 /* section */
76 unsigned int section;
78 /* dynamic */
79 unsigned int dynamic;
80 unsigned int dynamic_type;
81 unsigned int dynamic_begin;
82 unsigned int dynamic_end;
85 struct vcomp_team_data
87 CONDITION_VARIABLE cond;
88 int num_threads;
89 int finished_threads;
91 /* callback arguments */
92 int nargs;
93 void *wrapper;
94 __ms_va_list valist;
96 /* barrier */
97 unsigned int barrier;
98 int barrier_count;
101 struct vcomp_task_data
103 /* single */
104 unsigned int single;
106 /* section */
107 unsigned int section;
108 int num_sections;
109 int section_index;
111 /* dynamic */
112 unsigned int dynamic;
113 unsigned int dynamic_first;
114 unsigned int dynamic_last;
115 unsigned int dynamic_iterations;
116 int dynamic_step;
117 unsigned int dynamic_chunksize;
120 #if defined(__i386__)
122 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
123 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
124 "pushl %ebp\n\t"
125 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
126 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
127 "movl %esp,%ebp\n\t"
128 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
129 "pushl %esi\n\t"
130 __ASM_CFI(".cfi_rel_offset %esi,-4\n\t")
131 "pushl %edi\n\t"
132 __ASM_CFI(".cfi_rel_offset %edi,-8\n\t")
133 "movl 12(%ebp),%edx\n\t"
134 "movl %esp,%edi\n\t"
135 "shll $2,%edx\n\t"
136 "jz 1f\n\t"
137 "subl %edx,%edi\n\t"
138 "andl $~15,%edi\n\t"
139 "movl %edi,%esp\n\t"
140 "movl 12(%ebp),%ecx\n\t"
141 "movl 16(%ebp),%esi\n\t"
142 "cld\n\t"
143 "rep; movsl\n"
144 "1:\tcall *8(%ebp)\n\t"
145 "leal -8(%ebp),%esp\n\t"
146 "popl %edi\n\t"
147 __ASM_CFI(".cfi_same_value %edi\n\t")
148 "popl %esi\n\t"
149 __ASM_CFI(".cfi_same_value %esi\n\t")
150 "popl %ebp\n\t"
151 __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
152 __ASM_CFI(".cfi_same_value %ebp\n\t")
153 "ret" )
155 #elif defined(__x86_64__)
157 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
158 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
159 "pushq %rbp\n\t"
160 __ASM_SEH(".seh_pushreg %rbp\n\t")
161 __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t")
162 __ASM_CFI(".cfi_rel_offset %rbp,0\n\t")
163 "movq %rsp,%rbp\n\t"
164 __ASM_SEH(".seh_setframe %rbp,0\n\t")
165 __ASM_CFI(".cfi_def_cfa_register %rbp\n\t")
166 "pushq %rsi\n\t"
167 __ASM_SEH(".seh_pushreg %rsi\n\t")
168 __ASM_CFI(".cfi_rel_offset %rsi,-8\n\t")
169 "pushq %rdi\n\t"
170 __ASM_SEH(".seh_pushreg %rdi\n\t")
171 __ASM_SEH(".seh_endprologue\n\t")
172 __ASM_CFI(".cfi_rel_offset %rdi,-16\n\t")
173 "movq %rcx,%rax\n\t"
174 "movq $4,%rcx\n\t"
175 "cmp %rcx,%rdx\n\t"
176 "cmovgq %rdx,%rcx\n\t"
177 "leaq 0(,%rcx,8),%rdx\n\t"
178 "subq %rdx,%rsp\n\t"
179 "andq $~15,%rsp\n\t"
180 "movq %rsp,%rdi\n\t"
181 "movq %r8,%rsi\n\t"
182 "rep; movsq\n\t"
183 "movq 0(%rsp),%rcx\n\t"
184 "movq 8(%rsp),%rdx\n\t"
185 "movq 16(%rsp),%r8\n\t"
186 "movq 24(%rsp),%r9\n\t"
187 "callq *%rax\n\t"
188 "leaq -16(%rbp),%rsp\n\t"
189 "popq %rdi\n\t"
190 __ASM_CFI(".cfi_same_value %rdi\n\t")
191 "popq %rsi\n\t"
192 __ASM_CFI(".cfi_same_value %rsi\n\t")
193 __ASM_CFI(".cfi_def_cfa_register %rsp\n\t")
194 "popq %rbp\n\t"
195 __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t")
196 __ASM_CFI(".cfi_same_value %rbp\n\t")
197 "ret")
199 #elif defined(__arm__)
201 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
202 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
203 ".arm\n\t"
204 "push {r4, r5, LR}\n\t"
205 "mov r4, r0\n\t"
206 "mov r5, SP\n\t"
207 "lsl r3, r1, #2\n\t"
208 "cmp r3, #0\n\t"
209 "beq 5f\n\t"
210 "sub SP, SP, r3\n\t"
211 "tst r1, #1\n\t"
212 "subeq SP, SP, #4\n\t"
213 "1:\tsub r3, r3, #4\n\t"
214 "ldr r0, [r2, r3]\n\t"
215 "str r0, [SP, r3]\n\t"
216 "cmp r3, #0\n\t"
217 "bgt 1b\n\t"
218 "cmp r1, #1\n\t"
219 "bgt 2f\n\t"
220 "pop {r0}\n\t"
221 "b 5f\n\t"
222 "2:\tcmp r1, #2\n\t"
223 "bgt 3f\n\t"
224 "pop {r0-r1}\n\t"
225 "b 5f\n\t"
226 "3:\tcmp r1, #3\n\t"
227 "bgt 4f\n\t"
228 "pop {r0-r2}\n\t"
229 "b 5f\n\t"
230 "4:\tpop {r0-r3}\n\t"
231 "5:\tblx r4\n\t"
232 "mov SP, r5\n\t"
233 "pop {r4, r5, PC}" )
235 #elif defined(__aarch64__)
237 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
238 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
239 "stp x29, x30, [SP,#-16]!\n\t"
240 "mov x29, SP\n\t"
241 "mov x9, x0\n\t"
242 "cbz w1, 4f\n\t"
243 "lsl w8, w1, #3\n\t"
244 "cmp w8, #64\n\t"
245 "b.ge 1f\n\t"
246 "mov w8, #64\n"
247 "1:\ttbz w8, #3, 2f\n\t"
248 "add w8, w8, #8\n"
249 "2:\tsub x10, x29, x8\n\t"
250 "mov sp, x10\n"
251 "3:\tldr x0, [x2], #8\n\t"
252 "str x0, [x10], #8\n\t"
253 "subs w1, w1, #1\n\t"
254 "b.ne 3b\n\t"
255 "ldp x0, x1, [sp], #16\n\t"
256 "ldp x2, x3, [sp], #16\n\t"
257 "ldp x4, x5, [sp], #16\n\t"
258 "ldp x6, x7, [sp], #16\n"
259 "4:\tblr x9\n\t"
260 "mov SP, x29\n\t"
261 "ldp x29, x30, [SP], #16\n\t"
262 "ret\n" )
264 #else
266 static void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args)
268 ERR("Not implemented for this architecture\n");
271 #endif
273 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
275 static inline char interlocked_cmpxchg8(char *dest, char xchg, char compare)
277 char ret;
278 __asm__ __volatile__( "lock; cmpxchgb %2,(%1)"
279 : "=a" (ret) : "r" (dest), "q" (xchg), "0" (compare) : "memory" );
280 return ret;
283 static inline short interlocked_cmpxchg16(short *dest, short xchg, short compare)
285 short ret;
286 __asm__ __volatile__( "lock; cmpxchgw %2,(%1)"
287 : "=a" (ret) : "r" (dest), "r" (xchg), "0" (compare) : "memory" );
288 return ret;
291 static inline char interlocked_xchg_add8(char *dest, char incr)
293 char ret;
294 __asm__ __volatile__( "lock; xaddb %0,(%1)"
295 : "=q" (ret) : "r" (dest), "0" (incr) : "memory" );
296 return ret;
299 static inline short interlocked_xchg_add16(short *dest, short incr)
301 short ret;
302 __asm__ __volatile__( "lock; xaddw %0,(%1)"
303 : "=r" (ret) : "r" (dest), "0" (incr) : "memory" );
304 return ret;
307 #else /* __GNUC__ */
309 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
310 static inline char interlocked_cmpxchg8(char *dest, char xchg, char compare)
312 return __sync_val_compare_and_swap(dest, compare, xchg);
315 static inline char interlocked_xchg_add8(char *dest, char incr)
317 return __sync_fetch_and_add(dest, incr);
319 #else
320 static char interlocked_cmpxchg8(char *dest, char xchg, char compare)
322 EnterCriticalSection(&vcomp_section);
323 if (*dest == compare) *dest = xchg; else compare = *dest;
324 LeaveCriticalSection(&vcomp_section);
325 return compare;
328 static char interlocked_xchg_add8(char *dest, char incr)
330 char ret;
331 EnterCriticalSection(&vcomp_section);
332 ret = *dest; *dest += incr;
333 LeaveCriticalSection(&vcomp_section);
334 return ret;
336 #endif
338 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
339 static inline short interlocked_cmpxchg16(short *dest, short xchg, short compare)
341 return __sync_val_compare_and_swap(dest, compare, xchg);
344 static inline short interlocked_xchg_add16(short *dest, short incr)
346 return __sync_fetch_and_add(dest, incr);
348 #else
349 static short interlocked_cmpxchg16(short *dest, short xchg, short compare)
351 EnterCriticalSection(&vcomp_section);
352 if (*dest == compare) *dest = xchg; else compare = *dest;
353 LeaveCriticalSection(&vcomp_section);
354 return compare;
357 static short interlocked_xchg_add16(short *dest, short incr)
359 short ret;
360 EnterCriticalSection(&vcomp_section);
361 ret = *dest; *dest += incr;
362 LeaveCriticalSection(&vcomp_section);
363 return ret;
365 #endif
367 #endif /* __GNUC__ */
369 static inline struct vcomp_thread_data *vcomp_get_thread_data(void)
371 return (struct vcomp_thread_data *)TlsGetValue(vcomp_context_tls);
374 static inline void vcomp_set_thread_data(struct vcomp_thread_data *thread_data)
376 TlsSetValue(vcomp_context_tls, thread_data);
379 static struct vcomp_thread_data *vcomp_init_thread_data(void)
381 struct vcomp_thread_data *thread_data = vcomp_get_thread_data();
382 struct
384 struct vcomp_thread_data thread;
385 struct vcomp_task_data task;
386 } *data;
388 if (thread_data) return thread_data;
389 if (!(data = HeapAlloc(GetProcessHeap(), 0, sizeof(*data))))
391 ERR("could not create thread data\n");
392 ExitProcess(1);
395 data->task.single = 0;
396 data->task.section = 0;
397 data->task.dynamic = 0;
399 thread_data = &data->thread;
400 thread_data->team = NULL;
401 thread_data->task = &data->task;
402 thread_data->thread_num = 0;
403 thread_data->parallel = FALSE;
404 thread_data->fork_threads = 0;
405 thread_data->single = 1;
406 thread_data->section = 1;
407 thread_data->dynamic = 1;
408 thread_data->dynamic_type = 0;
410 vcomp_set_thread_data(thread_data);
411 return thread_data;
414 static void vcomp_free_thread_data(void)
416 struct vcomp_thread_data *thread_data = vcomp_get_thread_data();
417 if (!thread_data) return;
419 HeapFree(GetProcessHeap(), 0, thread_data);
420 vcomp_set_thread_data(NULL);
423 void CDECL _vcomp_atomic_add_i1(char *dest, char val)
425 interlocked_xchg_add8(dest, val);
428 void CDECL _vcomp_atomic_and_i1(char *dest, char val)
430 char old;
431 do old = *dest; while (interlocked_cmpxchg8(dest, old & val, old) != old);
434 void CDECL _vcomp_atomic_div_i1(signed char *dest, signed char val)
436 signed char old;
437 do old = *dest; while ((signed char)interlocked_cmpxchg8((char *)dest, old / val, old) != old);
440 void CDECL _vcomp_atomic_div_ui1(unsigned char *dest, unsigned char val)
442 unsigned char old;
443 do old = *dest; while ((unsigned char)interlocked_cmpxchg8((char *)dest, old / val, old) != old);
446 void CDECL _vcomp_atomic_mul_i1(char *dest, char val)
448 char old;
449 do old = *dest; while (interlocked_cmpxchg8(dest, old * val, old) != old);
452 void CDECL _vcomp_atomic_or_i1(char *dest, char val)
454 char old;
455 do old = *dest; while (interlocked_cmpxchg8(dest, old | val, old) != old);
458 void CDECL _vcomp_atomic_shl_i1(char *dest, unsigned int val)
460 char old;
461 do old = *dest; while (interlocked_cmpxchg8(dest, old << val, old) != old);
464 void CDECL _vcomp_atomic_shr_i1(signed char *dest, unsigned int val)
466 signed char old;
467 do old = *dest; while ((signed char)interlocked_cmpxchg8((char *)dest, old >> val, old) != old);
470 void CDECL _vcomp_atomic_shr_ui1(unsigned char *dest, unsigned int val)
472 unsigned char old;
473 do old = *dest; while ((unsigned char)interlocked_cmpxchg8((char *)dest, old >> val, old) != old);
476 void CDECL _vcomp_atomic_sub_i1(char *dest, char val)
478 interlocked_xchg_add8(dest, -val);
481 void CDECL _vcomp_atomic_xor_i1(char *dest, char val)
483 char old;
484 do old = *dest; while (interlocked_cmpxchg8(dest, old ^ val, old) != old);
487 static void CDECL _vcomp_atomic_bool_and_i1(char *dest, char val)
489 char old;
490 do old = *dest; while (interlocked_cmpxchg8(dest, old && val, old) != old);
493 static void CDECL _vcomp_atomic_bool_or_i1(char *dest, char val)
495 char old;
496 do old = *dest; while (interlocked_cmpxchg8(dest, old ? old : (val != 0), old) != old);
499 void CDECL _vcomp_reduction_i1(unsigned int flags, char *dest, char val)
501 static void (CDECL * const funcs[])(char *, char) =
503 _vcomp_atomic_add_i1,
504 _vcomp_atomic_add_i1,
505 _vcomp_atomic_mul_i1,
506 _vcomp_atomic_and_i1,
507 _vcomp_atomic_or_i1,
508 _vcomp_atomic_xor_i1,
509 _vcomp_atomic_bool_and_i1,
510 _vcomp_atomic_bool_or_i1,
512 unsigned int op = (flags >> 8) & 0xf;
513 op = min(op, ARRAY_SIZE(funcs) - 1);
514 funcs[op](dest, val);
517 void CDECL _vcomp_atomic_add_i2(short *dest, short val)
519 interlocked_xchg_add16(dest, val);
522 void CDECL _vcomp_atomic_and_i2(short *dest, short val)
524 short old;
525 do old = *dest; while (interlocked_cmpxchg16(dest, old & val, old) != old);
528 void CDECL _vcomp_atomic_div_i2(short *dest, short val)
530 short old;
531 do old = *dest; while (interlocked_cmpxchg16(dest, old / val, old) != old);
534 void CDECL _vcomp_atomic_div_ui2(unsigned short *dest, unsigned short val)
536 unsigned short old;
537 do old = *dest; while ((unsigned short)interlocked_cmpxchg16((short *)dest, old / val, old) != old);
540 void CDECL _vcomp_atomic_mul_i2(short *dest, short val)
542 short old;
543 do old = *dest; while (interlocked_cmpxchg16(dest, old * val, old) != old);
546 void CDECL _vcomp_atomic_or_i2(short *dest, short val)
548 short old;
549 do old = *dest; while (interlocked_cmpxchg16(dest, old | val, old) != old);
552 void CDECL _vcomp_atomic_shl_i2(short *dest, unsigned int val)
554 short old;
555 do old = *dest; while (interlocked_cmpxchg16(dest, old << val, old) != old);
558 void CDECL _vcomp_atomic_shr_i2(short *dest, unsigned int val)
560 short old;
561 do old = *dest; while (interlocked_cmpxchg16(dest, old >> val, old) != old);
564 void CDECL _vcomp_atomic_shr_ui2(unsigned short *dest, unsigned int val)
566 unsigned short old;
567 do old = *dest; while ((unsigned short)interlocked_cmpxchg16((short *)dest, old >> val, old) != old);
570 void CDECL _vcomp_atomic_sub_i2(short *dest, short val)
572 interlocked_xchg_add16(dest, -val);
575 void CDECL _vcomp_atomic_xor_i2(short *dest, short val)
577 short old;
578 do old = *dest; while (interlocked_cmpxchg16(dest, old ^ val, old) != old);
581 static void CDECL _vcomp_atomic_bool_and_i2(short *dest, short val)
583 short old;
584 do old = *dest; while (interlocked_cmpxchg16(dest, old && val, old) != old);
587 static void CDECL _vcomp_atomic_bool_or_i2(short *dest, short val)
589 short old;
590 do old = *dest; while (interlocked_cmpxchg16(dest, old ? old : (val != 0), old) != old);
593 void CDECL _vcomp_reduction_i2(unsigned int flags, short *dest, short val)
595 static void (CDECL * const funcs[])(short *, short) =
597 _vcomp_atomic_add_i2,
598 _vcomp_atomic_add_i2,
599 _vcomp_atomic_mul_i2,
600 _vcomp_atomic_and_i2,
601 _vcomp_atomic_or_i2,
602 _vcomp_atomic_xor_i2,
603 _vcomp_atomic_bool_and_i2,
604 _vcomp_atomic_bool_or_i2,
606 unsigned int op = (flags >> 8) & 0xf;
607 op = min(op, ARRAY_SIZE(funcs) - 1);
608 funcs[op](dest, val);
611 void CDECL _vcomp_atomic_add_i4(int *dest, int val)
613 InterlockedExchangeAdd(dest, val);
616 void CDECL _vcomp_atomic_and_i4(int *dest, int val)
618 int old;
619 do old = *dest; while (InterlockedCompareExchange(dest, old & val, old) != old);
622 void CDECL _vcomp_atomic_div_i4(int *dest, int val)
624 int old;
625 do old = *dest; while (InterlockedCompareExchange(dest, old / val, old) != old);
628 void CDECL _vcomp_atomic_div_ui4(unsigned int *dest, unsigned int val)
630 unsigned int old;
631 do old = *dest; while (InterlockedCompareExchange((int *)dest, old / val, old) != old);
634 void CDECL _vcomp_atomic_mul_i4(int *dest, int val)
636 int old;
637 do old = *dest; while (InterlockedCompareExchange(dest, old * val, old) != old);
640 void CDECL _vcomp_atomic_or_i4(int *dest, int val)
642 int old;
643 do old = *dest; while (InterlockedCompareExchange(dest, old | val, old) != old);
646 void CDECL _vcomp_atomic_shl_i4(int *dest, int val)
648 int old;
649 do old = *dest; while (InterlockedCompareExchange(dest, old << val, old) != old);
652 void CDECL _vcomp_atomic_shr_i4(int *dest, int val)
654 int old;
655 do old = *dest; while (InterlockedCompareExchange(dest, old >> val, old) != old);
658 void CDECL _vcomp_atomic_shr_ui4(unsigned int *dest, unsigned int val)
660 unsigned int old;
661 do old = *dest; while (InterlockedCompareExchange((int *)dest, old >> val, old) != old);
664 void CDECL _vcomp_atomic_sub_i4(int *dest, int val)
666 InterlockedExchangeAdd(dest, -val);
669 void CDECL _vcomp_atomic_xor_i4(int *dest, int val)
671 int old;
672 do old = *dest; while (InterlockedCompareExchange(dest, old ^ val, old) != old);
675 static void CDECL _vcomp_atomic_bool_and_i4(int *dest, int val)
677 int old;
678 do old = *dest; while (InterlockedCompareExchange(dest, old && val, old) != old);
681 static void CDECL _vcomp_atomic_bool_or_i4(int *dest, int val)
683 int old;
684 do old = *dest; while (InterlockedCompareExchange(dest, old ? old : (val != 0), old) != old);
687 void CDECL _vcomp_reduction_i4(unsigned int flags, int *dest, int val)
689 static void (CDECL * const funcs[])(int *, int) =
691 _vcomp_atomic_add_i4,
692 _vcomp_atomic_add_i4,
693 _vcomp_atomic_mul_i4,
694 _vcomp_atomic_and_i4,
695 _vcomp_atomic_or_i4,
696 _vcomp_atomic_xor_i4,
697 _vcomp_atomic_bool_and_i4,
698 _vcomp_atomic_bool_or_i4,
700 unsigned int op = (flags >> 8) & 0xf;
701 op = min(op, ARRAY_SIZE(funcs) - 1);
702 funcs[op](dest, val);
705 void CDECL _vcomp_atomic_add_i8(LONG64 *dest, LONG64 val)
707 LONG64 old;
708 do old = *dest; while (InterlockedCompareExchange64(dest, old + val, old) != old);
711 void CDECL _vcomp_atomic_and_i8(LONG64 *dest, LONG64 val)
713 LONG64 old;
714 do old = *dest; while (InterlockedCompareExchange64(dest, old & val, old) != old);
717 void CDECL _vcomp_atomic_div_i8(LONG64 *dest, LONG64 val)
719 LONG64 old;
720 do old = *dest; while (InterlockedCompareExchange64(dest, old / val, old) != old);
723 void CDECL _vcomp_atomic_div_ui8(ULONG64 *dest, ULONG64 val)
725 ULONG64 old;
726 do old = *dest; while (InterlockedCompareExchange64((LONG64 *)dest, old / val, old) != old);
729 void CDECL _vcomp_atomic_mul_i8(LONG64 *dest, LONG64 val)
731 LONG64 old;
732 do old = *dest; while (InterlockedCompareExchange64(dest, old * val, old) != old);
735 void CDECL _vcomp_atomic_or_i8(LONG64 *dest, LONG64 val)
737 LONG64 old;
738 do old = *dest; while (InterlockedCompareExchange64(dest, old | val, old) != old);
741 void CDECL _vcomp_atomic_shl_i8(LONG64 *dest, unsigned int val)
743 LONG64 old;
744 do old = *dest; while (InterlockedCompareExchange64(dest, old << val, old) != old);
747 void CDECL _vcomp_atomic_shr_i8(LONG64 *dest, unsigned int val)
749 LONG64 old;
750 do old = *dest; while (InterlockedCompareExchange64(dest, old >> val, old) != old);
753 void CDECL _vcomp_atomic_shr_ui8(ULONG64 *dest, unsigned int val)
755 ULONG64 old;
756 do old = *dest; while (InterlockedCompareExchange64((LONG64 *)dest, old >> val, old) != old);
759 void CDECL _vcomp_atomic_sub_i8(LONG64 *dest, LONG64 val)
761 LONG64 old;
762 do old = *dest; while (InterlockedCompareExchange64(dest, old - val, old) != old);
765 void CDECL _vcomp_atomic_xor_i8(LONG64 *dest, LONG64 val)
767 LONG64 old;
768 do old = *dest; while (InterlockedCompareExchange64(dest, old ^ val, old) != old);
771 static void CDECL _vcomp_atomic_bool_and_i8(LONG64 *dest, LONG64 val)
773 LONG64 old;
774 do old = *dest; while (InterlockedCompareExchange64(dest, old && val, old) != old);
777 static void CDECL _vcomp_atomic_bool_or_i8(LONG64 *dest, LONG64 val)
779 LONG64 old;
780 do old = *dest; while (InterlockedCompareExchange64(dest, old ? old : (val != 0), old) != old);
783 void CDECL _vcomp_reduction_i8(unsigned int flags, LONG64 *dest, LONG64 val)
785 static void (CDECL * const funcs[])(LONG64 *, LONG64) =
787 _vcomp_atomic_add_i8,
788 _vcomp_atomic_add_i8,
789 _vcomp_atomic_mul_i8,
790 _vcomp_atomic_and_i8,
791 _vcomp_atomic_or_i8,
792 _vcomp_atomic_xor_i8,
793 _vcomp_atomic_bool_and_i8,
794 _vcomp_atomic_bool_or_i8,
796 unsigned int op = (flags >> 8) & 0xf;
797 op = min(op, ARRAY_SIZE(funcs) - 1);
798 funcs[op](dest, val);
801 void CDECL _vcomp_atomic_add_r4(float *dest, float val)
803 int old, new;
806 old = *(int *)dest;
807 *(float *)&new = *(float *)&old + val;
809 while (InterlockedCompareExchange((int *)dest, new, old) != old);
812 void CDECL _vcomp_atomic_div_r4(float *dest, float val)
814 int old, new;
817 old = *(int *)dest;
818 *(float *)&new = *(float *)&old / val;
820 while (InterlockedCompareExchange((int *)dest, new, old) != old);
823 void CDECL _vcomp_atomic_mul_r4(float *dest, float val)
825 int old, new;
828 old = *(int *)dest;
829 *(float *)&new = *(float *)&old * val;
831 while (InterlockedCompareExchange((int *)dest, new, old) != old);
834 void CDECL _vcomp_atomic_sub_r4(float *dest, float val)
836 int old, new;
839 old = *(int *)dest;
840 *(float *)&new = *(float *)&old - val;
842 while (InterlockedCompareExchange((int *)dest, new, old) != old);
845 static void CDECL _vcomp_atomic_bool_and_r4(float *dest, float val)
847 int old, new;
850 old = *(int *)dest;
851 *(float *)&new = (*(float *)&old != 0.0) ? (val != 0.0) : 0.0;
853 while (InterlockedCompareExchange((int *)dest, new, old) != old);
856 static void CDECL _vcomp_atomic_bool_or_r4(float *dest, float val)
858 int old, new;
861 old = *(int *)dest;
862 *(float *)&new = (*(float *)&old != 0.0) ? *(float *)&old : (val != 0.0);
864 while (InterlockedCompareExchange((int *)dest, new, old) != old);
867 void CDECL _vcomp_reduction_r4(unsigned int flags, float *dest, float val)
869 static void (CDECL * const funcs[])(float *, float) =
871 _vcomp_atomic_add_r4,
872 _vcomp_atomic_add_r4,
873 _vcomp_atomic_mul_r4,
874 _vcomp_atomic_bool_or_r4,
875 _vcomp_atomic_bool_or_r4,
876 _vcomp_atomic_bool_or_r4,
877 _vcomp_atomic_bool_and_r4,
878 _vcomp_atomic_bool_or_r4,
880 unsigned int op = (flags >> 8) & 0xf;
881 op = min(op, ARRAY_SIZE(funcs) - 1);
882 funcs[op](dest, val);
885 void CDECL _vcomp_atomic_add_r8(double *dest, double val)
887 LONG64 old, new;
890 old = *(LONG64 *)dest;
891 *(double *)&new = *(double *)&old + val;
893 while (InterlockedCompareExchange64((LONG64 *)dest, new, old) != old);
896 void CDECL _vcomp_atomic_div_r8(double *dest, double val)
898 LONG64 old, new;
901 old = *(LONG64 *)dest;
902 *(double *)&new = *(double *)&old / val;
904 while (InterlockedCompareExchange64((LONG64 *)dest, new, old) != old);
907 void CDECL _vcomp_atomic_mul_r8(double *dest, double val)
909 LONG64 old, new;
912 old = *(LONG64 *)dest;
913 *(double *)&new = *(double *)&old * val;
915 while (InterlockedCompareExchange64((LONG64 *)dest, new, old) != old);
918 void CDECL _vcomp_atomic_sub_r8(double *dest, double val)
920 LONG64 old, new;
923 old = *(LONG64 *)dest;
924 *(double *)&new = *(double *)&old - val;
926 while (InterlockedCompareExchange64((LONG64 *)dest, new, old) != old);
929 static void CDECL _vcomp_atomic_bool_and_r8(double *dest, double val)
931 LONG64 old, new;
934 old = *(LONG64 *)dest;
935 *(double *)&new = (*(double *)&old != 0.0) ? (val != 0.0) : 0.0;
937 while (InterlockedCompareExchange64((LONG64 *)dest, new, old) != old);
940 static void CDECL _vcomp_atomic_bool_or_r8(double *dest, double val)
942 LONG64 old, new;
945 old = *(LONG64 *)dest;
946 *(double *)&new = (*(double *)&old != 0.0) ? *(double *)&old : (val != 0.0);
948 while (InterlockedCompareExchange64((LONG64 *)dest, new, old) != old);
951 void CDECL _vcomp_reduction_r8(unsigned int flags, double *dest, double val)
953 static void (CDECL * const funcs[])(double *, double) =
955 _vcomp_atomic_add_r8,
956 _vcomp_atomic_add_r8,
957 _vcomp_atomic_mul_r8,
958 _vcomp_atomic_bool_or_r8,
959 _vcomp_atomic_bool_or_r8,
960 _vcomp_atomic_bool_or_r8,
961 _vcomp_atomic_bool_and_r8,
962 _vcomp_atomic_bool_or_r8,
964 unsigned int op = (flags >> 8) & 0xf;
965 op = min(op, ARRAY_SIZE(funcs) - 1);
966 funcs[op](dest, val);
969 int CDECL omp_get_dynamic(void)
971 TRACE("stub\n");
972 return 0;
975 int CDECL omp_get_max_threads(void)
977 TRACE("()\n");
978 return vcomp_max_threads;
981 int CDECL omp_get_nested(void)
983 TRACE("stub\n");
984 return vcomp_nested_fork;
987 int CDECL omp_get_num_procs(void)
989 TRACE("stub\n");
990 return 1;
993 int CDECL omp_get_num_threads(void)
995 struct vcomp_team_data *team_data = vcomp_init_thread_data()->team;
996 TRACE("()\n");
997 return team_data ? team_data->num_threads : 1;
1000 int CDECL omp_get_thread_num(void)
1002 TRACE("()\n");
1003 return vcomp_init_thread_data()->thread_num;
1006 int CDECL _vcomp_get_thread_num(void)
1008 TRACE("()\n");
1009 return vcomp_init_thread_data()->thread_num;
1012 /* Time in seconds since "some time in the past" */
1013 double CDECL omp_get_wtime(void)
1015 return GetTickCount() / 1000.0;
1018 void CDECL omp_set_dynamic(int val)
1020 TRACE("(%d): stub\n", val);
1023 void CDECL omp_set_nested(int nested)
1025 TRACE("(%d)\n", nested);
1026 vcomp_nested_fork = (nested != 0);
1029 void CDECL omp_set_num_threads(int num_threads)
1031 TRACE("(%d)\n", num_threads);
1032 if (num_threads >= 1)
1033 vcomp_num_threads = num_threads;
1036 void CDECL _vcomp_flush(void)
1038 TRACE("(): stub\n");
1041 void CDECL _vcomp_barrier(void)
1043 struct vcomp_team_data *team_data = vcomp_init_thread_data()->team;
1045 TRACE("()\n");
1047 if (!team_data)
1048 return;
1050 EnterCriticalSection(&vcomp_section);
1051 if (++team_data->barrier_count >= team_data->num_threads)
1053 team_data->barrier++;
1054 team_data->barrier_count = 0;
1055 WakeAllConditionVariable(&team_data->cond);
1057 else
1059 unsigned int barrier = team_data->barrier;
1060 while (team_data->barrier == barrier)
1061 SleepConditionVariableCS(&team_data->cond, &vcomp_section, INFINITE);
1063 LeaveCriticalSection(&vcomp_section);
1066 void CDECL _vcomp_set_num_threads(int num_threads)
1068 TRACE("(%d)\n", num_threads);
1069 if (num_threads >= 1)
1070 vcomp_init_thread_data()->fork_threads = num_threads;
1073 int CDECL _vcomp_master_begin(void)
1075 TRACE("()\n");
1076 return !vcomp_init_thread_data()->thread_num;
1079 void CDECL _vcomp_master_end(void)
1081 TRACE("()\n");
1082 /* nothing to do here */
1085 int CDECL _vcomp_single_begin(int flags)
1087 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1088 struct vcomp_task_data *task_data = thread_data->task;
1089 int ret = FALSE;
1091 TRACE("(%x): semi-stub\n", flags);
1093 EnterCriticalSection(&vcomp_section);
1094 thread_data->single++;
1095 if ((int)(thread_data->single - task_data->single) > 0)
1097 task_data->single = thread_data->single;
1098 ret = TRUE;
1100 LeaveCriticalSection(&vcomp_section);
1102 return ret;
1105 void CDECL _vcomp_single_end(void)
1107 TRACE("()\n");
1108 /* nothing to do here */
1111 void CDECL _vcomp_sections_init(int n)
1113 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1114 struct vcomp_task_data *task_data = thread_data->task;
1116 TRACE("(%d)\n", n);
1118 EnterCriticalSection(&vcomp_section);
1119 thread_data->section++;
1120 if ((int)(thread_data->section - task_data->section) > 0)
1122 task_data->section = thread_data->section;
1123 task_data->num_sections = n;
1124 task_data->section_index = 0;
1126 LeaveCriticalSection(&vcomp_section);
1129 int CDECL _vcomp_sections_next(void)
1131 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1132 struct vcomp_task_data *task_data = thread_data->task;
1133 int i = -1;
1135 TRACE("()\n");
1137 EnterCriticalSection(&vcomp_section);
1138 if (thread_data->section == task_data->section &&
1139 task_data->section_index != task_data->num_sections)
1141 i = task_data->section_index++;
1143 LeaveCriticalSection(&vcomp_section);
1144 return i;
1147 void CDECL _vcomp_for_static_simple_init(unsigned int first, unsigned int last, int step,
1148 BOOL increment, unsigned int *begin, unsigned int *end)
1150 unsigned int iterations, per_thread, remaining;
1151 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1152 struct vcomp_team_data *team_data = thread_data->team;
1153 int num_threads = team_data ? team_data->num_threads : 1;
1154 int thread_num = thread_data->thread_num;
1156 TRACE("(%u, %u, %d, %u, %p, %p)\n", first, last, step, increment, begin, end);
1158 if (num_threads == 1)
1160 *begin = first;
1161 *end = last;
1162 return;
1165 if (step <= 0)
1167 *begin = 0;
1168 *end = increment ? -1 : 1;
1169 return;
1172 if (increment)
1173 iterations = 1 + (last - first) / step;
1174 else
1176 iterations = 1 + (first - last) / step;
1177 step *= -1;
1180 per_thread = iterations / num_threads;
1181 remaining = iterations - per_thread * num_threads;
1183 if (thread_num < remaining)
1184 per_thread++;
1185 else if (per_thread)
1186 first += remaining * step;
1187 else
1189 *begin = first;
1190 *end = first - step;
1191 return;
1194 *begin = first + per_thread * thread_num * step;
1195 *end = *begin + (per_thread - 1) * step;
1198 void CDECL _vcomp_for_static_init(int first, int last, int step, int chunksize, unsigned int *loops,
1199 int *begin, int *end, int *next, int *lastchunk)
1201 unsigned int iterations, num_chunks, per_thread, remaining;
1202 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1203 struct vcomp_team_data *team_data = thread_data->team;
1204 int num_threads = team_data ? team_data->num_threads : 1;
1205 int thread_num = thread_data->thread_num;
1206 int no_begin, no_lastchunk;
1208 TRACE("(%d, %d, %d, %d, %p, %p, %p, %p, %p)\n",
1209 first, last, step, chunksize, loops, begin, end, next, lastchunk);
1211 if (!begin)
1213 begin = &no_begin;
1214 lastchunk = &no_lastchunk;
1217 if (num_threads == 1 && chunksize != 1)
1219 *loops = 1;
1220 *begin = first;
1221 *end = last;
1222 *next = 0;
1223 *lastchunk = first;
1224 return;
1227 if (first == last)
1229 *loops = !thread_num;
1230 if (!thread_num)
1232 *begin = first;
1233 *end = last;
1234 *next = 0;
1235 *lastchunk = first;
1237 return;
1240 if (step <= 0)
1242 *loops = 0;
1243 return;
1246 if (first < last)
1247 iterations = 1 + (last - first) / step;
1248 else
1250 iterations = 1 + (first - last) / step;
1251 step *= -1;
1254 if (chunksize < 1)
1255 chunksize = 1;
1257 num_chunks = ((DWORD64)iterations + chunksize - 1) / chunksize;
1258 per_thread = num_chunks / num_threads;
1259 remaining = num_chunks - per_thread * num_threads;
1261 *loops = per_thread + (thread_num < remaining);
1262 *begin = first + thread_num * chunksize * step;
1263 *end = *begin + (chunksize - 1) * step;
1264 *next = chunksize * num_threads * step;
1265 *lastchunk = first + (num_chunks - 1) * chunksize * step;
1268 void CDECL _vcomp_for_static_end(void)
1270 TRACE("()\n");
1271 /* nothing to do here */
1274 void CDECL _vcomp_for_dynamic_init(unsigned int flags, unsigned int first, unsigned int last,
1275 int step, unsigned int chunksize)
1277 unsigned int iterations, per_thread, remaining;
1278 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1279 struct vcomp_team_data *team_data = thread_data->team;
1280 struct vcomp_task_data *task_data = thread_data->task;
1281 int num_threads = team_data ? team_data->num_threads : 1;
1282 int thread_num = thread_data->thread_num;
1283 unsigned int type = flags & ~VCOMP_DYNAMIC_FLAGS_INCREMENT;
1285 TRACE("(%u, %u, %u, %d, %u)\n", flags, first, last, step, chunksize);
1287 if (step <= 0)
1289 thread_data->dynamic_type = 0;
1290 return;
1293 if (flags & VCOMP_DYNAMIC_FLAGS_INCREMENT)
1294 iterations = 1 + (last - first) / step;
1295 else
1297 iterations = 1 + (first - last) / step;
1298 step *= -1;
1301 if (type == VCOMP_DYNAMIC_FLAGS_STATIC)
1303 per_thread = iterations / num_threads;
1304 remaining = iterations - per_thread * num_threads;
1306 if (thread_num < remaining)
1307 per_thread++;
1308 else if (per_thread)
1309 first += remaining * step;
1310 else
1312 thread_data->dynamic_type = 0;
1313 return;
1316 thread_data->dynamic_type = VCOMP_DYNAMIC_FLAGS_STATIC;
1317 thread_data->dynamic_begin = first + per_thread * thread_num * step;
1318 thread_data->dynamic_end = thread_data->dynamic_begin + (per_thread - 1) * step;
1320 else
1322 if (type != VCOMP_DYNAMIC_FLAGS_CHUNKED &&
1323 type != VCOMP_DYNAMIC_FLAGS_GUIDED)
1325 FIXME("unsupported flags %u\n", flags);
1326 type = VCOMP_DYNAMIC_FLAGS_GUIDED;
1329 EnterCriticalSection(&vcomp_section);
1330 thread_data->dynamic++;
1331 thread_data->dynamic_type = type;
1332 if ((int)(thread_data->dynamic - task_data->dynamic) > 0)
1334 task_data->dynamic = thread_data->dynamic;
1335 task_data->dynamic_first = first;
1336 task_data->dynamic_last = last;
1337 task_data->dynamic_iterations = iterations;
1338 task_data->dynamic_step = step;
1339 task_data->dynamic_chunksize = chunksize;
1341 LeaveCriticalSection(&vcomp_section);
1345 int CDECL _vcomp_for_dynamic_next(unsigned int *begin, unsigned int *end)
1347 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
1348 struct vcomp_task_data *task_data = thread_data->task;
1349 struct vcomp_team_data *team_data = thread_data->team;
1350 int num_threads = team_data ? team_data->num_threads : 1;
1352 TRACE("(%p, %p)\n", begin, end);
1354 if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_STATIC)
1356 *begin = thread_data->dynamic_begin;
1357 *end = thread_data->dynamic_end;
1358 thread_data->dynamic_type = 0;
1359 return 1;
1361 else if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_CHUNKED ||
1362 thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_GUIDED)
1364 unsigned int iterations = 0;
1365 EnterCriticalSection(&vcomp_section);
1366 if (thread_data->dynamic == task_data->dynamic &&
1367 task_data->dynamic_iterations != 0)
1369 iterations = min(task_data->dynamic_iterations, task_data->dynamic_chunksize);
1370 if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_GUIDED &&
1371 task_data->dynamic_iterations > num_threads * task_data->dynamic_chunksize)
1373 iterations = (task_data->dynamic_iterations + num_threads - 1) / num_threads;
1375 *begin = task_data->dynamic_first;
1376 *end = task_data->dynamic_first + (iterations - 1) * task_data->dynamic_step;
1377 task_data->dynamic_iterations -= iterations;
1378 task_data->dynamic_first += iterations * task_data->dynamic_step;
1379 if (!task_data->dynamic_iterations)
1380 *end = task_data->dynamic_last;
1382 LeaveCriticalSection(&vcomp_section);
1383 return iterations != 0;
1386 return 0;
1389 int CDECL omp_in_parallel(void)
1391 TRACE("()\n");
1392 return vcomp_init_thread_data()->parallel;
1395 static DWORD WINAPI _vcomp_fork_worker(void *param)
1397 struct vcomp_thread_data *thread_data = param;
1398 vcomp_set_thread_data(thread_data);
1400 TRACE("starting worker thread for %p\n", thread_data);
1402 EnterCriticalSection(&vcomp_section);
1403 for (;;)
1405 struct vcomp_team_data *team = thread_data->team;
1406 if (team != NULL)
1408 LeaveCriticalSection(&vcomp_section);
1409 _vcomp_fork_call_wrapper(team->wrapper, team->nargs, team->valist);
1410 EnterCriticalSection(&vcomp_section);
1412 thread_data->team = NULL;
1413 list_remove(&thread_data->entry);
1414 list_add_tail(&vcomp_idle_threads, &thread_data->entry);
1415 if (++team->finished_threads >= team->num_threads)
1416 WakeAllConditionVariable(&team->cond);
1419 if (!SleepConditionVariableCS(&thread_data->cond, &vcomp_section, 5000) &&
1420 GetLastError() == ERROR_TIMEOUT && !thread_data->team)
1422 break;
1425 list_remove(&thread_data->entry);
1426 LeaveCriticalSection(&vcomp_section);
1428 TRACE("terminating worker thread for %p\n", thread_data);
1430 HeapFree(GetProcessHeap(), 0, thread_data);
1431 vcomp_set_thread_data(NULL);
1432 FreeLibraryAndExitThread(vcomp_module, 0);
1433 return 0;
1436 void WINAPIV _vcomp_fork(BOOL ifval, int nargs, void *wrapper, ...)
1438 struct vcomp_thread_data *prev_thread_data = vcomp_init_thread_data();
1439 struct vcomp_thread_data thread_data;
1440 struct vcomp_team_data team_data;
1441 struct vcomp_task_data task_data;
1442 int num_threads;
1444 TRACE("(%d, %d, %p, ...)\n", ifval, nargs, wrapper);
1446 if (prev_thread_data->parallel && !vcomp_nested_fork)
1447 ifval = FALSE;
1449 if (!ifval)
1450 num_threads = 1;
1451 else if (prev_thread_data->fork_threads)
1452 num_threads = prev_thread_data->fork_threads;
1453 else
1454 num_threads = vcomp_num_threads;
1456 InitializeConditionVariable(&team_data.cond);
1457 team_data.num_threads = 1;
1458 team_data.finished_threads = 0;
1459 team_data.nargs = nargs;
1460 team_data.wrapper = wrapper;
1461 __ms_va_start(team_data.valist, wrapper);
1462 team_data.barrier = 0;
1463 team_data.barrier_count = 0;
1465 task_data.single = 0;
1466 task_data.section = 0;
1467 task_data.dynamic = 0;
1469 thread_data.team = &team_data;
1470 thread_data.task = &task_data;
1471 thread_data.thread_num = 0;
1472 thread_data.parallel = ifval || prev_thread_data->parallel;
1473 thread_data.fork_threads = 0;
1474 thread_data.single = 1;
1475 thread_data.section = 1;
1476 thread_data.dynamic = 1;
1477 thread_data.dynamic_type = 0;
1478 list_init(&thread_data.entry);
1479 InitializeConditionVariable(&thread_data.cond);
1481 if (num_threads > 1)
1483 struct list *ptr;
1484 EnterCriticalSection(&vcomp_section);
1486 /* reuse existing threads (if any) */
1487 while (team_data.num_threads < num_threads && (ptr = list_head(&vcomp_idle_threads)))
1489 struct vcomp_thread_data *data = LIST_ENTRY(ptr, struct vcomp_thread_data, entry);
1490 data->team = &team_data;
1491 data->task = &task_data;
1492 data->thread_num = team_data.num_threads++;
1493 data->parallel = thread_data.parallel;
1494 data->fork_threads = 0;
1495 data->single = 1;
1496 data->section = 1;
1497 data->dynamic = 1;
1498 data->dynamic_type = 0;
1499 list_remove(&data->entry);
1500 list_add_tail(&thread_data.entry, &data->entry);
1501 WakeAllConditionVariable(&data->cond);
1504 /* spawn additional threads */
1505 while (team_data.num_threads < num_threads)
1507 struct vcomp_thread_data *data;
1508 HMODULE module;
1509 HANDLE thread;
1511 data = HeapAlloc(GetProcessHeap(), 0, sizeof(*data));
1512 if (!data) break;
1514 data->team = &team_data;
1515 data->task = &task_data;
1516 data->thread_num = team_data.num_threads;
1517 data->parallel = thread_data.parallel;
1518 data->fork_threads = 0;
1519 data->single = 1;
1520 data->section = 1;
1521 data->dynamic = 1;
1522 data->dynamic_type = 0;
1523 InitializeConditionVariable(&data->cond);
1525 thread = CreateThread(NULL, 0, _vcomp_fork_worker, data, 0, NULL);
1526 if (!thread)
1528 HeapFree(GetProcessHeap(), 0, data);
1529 break;
1532 GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS,
1533 (const WCHAR *)vcomp_module, &module);
1534 team_data.num_threads++;
1535 list_add_tail(&thread_data.entry, &data->entry);
1536 CloseHandle(thread);
1539 LeaveCriticalSection(&vcomp_section);
1542 vcomp_set_thread_data(&thread_data);
1543 _vcomp_fork_call_wrapper(team_data.wrapper, team_data.nargs, team_data.valist);
1544 vcomp_set_thread_data(prev_thread_data);
1545 prev_thread_data->fork_threads = 0;
1547 if (team_data.num_threads > 1)
1549 EnterCriticalSection(&vcomp_section);
1551 team_data.finished_threads++;
1552 while (team_data.finished_threads < team_data.num_threads)
1553 SleepConditionVariableCS(&team_data.cond, &vcomp_section, INFINITE);
1555 LeaveCriticalSection(&vcomp_section);
1556 assert(list_empty(&thread_data.entry));
1559 __ms_va_end(team_data.valist);
1562 static CRITICAL_SECTION *alloc_critsect(void)
1564 CRITICAL_SECTION *critsect;
1565 if (!(critsect = HeapAlloc(GetProcessHeap(), 0, sizeof(*critsect))))
1567 ERR("could not allocate critical section\n");
1568 ExitProcess(1);
1571 InitializeCriticalSection(critsect);
1572 critsect->DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": critsect");
1573 return critsect;
1576 static void destroy_critsect(CRITICAL_SECTION *critsect)
1578 if (!critsect) return;
1579 critsect->DebugInfo->Spare[0] = 0;
1580 DeleteCriticalSection(critsect);
1581 HeapFree(GetProcessHeap(), 0, critsect);
1584 void CDECL omp_init_lock(omp_lock_t *lock)
1586 TRACE("(%p)\n", lock);
1587 *lock = alloc_critsect();
1590 void CDECL omp_destroy_lock(omp_lock_t *lock)
1592 TRACE("(%p)\n", lock);
1593 destroy_critsect(*lock);
1596 void CDECL omp_set_lock(omp_lock_t *lock)
1598 TRACE("(%p)\n", lock);
1600 if (RtlIsCriticalSectionLockedByThread(*lock))
1602 ERR("omp_set_lock called while holding lock %p\n", *lock);
1603 ExitProcess(1);
1606 EnterCriticalSection(*lock);
1609 void CDECL omp_unset_lock(omp_lock_t *lock)
1611 TRACE("(%p)\n", lock);
1612 LeaveCriticalSection(*lock);
1615 int CDECL omp_test_lock(omp_lock_t *lock)
1617 TRACE("(%p)\n", lock);
1619 if (RtlIsCriticalSectionLockedByThread(*lock))
1620 return 0;
1622 return TryEnterCriticalSection(*lock);
1625 void CDECL omp_set_nest_lock(omp_nest_lock_t *lock)
1627 TRACE("(%p)\n", lock);
1628 EnterCriticalSection(*lock);
1631 void CDECL omp_unset_nest_lock(omp_nest_lock_t *lock)
1633 TRACE("(%p)\n", lock);
1634 LeaveCriticalSection(*lock);
1637 int CDECL omp_test_nest_lock(omp_nest_lock_t *lock)
1639 TRACE("(%p)\n", lock);
1640 return TryEnterCriticalSection(*lock) ? (*lock)->RecursionCount : 0;
1643 void CDECL _vcomp_enter_critsect(CRITICAL_SECTION **critsect)
1645 TRACE("(%p)\n", critsect);
1647 if (!*critsect)
1649 CRITICAL_SECTION *new_critsect = alloc_critsect();
1650 if (InterlockedCompareExchangePointer((void **)critsect, new_critsect, NULL) != NULL)
1651 destroy_critsect(new_critsect); /* someone beat us to it */
1654 EnterCriticalSection(*critsect);
1657 void CDECL _vcomp_leave_critsect(CRITICAL_SECTION *critsect)
1659 TRACE("(%p)\n", critsect);
1660 LeaveCriticalSection(critsect);
1663 BOOL WINAPI DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved)
1665 TRACE("(%p, %d, %p)\n", instance, reason, reserved);
1667 switch (reason)
1669 case DLL_PROCESS_ATTACH:
1671 SYSTEM_INFO sysinfo;
1673 if ((vcomp_context_tls = TlsAlloc()) == TLS_OUT_OF_INDEXES)
1675 ERR("Failed to allocate TLS index\n");
1676 return FALSE;
1679 GetSystemInfo(&sysinfo);
1680 vcomp_module = instance;
1681 vcomp_max_threads = sysinfo.dwNumberOfProcessors;
1682 vcomp_num_threads = sysinfo.dwNumberOfProcessors;
1683 break;
1686 case DLL_PROCESS_DETACH:
1688 if (reserved) break;
1689 if (vcomp_context_tls != TLS_OUT_OF_INDEXES)
1691 vcomp_free_thread_data();
1692 TlsFree(vcomp_context_tls);
1694 break;
1697 case DLL_THREAD_DETACH:
1699 vcomp_free_thread_data();
1700 break;
1704 return TRUE;