ntdll: Fix up instruction pointer in context for EXCEPTION_BREAKPOINT on x86_64.
[wine.git] / dlls / vcomp / main.c
blob446b83da21a452116b614f8f474e34ab7a1826a8
1 /*
3 * vcomp implementation
5 * Copyright 2011 Austin English
6 * Copyright 2012 Dan Kegel
7 * Copyright 2015-2016 Sebastian Lackner
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 #include "config.h"
25 #include "wine/port.h"
27 #include <stdarg.h>
28 #include <assert.h>
30 #include "windef.h"
31 #include "winbase.h"
32 #include "winternl.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(vcomp);
38 typedef CRITICAL_SECTION *omp_lock_t;
39 typedef CRITICAL_SECTION *omp_nest_lock_t;
41 static struct list vcomp_idle_threads = LIST_INIT(vcomp_idle_threads);
42 static DWORD vcomp_context_tls = TLS_OUT_OF_INDEXES;
43 static HMODULE vcomp_module;
44 static int vcomp_max_threads;
45 static int vcomp_num_threads;
46 static BOOL vcomp_nested_fork = FALSE;
48 static RTL_CRITICAL_SECTION vcomp_section;
49 static RTL_CRITICAL_SECTION_DEBUG critsect_debug =
51 0, 0, &vcomp_section,
52 { &critsect_debug.ProcessLocksList, &critsect_debug.ProcessLocksList },
53 0, 0, { (DWORD_PTR)(__FILE__ ": vcomp_section") }
55 static RTL_CRITICAL_SECTION vcomp_section = { &critsect_debug, -1, 0, 0, 0, 0 };
57 #define VCOMP_DYNAMIC_FLAGS_STATIC 0x01
58 #define VCOMP_DYNAMIC_FLAGS_CHUNKED 0x02
59 #define VCOMP_DYNAMIC_FLAGS_GUIDED 0x03
60 #define VCOMP_DYNAMIC_FLAGS_INCREMENT 0x40
62 struct vcomp_thread_data
64 struct vcomp_team_data *team;
65 struct vcomp_task_data *task;
66 int thread_num;
67 BOOL parallel;
68 int fork_threads;
70 /* only used for concurrent tasks */
71 struct list entry;
72 CONDITION_VARIABLE cond;
74 /* single */
75 unsigned int single;
77 /* section */
78 unsigned int section;
80 /* dynamic */
81 unsigned int dynamic;
82 unsigned int dynamic_type;
83 unsigned int dynamic_begin;
84 unsigned int dynamic_end;
87 struct vcomp_team_data
89 CONDITION_VARIABLE cond;
90 int num_threads;
91 int finished_threads;
93 /* callback arguments */
94 int nargs;
95 void *wrapper;
96 __ms_va_list valist;
98 /* barrier */
99 unsigned int barrier;
100 int barrier_count;
103 struct vcomp_task_data
105 /* single */
106 unsigned int single;
108 /* section */
109 unsigned int section;
110 int num_sections;
111 int section_index;
113 /* dynamic */
114 unsigned int dynamic;
115 unsigned int dynamic_first;
116 unsigned int dynamic_last;
117 unsigned int dynamic_iterations;
118 int dynamic_step;
119 unsigned int dynamic_chunksize;
122 #if defined(__i386__)
124 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
125 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
126 "pushl %ebp\n\t"
127 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
128 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
129 "movl %esp,%ebp\n\t"
130 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
131 "pushl %esi\n\t"
132 __ASM_CFI(".cfi_rel_offset %esi,-4\n\t")
133 "pushl %edi\n\t"
134 __ASM_CFI(".cfi_rel_offset %edi,-8\n\t")
135 "movl 12(%ebp),%edx\n\t"
136 "movl %esp,%edi\n\t"
137 "shll $2,%edx\n\t"
138 "jz 1f\n\t"
139 "subl %edx,%edi\n\t"
140 "andl $~15,%edi\n\t"
141 "movl %edi,%esp\n\t"
142 "movl 12(%ebp),%ecx\n\t"
143 "movl 16(%ebp),%esi\n\t"
144 "cld\n\t"
145 "rep; movsl\n"
146 "1:\tcall *8(%ebp)\n\t"
147 "leal -8(%ebp),%esp\n\t"
148 "popl %edi\n\t"
149 __ASM_CFI(".cfi_same_value %edi\n\t")
150 "popl %esi\n\t"
151 __ASM_CFI(".cfi_same_value %esi\n\t")
152 "popl %ebp\n\t"
153 __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
154 __ASM_CFI(".cfi_same_value %ebp\n\t")
155 "ret" )
157 #elif defined(__x86_64__)
159 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
160 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
161 "pushq %rbp\n\t"
162 __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t")
163 __ASM_CFI(".cfi_rel_offset %rbp,0\n\t")
164 "movq %rsp,%rbp\n\t"
165 __ASM_CFI(".cfi_def_cfa_register %rbp\n\t")
166 "pushq %rsi\n\t"
167 __ASM_CFI(".cfi_rel_offset %rsi,-8\n\t")
168 "pushq %rdi\n\t"
169 __ASM_CFI(".cfi_rel_offset %rdi,-16\n\t")
170 "movq %rcx,%rax\n\t"
171 "movq $4,%rcx\n\t"
172 "cmp %rcx,%rdx\n\t"
173 "cmovgq %rdx,%rcx\n\t"
174 "leaq 0(,%rcx,8),%rdx\n\t"
175 "subq %rdx,%rsp\n\t"
176 "andq $~15,%rsp\n\t"
177 "movq %rsp,%rdi\n\t"
178 "movq %r8,%rsi\n\t"
179 "rep; movsq\n\t"
180 "movq 0(%rsp),%rcx\n\t"
181 "movq 8(%rsp),%rdx\n\t"
182 "movq 16(%rsp),%r8\n\t"
183 "movq 24(%rsp),%r9\n\t"
184 "callq *%rax\n\t"
185 "leaq -16(%rbp),%rsp\n\t"
186 "popq %rdi\n\t"
187 __ASM_CFI(".cfi_same_value %rdi\n\t")
188 "popq %rsi\n\t"
189 __ASM_CFI(".cfi_same_value %rsi\n\t")
190 __ASM_CFI(".cfi_def_cfa_register %rsp\n\t")
191 "popq %rbp\n\t"
192 __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t")
193 __ASM_CFI(".cfi_same_value %rbp\n\t")
194 "ret")
196 #elif defined(__arm__)
198 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
199 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
200 ".arm\n\t"
201 "push {r4, r5, LR}\n\t"
202 "mov r4, r0\n\t"
203 "mov r5, SP\n\t"
204 "lsl r3, r1, #2\n\t"
205 "cmp r3, #0\n\t"
206 "beq 5f\n\t"
207 "sub SP, SP, r3\n\t"
208 "tst r1, #1\n\t"
209 "subeq SP, SP, #4\n\t"
210 "1:\tsub r3, r3, #4\n\t"
211 "ldr r0, [r2, r3]\n\t"
212 "str r0, [SP, r3]\n\t"
213 "cmp r3, #0\n\t"
214 "bgt 1b\n\t"
215 "cmp r1, #1\n\t"
216 "bgt 2f\n\t"
217 "pop {r0}\n\t"
218 "b 5f\n\t"
219 "2:\tcmp r1, #2\n\t"
220 "bgt 3f\n\t"
221 "pop {r0-r1}\n\t"
222 "b 5f\n\t"
223 "3:\tcmp r1, #3\n\t"
224 "bgt 4f\n\t"
225 "pop {r0-r2}\n\t"
226 "b 5f\n\t"
227 "4:\tpop {r0-r3}\n\t"
228 "5:\tblx r4\n\t"
229 "mov SP, r5\n\t"
230 "pop {r4, r5, PC}" )
232 #else
234 static void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args)
236 ERR("Not implemented for this architecture\n");
239 #endif
241 static inline struct vcomp_thread_data *vcomp_get_thread_data(void)
243 return (struct vcomp_thread_data *)TlsGetValue(vcomp_context_tls);
246 static inline void vcomp_set_thread_data(struct vcomp_thread_data *thread_data)
248 TlsSetValue(vcomp_context_tls, thread_data);
251 static struct vcomp_thread_data *vcomp_init_thread_data(void)
253 struct vcomp_thread_data *thread_data = vcomp_get_thread_data();
254 struct
256 struct vcomp_thread_data thread;
257 struct vcomp_task_data task;
258 } *data;
260 if (thread_data) return thread_data;
261 if (!(data = HeapAlloc(GetProcessHeap(), 0, sizeof(*data))))
263 ERR("could not create thread data\n");
264 ExitProcess(1);
267 data->task.single = 0;
268 data->task.section = 0;
269 data->task.dynamic = 0;
271 thread_data = &data->thread;
272 thread_data->team = NULL;
273 thread_data->task = &data->task;
274 thread_data->thread_num = 0;
275 thread_data->parallel = FALSE;
276 thread_data->fork_threads = 0;
277 thread_data->single = 1;
278 thread_data->section = 1;
279 thread_data->dynamic = 1;
280 thread_data->dynamic_type = 0;
282 vcomp_set_thread_data(thread_data);
283 return thread_data;
286 static void vcomp_free_thread_data(void)
288 struct vcomp_thread_data *thread_data = vcomp_get_thread_data();
289 if (!thread_data) return;
291 HeapFree(GetProcessHeap(), 0, thread_data);
292 vcomp_set_thread_data(NULL);
295 void CDECL _vcomp_atomic_add_i4(int *dest, int val)
297 interlocked_xchg_add(dest, val);
300 void CDECL _vcomp_atomic_and_i4(int *dest, int val)
302 int old;
303 do old = *dest; while (interlocked_cmpxchg(dest, old & val, old) != old);
306 void CDECL _vcomp_atomic_div_i4(int *dest, int val)
308 int old;
309 do old = *dest; while (interlocked_cmpxchg(dest, old / val, old) != old);
312 void CDECL _vcomp_atomic_div_ui4(unsigned int *dest, unsigned int val)
314 unsigned int old;
315 do old = *dest; while (interlocked_cmpxchg((int *)dest, old / val, old) != old);
318 void CDECL _vcomp_atomic_mul_i4(int *dest, int val)
320 int old;
321 do old = *dest; while (interlocked_cmpxchg(dest, old * val, old) != old);
324 void CDECL _vcomp_atomic_or_i4(int *dest, int val)
326 int old;
327 do old = *dest; while (interlocked_cmpxchg(dest, old | val, old) != old);
330 void CDECL _vcomp_atomic_shl_i4(int *dest, int val)
332 int old;
333 do old = *dest; while (interlocked_cmpxchg(dest, old << val, old) != old);
336 void CDECL _vcomp_atomic_shr_i4(int *dest, int val)
338 int old;
339 do old = *dest; while (interlocked_cmpxchg(dest, old >> val, old) != old);
342 void CDECL _vcomp_atomic_shr_ui4(unsigned int *dest, unsigned int val)
344 unsigned int old;
345 do old = *dest; while (interlocked_cmpxchg((int *)dest, old >> val, old) != old);
348 void CDECL _vcomp_atomic_sub_i4(int *dest, int val)
350 interlocked_xchg_add(dest, -val);
353 void CDECL _vcomp_atomic_xor_i4(int *dest, int val)
355 int old;
356 do old = *dest; while (interlocked_cmpxchg(dest, old ^ val, old) != old);
359 void CDECL _vcomp_atomic_add_i8(LONG64 *dest, LONG64 val)
361 LONG64 old;
362 do old = *dest; while (interlocked_cmpxchg64(dest, old + val, old) != old);
365 void CDECL _vcomp_atomic_and_i8(LONG64 *dest, LONG64 val)
367 LONG64 old;
368 do old = *dest; while (interlocked_cmpxchg64(dest, old & val, old) != old);
371 void CDECL _vcomp_atomic_div_i8(LONG64 *dest, LONG64 val)
373 LONG64 old;
374 do old = *dest; while (interlocked_cmpxchg64(dest, old / val, old) != old);
377 void CDECL _vcomp_atomic_div_ui8(ULONG64 *dest, ULONG64 val)
379 ULONG64 old;
380 do old = *dest; while (interlocked_cmpxchg64((LONG64 *)dest, old / val, old) != old);
383 void CDECL _vcomp_atomic_mul_i8(LONG64 *dest, LONG64 val)
385 LONG64 old;
386 do old = *dest; while (interlocked_cmpxchg64(dest, old * val, old) != old);
389 void CDECL _vcomp_atomic_or_i8(LONG64 *dest, LONG64 val)
391 LONG64 old;
392 do old = *dest; while (interlocked_cmpxchg64(dest, old | val, old) != old);
395 void CDECL _vcomp_atomic_shl_i8(LONG64 *dest, unsigned int val)
397 LONG64 old;
398 do old = *dest; while (interlocked_cmpxchg64(dest, old << val, old) != old);
401 void CDECL _vcomp_atomic_shr_i8(LONG64 *dest, unsigned int val)
403 LONG64 old;
404 do old = *dest; while (interlocked_cmpxchg64(dest, old >> val, old) != old);
407 void CDECL _vcomp_atomic_shr_ui8(ULONG64 *dest, unsigned int val)
409 ULONG64 old;
410 do old = *dest; while (interlocked_cmpxchg64((LONG64 *)dest, old >> val, old) != old);
413 void CDECL _vcomp_atomic_sub_i8(LONG64 *dest, LONG64 val)
415 LONG64 old;
416 do old = *dest; while (interlocked_cmpxchg64(dest, old - val, old) != old);
419 void CDECL _vcomp_atomic_xor_i8(LONG64 *dest, LONG64 val)
421 LONG64 old;
422 do old = *dest; while (interlocked_cmpxchg64(dest, old ^ val, old) != old);
425 void CDECL _vcomp_atomic_add_r4(float *dest, float val)
427 int old, new;
430 old = *(int *)dest;
431 *(float *)&new = *(float *)&old + val;
433 while (interlocked_cmpxchg((int *)dest, new, old) != old);
436 void CDECL _vcomp_atomic_div_r4(float *dest, float val)
438 int old, new;
441 old = *(int *)dest;
442 *(float *)&new = *(float *)&old / val;
444 while (interlocked_cmpxchg((int *)dest, new, old) != old);
447 void CDECL _vcomp_atomic_mul_r4(float *dest, float val)
449 int old, new;
452 old = *(int *)dest;
453 *(float *)&new = *(float *)&old * val;
455 while (interlocked_cmpxchg((int *)dest, new, old) != old);
458 void CDECL _vcomp_atomic_sub_r4(float *dest, float val)
460 int old, new;
463 old = *(int *)dest;
464 *(float *)&new = *(float *)&old - val;
466 while (interlocked_cmpxchg((int *)dest, new, old) != old);
469 void CDECL _vcomp_atomic_add_r8(double *dest, double val)
471 LONG64 old, new;
474 old = *(LONG64 *)dest;
475 *(double *)&new = *(double *)&old + val;
477 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
480 void CDECL _vcomp_atomic_div_r8(double *dest, double val)
482 LONG64 old, new;
485 old = *(LONG64 *)dest;
486 *(double *)&new = *(double *)&old / val;
488 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
491 void CDECL _vcomp_atomic_mul_r8(double *dest, double val)
493 LONG64 old, new;
496 old = *(LONG64 *)dest;
497 *(double *)&new = *(double *)&old * val;
499 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
502 void CDECL _vcomp_atomic_sub_r8(double *dest, double val)
504 LONG64 old, new;
507 old = *(LONG64 *)dest;
508 *(double *)&new = *(double *)&old - val;
510 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
513 int CDECL omp_get_dynamic(void)
515 TRACE("stub\n");
516 return 0;
519 int CDECL omp_get_max_threads(void)
521 TRACE("()\n");
522 return vcomp_max_threads;
525 int CDECL omp_get_nested(void)
527 TRACE("stub\n");
528 return vcomp_nested_fork;
531 int CDECL omp_get_num_procs(void)
533 TRACE("stub\n");
534 return 1;
537 int CDECL omp_get_num_threads(void)
539 struct vcomp_team_data *team_data = vcomp_init_thread_data()->team;
540 TRACE("()\n");
541 return team_data ? team_data->num_threads : 1;
544 int CDECL omp_get_thread_num(void)
546 TRACE("()\n");
547 return vcomp_init_thread_data()->thread_num;
550 int CDECL _vcomp_get_thread_num(void)
552 TRACE("()\n");
553 return vcomp_init_thread_data()->thread_num;
556 /* Time in seconds since "some time in the past" */
557 double CDECL omp_get_wtime(void)
559 return GetTickCount() / 1000.0;
562 void CDECL omp_set_dynamic(int val)
564 TRACE("(%d): stub\n", val);
567 void CDECL omp_set_nested(int nested)
569 TRACE("(%d)\n", nested);
570 vcomp_nested_fork = (nested != 0);
573 void CDECL omp_set_num_threads(int num_threads)
575 TRACE("(%d)\n", num_threads);
576 if (num_threads >= 1)
577 vcomp_num_threads = num_threads;
580 void CDECL _vcomp_flush(void)
582 TRACE("(): stub\n");
585 void CDECL _vcomp_barrier(void)
587 struct vcomp_team_data *team_data = vcomp_init_thread_data()->team;
589 TRACE("()\n");
591 if (!team_data)
592 return;
594 EnterCriticalSection(&vcomp_section);
595 if (++team_data->barrier_count >= team_data->num_threads)
597 team_data->barrier++;
598 team_data->barrier_count = 0;
599 WakeAllConditionVariable(&team_data->cond);
601 else
603 unsigned int barrier = team_data->barrier;
604 while (team_data->barrier == barrier)
605 SleepConditionVariableCS(&team_data->cond, &vcomp_section, INFINITE);
607 LeaveCriticalSection(&vcomp_section);
610 void CDECL _vcomp_set_num_threads(int num_threads)
612 TRACE("(%d)\n", num_threads);
613 if (num_threads >= 1)
614 vcomp_init_thread_data()->fork_threads = num_threads;
617 int CDECL _vcomp_master_begin(void)
619 TRACE("()\n");
620 return !vcomp_init_thread_data()->thread_num;
623 void CDECL _vcomp_master_end(void)
625 TRACE("()\n");
626 /* nothing to do here */
629 int CDECL _vcomp_single_begin(int flags)
631 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
632 struct vcomp_task_data *task_data = thread_data->task;
633 int ret = FALSE;
635 TRACE("(%x): semi-stub\n", flags);
637 EnterCriticalSection(&vcomp_section);
638 thread_data->single++;
639 if ((int)(thread_data->single - task_data->single) > 0)
641 task_data->single = thread_data->single;
642 ret = TRUE;
644 LeaveCriticalSection(&vcomp_section);
646 return ret;
649 void CDECL _vcomp_single_end(void)
651 TRACE("()\n");
652 /* nothing to do here */
655 void CDECL _vcomp_sections_init(int n)
657 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
658 struct vcomp_task_data *task_data = thread_data->task;
660 TRACE("(%d)\n", n);
662 EnterCriticalSection(&vcomp_section);
663 thread_data->section++;
664 if ((int)(thread_data->section - task_data->section) > 0)
666 task_data->section = thread_data->section;
667 task_data->num_sections = n;
668 task_data->section_index = 0;
670 LeaveCriticalSection(&vcomp_section);
673 int CDECL _vcomp_sections_next(void)
675 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
676 struct vcomp_task_data *task_data = thread_data->task;
677 int i = -1;
679 TRACE("()\n");
681 EnterCriticalSection(&vcomp_section);
682 if (thread_data->section == task_data->section &&
683 task_data->section_index != task_data->num_sections)
685 i = task_data->section_index++;
687 LeaveCriticalSection(&vcomp_section);
688 return i;
691 void CDECL _vcomp_for_static_simple_init(unsigned int first, unsigned int last, int step,
692 BOOL increment, unsigned int *begin, unsigned int *end)
694 unsigned int iterations, per_thread, remaining;
695 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
696 struct vcomp_team_data *team_data = thread_data->team;
697 int num_threads = team_data ? team_data->num_threads : 1;
698 int thread_num = thread_data->thread_num;
700 TRACE("(%u, %u, %d, %u, %p, %p)\n", first, last, step, increment, begin, end);
702 if (num_threads == 1)
704 *begin = first;
705 *end = last;
706 return;
709 if (step <= 0)
711 *begin = 0;
712 *end = increment ? -1 : 1;
713 return;
716 if (increment)
717 iterations = 1 + (last - first) / step;
718 else
720 iterations = 1 + (first - last) / step;
721 step *= -1;
724 per_thread = iterations / num_threads;
725 remaining = iterations - per_thread * num_threads;
727 if (thread_num < remaining)
728 per_thread++;
729 else if (per_thread)
730 first += remaining * step;
731 else
733 *begin = first;
734 *end = first - step;
735 return;
738 *begin = first + per_thread * thread_num * step;
739 *end = *begin + (per_thread - 1) * step;
742 void CDECL _vcomp_for_static_init(int first, int last, int step, int chunksize, unsigned int *loops,
743 int *begin, int *end, int *next, int *lastchunk)
745 unsigned int iterations, num_chunks, per_thread, remaining;
746 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
747 struct vcomp_team_data *team_data = thread_data->team;
748 int num_threads = team_data ? team_data->num_threads : 1;
749 int thread_num = thread_data->thread_num;
751 TRACE("(%d, %d, %d, %d, %p, %p, %p, %p, %p)\n",
752 first, last, step, chunksize, loops, begin, end, next, lastchunk);
754 if (num_threads == 1 && chunksize != 1)
756 *loops = 1;
757 *begin = first;
758 *end = last;
759 *next = 0;
760 *lastchunk = first;
761 return;
764 if (first == last)
766 *loops = !thread_num;
767 if (!thread_num)
769 *begin = first;
770 *end = last;
771 *next = 0;
772 *lastchunk = first;
774 return;
777 if (step <= 0)
779 *loops = 0;
780 return;
783 if (first < last)
784 iterations = 1 + (last - first) / step;
785 else
787 iterations = 1 + (first - last) / step;
788 step *= -1;
791 if (chunksize < 1)
792 chunksize = 1;
794 num_chunks = ((DWORD64)iterations + chunksize - 1) / chunksize;
795 per_thread = num_chunks / num_threads;
796 remaining = num_chunks - per_thread * num_threads;
798 *loops = per_thread + (thread_num < remaining);
799 *begin = first + thread_num * chunksize * step;
800 *end = *begin + (chunksize - 1) * step;
801 *next = chunksize * num_threads * step;
802 *lastchunk = first + (num_chunks - 1) * chunksize * step;
805 void CDECL _vcomp_for_static_end(void)
807 TRACE("()\n");
808 /* nothing to do here */
811 void CDECL _vcomp_for_dynamic_init(unsigned int flags, unsigned int first, unsigned int last,
812 int step, unsigned int chunksize)
814 unsigned int iterations, per_thread, remaining;
815 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
816 struct vcomp_team_data *team_data = thread_data->team;
817 struct vcomp_task_data *task_data = thread_data->task;
818 int num_threads = team_data ? team_data->num_threads : 1;
819 int thread_num = thread_data->thread_num;
820 unsigned int type = flags & ~VCOMP_DYNAMIC_FLAGS_INCREMENT;
822 TRACE("(%u, %u, %u, %d, %u)\n", flags, first, last, step, chunksize);
824 if (step <= 0)
826 thread_data->dynamic_type = 0;
827 return;
830 if (flags & VCOMP_DYNAMIC_FLAGS_INCREMENT)
831 iterations = 1 + (last - first) / step;
832 else
834 iterations = 1 + (first - last) / step;
835 step *= -1;
838 if (type == VCOMP_DYNAMIC_FLAGS_STATIC)
840 per_thread = iterations / num_threads;
841 remaining = iterations - per_thread * num_threads;
843 if (thread_num < remaining)
844 per_thread++;
845 else if (per_thread)
846 first += remaining * step;
847 else
849 thread_data->dynamic_type = 0;
850 return;
853 thread_data->dynamic_type = VCOMP_DYNAMIC_FLAGS_STATIC;
854 thread_data->dynamic_begin = first + per_thread * thread_num * step;
855 thread_data->dynamic_end = thread_data->dynamic_begin + (per_thread - 1) * step;
857 else
859 if (type != VCOMP_DYNAMIC_FLAGS_CHUNKED &&
860 type != VCOMP_DYNAMIC_FLAGS_GUIDED)
862 FIXME("unsupported flags %u\n", flags);
863 type = VCOMP_DYNAMIC_FLAGS_GUIDED;
866 EnterCriticalSection(&vcomp_section);
867 thread_data->dynamic++;
868 thread_data->dynamic_type = type;
869 if ((int)(thread_data->dynamic - task_data->dynamic) > 0)
871 task_data->dynamic = thread_data->dynamic;
872 task_data->dynamic_first = first;
873 task_data->dynamic_last = last;
874 task_data->dynamic_iterations = iterations;
875 task_data->dynamic_step = step;
876 task_data->dynamic_chunksize = chunksize;
878 LeaveCriticalSection(&vcomp_section);
882 int CDECL _vcomp_for_dynamic_next(unsigned int *begin, unsigned int *end)
884 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
885 struct vcomp_task_data *task_data = thread_data->task;
886 struct vcomp_team_data *team_data = thread_data->team;
887 int num_threads = team_data ? team_data->num_threads : 1;
889 TRACE("(%p, %p)\n", begin, end);
891 if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_STATIC)
893 *begin = thread_data->dynamic_begin;
894 *end = thread_data->dynamic_end;
895 thread_data->dynamic_type = 0;
896 return 1;
898 else if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_CHUNKED ||
899 thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_GUIDED)
901 unsigned int iterations = 0;
902 EnterCriticalSection(&vcomp_section);
903 if (thread_data->dynamic == task_data->dynamic &&
904 task_data->dynamic_iterations != 0)
906 iterations = min(task_data->dynamic_iterations, task_data->dynamic_chunksize);
907 if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_GUIDED &&
908 task_data->dynamic_iterations > num_threads * task_data->dynamic_chunksize)
910 iterations = (task_data->dynamic_iterations + num_threads - 1) / num_threads;
912 *begin = task_data->dynamic_first;
913 *end = task_data->dynamic_first + (iterations - 1) * task_data->dynamic_step;
914 task_data->dynamic_iterations -= iterations;
915 task_data->dynamic_first += iterations * task_data->dynamic_step;
916 if (!task_data->dynamic_iterations)
917 *end = task_data->dynamic_last;
919 LeaveCriticalSection(&vcomp_section);
920 return iterations != 0;
923 return 0;
926 int CDECL omp_in_parallel(void)
928 TRACE("()\n");
929 return vcomp_init_thread_data()->parallel;
932 static DWORD WINAPI _vcomp_fork_worker(void *param)
934 struct vcomp_thread_data *thread_data = param;
935 vcomp_set_thread_data(thread_data);
937 TRACE("starting worker thread for %p\n", thread_data);
939 EnterCriticalSection(&vcomp_section);
940 for (;;)
942 struct vcomp_team_data *team = thread_data->team;
943 if (team != NULL)
945 LeaveCriticalSection(&vcomp_section);
946 _vcomp_fork_call_wrapper(team->wrapper, team->nargs, team->valist);
947 EnterCriticalSection(&vcomp_section);
949 thread_data->team = NULL;
950 list_remove(&thread_data->entry);
951 list_add_tail(&vcomp_idle_threads, &thread_data->entry);
952 if (++team->finished_threads >= team->num_threads)
953 WakeAllConditionVariable(&team->cond);
956 if (!SleepConditionVariableCS(&thread_data->cond, &vcomp_section, 5000) &&
957 GetLastError() == ERROR_TIMEOUT && !thread_data->team)
959 break;
962 list_remove(&thread_data->entry);
963 LeaveCriticalSection(&vcomp_section);
965 TRACE("terminating worker thread for %p\n", thread_data);
967 HeapFree(GetProcessHeap(), 0, thread_data);
968 vcomp_set_thread_data(NULL);
969 FreeLibraryAndExitThread(vcomp_module, 0);
970 return 0;
973 void WINAPIV _vcomp_fork(BOOL ifval, int nargs, void *wrapper, ...)
975 struct vcomp_thread_data *prev_thread_data = vcomp_init_thread_data();
976 struct vcomp_thread_data thread_data;
977 struct vcomp_team_data team_data;
978 struct vcomp_task_data task_data;
979 int num_threads;
981 TRACE("(%d, %d, %p, ...)\n", ifval, nargs, wrapper);
983 if (prev_thread_data->parallel && !vcomp_nested_fork)
984 ifval = FALSE;
986 if (!ifval)
987 num_threads = 1;
988 else if (prev_thread_data->fork_threads)
989 num_threads = prev_thread_data->fork_threads;
990 else
991 num_threads = vcomp_num_threads;
993 InitializeConditionVariable(&team_data.cond);
994 team_data.num_threads = 1;
995 team_data.finished_threads = 0;
996 team_data.nargs = nargs;
997 team_data.wrapper = wrapper;
998 __ms_va_start(team_data.valist, wrapper);
999 team_data.barrier = 0;
1000 team_data.barrier_count = 0;
1002 task_data.single = 0;
1003 task_data.section = 0;
1004 task_data.dynamic = 0;
1006 thread_data.team = &team_data;
1007 thread_data.task = &task_data;
1008 thread_data.thread_num = 0;
1009 thread_data.parallel = ifval || prev_thread_data->parallel;
1010 thread_data.fork_threads = 0;
1011 thread_data.single = 1;
1012 thread_data.section = 1;
1013 thread_data.dynamic = 1;
1014 thread_data.dynamic_type = 0;
1015 list_init(&thread_data.entry);
1016 InitializeConditionVariable(&thread_data.cond);
1018 if (num_threads > 1)
1020 struct list *ptr;
1021 EnterCriticalSection(&vcomp_section);
1023 /* reuse existing threads (if any) */
1024 while (team_data.num_threads < num_threads && (ptr = list_head(&vcomp_idle_threads)))
1026 struct vcomp_thread_data *data = LIST_ENTRY(ptr, struct vcomp_thread_data, entry);
1027 data->team = &team_data;
1028 data->task = &task_data;
1029 data->thread_num = team_data.num_threads++;
1030 data->parallel = thread_data.parallel;
1031 data->fork_threads = 0;
1032 data->single = 1;
1033 data->section = 1;
1034 data->dynamic = 1;
1035 data->dynamic_type = 0;
1036 list_remove(&data->entry);
1037 list_add_tail(&thread_data.entry, &data->entry);
1038 WakeAllConditionVariable(&data->cond);
1041 /* spawn additional threads */
1042 while (team_data.num_threads < num_threads)
1044 struct vcomp_thread_data *data;
1045 HMODULE module;
1046 HANDLE thread;
1048 data = HeapAlloc(GetProcessHeap(), 0, sizeof(*data));
1049 if (!data) break;
1051 data->team = &team_data;
1052 data->task = &task_data;
1053 data->thread_num = team_data.num_threads;
1054 data->parallel = thread_data.parallel;
1055 data->fork_threads = 0;
1056 data->single = 1;
1057 data->section = 1;
1058 data->dynamic = 1;
1059 data->dynamic_type = 0;
1060 InitializeConditionVariable(&data->cond);
1062 thread = CreateThread(NULL, 0, _vcomp_fork_worker, data, 0, NULL);
1063 if (!thread)
1065 HeapFree(GetProcessHeap(), 0, data);
1066 break;
1069 GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS,
1070 (const WCHAR *)vcomp_module, &module);
1071 team_data.num_threads++;
1072 list_add_tail(&thread_data.entry, &data->entry);
1073 CloseHandle(thread);
1076 LeaveCriticalSection(&vcomp_section);
1079 vcomp_set_thread_data(&thread_data);
1080 _vcomp_fork_call_wrapper(team_data.wrapper, team_data.nargs, team_data.valist);
1081 vcomp_set_thread_data(prev_thread_data);
1082 prev_thread_data->fork_threads = 0;
1084 if (team_data.num_threads > 1)
1086 EnterCriticalSection(&vcomp_section);
1088 team_data.finished_threads++;
1089 while (team_data.finished_threads < team_data.num_threads)
1090 SleepConditionVariableCS(&team_data.cond, &vcomp_section, INFINITE);
1092 LeaveCriticalSection(&vcomp_section);
1093 assert(list_empty(&thread_data.entry));
1096 __ms_va_end(team_data.valist);
1099 static CRITICAL_SECTION *alloc_critsect(void)
1101 CRITICAL_SECTION *critsect;
1102 if (!(critsect = HeapAlloc(GetProcessHeap(), 0, sizeof(*critsect))))
1104 ERR("could not allocate critical section\n");
1105 ExitProcess(1);
1108 InitializeCriticalSection(critsect);
1109 critsect->DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": critsect");
1110 return critsect;
1113 static void destroy_critsect(CRITICAL_SECTION *critsect)
1115 if (!critsect) return;
1116 critsect->DebugInfo->Spare[0] = 0;
1117 DeleteCriticalSection(critsect);
1118 HeapFree(GetProcessHeap(), 0, critsect);
1121 void CDECL omp_init_lock(omp_lock_t *lock)
1123 TRACE("(%p)\n", lock);
1124 *lock = alloc_critsect();
1127 void CDECL omp_destroy_lock(omp_lock_t *lock)
1129 TRACE("(%p)\n", lock);
1130 destroy_critsect(*lock);
1133 void CDECL omp_set_lock(omp_lock_t *lock)
1135 TRACE("(%p)\n", lock);
1137 if (RtlIsCriticalSectionLockedByThread(*lock))
1139 ERR("omp_set_lock called while holding lock %p\n", *lock);
1140 ExitProcess(1);
1143 EnterCriticalSection(*lock);
1146 void CDECL omp_unset_lock(omp_lock_t *lock)
1148 TRACE("(%p)\n", lock);
1149 LeaveCriticalSection(*lock);
1152 int CDECL omp_test_lock(omp_lock_t *lock)
1154 TRACE("(%p)\n", lock);
1156 if (RtlIsCriticalSectionLockedByThread(*lock))
1157 return 0;
1159 return TryEnterCriticalSection(*lock);
1162 void CDECL omp_set_nest_lock(omp_nest_lock_t *lock)
1164 TRACE("(%p)\n", lock);
1165 EnterCriticalSection(*lock);
1168 void CDECL omp_unset_nest_lock(omp_nest_lock_t *lock)
1170 TRACE("(%p)\n", lock);
1171 LeaveCriticalSection(*lock);
1174 int CDECL omp_test_nest_lock(omp_nest_lock_t *lock)
1176 TRACE("(%p)\n", lock);
1177 return TryEnterCriticalSection(*lock) ? (*lock)->RecursionCount : 0;
1180 void CDECL _vcomp_enter_critsect(CRITICAL_SECTION **critsect)
1182 TRACE("(%p)\n", critsect);
1184 if (!*critsect)
1186 CRITICAL_SECTION *new_critsect = alloc_critsect();
1187 if (interlocked_cmpxchg_ptr((void **)critsect, new_critsect, NULL) != NULL)
1188 destroy_critsect(new_critsect); /* someone beat us to it */
1191 EnterCriticalSection(*critsect);
1194 void CDECL _vcomp_leave_critsect(CRITICAL_SECTION *critsect)
1196 TRACE("(%p)\n", critsect);
1197 LeaveCriticalSection(critsect);
1200 BOOL WINAPI DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved)
1202 TRACE("(%p, %d, %p)\n", instance, reason, reserved);
1204 switch (reason)
1206 case DLL_WINE_PREATTACH:
1207 return FALSE; /* prefer native version */
1209 case DLL_PROCESS_ATTACH:
1211 SYSTEM_INFO sysinfo;
1213 if ((vcomp_context_tls = TlsAlloc()) == TLS_OUT_OF_INDEXES)
1215 ERR("Failed to allocate TLS index\n");
1216 return FALSE;
1219 GetSystemInfo(&sysinfo);
1220 vcomp_module = instance;
1221 vcomp_max_threads = sysinfo.dwNumberOfProcessors;
1222 vcomp_num_threads = sysinfo.dwNumberOfProcessors;
1223 break;
1226 case DLL_PROCESS_DETACH:
1228 if (reserved) break;
1229 if (vcomp_context_tls != TLS_OUT_OF_INDEXES)
1231 vcomp_free_thread_data();
1232 TlsFree(vcomp_context_tls);
1234 break;
1237 case DLL_THREAD_DETACH:
1239 vcomp_free_thread_data();
1240 break;
1244 return TRUE;