msi: Correctly parse double quotes in the token value.
[wine/multimedia.git] / dlls / vcomp / main.c
blobd4421557836edbd3328513484b602fd339c22c3d
1 /*
3 * vcomp implementation
5 * Copyright 2011 Austin English
6 * Copyright 2012 Dan Kegel
7 * Copyright 2015 Sebastian Lackner
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 #include "config.h"
25 #include "wine/port.h"
27 #include <stdarg.h>
28 #include <assert.h>
30 #include "windef.h"
31 #include "winbase.h"
32 #include "wine/debug.h"
33 #include "wine/list.h"
35 WINE_DEFAULT_DEBUG_CHANNEL(vcomp);
37 typedef CRITICAL_SECTION *omp_lock_t;
38 typedef CRITICAL_SECTION *omp_nest_lock_t;
40 static struct list vcomp_idle_threads = LIST_INIT(vcomp_idle_threads);
41 static DWORD vcomp_context_tls = TLS_OUT_OF_INDEXES;
42 static HMODULE vcomp_module;
43 static int vcomp_max_threads;
44 static int vcomp_num_threads;
45 static BOOL vcomp_nested_fork = FALSE;
47 static RTL_CRITICAL_SECTION vcomp_section;
48 static RTL_CRITICAL_SECTION_DEBUG critsect_debug =
50 0, 0, &vcomp_section,
51 { &critsect_debug.ProcessLocksList, &critsect_debug.ProcessLocksList },
52 0, 0, { (DWORD_PTR)(__FILE__ ": vcomp_section") }
54 static RTL_CRITICAL_SECTION vcomp_section = { &critsect_debug, -1, 0, 0, 0, 0 };
56 #define VCOMP_DYNAMIC_FLAGS_STATIC 0x01
57 #define VCOMP_DYNAMIC_FLAGS_CHUNKED 0x02
58 #define VCOMP_DYNAMIC_FLAGS_GUIDED 0x03
59 #define VCOMP_DYNAMIC_FLAGS_INCREMENT 0x40
61 struct vcomp_thread_data
63 struct vcomp_team_data *team;
64 struct vcomp_task_data *task;
65 int thread_num;
66 BOOL parallel;
67 int fork_threads;
69 /* only used for concurrent tasks */
70 struct list entry;
71 CONDITION_VARIABLE cond;
73 /* single */
74 unsigned int single;
76 /* section */
77 unsigned int section;
79 /* dynamic */
80 unsigned int dynamic;
81 unsigned int dynamic_type;
82 unsigned int dynamic_begin;
83 unsigned int dynamic_end;
86 struct vcomp_team_data
88 CONDITION_VARIABLE cond;
89 int num_threads;
90 int finished_threads;
92 /* callback arguments */
93 int nargs;
94 void *wrapper;
95 __ms_va_list valist;
97 /* barrier */
98 unsigned int barrier;
99 int barrier_count;
102 struct vcomp_task_data
104 /* single */
105 unsigned int single;
107 /* section */
108 unsigned int section;
109 int num_sections;
110 int section_index;
112 /* dynamic */
113 unsigned int dynamic;
114 unsigned int dynamic_first;
115 unsigned int dynamic_last;
116 unsigned int dynamic_iterations;
117 int dynamic_step;
118 unsigned int dynamic_chunksize;
121 #if defined(__i386__)
123 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
124 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
125 "pushl %ebp\n\t"
126 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
127 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
128 "movl %esp,%ebp\n\t"
129 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
130 "pushl %esi\n\t"
131 __ASM_CFI(".cfi_rel_offset %esi,-4\n\t")
132 "pushl %edi\n\t"
133 __ASM_CFI(".cfi_rel_offset %edi,-8\n\t")
134 "movl 12(%ebp),%edx\n\t"
135 "movl %esp,%edi\n\t"
136 "shll $2,%edx\n\t"
137 "jz 1f\n\t"
138 "subl %edx,%edi\n\t"
139 "andl $~15,%edi\n\t"
140 "movl %edi,%esp\n\t"
141 "movl 12(%ebp),%ecx\n\t"
142 "movl 16(%ebp),%esi\n\t"
143 "cld\n\t"
144 "rep; movsl\n"
145 "1:\tcall *8(%ebp)\n\t"
146 "leal -8(%ebp),%esp\n\t"
147 "popl %edi\n\t"
148 __ASM_CFI(".cfi_same_value %edi\n\t")
149 "popl %esi\n\t"
150 __ASM_CFI(".cfi_same_value %esi\n\t")
151 "popl %ebp\n\t"
152 __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
153 __ASM_CFI(".cfi_same_value %ebp\n\t")
154 "ret" )
156 #elif defined(__x86_64__)
158 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
159 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
160 "pushq %rbp\n\t"
161 __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t")
162 __ASM_CFI(".cfi_rel_offset %rbp,0\n\t")
163 "movq %rsp,%rbp\n\t"
164 __ASM_CFI(".cfi_def_cfa_register %rbp\n\t")
165 "pushq %rsi\n\t"
166 __ASM_CFI(".cfi_rel_offset %rsi,-8\n\t")
167 "pushq %rdi\n\t"
168 __ASM_CFI(".cfi_rel_offset %rdi,-16\n\t")
169 "movq %rcx,%rax\n\t"
170 "movq $4,%rcx\n\t"
171 "cmp %rcx,%rdx\n\t"
172 "cmovgq %rdx,%rcx\n\t"
173 "leaq 0(,%rcx,8),%rdx\n\t"
174 "subq %rdx,%rsp\n\t"
175 "andq $~15,%rsp\n\t"
176 "movq %rsp,%rdi\n\t"
177 "movq %r8,%rsi\n\t"
178 "rep; movsq\n\t"
179 "movq 0(%rsp),%rcx\n\t"
180 "movq 8(%rsp),%rdx\n\t"
181 "movq 16(%rsp),%r8\n\t"
182 "movq 24(%rsp),%r9\n\t"
183 "callq *%rax\n\t"
184 "leaq -16(%rbp),%rsp\n\t"
185 "popq %rdi\n\t"
186 __ASM_CFI(".cfi_same_value %rdi\n\t")
187 "popq %rsi\n\t"
188 __ASM_CFI(".cfi_same_value %rsi\n\t")
189 __ASM_CFI(".cfi_def_cfa_register %rsp\n\t")
190 "popq %rbp\n\t"
191 __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t")
192 __ASM_CFI(".cfi_same_value %rbp\n\t")
193 "ret")
195 #elif defined(__arm__)
197 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
198 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
199 ".arm\n\t"
200 "push {r4, r5, LR}\n\t"
201 "mov r4, r0\n\t"
202 "mov r5, SP\n\t"
203 "lsl r3, r1, #2\n\t"
204 "cmp r3, #0\n\t"
205 "beq 5f\n\t"
206 "sub SP, SP, r3\n\t"
207 "tst r1, #1\n\t"
208 "subeq SP, SP, #4\n\t"
209 "1:\tsub r3, r3, #4\n\t"
210 "ldr r0, [r2, r3]\n\t"
211 "str r0, [SP, r3]\n\t"
212 "cmp r3, #0\n\t"
213 "bgt 1b\n\t"
214 "cmp r1, #1\n\t"
215 "bgt 2f\n\t"
216 "pop {r0}\n\t"
217 "b 5f\n\t"
218 "2:\tcmp r1, #2\n\t"
219 "bgt 3f\n\t"
220 "pop {r0-r1}\n\t"
221 "b 5f\n\t"
222 "3:\tcmp r1, #3\n\t"
223 "bgt 4f\n\t"
224 "pop {r0-r2}\n\t"
225 "b 5f\n\t"
226 "4:\tpop {r0-r3}\n\t"
227 "5:\tblx r4\n\t"
228 "mov SP, r5\n\t"
229 "pop {r4, r5, PC}" )
231 #else
233 static void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args)
235 ERR("Not implemented for this architecture\n");
238 #endif
240 static inline struct vcomp_thread_data *vcomp_get_thread_data(void)
242 return (struct vcomp_thread_data *)TlsGetValue(vcomp_context_tls);
245 static inline void vcomp_set_thread_data(struct vcomp_thread_data *thread_data)
247 TlsSetValue(vcomp_context_tls, thread_data);
250 static struct vcomp_thread_data *vcomp_init_thread_data(void)
252 struct vcomp_thread_data *thread_data = vcomp_get_thread_data();
253 struct
255 struct vcomp_thread_data thread;
256 struct vcomp_task_data task;
257 } *data;
259 if (thread_data) return thread_data;
260 if (!(data = HeapAlloc(GetProcessHeap(), 0, sizeof(*data))))
262 ERR("could not create thread data\n");
263 ExitProcess(1);
266 data->task.single = 0;
267 data->task.section = 0;
268 data->task.dynamic = 0;
270 thread_data = &data->thread;
271 thread_data->team = NULL;
272 thread_data->task = &data->task;
273 thread_data->thread_num = 0;
274 thread_data->parallel = FALSE;
275 thread_data->fork_threads = 0;
276 thread_data->single = 1;
277 thread_data->section = 1;
278 thread_data->dynamic = 1;
279 thread_data->dynamic_type = 0;
281 vcomp_set_thread_data(thread_data);
282 return thread_data;
285 static void vcomp_free_thread_data(void)
287 struct vcomp_thread_data *thread_data = vcomp_get_thread_data();
288 if (!thread_data) return;
290 HeapFree(GetProcessHeap(), 0, thread_data);
291 vcomp_set_thread_data(NULL);
294 void CDECL _vcomp_atomic_add_i4(int *dest, int val)
296 interlocked_xchg_add(dest, val);
299 void CDECL _vcomp_atomic_and_i4(int *dest, int val)
301 int old;
302 do old = *dest; while (interlocked_cmpxchg(dest, old & val, old) != old);
305 void CDECL _vcomp_atomic_div_i4(int *dest, int val)
307 int old;
308 do old = *dest; while (interlocked_cmpxchg(dest, old / val, old) != old);
311 void CDECL _vcomp_atomic_div_ui4(unsigned int *dest, unsigned int val)
313 unsigned int old;
314 do old = *dest; while (interlocked_cmpxchg((int *)dest, old / val, old) != old);
317 void CDECL _vcomp_atomic_mul_i4(int *dest, int val)
319 int old;
320 do old = *dest; while (interlocked_cmpxchg(dest, old * val, old) != old);
323 void CDECL _vcomp_atomic_or_i4(int *dest, int val)
325 int old;
326 do old = *dest; while (interlocked_cmpxchg(dest, old | val, old) != old);
329 void CDECL _vcomp_atomic_shl_i4(int *dest, int val)
331 int old;
332 do old = *dest; while (interlocked_cmpxchg(dest, old << val, old) != old);
335 void CDECL _vcomp_atomic_shr_i4(int *dest, int val)
337 int old;
338 do old = *dest; while (interlocked_cmpxchg(dest, old >> val, old) != old);
341 void CDECL _vcomp_atomic_shr_ui4(unsigned int *dest, unsigned int val)
343 unsigned int old;
344 do old = *dest; while (interlocked_cmpxchg((int *)dest, old >> val, old) != old);
347 void CDECL _vcomp_atomic_sub_i4(int *dest, int val)
349 interlocked_xchg_add(dest, -val);
352 void CDECL _vcomp_atomic_xor_i4(int *dest, int val)
354 int old;
355 do old = *dest; while (interlocked_cmpxchg(dest, old ^ val, old) != old);
358 void CDECL _vcomp_atomic_add_r4(float *dest, float val)
360 int old, new;
363 old = *(int *)dest;
364 *(float *)&new = *(float *)&old + val;
366 while (interlocked_cmpxchg((int *)dest, new, old) != old);
369 void CDECL _vcomp_atomic_div_r4(float *dest, float val)
371 int old, new;
374 old = *(int *)dest;
375 *(float *)&new = *(float *)&old / val;
377 while (interlocked_cmpxchg((int *)dest, new, old) != old);
380 void CDECL _vcomp_atomic_mul_r4(float *dest, float val)
382 int old, new;
385 old = *(int *)dest;
386 *(float *)&new = *(float *)&old * val;
388 while (interlocked_cmpxchg((int *)dest, new, old) != old);
391 void CDECL _vcomp_atomic_sub_r4(float *dest, float val)
393 int old, new;
396 old = *(int *)dest;
397 *(float *)&new = *(float *)&old - val;
399 while (interlocked_cmpxchg((int *)dest, new, old) != old);
402 void CDECL _vcomp_atomic_add_r8(double *dest, double val)
404 LONG64 old, new;
407 old = *(LONG64 *)dest;
408 *(double *)&new = *(double *)&old + val;
410 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
413 void CDECL _vcomp_atomic_div_r8(double *dest, double val)
415 LONG64 old, new;
418 old = *(LONG64 *)dest;
419 *(double *)&new = *(double *)&old / val;
421 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
424 void CDECL _vcomp_atomic_mul_r8(double *dest, double val)
426 LONG64 old, new;
429 old = *(LONG64 *)dest;
430 *(double *)&new = *(double *)&old * val;
432 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
435 void CDECL _vcomp_atomic_sub_r8(double *dest, double val)
437 LONG64 old, new;
440 old = *(LONG64 *)dest;
441 *(double *)&new = *(double *)&old - val;
443 while (interlocked_cmpxchg64((LONG64 *)dest, new, old) != old);
446 int CDECL omp_get_dynamic(void)
448 TRACE("stub\n");
449 return 0;
452 int CDECL omp_get_max_threads(void)
454 TRACE("()\n");
455 return vcomp_max_threads;
458 int CDECL omp_get_nested(void)
460 TRACE("stub\n");
461 return vcomp_nested_fork;
464 int CDECL omp_get_num_procs(void)
466 TRACE("stub\n");
467 return 1;
470 int CDECL omp_get_num_threads(void)
472 struct vcomp_team_data *team_data = vcomp_init_thread_data()->team;
473 TRACE("()\n");
474 return team_data ? team_data->num_threads : 1;
477 int CDECL omp_get_thread_num(void)
479 TRACE("()\n");
480 return vcomp_init_thread_data()->thread_num;
483 /* Time in seconds since "some time in the past" */
484 double CDECL omp_get_wtime(void)
486 return GetTickCount() / 1000.0;
489 void CDECL omp_set_dynamic(int val)
491 TRACE("(%d): stub\n", val);
494 void CDECL omp_set_nested(int nested)
496 TRACE("(%d)\n", nested);
497 vcomp_nested_fork = (nested != 0);
500 void CDECL omp_set_num_threads(int num_threads)
502 TRACE("(%d)\n", num_threads);
503 if (num_threads >= 1)
504 vcomp_num_threads = num_threads;
507 void CDECL _vcomp_flush(void)
509 TRACE("(): stub\n");
512 void CDECL _vcomp_barrier(void)
514 struct vcomp_team_data *team_data = vcomp_init_thread_data()->team;
516 TRACE("()\n");
518 if (!team_data)
519 return;
521 EnterCriticalSection(&vcomp_section);
522 if (++team_data->barrier_count >= team_data->num_threads)
524 team_data->barrier++;
525 team_data->barrier_count = 0;
526 WakeAllConditionVariable(&team_data->cond);
528 else
530 unsigned int barrier = team_data->barrier;
531 while (team_data->barrier == barrier)
532 SleepConditionVariableCS(&team_data->cond, &vcomp_section, INFINITE);
534 LeaveCriticalSection(&vcomp_section);
537 void CDECL _vcomp_set_num_threads(int num_threads)
539 TRACE("(%d)\n", num_threads);
540 if (num_threads >= 1)
541 vcomp_init_thread_data()->fork_threads = num_threads;
544 int CDECL _vcomp_master_begin(void)
546 TRACE("()\n");
547 return !vcomp_init_thread_data()->thread_num;
550 void CDECL _vcomp_master_end(void)
552 TRACE("()\n");
553 /* nothing to do here */
556 int CDECL _vcomp_single_begin(int flags)
558 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
559 struct vcomp_task_data *task_data = thread_data->task;
560 int ret = FALSE;
562 TRACE("(%x): semi-stub\n", flags);
564 EnterCriticalSection(&vcomp_section);
565 thread_data->single++;
566 if ((int)(thread_data->single - task_data->single) > 0)
568 task_data->single = thread_data->single;
569 ret = TRUE;
571 LeaveCriticalSection(&vcomp_section);
573 return ret;
576 void CDECL _vcomp_single_end(void)
578 TRACE("()\n");
579 /* nothing to do here */
582 void CDECL _vcomp_sections_init(int n)
584 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
585 struct vcomp_task_data *task_data = thread_data->task;
587 TRACE("(%d)\n", n);
589 EnterCriticalSection(&vcomp_section);
590 thread_data->section++;
591 if ((int)(thread_data->section - task_data->section) > 0)
593 task_data->section = thread_data->section;
594 task_data->num_sections = n;
595 task_data->section_index = 0;
597 LeaveCriticalSection(&vcomp_section);
600 int CDECL _vcomp_sections_next(void)
602 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
603 struct vcomp_task_data *task_data = thread_data->task;
604 int i = -1;
606 TRACE("()\n");
608 EnterCriticalSection(&vcomp_section);
609 if (thread_data->section == task_data->section &&
610 task_data->section_index != task_data->num_sections)
612 i = task_data->section_index++;
614 LeaveCriticalSection(&vcomp_section);
615 return i;
618 void CDECL _vcomp_for_static_simple_init(unsigned int first, unsigned int last, int step,
619 BOOL increment, unsigned int *begin, unsigned int *end)
621 unsigned int iterations, per_thread, remaining;
622 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
623 struct vcomp_team_data *team_data = thread_data->team;
624 int num_threads = team_data ? team_data->num_threads : 1;
625 int thread_num = thread_data->thread_num;
627 TRACE("(%u, %u, %d, %u, %p, %p)\n", first, last, step, increment, begin, end);
629 if (num_threads == 1)
631 *begin = first;
632 *end = last;
633 return;
636 if (step <= 0)
638 *begin = 0;
639 *end = increment ? -1 : 1;
640 return;
643 if (increment)
644 iterations = 1 + (last - first) / step;
645 else
647 iterations = 1 + (first - last) / step;
648 step *= -1;
651 per_thread = iterations / num_threads;
652 remaining = iterations - per_thread * num_threads;
654 if (thread_num < remaining)
655 per_thread++;
656 else if (per_thread)
657 first += remaining * step;
658 else
660 *begin = first;
661 *end = first - step;
662 return;
665 *begin = first + per_thread * thread_num * step;
666 *end = *begin + (per_thread - 1) * step;
669 void CDECL _vcomp_for_static_init(int first, int last, int step, int chunksize, unsigned int *loops,
670 int *begin, int *end, int *next, int *lastchunk)
672 unsigned int iterations, num_chunks, per_thread, remaining;
673 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
674 struct vcomp_team_data *team_data = thread_data->team;
675 int num_threads = team_data ? team_data->num_threads : 1;
676 int thread_num = thread_data->thread_num;
678 TRACE("(%d, %d, %d, %d, %p, %p, %p, %p, %p)\n",
679 first, last, step, chunksize, loops, begin, end, next, lastchunk);
681 if (num_threads == 1 && chunksize != 1)
683 *loops = 1;
684 *begin = first;
685 *end = last;
686 *next = 0;
687 *lastchunk = first;
688 return;
691 if (first == last)
693 *loops = !thread_num;
694 if (!thread_num)
696 *begin = first;
697 *end = last;
698 *next = 0;
699 *lastchunk = first;
701 return;
704 if (step <= 0)
706 *loops = 0;
707 return;
710 if (first < last)
711 iterations = 1 + (last - first) / step;
712 else
714 iterations = 1 + (first - last) / step;
715 step *= -1;
718 if (chunksize < 1)
719 chunksize = 1;
721 num_chunks = ((DWORD64)iterations + chunksize - 1) / chunksize;
722 per_thread = num_chunks / num_threads;
723 remaining = num_chunks - per_thread * num_threads;
725 *loops = per_thread + (thread_num < remaining);
726 *begin = first + thread_num * chunksize * step;
727 *end = *begin + (chunksize - 1) * step;
728 *next = chunksize * num_threads * step;
729 *lastchunk = first + (num_chunks - 1) * chunksize * step;
732 void CDECL _vcomp_for_static_end(void)
734 TRACE("()\n");
735 /* nothing to do here */
738 void CDECL _vcomp_for_dynamic_init(unsigned int flags, unsigned int first, unsigned int last,
739 int step, unsigned int chunksize)
741 unsigned int iterations, per_thread, remaining;
742 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
743 struct vcomp_team_data *team_data = thread_data->team;
744 struct vcomp_task_data *task_data = thread_data->task;
745 int num_threads = team_data ? team_data->num_threads : 1;
746 int thread_num = thread_data->thread_num;
747 unsigned int type = flags & ~VCOMP_DYNAMIC_FLAGS_INCREMENT;
749 TRACE("(%u, %u, %u, %d, %u)\n", flags, first, last, step, chunksize);
751 if (step <= 0)
753 thread_data->dynamic_type = 0;
754 return;
757 if (flags & VCOMP_DYNAMIC_FLAGS_INCREMENT)
758 iterations = 1 + (last - first) / step;
759 else
761 iterations = 1 + (first - last) / step;
762 step *= -1;
765 if (type == VCOMP_DYNAMIC_FLAGS_STATIC)
767 per_thread = iterations / num_threads;
768 remaining = iterations - per_thread * num_threads;
770 if (thread_num < remaining)
771 per_thread++;
772 else if (per_thread)
773 first += remaining * step;
774 else
776 thread_data->dynamic_type = 0;
777 return;
780 thread_data->dynamic_type = VCOMP_DYNAMIC_FLAGS_STATIC;
781 thread_data->dynamic_begin = first + per_thread * thread_num * step;
782 thread_data->dynamic_end = thread_data->dynamic_begin + (per_thread - 1) * step;
784 else
786 if (type != VCOMP_DYNAMIC_FLAGS_CHUNKED &&
787 type != VCOMP_DYNAMIC_FLAGS_GUIDED)
789 FIXME("unsupported flags %u\n", flags);
790 type = VCOMP_DYNAMIC_FLAGS_GUIDED;
793 EnterCriticalSection(&vcomp_section);
794 thread_data->dynamic++;
795 thread_data->dynamic_type = type;
796 if ((int)(thread_data->dynamic - task_data->dynamic) > 0)
798 task_data->dynamic = thread_data->dynamic;
799 task_data->dynamic_first = first;
800 task_data->dynamic_last = last;
801 task_data->dynamic_iterations = iterations;
802 task_data->dynamic_step = step;
803 task_data->dynamic_chunksize = chunksize;
805 LeaveCriticalSection(&vcomp_section);
809 int CDECL _vcomp_for_dynamic_next(unsigned int *begin, unsigned int *end)
811 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
812 struct vcomp_task_data *task_data = thread_data->task;
813 struct vcomp_team_data *team_data = thread_data->team;
814 int num_threads = team_data ? team_data->num_threads : 1;
816 TRACE("(%p, %p)\n", begin, end);
818 if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_STATIC)
820 *begin = thread_data->dynamic_begin;
821 *end = thread_data->dynamic_end;
822 thread_data->dynamic_type = 0;
823 return 1;
825 else if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_CHUNKED ||
826 thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_GUIDED)
828 unsigned int iterations = 0;
829 EnterCriticalSection(&vcomp_section);
830 if (thread_data->dynamic == task_data->dynamic &&
831 task_data->dynamic_iterations != 0)
833 iterations = min(task_data->dynamic_iterations, task_data->dynamic_chunksize);
834 if (thread_data->dynamic_type == VCOMP_DYNAMIC_FLAGS_GUIDED &&
835 task_data->dynamic_iterations > num_threads * task_data->dynamic_chunksize)
837 iterations = (task_data->dynamic_iterations + num_threads - 1) / num_threads;
839 *begin = task_data->dynamic_first;
840 *end = task_data->dynamic_first + (iterations - 1) * task_data->dynamic_step;
841 task_data->dynamic_iterations -= iterations;
842 task_data->dynamic_first += iterations * task_data->dynamic_step;
843 if (!task_data->dynamic_iterations)
844 *end = task_data->dynamic_last;
846 LeaveCriticalSection(&vcomp_section);
847 return iterations != 0;
850 return 0;
853 int CDECL omp_in_parallel(void)
855 TRACE("()\n");
856 return vcomp_init_thread_data()->parallel;
859 static DWORD WINAPI _vcomp_fork_worker(void *param)
861 struct vcomp_thread_data *thread_data = param;
862 vcomp_set_thread_data(thread_data);
864 TRACE("starting worker thread for %p\n", thread_data);
866 EnterCriticalSection(&vcomp_section);
867 for (;;)
869 struct vcomp_team_data *team = thread_data->team;
870 if (team != NULL)
872 LeaveCriticalSection(&vcomp_section);
873 _vcomp_fork_call_wrapper(team->wrapper, team->nargs, team->valist);
874 EnterCriticalSection(&vcomp_section);
876 thread_data->team = NULL;
877 list_remove(&thread_data->entry);
878 list_add_tail(&vcomp_idle_threads, &thread_data->entry);
879 if (++team->finished_threads >= team->num_threads)
880 WakeAllConditionVariable(&team->cond);
883 if (!SleepConditionVariableCS(&thread_data->cond, &vcomp_section, 5000) &&
884 GetLastError() == ERROR_TIMEOUT && !thread_data->team)
886 break;
889 list_remove(&thread_data->entry);
890 LeaveCriticalSection(&vcomp_section);
892 TRACE("terminating worker thread for %p\n", thread_data);
894 HeapFree(GetProcessHeap(), 0, thread_data);
895 vcomp_set_thread_data(NULL);
896 FreeLibraryAndExitThread(vcomp_module, 0);
897 return 0;
900 void WINAPIV _vcomp_fork(BOOL ifval, int nargs, void *wrapper, ...)
902 struct vcomp_thread_data *prev_thread_data = vcomp_init_thread_data();
903 struct vcomp_thread_data thread_data;
904 struct vcomp_team_data team_data;
905 struct vcomp_task_data task_data;
906 int num_threads;
908 TRACE("(%d, %d, %p, ...)\n", ifval, nargs, wrapper);
910 if (prev_thread_data->parallel && !vcomp_nested_fork)
911 ifval = FALSE;
913 if (!ifval)
914 num_threads = 1;
915 else if (prev_thread_data->fork_threads)
916 num_threads = prev_thread_data->fork_threads;
917 else
918 num_threads = vcomp_num_threads;
920 InitializeConditionVariable(&team_data.cond);
921 team_data.num_threads = 1;
922 team_data.finished_threads = 0;
923 team_data.nargs = nargs;
924 team_data.wrapper = wrapper;
925 __ms_va_start(team_data.valist, wrapper);
926 team_data.barrier = 0;
927 team_data.barrier_count = 0;
929 task_data.single = 0;
930 task_data.section = 0;
931 task_data.dynamic = 0;
933 thread_data.team = &team_data;
934 thread_data.task = &task_data;
935 thread_data.thread_num = 0;
936 thread_data.parallel = ifval || prev_thread_data->parallel;
937 thread_data.fork_threads = 0;
938 thread_data.single = 1;
939 thread_data.section = 1;
940 thread_data.dynamic = 1;
941 thread_data.dynamic_type = 0;
942 list_init(&thread_data.entry);
943 InitializeConditionVariable(&thread_data.cond);
945 if (num_threads > 1)
947 struct list *ptr;
948 EnterCriticalSection(&vcomp_section);
950 /* reuse existing threads (if any) */
951 while (team_data.num_threads < num_threads && (ptr = list_head(&vcomp_idle_threads)))
953 struct vcomp_thread_data *data = LIST_ENTRY(ptr, struct vcomp_thread_data, entry);
954 data->team = &team_data;
955 data->task = &task_data;
956 data->thread_num = team_data.num_threads++;
957 data->parallel = thread_data.parallel;
958 data->fork_threads = 0;
959 data->single = 1;
960 data->section = 1;
961 data->dynamic = 1;
962 data->dynamic_type = 0;
963 list_remove(&data->entry);
964 list_add_tail(&thread_data.entry, &data->entry);
965 WakeAllConditionVariable(&data->cond);
968 /* spawn additional threads */
969 while (team_data.num_threads < num_threads)
971 struct vcomp_thread_data *data;
972 HMODULE module;
973 HANDLE thread;
975 data = HeapAlloc(GetProcessHeap(), 0, sizeof(*data));
976 if (!data) break;
978 data->team = &team_data;
979 data->task = &task_data;
980 data->thread_num = team_data.num_threads;
981 data->parallel = thread_data.parallel;
982 data->fork_threads = 0;
983 data->single = 1;
984 data->section = 1;
985 data->dynamic = 1;
986 data->dynamic_type = 0;
987 InitializeConditionVariable(&data->cond);
989 thread = CreateThread(NULL, 0, _vcomp_fork_worker, data, 0, NULL);
990 if (!thread)
992 HeapFree(GetProcessHeap(), 0, data);
993 break;
996 GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS,
997 (const WCHAR *)vcomp_module, &module);
998 team_data.num_threads++;
999 list_add_tail(&thread_data.entry, &data->entry);
1000 CloseHandle(thread);
1003 LeaveCriticalSection(&vcomp_section);
1006 vcomp_set_thread_data(&thread_data);
1007 _vcomp_fork_call_wrapper(team_data.wrapper, team_data.nargs, team_data.valist);
1008 vcomp_set_thread_data(prev_thread_data);
1009 prev_thread_data->fork_threads = 0;
1011 if (team_data.num_threads > 1)
1013 EnterCriticalSection(&vcomp_section);
1015 team_data.finished_threads++;
1016 while (team_data.finished_threads < team_data.num_threads)
1017 SleepConditionVariableCS(&team_data.cond, &vcomp_section, INFINITE);
1019 LeaveCriticalSection(&vcomp_section);
1020 assert(list_empty(&thread_data.entry));
1023 __ms_va_end(team_data.valist);
1026 static CRITICAL_SECTION *alloc_critsect(void)
1028 CRITICAL_SECTION *critsect;
1029 if (!(critsect = HeapAlloc(GetProcessHeap(), 0, sizeof(*critsect))))
1031 ERR("could not allocate critical section\n");
1032 ExitProcess(1);
1035 InitializeCriticalSection(critsect);
1036 critsect->DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": critsect");
1037 return critsect;
1040 static void destroy_critsect(CRITICAL_SECTION *critsect)
1042 if (!critsect) return;
1043 critsect->DebugInfo->Spare[0] = 0;
1044 DeleteCriticalSection(critsect);
1045 HeapFree(GetProcessHeap(), 0, critsect);
1048 static BOOL critsect_is_locked(CRITICAL_SECTION *critsect)
1050 return critsect->OwningThread == ULongToHandle(GetCurrentThreadId()) &&
1051 critsect->RecursionCount;
1054 void CDECL omp_init_lock(omp_lock_t *lock)
1056 TRACE("(%p)\n", lock);
1057 *lock = alloc_critsect();
1060 void CDECL omp_destroy_lock(omp_lock_t *lock)
1062 TRACE("(%p)\n", lock);
1063 destroy_critsect(*lock);
1066 void CDECL omp_set_lock(omp_lock_t *lock)
1068 TRACE("(%p)\n", lock);
1070 if (critsect_is_locked(*lock))
1072 ERR("omp_set_lock called while holding lock %p\n", *lock);
1073 ExitProcess(1);
1076 EnterCriticalSection(*lock);
1079 void CDECL omp_unset_lock(omp_lock_t *lock)
1081 TRACE("(%p)\n", lock);
1082 LeaveCriticalSection(*lock);
1085 int CDECL omp_test_lock(omp_lock_t *lock)
1087 TRACE("(%p)\n", lock);
1089 if (critsect_is_locked(*lock))
1090 return 0;
1092 return TryEnterCriticalSection(*lock);
1095 void CDECL omp_set_nest_lock(omp_nest_lock_t *lock)
1097 TRACE("(%p)\n", lock);
1098 EnterCriticalSection(*lock);
1101 void CDECL omp_unset_nest_lock(omp_nest_lock_t *lock)
1103 TRACE("(%p)\n", lock);
1104 LeaveCriticalSection(*lock);
1107 int CDECL omp_test_nest_lock(omp_nest_lock_t *lock)
1109 TRACE("(%p)\n", lock);
1110 return TryEnterCriticalSection(*lock) ? (*lock)->RecursionCount : 0;
1113 void CDECL _vcomp_enter_critsect(CRITICAL_SECTION **critsect)
1115 TRACE("(%p)\n", critsect);
1117 if (!*critsect)
1119 CRITICAL_SECTION *new_critsect = alloc_critsect();
1120 if (interlocked_cmpxchg_ptr((void **)critsect, new_critsect, NULL) != NULL)
1121 destroy_critsect(new_critsect); /* someone beat us to it */
1124 EnterCriticalSection(*critsect);
1127 void CDECL _vcomp_leave_critsect(CRITICAL_SECTION *critsect)
1129 TRACE("(%p)\n", critsect);
1130 LeaveCriticalSection(critsect);
1133 BOOL WINAPI DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved)
1135 TRACE("(%p, %d, %p)\n", instance, reason, reserved);
1137 switch (reason)
1139 case DLL_WINE_PREATTACH:
1140 return FALSE; /* prefer native version */
1142 case DLL_PROCESS_ATTACH:
1144 SYSTEM_INFO sysinfo;
1146 if ((vcomp_context_tls = TlsAlloc()) == TLS_OUT_OF_INDEXES)
1148 ERR("Failed to allocate TLS index\n");
1149 return FALSE;
1152 GetSystemInfo(&sysinfo);
1153 vcomp_module = instance;
1154 vcomp_max_threads = sysinfo.dwNumberOfProcessors;
1155 vcomp_num_threads = sysinfo.dwNumberOfProcessors;
1156 break;
1159 case DLL_PROCESS_DETACH:
1161 if (reserved) break;
1162 if (vcomp_context_tls != TLS_OUT_OF_INDEXES)
1164 vcomp_free_thread_data();
1165 TlsFree(vcomp_context_tls);
1167 break;
1170 case DLL_THREAD_DETACH:
1172 vcomp_free_thread_data();
1173 break;
1177 return TRUE;