vcomp: Fix handling of _vcomp_fork with ifval == FALSE.
[wine/multimedia.git] / dlls / vcomp / main.c
blob1dad029c601b6a6ae31d67b9c05413aae762b24a
1 /*
3 * vcomp implementation
5 * Copyright 2011 Austin English
6 * Copyright 2012 Dan Kegel
7 * Copyright 2015 Sebastian Lackner
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 #include "config.h"
26 #include <stdarg.h>
27 #include <assert.h>
29 #include "windef.h"
30 #include "winbase.h"
31 #include "wine/debug.h"
32 #include "wine/list.h"
34 WINE_DEFAULT_DEBUG_CHANNEL(vcomp);
36 static struct list vcomp_idle_threads = LIST_INIT(vcomp_idle_threads);
37 static DWORD vcomp_context_tls = TLS_OUT_OF_INDEXES;
38 static HMODULE vcomp_module;
39 static int vcomp_max_threads;
40 static int vcomp_num_threads;
41 static BOOL vcomp_nested_fork = FALSE;
43 static RTL_CRITICAL_SECTION vcomp_section;
44 static RTL_CRITICAL_SECTION_DEBUG critsect_debug =
46 0, 0, &vcomp_section,
47 { &critsect_debug.ProcessLocksList, &critsect_debug.ProcessLocksList },
48 0, 0, { (DWORD_PTR)(__FILE__ ": vcomp_section") }
50 static RTL_CRITICAL_SECTION vcomp_section = { &critsect_debug, -1, 0, 0, 0, 0 };
52 struct vcomp_thread_data
54 struct vcomp_team_data *team;
55 struct vcomp_task_data *task;
56 int thread_num;
57 BOOL parallel;
58 int fork_threads;
60 /* only used for concurrent tasks */
61 struct list entry;
62 CONDITION_VARIABLE cond;
64 /* section */
65 unsigned int section;
68 struct vcomp_team_data
70 CONDITION_VARIABLE cond;
71 int num_threads;
72 int finished_threads;
74 /* callback arguments */
75 int nargs;
76 void *wrapper;
77 __ms_va_list valist;
79 /* barrier */
80 unsigned int barrier;
81 int barrier_count;
84 struct vcomp_task_data
86 /* section */
87 unsigned int section;
88 int num_sections;
89 int section_index;
92 #if defined(__i386__)
94 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
95 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
96 "pushl %ebp\n\t"
97 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
98 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
99 "movl %esp,%ebp\n\t"
100 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
101 "pushl %esi\n\t"
102 __ASM_CFI(".cfi_rel_offset %esi,-4\n\t")
103 "pushl %edi\n\t"
104 __ASM_CFI(".cfi_rel_offset %edi,-8\n\t")
105 "movl 12(%ebp),%edx\n\t"
106 "movl %esp,%edi\n\t"
107 "shll $2,%edx\n\t"
108 "jz 1f\n\t"
109 "subl %edx,%edi\n\t"
110 "andl $~15,%edi\n\t"
111 "movl %edi,%esp\n\t"
112 "movl 12(%ebp),%ecx\n\t"
113 "movl 16(%ebp),%esi\n\t"
114 "cld\n\t"
115 "rep; movsl\n"
116 "1:\tcall *8(%ebp)\n\t"
117 "leal -8(%ebp),%esp\n\t"
118 "popl %edi\n\t"
119 __ASM_CFI(".cfi_same_value %edi\n\t")
120 "popl %esi\n\t"
121 __ASM_CFI(".cfi_same_value %esi\n\t")
122 "popl %ebp\n\t"
123 __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
124 __ASM_CFI(".cfi_same_value %ebp\n\t")
125 "ret" )
127 #elif defined(__x86_64__)
129 extern void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args);
130 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper,
131 "pushq %rbp\n\t"
132 __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t")
133 __ASM_CFI(".cfi_rel_offset %rbp,0\n\t")
134 "movq %rsp,%rbp\n\t"
135 __ASM_CFI(".cfi_def_cfa_register %rbp\n\t")
136 "pushq %rsi\n\t"
137 __ASM_CFI(".cfi_rel_offset %rsi,-8\n\t")
138 "pushq %rdi\n\t"
139 __ASM_CFI(".cfi_rel_offset %rdi,-16\n\t")
140 "movq %rcx,%rax\n\t"
141 "movq $4,%rcx\n\t"
142 "cmp %rcx,%rdx\n\t"
143 "cmovgq %rdx,%rcx\n\t"
144 "leaq 0(,%rcx,8),%rdx\n\t"
145 "subq %rdx,%rsp\n\t"
146 "andq $~15,%rsp\n\t"
147 "movq %rsp,%rdi\n\t"
148 "movq %r8,%rsi\n\t"
149 "rep; movsq\n\t"
150 "movq 0(%rsp),%rcx\n\t"
151 "movq 8(%rsp),%rdx\n\t"
152 "movq 16(%rsp),%r8\n\t"
153 "movq 24(%rsp),%r9\n\t"
154 "callq *%rax\n\t"
155 "leaq -16(%rbp),%rsp\n\t"
156 "popq %rdi\n\t"
157 __ASM_CFI(".cfi_same_value %rdi\n\t")
158 "popq %rsi\n\t"
159 __ASM_CFI(".cfi_same_value %rsi\n\t")
160 __ASM_CFI(".cfi_def_cfa_register %rsp\n\t")
161 "popq %rbp\n\t"
162 __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t")
163 __ASM_CFI(".cfi_same_value %rbp\n\t")
164 "ret")
166 #else
168 static void CDECL _vcomp_fork_call_wrapper(void *wrapper, int nargs, __ms_va_list args)
170 ERR("Not implemented for this architecture\n");
173 #endif
175 static inline struct vcomp_thread_data *vcomp_get_thread_data(void)
177 return (struct vcomp_thread_data *)TlsGetValue(vcomp_context_tls);
180 static inline void vcomp_set_thread_data(struct vcomp_thread_data *thread_data)
182 TlsSetValue(vcomp_context_tls, thread_data);
185 static struct vcomp_thread_data *vcomp_init_thread_data(void)
187 struct vcomp_thread_data *thread_data = vcomp_get_thread_data();
188 struct
190 struct vcomp_thread_data thread;
191 struct vcomp_task_data task;
192 } *data;
194 if (thread_data) return thread_data;
195 if (!(data = HeapAlloc(GetProcessHeap(), 0, sizeof(*data))))
197 ERR("could not create thread data\n");
198 ExitProcess(1);
201 data->task.section = 0;
203 thread_data = &data->thread;
204 thread_data->team = NULL;
205 thread_data->task = &data->task;
206 thread_data->thread_num = 0;
207 thread_data->parallel = FALSE;
208 thread_data->fork_threads = 0;
209 thread_data->section = 1;
211 vcomp_set_thread_data(thread_data);
212 return thread_data;
215 static void vcomp_free_thread_data(void)
217 struct vcomp_thread_data *thread_data = vcomp_get_thread_data();
218 if (!thread_data) return;
220 HeapFree(GetProcessHeap(), 0, thread_data);
221 vcomp_set_thread_data(NULL);
224 int CDECL omp_get_dynamic(void)
226 TRACE("stub\n");
227 return 0;
230 int CDECL omp_get_max_threads(void)
232 TRACE("()\n");
233 return vcomp_max_threads;
236 int CDECL omp_get_nested(void)
238 TRACE("stub\n");
239 return vcomp_nested_fork;
242 int CDECL omp_get_num_procs(void)
244 TRACE("stub\n");
245 return 1;
248 int CDECL omp_get_num_threads(void)
250 struct vcomp_team_data *team_data = vcomp_init_thread_data()->team;
251 TRACE("()\n");
252 return team_data ? team_data->num_threads : 1;
255 int CDECL omp_get_thread_num(void)
257 TRACE("()\n");
258 return vcomp_init_thread_data()->thread_num;
261 /* Time in seconds since "some time in the past" */
262 double CDECL omp_get_wtime(void)
264 return GetTickCount() / 1000.0;
267 void CDECL omp_set_dynamic(int val)
269 TRACE("(%d): stub\n", val);
272 void CDECL omp_set_nested(int nested)
274 TRACE("(%d)\n", nested);
275 vcomp_nested_fork = (nested != 0);
278 void CDECL omp_set_num_threads(int num_threads)
280 TRACE("(%d)\n", num_threads);
281 if (num_threads >= 1)
282 vcomp_num_threads = num_threads;
285 void CDECL _vcomp_barrier(void)
287 struct vcomp_team_data *team_data = vcomp_init_thread_data()->team;
289 TRACE("()\n");
291 if (!team_data)
292 return;
294 EnterCriticalSection(&vcomp_section);
295 if (++team_data->barrier_count >= team_data->num_threads)
297 team_data->barrier++;
298 team_data->barrier_count = 0;
299 WakeAllConditionVariable(&team_data->cond);
301 else
303 unsigned int barrier = team_data->barrier;
304 while (team_data->barrier == barrier)
305 SleepConditionVariableCS(&team_data->cond, &vcomp_section, INFINITE);
307 LeaveCriticalSection(&vcomp_section);
310 void CDECL _vcomp_set_num_threads(int num_threads)
312 TRACE("(%d)\n", num_threads);
313 if (num_threads >= 1)
314 vcomp_init_thread_data()->fork_threads = num_threads;
317 int CDECL _vcomp_single_begin(int flags)
319 TRACE("(%x): stub\n", flags);
320 return TRUE;
323 void CDECL _vcomp_single_end(void)
325 TRACE("stub\n");
328 void CDECL _vcomp_sections_init(int n)
330 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
331 struct vcomp_task_data *task_data = thread_data->task;
333 TRACE("(%d)\n", n);
335 EnterCriticalSection(&vcomp_section);
336 thread_data->section++;
337 if ((int)(thread_data->section - task_data->section) > 0)
339 task_data->section = thread_data->section;
340 task_data->num_sections = n;
341 task_data->section_index = 0;
343 LeaveCriticalSection(&vcomp_section);
346 int CDECL _vcomp_sections_next(void)
348 struct vcomp_thread_data *thread_data = vcomp_init_thread_data();
349 struct vcomp_task_data *task_data = thread_data->task;
350 int i = -1;
352 TRACE("()\n");
354 EnterCriticalSection(&vcomp_section);
355 if (thread_data->section == task_data->section &&
356 task_data->section_index != task_data->num_sections)
358 i = task_data->section_index++;
360 LeaveCriticalSection(&vcomp_section);
361 return i;
364 static DWORD WINAPI _vcomp_fork_worker(void *param)
366 struct vcomp_thread_data *thread_data = param;
367 vcomp_set_thread_data(thread_data);
369 TRACE("starting worker thread for %p\n", thread_data);
371 EnterCriticalSection(&vcomp_section);
372 for (;;)
374 struct vcomp_team_data *team = thread_data->team;
375 if (team != NULL)
377 LeaveCriticalSection(&vcomp_section);
378 _vcomp_fork_call_wrapper(team->wrapper, team->nargs, team->valist);
379 EnterCriticalSection(&vcomp_section);
381 thread_data->team = NULL;
382 list_remove(&thread_data->entry);
383 list_add_tail(&vcomp_idle_threads, &thread_data->entry);
384 if (++team->finished_threads >= team->num_threads)
385 WakeAllConditionVariable(&team->cond);
388 if (!SleepConditionVariableCS(&thread_data->cond, &vcomp_section, 5000) &&
389 GetLastError() == ERROR_TIMEOUT && !thread_data->team)
391 break;
394 list_remove(&thread_data->entry);
395 LeaveCriticalSection(&vcomp_section);
397 TRACE("terminating worker thread for %p\n", thread_data);
399 HeapFree(GetProcessHeap(), 0, thread_data);
400 vcomp_set_thread_data(NULL);
401 FreeLibraryAndExitThread(vcomp_module, 0);
402 return 0;
405 void WINAPIV _vcomp_fork(BOOL ifval, int nargs, void *wrapper, ...)
407 struct vcomp_thread_data *prev_thread_data = vcomp_init_thread_data();
408 struct vcomp_thread_data thread_data;
409 struct vcomp_team_data team_data;
410 struct vcomp_task_data task_data;
411 int num_threads;
413 TRACE("(%d, %d, %p, ...)\n", ifval, nargs, wrapper);
415 if (prev_thread_data->parallel && !vcomp_nested_fork)
416 ifval = FALSE;
418 if (!ifval)
419 num_threads = 1;
420 else if (prev_thread_data->fork_threads)
421 num_threads = prev_thread_data->fork_threads;
422 else
423 num_threads = vcomp_num_threads;
425 InitializeConditionVariable(&team_data.cond);
426 team_data.num_threads = 1;
427 team_data.finished_threads = 0;
428 team_data.nargs = nargs;
429 team_data.wrapper = wrapper;
430 __ms_va_start(team_data.valist, wrapper);
431 team_data.barrier = 0;
432 team_data.barrier_count = 0;
434 task_data.section = 0;
436 thread_data.team = &team_data;
437 thread_data.task = &task_data;
438 thread_data.thread_num = 0;
439 thread_data.parallel = ifval || prev_thread_data->parallel;
440 thread_data.fork_threads = 0;
441 thread_data.section = 1;
442 list_init(&thread_data.entry);
443 InitializeConditionVariable(&thread_data.cond);
445 if (num_threads > 1)
447 struct list *ptr;
448 EnterCriticalSection(&vcomp_section);
450 /* reuse existing threads (if any) */
451 while (team_data.num_threads < num_threads && (ptr = list_head(&vcomp_idle_threads)))
453 struct vcomp_thread_data *data = LIST_ENTRY(ptr, struct vcomp_thread_data, entry);
454 data->team = &team_data;
455 data->task = &task_data;
456 data->thread_num = team_data.num_threads++;
457 data->parallel = thread_data.parallel;
458 data->fork_threads = 0;
459 data->section = 1;
460 list_remove(&data->entry);
461 list_add_tail(&thread_data.entry, &data->entry);
462 WakeAllConditionVariable(&data->cond);
465 /* spawn additional threads */
466 while (team_data.num_threads < num_threads)
468 struct vcomp_thread_data *data;
469 HMODULE module;
470 HANDLE thread;
472 data = HeapAlloc(GetProcessHeap(), 0, sizeof(*data));
473 if (!data) break;
475 data->team = &team_data;
476 data->task = &task_data;
477 data->thread_num = team_data.num_threads;
478 data->parallel = thread_data.parallel;
479 data->fork_threads = 0;
480 data->section = 1;
481 InitializeConditionVariable(&data->cond);
483 thread = CreateThread(NULL, 0, _vcomp_fork_worker, data, 0, NULL);
484 if (!thread)
486 HeapFree(GetProcessHeap(), 0, data);
487 break;
490 GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS,
491 (const WCHAR *)vcomp_module, &module);
492 team_data.num_threads++;
493 list_add_tail(&thread_data.entry, &data->entry);
494 CloseHandle(thread);
497 LeaveCriticalSection(&vcomp_section);
500 vcomp_set_thread_data(&thread_data);
501 _vcomp_fork_call_wrapper(team_data.wrapper, team_data.nargs, team_data.valist);
502 vcomp_set_thread_data(prev_thread_data);
503 prev_thread_data->fork_threads = 0;
505 if (team_data.num_threads > 1)
507 EnterCriticalSection(&vcomp_section);
509 team_data.finished_threads++;
510 while (team_data.finished_threads < team_data.num_threads)
511 SleepConditionVariableCS(&team_data.cond, &vcomp_section, INFINITE);
513 LeaveCriticalSection(&vcomp_section);
514 assert(list_empty(&thread_data.entry));
517 __ms_va_end(team_data.valist);
520 BOOL WINAPI DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved)
522 TRACE("(%p, %d, %p)\n", instance, reason, reserved);
524 switch (reason)
526 case DLL_WINE_PREATTACH:
527 return FALSE; /* prefer native version */
529 case DLL_PROCESS_ATTACH:
531 SYSTEM_INFO sysinfo;
533 if ((vcomp_context_tls = TlsAlloc()) == TLS_OUT_OF_INDEXES)
535 ERR("Failed to allocate TLS index\n");
536 return FALSE;
539 GetSystemInfo(&sysinfo);
540 vcomp_module = instance;
541 vcomp_max_threads = sysinfo.dwNumberOfProcessors;
542 vcomp_num_threads = sysinfo.dwNumberOfProcessors;
543 break;
546 case DLL_PROCESS_DETACH:
548 if (reserved) break;
549 if (vcomp_context_tls != TLS_OUT_OF_INDEXES)
551 vcomp_free_thread_data();
552 TlsFree(vcomp_context_tls);
554 break;
557 case DLL_THREAD_DETACH:
559 vcomp_free_thread_data();
560 break;
564 return TRUE;