5 * Copyright 2011 Austin English
6 * Copyright 2012 Dan Kegel
7 * Copyright 2015 Sebastian Lackner
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
31 #include "wine/debug.h"
32 #include "wine/list.h"
34 WINE_DEFAULT_DEBUG_CHANNEL(vcomp
);
36 static struct list vcomp_idle_threads
= LIST_INIT(vcomp_idle_threads
);
37 static DWORD vcomp_context_tls
= TLS_OUT_OF_INDEXES
;
38 static HMODULE vcomp_module
;
39 static int vcomp_max_threads
;
40 static int vcomp_num_threads
;
41 static BOOL vcomp_nested_fork
= FALSE
;
43 static RTL_CRITICAL_SECTION vcomp_section
;
44 static RTL_CRITICAL_SECTION_DEBUG critsect_debug
=
47 { &critsect_debug
.ProcessLocksList
, &critsect_debug
.ProcessLocksList
},
48 0, 0, { (DWORD_PTR
)(__FILE__
": vcomp_section") }
50 static RTL_CRITICAL_SECTION vcomp_section
= { &critsect_debug
, -1, 0, 0, 0, 0 };
52 struct vcomp_thread_data
54 struct vcomp_team_data
*team
;
55 struct vcomp_task_data
*task
;
60 /* only used for concurrent tasks */
62 CONDITION_VARIABLE cond
;
68 struct vcomp_team_data
70 CONDITION_VARIABLE cond
;
74 /* callback arguments */
84 struct vcomp_task_data
94 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, __ms_va_list args
);
95 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
97 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
98 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
100 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
102 __ASM_CFI(".cfi_rel_offset %esi,-4\n\t")
104 __ASM_CFI(".cfi_rel_offset %edi,-8\n\t")
105 "movl 12(%ebp),%edx\n\t"
112 "movl 12(%ebp),%ecx\n\t"
113 "movl 16(%ebp),%esi\n\t"
116 "1:\tcall *8(%ebp)\n\t"
117 "leal -8(%ebp),%esp\n\t"
119 __ASM_CFI(".cfi_same_value %edi\n\t")
121 __ASM_CFI(".cfi_same_value %esi\n\t")
123 __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
124 __ASM_CFI(".cfi_same_value %ebp\n\t")
127 #elif defined(__x86_64__)
129 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, __ms_va_list args
);
130 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
132 __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t")
133 __ASM_CFI(".cfi_rel_offset %rbp,0\n\t")
135 __ASM_CFI(".cfi_def_cfa_register %rbp\n\t")
137 __ASM_CFI(".cfi_rel_offset %rsi,-8\n\t")
139 __ASM_CFI(".cfi_rel_offset %rdi,-16\n\t")
143 "cmovgq %rdx,%rcx\n\t"
144 "leaq 0(,%rcx,8),%rdx\n\t"
150 "movq 0(%rsp),%rcx\n\t"
151 "movq 8(%rsp),%rdx\n\t"
152 "movq 16(%rsp),%r8\n\t"
153 "movq 24(%rsp),%r9\n\t"
155 "leaq -16(%rbp),%rsp\n\t"
157 __ASM_CFI(".cfi_same_value %rdi\n\t")
159 __ASM_CFI(".cfi_same_value %rsi\n\t")
160 __ASM_CFI(".cfi_def_cfa_register %rsp\n\t")
162 __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t")
163 __ASM_CFI(".cfi_same_value %rbp\n\t")
168 static void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, __ms_va_list args
)
170 ERR("Not implemented for this architecture\n");
175 static inline struct vcomp_thread_data
*vcomp_get_thread_data(void)
177 return (struct vcomp_thread_data
*)TlsGetValue(vcomp_context_tls
);
180 static inline void vcomp_set_thread_data(struct vcomp_thread_data
*thread_data
)
182 TlsSetValue(vcomp_context_tls
, thread_data
);
185 static struct vcomp_thread_data
*vcomp_init_thread_data(void)
187 struct vcomp_thread_data
*thread_data
= vcomp_get_thread_data();
190 struct vcomp_thread_data thread
;
191 struct vcomp_task_data task
;
194 if (thread_data
) return thread_data
;
195 if (!(data
= HeapAlloc(GetProcessHeap(), 0, sizeof(*data
))))
197 ERR("could not create thread data\n");
201 data
->task
.section
= 0;
203 thread_data
= &data
->thread
;
204 thread_data
->team
= NULL
;
205 thread_data
->task
= &data
->task
;
206 thread_data
->thread_num
= 0;
207 thread_data
->parallel
= FALSE
;
208 thread_data
->fork_threads
= 0;
209 thread_data
->section
= 1;
211 vcomp_set_thread_data(thread_data
);
215 static void vcomp_free_thread_data(void)
217 struct vcomp_thread_data
*thread_data
= vcomp_get_thread_data();
218 if (!thread_data
) return;
220 HeapFree(GetProcessHeap(), 0, thread_data
);
221 vcomp_set_thread_data(NULL
);
224 int CDECL
omp_get_dynamic(void)
230 int CDECL
omp_get_max_threads(void)
233 return vcomp_max_threads
;
236 int CDECL
omp_get_nested(void)
239 return vcomp_nested_fork
;
242 int CDECL
omp_get_num_procs(void)
248 int CDECL
omp_get_num_threads(void)
250 struct vcomp_team_data
*team_data
= vcomp_init_thread_data()->team
;
252 return team_data
? team_data
->num_threads
: 1;
255 int CDECL
omp_get_thread_num(void)
258 return vcomp_init_thread_data()->thread_num
;
261 /* Time in seconds since "some time in the past" */
262 double CDECL
omp_get_wtime(void)
264 return GetTickCount() / 1000.0;
267 void CDECL
omp_set_dynamic(int val
)
269 TRACE("(%d): stub\n", val
);
272 void CDECL
omp_set_nested(int nested
)
274 TRACE("(%d)\n", nested
);
275 vcomp_nested_fork
= (nested
!= 0);
278 void CDECL
omp_set_num_threads(int num_threads
)
280 TRACE("(%d)\n", num_threads
);
281 if (num_threads
>= 1)
282 vcomp_num_threads
= num_threads
;
285 void CDECL
_vcomp_barrier(void)
287 struct vcomp_team_data
*team_data
= vcomp_init_thread_data()->team
;
294 EnterCriticalSection(&vcomp_section
);
295 if (++team_data
->barrier_count
>= team_data
->num_threads
)
297 team_data
->barrier
++;
298 team_data
->barrier_count
= 0;
299 WakeAllConditionVariable(&team_data
->cond
);
303 unsigned int barrier
= team_data
->barrier
;
304 while (team_data
->barrier
== barrier
)
305 SleepConditionVariableCS(&team_data
->cond
, &vcomp_section
, INFINITE
);
307 LeaveCriticalSection(&vcomp_section
);
310 void CDECL
_vcomp_set_num_threads(int num_threads
)
312 TRACE("(%d)\n", num_threads
);
313 if (num_threads
>= 1)
314 vcomp_init_thread_data()->fork_threads
= num_threads
;
317 int CDECL
_vcomp_single_begin(int flags
)
319 TRACE("(%x): stub\n", flags
);
323 void CDECL
_vcomp_single_end(void)
328 void CDECL
_vcomp_sections_init(int n
)
330 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
331 struct vcomp_task_data
*task_data
= thread_data
->task
;
335 EnterCriticalSection(&vcomp_section
);
336 thread_data
->section
++;
337 if ((int)(thread_data
->section
- task_data
->section
) > 0)
339 task_data
->section
= thread_data
->section
;
340 task_data
->num_sections
= n
;
341 task_data
->section_index
= 0;
343 LeaveCriticalSection(&vcomp_section
);
346 int CDECL
_vcomp_sections_next(void)
348 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
349 struct vcomp_task_data
*task_data
= thread_data
->task
;
354 EnterCriticalSection(&vcomp_section
);
355 if (thread_data
->section
== task_data
->section
&&
356 task_data
->section_index
!= task_data
->num_sections
)
358 i
= task_data
->section_index
++;
360 LeaveCriticalSection(&vcomp_section
);
364 static DWORD WINAPI
_vcomp_fork_worker(void *param
)
366 struct vcomp_thread_data
*thread_data
= param
;
367 vcomp_set_thread_data(thread_data
);
369 TRACE("starting worker thread for %p\n", thread_data
);
371 EnterCriticalSection(&vcomp_section
);
374 struct vcomp_team_data
*team
= thread_data
->team
;
377 LeaveCriticalSection(&vcomp_section
);
378 _vcomp_fork_call_wrapper(team
->wrapper
, team
->nargs
, team
->valist
);
379 EnterCriticalSection(&vcomp_section
);
381 thread_data
->team
= NULL
;
382 list_remove(&thread_data
->entry
);
383 list_add_tail(&vcomp_idle_threads
, &thread_data
->entry
);
384 if (++team
->finished_threads
>= team
->num_threads
)
385 WakeAllConditionVariable(&team
->cond
);
388 if (!SleepConditionVariableCS(&thread_data
->cond
, &vcomp_section
, 5000) &&
389 GetLastError() == ERROR_TIMEOUT
&& !thread_data
->team
)
394 list_remove(&thread_data
->entry
);
395 LeaveCriticalSection(&vcomp_section
);
397 TRACE("terminating worker thread for %p\n", thread_data
);
399 HeapFree(GetProcessHeap(), 0, thread_data
);
400 vcomp_set_thread_data(NULL
);
401 FreeLibraryAndExitThread(vcomp_module
, 0);
405 void WINAPIV
_vcomp_fork(BOOL ifval
, int nargs
, void *wrapper
, ...)
407 struct vcomp_thread_data
*prev_thread_data
= vcomp_init_thread_data();
408 struct vcomp_thread_data thread_data
;
409 struct vcomp_team_data team_data
;
410 struct vcomp_task_data task_data
;
413 TRACE("(%d, %d, %p, ...)\n", ifval
, nargs
, wrapper
);
415 if (prev_thread_data
->parallel
&& !vcomp_nested_fork
)
420 else if (prev_thread_data
->fork_threads
)
421 num_threads
= prev_thread_data
->fork_threads
;
423 num_threads
= vcomp_num_threads
;
425 InitializeConditionVariable(&team_data
.cond
);
426 team_data
.num_threads
= 1;
427 team_data
.finished_threads
= 0;
428 team_data
.nargs
= nargs
;
429 team_data
.wrapper
= wrapper
;
430 __ms_va_start(team_data
.valist
, wrapper
);
431 team_data
.barrier
= 0;
432 team_data
.barrier_count
= 0;
434 task_data
.section
= 0;
436 thread_data
.team
= &team_data
;
437 thread_data
.task
= &task_data
;
438 thread_data
.thread_num
= 0;
439 thread_data
.parallel
= ifval
|| prev_thread_data
->parallel
;
440 thread_data
.fork_threads
= 0;
441 thread_data
.section
= 1;
442 list_init(&thread_data
.entry
);
443 InitializeConditionVariable(&thread_data
.cond
);
448 EnterCriticalSection(&vcomp_section
);
450 /* reuse existing threads (if any) */
451 while (team_data
.num_threads
< num_threads
&& (ptr
= list_head(&vcomp_idle_threads
)))
453 struct vcomp_thread_data
*data
= LIST_ENTRY(ptr
, struct vcomp_thread_data
, entry
);
454 data
->team
= &team_data
;
455 data
->task
= &task_data
;
456 data
->thread_num
= team_data
.num_threads
++;
457 data
->parallel
= thread_data
.parallel
;
458 data
->fork_threads
= 0;
460 list_remove(&data
->entry
);
461 list_add_tail(&thread_data
.entry
, &data
->entry
);
462 WakeAllConditionVariable(&data
->cond
);
465 /* spawn additional threads */
466 while (team_data
.num_threads
< num_threads
)
468 struct vcomp_thread_data
*data
;
472 data
= HeapAlloc(GetProcessHeap(), 0, sizeof(*data
));
475 data
->team
= &team_data
;
476 data
->task
= &task_data
;
477 data
->thread_num
= team_data
.num_threads
;
478 data
->parallel
= thread_data
.parallel
;
479 data
->fork_threads
= 0;
481 InitializeConditionVariable(&data
->cond
);
483 thread
= CreateThread(NULL
, 0, _vcomp_fork_worker
, data
, 0, NULL
);
486 HeapFree(GetProcessHeap(), 0, data
);
490 GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
,
491 (const WCHAR
*)vcomp_module
, &module
);
492 team_data
.num_threads
++;
493 list_add_tail(&thread_data
.entry
, &data
->entry
);
497 LeaveCriticalSection(&vcomp_section
);
500 vcomp_set_thread_data(&thread_data
);
501 _vcomp_fork_call_wrapper(team_data
.wrapper
, team_data
.nargs
, team_data
.valist
);
502 vcomp_set_thread_data(prev_thread_data
);
503 prev_thread_data
->fork_threads
= 0;
505 if (team_data
.num_threads
> 1)
507 EnterCriticalSection(&vcomp_section
);
509 team_data
.finished_threads
++;
510 while (team_data
.finished_threads
< team_data
.num_threads
)
511 SleepConditionVariableCS(&team_data
.cond
, &vcomp_section
, INFINITE
);
513 LeaveCriticalSection(&vcomp_section
);
514 assert(list_empty(&thread_data
.entry
));
517 __ms_va_end(team_data
.valist
);
520 BOOL WINAPI
DllMain(HINSTANCE instance
, DWORD reason
, LPVOID reserved
)
522 TRACE("(%p, %d, %p)\n", instance
, reason
, reserved
);
526 case DLL_WINE_PREATTACH
:
527 return FALSE
; /* prefer native version */
529 case DLL_PROCESS_ATTACH
:
533 if ((vcomp_context_tls
= TlsAlloc()) == TLS_OUT_OF_INDEXES
)
535 ERR("Failed to allocate TLS index\n");
539 GetSystemInfo(&sysinfo
);
540 vcomp_module
= instance
;
541 vcomp_max_threads
= sysinfo
.dwNumberOfProcessors
;
542 vcomp_num_threads
= sysinfo
.dwNumberOfProcessors
;
546 case DLL_PROCESS_DETACH
:
549 if (vcomp_context_tls
!= TLS_OUT_OF_INDEXES
)
551 vcomp_free_thread_data();
552 TlsFree(vcomp_context_tls
);
557 case DLL_THREAD_DETACH
:
559 vcomp_free_thread_data();