5 * Copyright 2011 Austin English
6 * Copyright 2012 Dan Kegel
7 * Copyright 2015-2016 Sebastian Lackner
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
25 #include "wine/port.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(vcomp
);
38 typedef CRITICAL_SECTION
*omp_lock_t
;
39 typedef CRITICAL_SECTION
*omp_nest_lock_t
;
41 static struct list vcomp_idle_threads
= LIST_INIT(vcomp_idle_threads
);
42 static DWORD vcomp_context_tls
= TLS_OUT_OF_INDEXES
;
43 static HMODULE vcomp_module
;
44 static int vcomp_max_threads
;
45 static int vcomp_num_threads
;
46 static BOOL vcomp_nested_fork
= FALSE
;
48 static RTL_CRITICAL_SECTION vcomp_section
;
49 static RTL_CRITICAL_SECTION_DEBUG critsect_debug
=
52 { &critsect_debug
.ProcessLocksList
, &critsect_debug
.ProcessLocksList
},
53 0, 0, { (DWORD_PTR
)(__FILE__
": vcomp_section") }
55 static RTL_CRITICAL_SECTION vcomp_section
= { &critsect_debug
, -1, 0, 0, 0, 0 };
57 #define VCOMP_DYNAMIC_FLAGS_STATIC 0x01
58 #define VCOMP_DYNAMIC_FLAGS_CHUNKED 0x02
59 #define VCOMP_DYNAMIC_FLAGS_GUIDED 0x03
60 #define VCOMP_DYNAMIC_FLAGS_INCREMENT 0x40
62 struct vcomp_thread_data
64 struct vcomp_team_data
*team
;
65 struct vcomp_task_data
*task
;
70 /* only used for concurrent tasks */
72 CONDITION_VARIABLE cond
;
82 unsigned int dynamic_type
;
83 unsigned int dynamic_begin
;
84 unsigned int dynamic_end
;
87 struct vcomp_team_data
89 CONDITION_VARIABLE cond
;
93 /* callback arguments */
103 struct vcomp_task_data
109 unsigned int section
;
114 unsigned int dynamic
;
115 unsigned int dynamic_first
;
116 unsigned int dynamic_last
;
117 unsigned int dynamic_iterations
;
119 unsigned int dynamic_chunksize
;
122 #if defined(__i386__)
124 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, __ms_va_list args
);
125 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
127 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
128 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
130 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
132 __ASM_CFI(".cfi_rel_offset %esi,-4\n\t")
134 __ASM_CFI(".cfi_rel_offset %edi,-8\n\t")
135 "movl 12(%ebp),%edx\n\t"
142 "movl 12(%ebp),%ecx\n\t"
143 "movl 16(%ebp),%esi\n\t"
146 "1:\tcall *8(%ebp)\n\t"
147 "leal -8(%ebp),%esp\n\t"
149 __ASM_CFI(".cfi_same_value %edi\n\t")
151 __ASM_CFI(".cfi_same_value %esi\n\t")
153 __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
154 __ASM_CFI(".cfi_same_value %ebp\n\t")
157 #elif defined(__x86_64__)
159 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, __ms_va_list args
);
160 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
162 __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t")
163 __ASM_CFI(".cfi_rel_offset %rbp,0\n\t")
165 __ASM_CFI(".cfi_def_cfa_register %rbp\n\t")
167 __ASM_CFI(".cfi_rel_offset %rsi,-8\n\t")
169 __ASM_CFI(".cfi_rel_offset %rdi,-16\n\t")
173 "cmovgq %rdx,%rcx\n\t"
174 "leaq 0(,%rcx,8),%rdx\n\t"
180 "movq 0(%rsp),%rcx\n\t"
181 "movq 8(%rsp),%rdx\n\t"
182 "movq 16(%rsp),%r8\n\t"
183 "movq 24(%rsp),%r9\n\t"
185 "leaq -16(%rbp),%rsp\n\t"
187 __ASM_CFI(".cfi_same_value %rdi\n\t")
189 __ASM_CFI(".cfi_same_value %rsi\n\t")
190 __ASM_CFI(".cfi_def_cfa_register %rsp\n\t")
192 __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t")
193 __ASM_CFI(".cfi_same_value %rbp\n\t")
196 #elif defined(__arm__)
198 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, __ms_va_list args
);
199 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
201 "push {r4, r5, LR}\n\t"
209 "subeq SP, SP, #4\n\t"
210 "1:\tsub r3, r3, #4\n\t"
211 "ldr r0, [r2, r3]\n\t"
212 "str r0, [SP, r3]\n\t"
227 "4:\tpop {r0-r3}\n\t"
234 static void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, __ms_va_list args
)
236 ERR("Not implemented for this architecture\n");
241 static inline struct vcomp_thread_data
*vcomp_get_thread_data(void)
243 return (struct vcomp_thread_data
*)TlsGetValue(vcomp_context_tls
);
246 static inline void vcomp_set_thread_data(struct vcomp_thread_data
*thread_data
)
248 TlsSetValue(vcomp_context_tls
, thread_data
);
251 static struct vcomp_thread_data
*vcomp_init_thread_data(void)
253 struct vcomp_thread_data
*thread_data
= vcomp_get_thread_data();
256 struct vcomp_thread_data thread
;
257 struct vcomp_task_data task
;
260 if (thread_data
) return thread_data
;
261 if (!(data
= HeapAlloc(GetProcessHeap(), 0, sizeof(*data
))))
263 ERR("could not create thread data\n");
267 data
->task
.single
= 0;
268 data
->task
.section
= 0;
269 data
->task
.dynamic
= 0;
271 thread_data
= &data
->thread
;
272 thread_data
->team
= NULL
;
273 thread_data
->task
= &data
->task
;
274 thread_data
->thread_num
= 0;
275 thread_data
->parallel
= FALSE
;
276 thread_data
->fork_threads
= 0;
277 thread_data
->single
= 1;
278 thread_data
->section
= 1;
279 thread_data
->dynamic
= 1;
280 thread_data
->dynamic_type
= 0;
282 vcomp_set_thread_data(thread_data
);
286 static void vcomp_free_thread_data(void)
288 struct vcomp_thread_data
*thread_data
= vcomp_get_thread_data();
289 if (!thread_data
) return;
291 HeapFree(GetProcessHeap(), 0, thread_data
);
292 vcomp_set_thread_data(NULL
);
295 void CDECL
_vcomp_atomic_add_i4(int *dest
, int val
)
297 interlocked_xchg_add(dest
, val
);
300 void CDECL
_vcomp_atomic_and_i4(int *dest
, int val
)
303 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
& val
, old
) != old
);
306 void CDECL
_vcomp_atomic_div_i4(int *dest
, int val
)
309 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
/ val
, old
) != old
);
312 void CDECL
_vcomp_atomic_div_ui4(unsigned int *dest
, unsigned int val
)
315 do old
= *dest
; while (interlocked_cmpxchg((int *)dest
, old
/ val
, old
) != old
);
318 void CDECL
_vcomp_atomic_mul_i4(int *dest
, int val
)
321 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
* val
, old
) != old
);
324 void CDECL
_vcomp_atomic_or_i4(int *dest
, int val
)
327 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
| val
, old
) != old
);
330 void CDECL
_vcomp_atomic_shl_i4(int *dest
, int val
)
333 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
<< val
, old
) != old
);
336 void CDECL
_vcomp_atomic_shr_i4(int *dest
, int val
)
339 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
>> val
, old
) != old
);
342 void CDECL
_vcomp_atomic_shr_ui4(unsigned int *dest
, unsigned int val
)
345 do old
= *dest
; while (interlocked_cmpxchg((int *)dest
, old
>> val
, old
) != old
);
348 void CDECL
_vcomp_atomic_sub_i4(int *dest
, int val
)
350 interlocked_xchg_add(dest
, -val
);
353 void CDECL
_vcomp_atomic_xor_i4(int *dest
, int val
)
356 do old
= *dest
; while (interlocked_cmpxchg(dest
, old
^ val
, old
) != old
);
359 void CDECL
_vcomp_atomic_add_i8(LONG64
*dest
, LONG64 val
)
362 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
+ val
, old
) != old
);
365 void CDECL
_vcomp_atomic_and_i8(LONG64
*dest
, LONG64 val
)
368 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
& val
, old
) != old
);
371 void CDECL
_vcomp_atomic_div_i8(LONG64
*dest
, LONG64 val
)
374 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
/ val
, old
) != old
);
377 void CDECL
_vcomp_atomic_div_ui8(ULONG64
*dest
, ULONG64 val
)
380 do old
= *dest
; while (interlocked_cmpxchg64((LONG64
*)dest
, old
/ val
, old
) != old
);
383 void CDECL
_vcomp_atomic_mul_i8(LONG64
*dest
, LONG64 val
)
386 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
* val
, old
) != old
);
389 void CDECL
_vcomp_atomic_or_i8(LONG64
*dest
, LONG64 val
)
392 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
| val
, old
) != old
);
395 void CDECL
_vcomp_atomic_shl_i8(LONG64
*dest
, unsigned int val
)
398 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
<< val
, old
) != old
);
401 void CDECL
_vcomp_atomic_shr_i8(LONG64
*dest
, unsigned int val
)
404 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
>> val
, old
) != old
);
407 void CDECL
_vcomp_atomic_shr_ui8(ULONG64
*dest
, unsigned int val
)
410 do old
= *dest
; while (interlocked_cmpxchg64((LONG64
*)dest
, old
>> val
, old
) != old
);
413 void CDECL
_vcomp_atomic_sub_i8(LONG64
*dest
, LONG64 val
)
416 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
- val
, old
) != old
);
419 void CDECL
_vcomp_atomic_xor_i8(LONG64
*dest
, LONG64 val
)
422 do old
= *dest
; while (interlocked_cmpxchg64(dest
, old
^ val
, old
) != old
);
425 void CDECL
_vcomp_atomic_add_r4(float *dest
, float val
)
431 *(float *)&new = *(float *)&old
+ val
;
433 while (interlocked_cmpxchg((int *)dest
, new, old
) != old
);
436 void CDECL
_vcomp_atomic_div_r4(float *dest
, float val
)
442 *(float *)&new = *(float *)&old
/ val
;
444 while (interlocked_cmpxchg((int *)dest
, new, old
) != old
);
447 void CDECL
_vcomp_atomic_mul_r4(float *dest
, float val
)
453 *(float *)&new = *(float *)&old
* val
;
455 while (interlocked_cmpxchg((int *)dest
, new, old
) != old
);
458 void CDECL
_vcomp_atomic_sub_r4(float *dest
, float val
)
464 *(float *)&new = *(float *)&old
- val
;
466 while (interlocked_cmpxchg((int *)dest
, new, old
) != old
);
469 void CDECL
_vcomp_atomic_add_r8(double *dest
, double val
)
474 old
= *(LONG64
*)dest
;
475 *(double *)&new = *(double *)&old
+ val
;
477 while (interlocked_cmpxchg64((LONG64
*)dest
, new, old
) != old
);
480 void CDECL
_vcomp_atomic_div_r8(double *dest
, double val
)
485 old
= *(LONG64
*)dest
;
486 *(double *)&new = *(double *)&old
/ val
;
488 while (interlocked_cmpxchg64((LONG64
*)dest
, new, old
) != old
);
491 void CDECL
_vcomp_atomic_mul_r8(double *dest
, double val
)
496 old
= *(LONG64
*)dest
;
497 *(double *)&new = *(double *)&old
* val
;
499 while (interlocked_cmpxchg64((LONG64
*)dest
, new, old
) != old
);
502 void CDECL
_vcomp_atomic_sub_r8(double *dest
, double val
)
507 old
= *(LONG64
*)dest
;
508 *(double *)&new = *(double *)&old
- val
;
510 while (interlocked_cmpxchg64((LONG64
*)dest
, new, old
) != old
);
513 int CDECL
omp_get_dynamic(void)
519 int CDECL
omp_get_max_threads(void)
522 return vcomp_max_threads
;
525 int CDECL
omp_get_nested(void)
528 return vcomp_nested_fork
;
531 int CDECL
omp_get_num_procs(void)
537 int CDECL
omp_get_num_threads(void)
539 struct vcomp_team_data
*team_data
= vcomp_init_thread_data()->team
;
541 return team_data
? team_data
->num_threads
: 1;
544 int CDECL
omp_get_thread_num(void)
547 return vcomp_init_thread_data()->thread_num
;
550 int CDECL
_vcomp_get_thread_num(void)
553 return vcomp_init_thread_data()->thread_num
;
556 /* Time in seconds since "some time in the past" */
557 double CDECL
omp_get_wtime(void)
559 return GetTickCount() / 1000.0;
562 void CDECL
omp_set_dynamic(int val
)
564 TRACE("(%d): stub\n", val
);
567 void CDECL
omp_set_nested(int nested
)
569 TRACE("(%d)\n", nested
);
570 vcomp_nested_fork
= (nested
!= 0);
573 void CDECL
omp_set_num_threads(int num_threads
)
575 TRACE("(%d)\n", num_threads
);
576 if (num_threads
>= 1)
577 vcomp_num_threads
= num_threads
;
580 void CDECL
_vcomp_flush(void)
585 void CDECL
_vcomp_barrier(void)
587 struct vcomp_team_data
*team_data
= vcomp_init_thread_data()->team
;
594 EnterCriticalSection(&vcomp_section
);
595 if (++team_data
->barrier_count
>= team_data
->num_threads
)
597 team_data
->barrier
++;
598 team_data
->barrier_count
= 0;
599 WakeAllConditionVariable(&team_data
->cond
);
603 unsigned int barrier
= team_data
->barrier
;
604 while (team_data
->barrier
== barrier
)
605 SleepConditionVariableCS(&team_data
->cond
, &vcomp_section
, INFINITE
);
607 LeaveCriticalSection(&vcomp_section
);
610 void CDECL
_vcomp_set_num_threads(int num_threads
)
612 TRACE("(%d)\n", num_threads
);
613 if (num_threads
>= 1)
614 vcomp_init_thread_data()->fork_threads
= num_threads
;
617 int CDECL
_vcomp_master_begin(void)
620 return !vcomp_init_thread_data()->thread_num
;
623 void CDECL
_vcomp_master_end(void)
626 /* nothing to do here */
629 int CDECL
_vcomp_single_begin(int flags
)
631 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
632 struct vcomp_task_data
*task_data
= thread_data
->task
;
635 TRACE("(%x): semi-stub\n", flags
);
637 EnterCriticalSection(&vcomp_section
);
638 thread_data
->single
++;
639 if ((int)(thread_data
->single
- task_data
->single
) > 0)
641 task_data
->single
= thread_data
->single
;
644 LeaveCriticalSection(&vcomp_section
);
649 void CDECL
_vcomp_single_end(void)
652 /* nothing to do here */
655 void CDECL
_vcomp_sections_init(int n
)
657 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
658 struct vcomp_task_data
*task_data
= thread_data
->task
;
662 EnterCriticalSection(&vcomp_section
);
663 thread_data
->section
++;
664 if ((int)(thread_data
->section
- task_data
->section
) > 0)
666 task_data
->section
= thread_data
->section
;
667 task_data
->num_sections
= n
;
668 task_data
->section_index
= 0;
670 LeaveCriticalSection(&vcomp_section
);
673 int CDECL
_vcomp_sections_next(void)
675 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
676 struct vcomp_task_data
*task_data
= thread_data
->task
;
681 EnterCriticalSection(&vcomp_section
);
682 if (thread_data
->section
== task_data
->section
&&
683 task_data
->section_index
!= task_data
->num_sections
)
685 i
= task_data
->section_index
++;
687 LeaveCriticalSection(&vcomp_section
);
691 void CDECL
_vcomp_for_static_simple_init(unsigned int first
, unsigned int last
, int step
,
692 BOOL increment
, unsigned int *begin
, unsigned int *end
)
694 unsigned int iterations
, per_thread
, remaining
;
695 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
696 struct vcomp_team_data
*team_data
= thread_data
->team
;
697 int num_threads
= team_data
? team_data
->num_threads
: 1;
698 int thread_num
= thread_data
->thread_num
;
700 TRACE("(%u, %u, %d, %u, %p, %p)\n", first
, last
, step
, increment
, begin
, end
);
702 if (num_threads
== 1)
712 *end
= increment
? -1 : 1;
717 iterations
= 1 + (last
- first
) / step
;
720 iterations
= 1 + (first
- last
) / step
;
724 per_thread
= iterations
/ num_threads
;
725 remaining
= iterations
- per_thread
* num_threads
;
727 if (thread_num
< remaining
)
730 first
+= remaining
* step
;
738 *begin
= first
+ per_thread
* thread_num
* step
;
739 *end
= *begin
+ (per_thread
- 1) * step
;
742 void CDECL
_vcomp_for_static_init(int first
, int last
, int step
, int chunksize
, unsigned int *loops
,
743 int *begin
, int *end
, int *next
, int *lastchunk
)
745 unsigned int iterations
, num_chunks
, per_thread
, remaining
;
746 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
747 struct vcomp_team_data
*team_data
= thread_data
->team
;
748 int num_threads
= team_data
? team_data
->num_threads
: 1;
749 int thread_num
= thread_data
->thread_num
;
751 TRACE("(%d, %d, %d, %d, %p, %p, %p, %p, %p)\n",
752 first
, last
, step
, chunksize
, loops
, begin
, end
, next
, lastchunk
);
754 if (num_threads
== 1 && chunksize
!= 1)
766 *loops
= !thread_num
;
784 iterations
= 1 + (last
- first
) / step
;
787 iterations
= 1 + (first
- last
) / step
;
794 num_chunks
= ((DWORD64
)iterations
+ chunksize
- 1) / chunksize
;
795 per_thread
= num_chunks
/ num_threads
;
796 remaining
= num_chunks
- per_thread
* num_threads
;
798 *loops
= per_thread
+ (thread_num
< remaining
);
799 *begin
= first
+ thread_num
* chunksize
* step
;
800 *end
= *begin
+ (chunksize
- 1) * step
;
801 *next
= chunksize
* num_threads
* step
;
802 *lastchunk
= first
+ (num_chunks
- 1) * chunksize
* step
;
805 void CDECL
_vcomp_for_static_end(void)
808 /* nothing to do here */
811 void CDECL
_vcomp_for_dynamic_init(unsigned int flags
, unsigned int first
, unsigned int last
,
812 int step
, unsigned int chunksize
)
814 unsigned int iterations
, per_thread
, remaining
;
815 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
816 struct vcomp_team_data
*team_data
= thread_data
->team
;
817 struct vcomp_task_data
*task_data
= thread_data
->task
;
818 int num_threads
= team_data
? team_data
->num_threads
: 1;
819 int thread_num
= thread_data
->thread_num
;
820 unsigned int type
= flags
& ~VCOMP_DYNAMIC_FLAGS_INCREMENT
;
822 TRACE("(%u, %u, %u, %d, %u)\n", flags
, first
, last
, step
, chunksize
);
826 thread_data
->dynamic_type
= 0;
830 if (flags
& VCOMP_DYNAMIC_FLAGS_INCREMENT
)
831 iterations
= 1 + (last
- first
) / step
;
834 iterations
= 1 + (first
- last
) / step
;
838 if (type
== VCOMP_DYNAMIC_FLAGS_STATIC
)
840 per_thread
= iterations
/ num_threads
;
841 remaining
= iterations
- per_thread
* num_threads
;
843 if (thread_num
< remaining
)
846 first
+= remaining
* step
;
849 thread_data
->dynamic_type
= 0;
853 thread_data
->dynamic_type
= VCOMP_DYNAMIC_FLAGS_STATIC
;
854 thread_data
->dynamic_begin
= first
+ per_thread
* thread_num
* step
;
855 thread_data
->dynamic_end
= thread_data
->dynamic_begin
+ (per_thread
- 1) * step
;
859 if (type
!= VCOMP_DYNAMIC_FLAGS_CHUNKED
&&
860 type
!= VCOMP_DYNAMIC_FLAGS_GUIDED
)
862 FIXME("unsupported flags %u\n", flags
);
863 type
= VCOMP_DYNAMIC_FLAGS_GUIDED
;
866 EnterCriticalSection(&vcomp_section
);
867 thread_data
->dynamic
++;
868 thread_data
->dynamic_type
= type
;
869 if ((int)(thread_data
->dynamic
- task_data
->dynamic
) > 0)
871 task_data
->dynamic
= thread_data
->dynamic
;
872 task_data
->dynamic_first
= first
;
873 task_data
->dynamic_last
= last
;
874 task_data
->dynamic_iterations
= iterations
;
875 task_data
->dynamic_step
= step
;
876 task_data
->dynamic_chunksize
= chunksize
;
878 LeaveCriticalSection(&vcomp_section
);
882 int CDECL
_vcomp_for_dynamic_next(unsigned int *begin
, unsigned int *end
)
884 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
885 struct vcomp_task_data
*task_data
= thread_data
->task
;
886 struct vcomp_team_data
*team_data
= thread_data
->team
;
887 int num_threads
= team_data
? team_data
->num_threads
: 1;
889 TRACE("(%p, %p)\n", begin
, end
);
891 if (thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_STATIC
)
893 *begin
= thread_data
->dynamic_begin
;
894 *end
= thread_data
->dynamic_end
;
895 thread_data
->dynamic_type
= 0;
898 else if (thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_CHUNKED
||
899 thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_GUIDED
)
901 unsigned int iterations
= 0;
902 EnterCriticalSection(&vcomp_section
);
903 if (thread_data
->dynamic
== task_data
->dynamic
&&
904 task_data
->dynamic_iterations
!= 0)
906 iterations
= min(task_data
->dynamic_iterations
, task_data
->dynamic_chunksize
);
907 if (thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_GUIDED
&&
908 task_data
->dynamic_iterations
> num_threads
* task_data
->dynamic_chunksize
)
910 iterations
= (task_data
->dynamic_iterations
+ num_threads
- 1) / num_threads
;
912 *begin
= task_data
->dynamic_first
;
913 *end
= task_data
->dynamic_first
+ (iterations
- 1) * task_data
->dynamic_step
;
914 task_data
->dynamic_iterations
-= iterations
;
915 task_data
->dynamic_first
+= iterations
* task_data
->dynamic_step
;
916 if (!task_data
->dynamic_iterations
)
917 *end
= task_data
->dynamic_last
;
919 LeaveCriticalSection(&vcomp_section
);
920 return iterations
!= 0;
926 int CDECL
omp_in_parallel(void)
929 return vcomp_init_thread_data()->parallel
;
932 static DWORD WINAPI
_vcomp_fork_worker(void *param
)
934 struct vcomp_thread_data
*thread_data
= param
;
935 vcomp_set_thread_data(thread_data
);
937 TRACE("starting worker thread for %p\n", thread_data
);
939 EnterCriticalSection(&vcomp_section
);
942 struct vcomp_team_data
*team
= thread_data
->team
;
945 LeaveCriticalSection(&vcomp_section
);
946 _vcomp_fork_call_wrapper(team
->wrapper
, team
->nargs
, team
->valist
);
947 EnterCriticalSection(&vcomp_section
);
949 thread_data
->team
= NULL
;
950 list_remove(&thread_data
->entry
);
951 list_add_tail(&vcomp_idle_threads
, &thread_data
->entry
);
952 if (++team
->finished_threads
>= team
->num_threads
)
953 WakeAllConditionVariable(&team
->cond
);
956 if (!SleepConditionVariableCS(&thread_data
->cond
, &vcomp_section
, 5000) &&
957 GetLastError() == ERROR_TIMEOUT
&& !thread_data
->team
)
962 list_remove(&thread_data
->entry
);
963 LeaveCriticalSection(&vcomp_section
);
965 TRACE("terminating worker thread for %p\n", thread_data
);
967 HeapFree(GetProcessHeap(), 0, thread_data
);
968 vcomp_set_thread_data(NULL
);
969 FreeLibraryAndExitThread(vcomp_module
, 0);
973 void WINAPIV
_vcomp_fork(BOOL ifval
, int nargs
, void *wrapper
, ...)
975 struct vcomp_thread_data
*prev_thread_data
= vcomp_init_thread_data();
976 struct vcomp_thread_data thread_data
;
977 struct vcomp_team_data team_data
;
978 struct vcomp_task_data task_data
;
981 TRACE("(%d, %d, %p, ...)\n", ifval
, nargs
, wrapper
);
983 if (prev_thread_data
->parallel
&& !vcomp_nested_fork
)
988 else if (prev_thread_data
->fork_threads
)
989 num_threads
= prev_thread_data
->fork_threads
;
991 num_threads
= vcomp_num_threads
;
993 InitializeConditionVariable(&team_data
.cond
);
994 team_data
.num_threads
= 1;
995 team_data
.finished_threads
= 0;
996 team_data
.nargs
= nargs
;
997 team_data
.wrapper
= wrapper
;
998 __ms_va_start(team_data
.valist
, wrapper
);
999 team_data
.barrier
= 0;
1000 team_data
.barrier_count
= 0;
1002 task_data
.single
= 0;
1003 task_data
.section
= 0;
1004 task_data
.dynamic
= 0;
1006 thread_data
.team
= &team_data
;
1007 thread_data
.task
= &task_data
;
1008 thread_data
.thread_num
= 0;
1009 thread_data
.parallel
= ifval
|| prev_thread_data
->parallel
;
1010 thread_data
.fork_threads
= 0;
1011 thread_data
.single
= 1;
1012 thread_data
.section
= 1;
1013 thread_data
.dynamic
= 1;
1014 thread_data
.dynamic_type
= 0;
1015 list_init(&thread_data
.entry
);
1016 InitializeConditionVariable(&thread_data
.cond
);
1018 if (num_threads
> 1)
1021 EnterCriticalSection(&vcomp_section
);
1023 /* reuse existing threads (if any) */
1024 while (team_data
.num_threads
< num_threads
&& (ptr
= list_head(&vcomp_idle_threads
)))
1026 struct vcomp_thread_data
*data
= LIST_ENTRY(ptr
, struct vcomp_thread_data
, entry
);
1027 data
->team
= &team_data
;
1028 data
->task
= &task_data
;
1029 data
->thread_num
= team_data
.num_threads
++;
1030 data
->parallel
= thread_data
.parallel
;
1031 data
->fork_threads
= 0;
1035 data
->dynamic_type
= 0;
1036 list_remove(&data
->entry
);
1037 list_add_tail(&thread_data
.entry
, &data
->entry
);
1038 WakeAllConditionVariable(&data
->cond
);
1041 /* spawn additional threads */
1042 while (team_data
.num_threads
< num_threads
)
1044 struct vcomp_thread_data
*data
;
1048 data
= HeapAlloc(GetProcessHeap(), 0, sizeof(*data
));
1051 data
->team
= &team_data
;
1052 data
->task
= &task_data
;
1053 data
->thread_num
= team_data
.num_threads
;
1054 data
->parallel
= thread_data
.parallel
;
1055 data
->fork_threads
= 0;
1059 data
->dynamic_type
= 0;
1060 InitializeConditionVariable(&data
->cond
);
1062 thread
= CreateThread(NULL
, 0, _vcomp_fork_worker
, data
, 0, NULL
);
1065 HeapFree(GetProcessHeap(), 0, data
);
1069 GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
,
1070 (const WCHAR
*)vcomp_module
, &module
);
1071 team_data
.num_threads
++;
1072 list_add_tail(&thread_data
.entry
, &data
->entry
);
1073 CloseHandle(thread
);
1076 LeaveCriticalSection(&vcomp_section
);
1079 vcomp_set_thread_data(&thread_data
);
1080 _vcomp_fork_call_wrapper(team_data
.wrapper
, team_data
.nargs
, team_data
.valist
);
1081 vcomp_set_thread_data(prev_thread_data
);
1082 prev_thread_data
->fork_threads
= 0;
1084 if (team_data
.num_threads
> 1)
1086 EnterCriticalSection(&vcomp_section
);
1088 team_data
.finished_threads
++;
1089 while (team_data
.finished_threads
< team_data
.num_threads
)
1090 SleepConditionVariableCS(&team_data
.cond
, &vcomp_section
, INFINITE
);
1092 LeaveCriticalSection(&vcomp_section
);
1093 assert(list_empty(&thread_data
.entry
));
1096 __ms_va_end(team_data
.valist
);
1099 static CRITICAL_SECTION
*alloc_critsect(void)
1101 CRITICAL_SECTION
*critsect
;
1102 if (!(critsect
= HeapAlloc(GetProcessHeap(), 0, sizeof(*critsect
))))
1104 ERR("could not allocate critical section\n");
1108 InitializeCriticalSection(critsect
);
1109 critsect
->DebugInfo
->Spare
[0] = (DWORD_PTR
)(__FILE__
": critsect");
1113 static void destroy_critsect(CRITICAL_SECTION
*critsect
)
1115 if (!critsect
) return;
1116 critsect
->DebugInfo
->Spare
[0] = 0;
1117 DeleteCriticalSection(critsect
);
1118 HeapFree(GetProcessHeap(), 0, critsect
);
1121 void CDECL
omp_init_lock(omp_lock_t
*lock
)
1123 TRACE("(%p)\n", lock
);
1124 *lock
= alloc_critsect();
1127 void CDECL
omp_destroy_lock(omp_lock_t
*lock
)
1129 TRACE("(%p)\n", lock
);
1130 destroy_critsect(*lock
);
1133 void CDECL
omp_set_lock(omp_lock_t
*lock
)
1135 TRACE("(%p)\n", lock
);
1137 if (RtlIsCriticalSectionLockedByThread(*lock
))
1139 ERR("omp_set_lock called while holding lock %p\n", *lock
);
1143 EnterCriticalSection(*lock
);
1146 void CDECL
omp_unset_lock(omp_lock_t
*lock
)
1148 TRACE("(%p)\n", lock
);
1149 LeaveCriticalSection(*lock
);
1152 int CDECL
omp_test_lock(omp_lock_t
*lock
)
1154 TRACE("(%p)\n", lock
);
1156 if (RtlIsCriticalSectionLockedByThread(*lock
))
1159 return TryEnterCriticalSection(*lock
);
1162 void CDECL
omp_set_nest_lock(omp_nest_lock_t
*lock
)
1164 TRACE("(%p)\n", lock
);
1165 EnterCriticalSection(*lock
);
1168 void CDECL
omp_unset_nest_lock(omp_nest_lock_t
*lock
)
1170 TRACE("(%p)\n", lock
);
1171 LeaveCriticalSection(*lock
);
1174 int CDECL
omp_test_nest_lock(omp_nest_lock_t
*lock
)
1176 TRACE("(%p)\n", lock
);
1177 return TryEnterCriticalSection(*lock
) ? (*lock
)->RecursionCount
: 0;
1180 void CDECL
_vcomp_enter_critsect(CRITICAL_SECTION
**critsect
)
1182 TRACE("(%p)\n", critsect
);
1186 CRITICAL_SECTION
*new_critsect
= alloc_critsect();
1187 if (interlocked_cmpxchg_ptr((void **)critsect
, new_critsect
, NULL
) != NULL
)
1188 destroy_critsect(new_critsect
); /* someone beat us to it */
1191 EnterCriticalSection(*critsect
);
1194 void CDECL
_vcomp_leave_critsect(CRITICAL_SECTION
*critsect
)
1196 TRACE("(%p)\n", critsect
);
1197 LeaveCriticalSection(critsect
);
1200 BOOL WINAPI
DllMain(HINSTANCE instance
, DWORD reason
, LPVOID reserved
)
1202 TRACE("(%p, %d, %p)\n", instance
, reason
, reserved
);
1206 case DLL_WINE_PREATTACH
:
1207 return FALSE
; /* prefer native version */
1209 case DLL_PROCESS_ATTACH
:
1211 SYSTEM_INFO sysinfo
;
1213 if ((vcomp_context_tls
= TlsAlloc()) == TLS_OUT_OF_INDEXES
)
1215 ERR("Failed to allocate TLS index\n");
1219 GetSystemInfo(&sysinfo
);
1220 vcomp_module
= instance
;
1221 vcomp_max_threads
= sysinfo
.dwNumberOfProcessors
;
1222 vcomp_num_threads
= sysinfo
.dwNumberOfProcessors
;
1226 case DLL_PROCESS_DETACH
:
1228 if (reserved
) break;
1229 if (vcomp_context_tls
!= TLS_OUT_OF_INDEXES
)
1231 vcomp_free_thread_data();
1232 TlsFree(vcomp_context_tls
);
1237 case DLL_THREAD_DETACH
:
1239 vcomp_free_thread_data();