1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Parallel for algorithm.
12 // the thread's iteration space [32lsb, 32msb)
20 byte pad
[CacheLineSize
];
24 runtime_parforalloc(uint32 nthrmax
)
28 // The ParFor object is followed by CacheLineSize padding
29 // and then nthrmax ParForThread.
30 desc
= (ParFor
*)runtime_malloc(sizeof(ParFor
) + CacheLineSize
+ nthrmax
* sizeof(ParForThread
));
31 desc
->thr
= (ParForThread
*)((byte
*)(desc
+1) + CacheLineSize
);
32 desc
->nthrmax
= nthrmax
;
36 // For testing from Go
37 // func parforalloc2(nthrmax uint32) *ParFor
39 ParFor
*runtime_parforalloc2(uint32
)
40 __asm__ (GOSYM_PREFIX
"runtime.parforalloc2");
43 runtime_parforalloc2(uint32 nthrmax
)
45 return runtime_parforalloc(nthrmax
);
49 runtime_parforsetup(ParFor
*desc
, uint32 nthr
, uint32 n
, void *ctx
, bool wait
, void (*body
)(ParFor
*, uint32
))
53 if(desc
== nil
|| nthr
== 0 || nthr
> desc
->nthrmax
|| body
== nil
) {
54 runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc
, nthr
, n
, body
);
55 runtime_throw("parfor: invalid args");
70 for(i
=0; i
<nthr
; i
++) {
71 begin
= (uint64
)n
*i
/ nthr
;
72 end
= (uint64
)n
*(i
+1) / nthr
;
73 desc
->thr
[i
].pos
= (uint64
)begin
| (((uint64
)end
)<<32);
77 // For testing from Go
78 // func parforsetup2(desc *ParFor, nthr, n uint32, ctx *byte, wait bool, body func(*ParFor, uint32))
80 void runtime_parforsetup2(ParFor
*, uint32
, uint32
, void *, bool, void *)
81 __asm__ (GOSYM_PREFIX
"runtime.parforsetup2");
84 runtime_parforsetup2(ParFor
*desc
, uint32 nthr
, uint32 n
, void *ctx
, bool wait
, void *body
)
86 runtime_parforsetup(desc
, nthr
, n
, ctx
, wait
, (void(*)(ParFor
*, uint32
))body
);
90 runtime_parfordo(ParFor
*desc
)
93 uint32 tid
, begin
, end
, begin2
, try, victim
, i
;
94 uint64
*mypos
, *victimpos
, pos
, newpos
;
95 void (*body
)(ParFor
*, uint32
);
98 // Obtain 0-based thread index.
99 tid
= runtime_xadd(&desc
->thrseq
, 1) - 1;
100 if(tid
>= desc
->nthr
) {
101 runtime_printf("tid=%d nthr=%d\n", tid
, desc
->nthr
);
102 runtime_throw("parfor: invalid tid");
105 // If single-threaded, just execute the for serially.
107 for(i
=0; i
<desc
->cnt
; i
++)
113 me
= &desc
->thr
[tid
];
117 // While there is local work,
118 // bump low index and execute the iteration.
119 pos
= runtime_xadd64(mypos
, 1);
120 begin
= (uint32
)pos
-1;
121 end
= (uint32
)(pos
>>32);
129 // Out of work, need to steal something.
132 // If we don't see any work for long enough,
133 // increment the done counter...
134 if(try > desc
->nthr
*4 && !idle
) {
136 runtime_xadd(&desc
->done
, 1);
138 // ...if all threads have incremented the counter,
140 if(desc
->done
+ !idle
== desc
->nthr
) {
142 runtime_xadd(&desc
->done
, 1);
145 // Choose a random victim for stealing.
146 victim
= runtime_fastrand1() % (desc
->nthr
-1);
149 victimpos
= &desc
->thr
[victim
].pos
;
150 pos
= runtime_atomicload64(victimpos
);
152 // See if it has any work.
154 end
= (uint32
)(pos
>>32);
160 runtime_xadd(&desc
->done
, -1);
163 begin2
= begin
+ (end
-begin
)/2;
164 newpos
= (uint64
)begin
| (uint64
)begin2
<<32;
165 if(runtime_cas64(victimpos
, &pos
, newpos
)) {
171 // Has successfully stolen some work.
173 runtime_throw("parfor: should not be idle");
174 runtime_atomicstore64(mypos
, (uint64
)begin
| (uint64
)end
<<32);
176 me
->nstealcnt
+= end
-begin
;
180 if(try < desc
->nthr
) {
182 } else if (try < 4*desc
->nthr
) {
184 runtime_procyield(20);
185 // If a caller asked not to wait for the others, exit now
186 // (assume that most work is already done at this point).
187 } else if (!desc
->wait
) {
189 runtime_xadd(&desc
->done
, 1);
191 } else if (try < 6*desc
->nthr
) {
201 runtime_xadd64(&desc
->nsteal
, me
->nsteal
);
202 runtime_xadd64(&desc
->nstealcnt
, me
->nstealcnt
);
203 runtime_xadd64(&desc
->nprocyield
, me
->nprocyield
);
204 runtime_xadd64(&desc
->nosyield
, me
->nosyield
);
205 runtime_xadd64(&desc
->nsleep
, me
->nsleep
);
213 // For testing from Go
214 // func parforiters(desc *ParFor, tid uintptr) (uintptr, uintptr)
216 struct parforiters_ret
{
221 struct parforiters_ret
runtime_parforiters(ParFor
*, uintptr
)
222 __asm__ (GOSYM_PREFIX
"runtime.parforiters");
224 struct parforiters_ret
225 runtime_parforiters(ParFor
*desc
, uintptr tid
)
227 struct parforiters_ret ret
;
229 ret
.start
= (uint32
)desc
->thr
[tid
].pos
;
230 ret
.end
= (uint32
)(desc
->thr
[tid
].pos
>>32);