1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Parallel for algorithm.
13 // the thread's iteration space [32lsb, 32msb)
21 byte pad
[CacheLineSize
];
25 runtime_parforalloc(uint32 nthrmax
)
29 // The ParFor object is followed by CacheLineSize padding
30 // and then nthrmax ParForThread.
31 desc
= (ParFor
*)runtime_mallocgc(sizeof(ParFor
) + CacheLineSize
+ nthrmax
* sizeof(ParForThread
), 0, FlagNoInvokeGC
);
32 desc
->thr
= (ParForThread
*)((byte
*)(desc
+1) + CacheLineSize
);
33 desc
->nthrmax
= nthrmax
;
38 runtime_parforsetup(ParFor
*desc
, uint32 nthr
, uint32 n
, bool wait
, const FuncVal
*body
)
43 if(desc
== nil
|| nthr
== 0 || nthr
> desc
->nthrmax
|| body
== nil
) {
44 runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc
, nthr
, n
, body
);
45 runtime_throw("parfor: invalid args");
59 for(i
=0; i
<nthr
; i
++) {
60 begin
= (uint64
)n
*i
/ nthr
;
61 end
= (uint64
)n
*(i
+1) / nthr
;
62 pos
= &desc
->thr
[i
].pos
;
63 if(((uintptr
)pos
& 7) != 0)
64 runtime_throw("parforsetup: pos is not aligned");
65 *pos
= (uint64
)begin
| (((uint64
)end
)<<32);
70 runtime_parfordo(ParFor
*desc
)
73 uint32 tid
, begin
, end
, begin2
, try, victim
, i
;
74 uint64
*mypos
, *victimpos
, pos
, newpos
;
76 void (*bodyfn
)(ParFor
*, uint32
);
79 // Obtain 0-based thread index.
80 tid
= runtime_xadd(&desc
->thrseq
, 1) - 1;
81 if(tid
>= desc
->nthr
) {
82 runtime_printf("tid=%d nthr=%d\n", tid
, desc
->nthr
);
83 runtime_throw("parfor: invalid tid");
87 bodyfn
= (void (*)(ParFor
*, uint32
))(void*)body
->fn
;
89 // If single-threaded, just execute the for serially.
91 for(i
=0; i
<desc
->cnt
; i
++)
92 __builtin_call_with_static_chain (bodyfn(desc
, i
), body
);
100 // While there is local work,
101 // bump low index and execute the iteration.
102 pos
= runtime_xadd64(mypos
, 1);
103 begin
= (uint32
)pos
-1;
104 end
= (uint32
)(pos
>>32);
106 __builtin_call_with_static_chain(bodyfn(desc
, begin
), body
);
112 // Out of work, need to steal something.
115 // If we don't see any work for long enough,
116 // increment the done counter...
117 if(try > desc
->nthr
*4 && !idle
) {
119 runtime_xadd(&desc
->done
, 1);
121 // ...if all threads have incremented the counter,
123 if(desc
->done
+ !idle
== desc
->nthr
) {
125 runtime_xadd(&desc
->done
, 1);
128 // Choose a random victim for stealing.
129 victim
= runtime_fastrand() % (desc
->nthr
-1);
132 victimpos
= &desc
->thr
[victim
].pos
;
134 // See if it has any work.
135 pos
= runtime_atomicload64(victimpos
);
137 end
= (uint32
)(pos
>>32);
143 runtime_xadd(&desc
->done
, -1);
146 begin2
= begin
+ (end
-begin
)/2;
147 newpos
= (uint64
)begin
| (uint64
)begin2
<<32;
148 if(runtime_cas64(victimpos
, pos
, newpos
)) {
154 // Has successfully stolen some work.
156 runtime_throw("parfor: should not be idle");
157 runtime_atomicstore64(mypos
, (uint64
)begin
| (uint64
)end
<<32);
159 me
->nstealcnt
+= end
-begin
;
163 if(try < desc
->nthr
) {
165 } else if (try < 4*desc
->nthr
) {
167 runtime_procyield(20);
168 // If a caller asked not to wait for the others, exit now
169 // (assume that most work is already done at this point).
170 } else if (!desc
->wait
) {
172 runtime_xadd(&desc
->done
, 1);
174 } else if (try < 6*desc
->nthr
) {
184 runtime_xadd64(&desc
->nsteal
, me
->nsteal
);
185 runtime_xadd64(&desc
->nstealcnt
, me
->nstealcnt
);
186 runtime_xadd64(&desc
->nprocyield
, me
->nprocyield
);
187 runtime_xadd64(&desc
->nosyield
, me
->nosyield
);
188 runtime_xadd64(&desc
->nsleep
, me
->nsleep
);
196 // For testing from Go.
198 runtime_parforiters(ParFor
*desc
, uintptr tid
, uintptr
*start
, uintptr
*end
)
200 *start
= (uint32
)desc
->thr
[tid
].pos
;
201 *end
= (uint32
)(desc
->thr
[tid
].pos
>>32);