2014-07-29 Ed Smith-Rowland <3dw4rd@verizon.net>
[official-gcc.git] / libgo / runtime / parfor.c
blob386faea512cfbec0c6189bc202d246be94b943fe
1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Parallel for algorithm.
7 #include "runtime.h"
8 #include "arch.h"
10 struct ParForThread
12 // the thread's iteration space [32lsb, 32msb)
13 uint64 pos;
14 // stats
15 uint64 nsteal;
16 uint64 nstealcnt;
17 uint64 nprocyield;
18 uint64 nosyield;
19 uint64 nsleep;
20 byte pad[CacheLineSize];
23 ParFor*
24 runtime_parforalloc(uint32 nthrmax)
26 ParFor *desc;
28 // The ParFor object is followed by CacheLineSize padding
29 // and then nthrmax ParForThread.
30 desc = (ParFor*)runtime_malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread));
31 desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
32 desc->nthrmax = nthrmax;
33 return desc;
36 void
37 runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
39 uint32 i, begin, end;
40 uint64 *pos;
42 if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
43 runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
44 runtime_throw("parfor: invalid args");
47 desc->body = body;
48 desc->done = 0;
49 desc->nthr = nthr;
50 desc->thrseq = 0;
51 desc->cnt = n;
52 desc->ctx = ctx;
53 desc->wait = wait;
54 desc->nsteal = 0;
55 desc->nstealcnt = 0;
56 desc->nprocyield = 0;
57 desc->nosyield = 0;
58 desc->nsleep = 0;
59 for(i=0; i<nthr; i++) {
60 begin = (uint64)n*i / nthr;
61 end = (uint64)n*(i+1) / nthr;
62 pos = &desc->thr[i].pos;
63 if(((uintptr)pos & 7) != 0)
64 runtime_throw("parforsetup: pos is not aligned");
65 *pos = (uint64)begin | (((uint64)end)<<32);
69 void
70 runtime_parfordo(ParFor *desc)
72 ParForThread *me;
73 uint32 tid, begin, end, begin2, try, victim, i;
74 uint64 *mypos, *victimpos, pos, newpos;
75 void (*body)(ParFor*, uint32);
76 bool idle;
78 // Obtain 0-based thread index.
79 tid = runtime_xadd(&desc->thrseq, 1) - 1;
80 if(tid >= desc->nthr) {
81 runtime_printf("tid=%d nthr=%d\n", tid, desc->nthr);
82 runtime_throw("parfor: invalid tid");
85 // If single-threaded, just execute the for serially.
86 if(desc->nthr==1) {
87 for(i=0; i<desc->cnt; i++)
88 desc->body(desc, i);
89 return;
92 body = desc->body;
93 me = &desc->thr[tid];
94 mypos = &me->pos;
95 for(;;) {
96 for(;;) {
97 // While there is local work,
98 // bump low index and execute the iteration.
99 pos = runtime_xadd64(mypos, 1);
100 begin = (uint32)pos-1;
101 end = (uint32)(pos>>32);
102 if(begin < end) {
103 body(desc, begin);
104 continue;
106 break;
109 // Out of work, need to steal something.
110 idle = false;
111 for(try=0;; try++) {
112 // If we don't see any work for long enough,
113 // increment the done counter...
114 if(try > desc->nthr*4 && !idle) {
115 idle = true;
116 runtime_xadd(&desc->done, 1);
118 // ...if all threads have incremented the counter,
119 // we are done.
120 if(desc->done + !idle == desc->nthr) {
121 if(!idle)
122 runtime_xadd(&desc->done, 1);
123 goto exit;
125 // Choose a random victim for stealing.
126 victim = runtime_fastrand1() % (desc->nthr-1);
127 if(victim >= tid)
128 victim++;
129 victimpos = &desc->thr[victim].pos;
130 for(;;) {
131 // See if it has any work.
132 pos = runtime_atomicload64(victimpos);
133 begin = (uint32)pos;
134 end = (uint32)(pos>>32);
135 if(begin+1 >= end) {
136 begin = end = 0;
137 break;
139 if(idle) {
140 runtime_xadd(&desc->done, -1);
141 idle = false;
143 begin2 = begin + (end-begin)/2;
144 newpos = (uint64)begin | (uint64)begin2<<32;
145 if(runtime_cas64(victimpos, pos, newpos)) {
146 begin = begin2;
147 break;
150 if(begin < end) {
151 // Has successfully stolen some work.
152 if(idle)
153 runtime_throw("parfor: should not be idle");
154 runtime_atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
155 me->nsteal++;
156 me->nstealcnt += end-begin;
157 break;
159 // Backoff.
160 if(try < desc->nthr) {
161 // nothing
162 } else if (try < 4*desc->nthr) {
163 me->nprocyield++;
164 runtime_procyield(20);
165 // If a caller asked not to wait for the others, exit now
166 // (assume that most work is already done at this point).
167 } else if (!desc->wait) {
168 if(!idle)
169 runtime_xadd(&desc->done, 1);
170 goto exit;
171 } else if (try < 6*desc->nthr) {
172 me->nosyield++;
173 runtime_osyield();
174 } else {
175 me->nsleep++;
176 runtime_usleep(1);
180 exit:
181 runtime_xadd64(&desc->nsteal, me->nsteal);
182 runtime_xadd64(&desc->nstealcnt, me->nstealcnt);
183 runtime_xadd64(&desc->nprocyield, me->nprocyield);
184 runtime_xadd64(&desc->nosyield, me->nosyield);
185 runtime_xadd64(&desc->nsleep, me->nsleep);
186 me->nsteal = 0;
187 me->nstealcnt = 0;
188 me->nprocyield = 0;
189 me->nosyield = 0;
190 me->nsleep = 0;
193 // For testing from Go.
194 void
195 runtime_parforiters(ParFor *desc, uintptr tid, uintptr *start, uintptr *end)
197 *start = (uint32)desc->thr[tid].pos;
198 *end = (uint32)(desc->thr[tid].pos>>32);