* config/rs6000/rs6000.c (rs6000_deligitimze_address): Do not
[official-gcc.git] / libgo / runtime / parfor.c
blob98ebdebc559e98a6b8ea594ef40ef6c0694fc2f3
1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Parallel for algorithm.
7 #include "runtime.h"
8 #include "arch.h"
10 struct ParForThread
12 // the thread's iteration space [32lsb, 32msb)
13 uint64 pos;
14 // stats
15 uint64 nsteal;
16 uint64 nstealcnt;
17 uint64 nprocyield;
18 uint64 nosyield;
19 uint64 nsleep;
20 byte pad[CacheLineSize];
23 ParFor*
24 runtime_parforalloc(uint32 nthrmax)
26 ParFor *desc;
28 // The ParFor object is followed by CacheLineSize padding
29 // and then nthrmax ParForThread.
30 desc = (ParFor*)runtime_malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread));
31 desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
32 desc->nthrmax = nthrmax;
33 return desc;
36 // For testing from Go
37 // func parforalloc2(nthrmax uint32) *ParFor
39 ParFor *runtime_parforalloc2(uint32)
40 asm("runtime.parforalloc2");
42 ParFor *
43 runtime_parforalloc2(uint32 nthrmax)
45 return runtime_parforalloc(nthrmax);
48 void
49 runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
51 uint32 i, begin, end;
53 if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
54 runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
55 runtime_throw("parfor: invalid args");
58 desc->body = body;
59 desc->done = 0;
60 desc->nthr = nthr;
61 desc->thrseq = 0;
62 desc->cnt = n;
63 desc->ctx = ctx;
64 desc->wait = wait;
65 desc->nsteal = 0;
66 desc->nstealcnt = 0;
67 desc->nprocyield = 0;
68 desc->nosyield = 0;
69 desc->nsleep = 0;
70 for(i=0; i<nthr; i++) {
71 begin = (uint64)n*i / nthr;
72 end = (uint64)n*(i+1) / nthr;
73 desc->thr[i].pos = (uint64)begin | (((uint64)end)<<32);
77 // For testing from Go
78 // func parforsetup2(desc *ParFor, nthr, n uint32, ctx *byte, wait bool, body func(*ParFor, uint32))
80 void runtime_parforsetup2(ParFor *, uint32, uint32, void *, bool, void *)
81 asm("runtime.parforsetup2");
83 void
84 runtime_parforsetup2(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void *body)
86 runtime_parforsetup(desc, nthr, n, ctx, wait, (void(*)(ParFor*, uint32))body);
89 void
90 runtime_parfordo(ParFor *desc)
92 ParForThread *me;
93 uint32 tid, begin, end, begin2, try, victim, i;
94 uint64 *mypos, *victimpos, pos, newpos;
95 void (*body)(ParFor*, uint32);
96 bool idle;
98 // Obtain 0-based thread index.
99 tid = runtime_xadd(&desc->thrseq, 1) - 1;
100 if(tid >= desc->nthr) {
101 runtime_printf("tid=%d nthr=%d\n", tid, desc->nthr);
102 runtime_throw("parfor: invalid tid");
105 // If single-threaded, just execute the for serially.
106 if(desc->nthr==1) {
107 for(i=0; i<desc->cnt; i++)
108 desc->body(desc, i);
109 return;
112 body = desc->body;
113 me = &desc->thr[tid];
114 mypos = &me->pos;
115 for(;;) {
116 for(;;) {
117 // While there is local work,
118 // bump low index and execute the iteration.
119 pos = runtime_xadd64(mypos, 1);
120 begin = (uint32)pos-1;
121 end = (uint32)(pos>>32);
122 if(begin < end) {
123 body(desc, begin);
124 continue;
126 break;
129 // Out of work, need to steal something.
130 idle = false;
131 for(try=0;; try++) {
132 // If we don't see any work for long enough,
133 // increment the done counter...
134 if(try > desc->nthr*4 && !idle) {
135 idle = true;
136 runtime_xadd(&desc->done, 1);
138 // ...if all threads have incremented the counter,
139 // we are done.
140 if(desc->done + !idle == desc->nthr) {
141 if(!idle)
142 runtime_xadd(&desc->done, 1);
143 goto exit;
145 // Choose a random victim for stealing.
146 victim = runtime_fastrand1() % (desc->nthr-1);
147 if(victim >= tid)
148 victim++;
149 victimpos = &desc->thr[victim].pos;
150 pos = runtime_atomicload64(victimpos);
151 for(;;) {
152 // See if it has any work.
153 begin = (uint32)pos;
154 end = (uint32)(pos>>32);
155 if(begin >= end-1) {
156 begin = end = 0;
157 break;
159 if(idle) {
160 runtime_xadd(&desc->done, -1);
161 idle = false;
163 begin2 = begin + (end-begin)/2;
164 newpos = (uint64)begin | (uint64)begin2<<32;
165 if(runtime_cas64(victimpos, &pos, newpos)) {
166 begin = begin2;
167 break;
170 if(begin < end) {
171 // Has successfully stolen some work.
172 if(idle)
173 runtime_throw("parfor: should not be idle");
174 runtime_atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
175 me->nsteal++;
176 me->nstealcnt += end-begin;
177 break;
179 // Backoff.
180 if(try < desc->nthr) {
181 // nothing
182 } else if (try < 4*desc->nthr) {
183 me->nprocyield++;
184 runtime_procyield(20);
185 // If a caller asked not to wait for the others, exit now
186 // (assume that most work is already done at this point).
187 } else if (!desc->wait) {
188 if(!idle)
189 runtime_xadd(&desc->done, 1);
190 goto exit;
191 } else if (try < 6*desc->nthr) {
192 me->nosyield++;
193 runtime_osyield();
194 } else {
195 me->nsleep++;
196 runtime_usleep(1);
200 exit:
201 runtime_xadd64(&desc->nsteal, me->nsteal);
202 runtime_xadd64(&desc->nstealcnt, me->nstealcnt);
203 runtime_xadd64(&desc->nprocyield, me->nprocyield);
204 runtime_xadd64(&desc->nosyield, me->nosyield);
205 runtime_xadd64(&desc->nsleep, me->nsleep);
206 me->nsteal = 0;
207 me->nstealcnt = 0;
208 me->nprocyield = 0;
209 me->nosyield = 0;
210 me->nsleep = 0;
213 // For testing from Go
214 // func parforiters(desc *ParFor, tid uintptr) (uintptr, uintptr)
216 struct parforiters_ret {
217 uintptr start;
218 uintptr end;
221 struct parforiters_ret runtime_parforiters(ParFor *, uintptr)
222 asm("runtime.parforiters");
224 struct parforiters_ret
225 runtime_parforiters(ParFor *desc, uintptr tid)
227 struct parforiters_ret ret;
229 ret.start = (uint32)desc->thr[tid].pos;
230 ret.end = (uint32)(desc->thr[tid].pos>>32);
231 return ret;