2016-07-13 Thomas Preud'homme <thomas.preudhomme@arm.com>
[official-gcc.git] / libgo / runtime / parfor.c
blobede921b9aaa4046b19b3ce14729257cf8c285076
1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Parallel for algorithm.
7 #include "runtime.h"
8 #include "arch.h"
10 struct ParForThread
12 // the thread's iteration space [32lsb, 32msb)
13 uint64 pos;
14 // stats
15 uint64 nsteal;
16 uint64 nstealcnt;
17 uint64 nprocyield;
18 uint64 nosyield;
19 uint64 nsleep;
20 byte pad[CacheLineSize];
23 ParFor*
24 runtime_parforalloc(uint32 nthrmax)
26 ParFor *desc;
28 // The ParFor object is followed by CacheLineSize padding
29 // and then nthrmax ParForThread.
30 desc = (ParFor*)runtime_malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread));
31 desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
32 desc->nthrmax = nthrmax;
33 return desc;
36 void
37 runtime_parforsetup(ParFor *desc, uint32 nthr, uint32 n, bool wait, const FuncVal *body)
39 uint32 i, begin, end;
40 uint64 *pos;
42 if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
43 runtime_printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
44 runtime_throw("parfor: invalid args");
47 desc->body = body;
48 desc->done = 0;
49 desc->nthr = nthr;
50 desc->thrseq = 0;
51 desc->cnt = n;
52 desc->wait = wait;
53 desc->nsteal = 0;
54 desc->nstealcnt = 0;
55 desc->nprocyield = 0;
56 desc->nosyield = 0;
57 desc->nsleep = 0;
58 for(i=0; i<nthr; i++) {
59 begin = (uint64)n*i / nthr;
60 end = (uint64)n*(i+1) / nthr;
61 pos = &desc->thr[i].pos;
62 if(((uintptr)pos & 7) != 0)
63 runtime_throw("parforsetup: pos is not aligned");
64 *pos = (uint64)begin | (((uint64)end)<<32);
68 void
69 runtime_parfordo(ParFor *desc)
71 ParForThread *me;
72 uint32 tid, begin, end, begin2, try, victim, i;
73 uint64 *mypos, *victimpos, pos, newpos;
74 const FuncVal *body;
75 void (*bodyfn)(ParFor*, uint32);
76 bool idle;
78 // Obtain 0-based thread index.
79 tid = runtime_xadd(&desc->thrseq, 1) - 1;
80 if(tid >= desc->nthr) {
81 runtime_printf("tid=%d nthr=%d\n", tid, desc->nthr);
82 runtime_throw("parfor: invalid tid");
85 body = desc->body;
86 bodyfn = (void (*)(ParFor*, uint32))(void*)body->fn;
88 // If single-threaded, just execute the for serially.
89 if(desc->nthr==1) {
90 for(i=0; i<desc->cnt; i++)
91 __builtin_call_with_static_chain (bodyfn(desc, i), body);
92 return;
95 me = &desc->thr[tid];
96 mypos = &me->pos;
97 for(;;) {
98 for(;;) {
99 // While there is local work,
100 // bump low index and execute the iteration.
101 pos = runtime_xadd64(mypos, 1);
102 begin = (uint32)pos-1;
103 end = (uint32)(pos>>32);
104 if(begin < end) {
105 __builtin_call_with_static_chain(bodyfn(desc, begin), body);
106 continue;
108 break;
111 // Out of work, need to steal something.
112 idle = false;
113 for(try=0;; try++) {
114 // If we don't see any work for long enough,
115 // increment the done counter...
116 if(try > desc->nthr*4 && !idle) {
117 idle = true;
118 runtime_xadd(&desc->done, 1);
120 // ...if all threads have incremented the counter,
121 // we are done.
122 if(desc->done + !idle == desc->nthr) {
123 if(!idle)
124 runtime_xadd(&desc->done, 1);
125 goto exit;
127 // Choose a random victim for stealing.
128 victim = runtime_fastrand1() % (desc->nthr-1);
129 if(victim >= tid)
130 victim++;
131 victimpos = &desc->thr[victim].pos;
132 for(;;) {
133 // See if it has any work.
134 pos = runtime_atomicload64(victimpos);
135 begin = (uint32)pos;
136 end = (uint32)(pos>>32);
137 if(begin+1 >= end) {
138 begin = end = 0;
139 break;
141 if(idle) {
142 runtime_xadd(&desc->done, -1);
143 idle = false;
145 begin2 = begin + (end-begin)/2;
146 newpos = (uint64)begin | (uint64)begin2<<32;
147 if(runtime_cas64(victimpos, pos, newpos)) {
148 begin = begin2;
149 break;
152 if(begin < end) {
153 // Has successfully stolen some work.
154 if(idle)
155 runtime_throw("parfor: should not be idle");
156 runtime_atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
157 me->nsteal++;
158 me->nstealcnt += end-begin;
159 break;
161 // Backoff.
162 if(try < desc->nthr) {
163 // nothing
164 } else if (try < 4*desc->nthr) {
165 me->nprocyield++;
166 runtime_procyield(20);
167 // If a caller asked not to wait for the others, exit now
168 // (assume that most work is already done at this point).
169 } else if (!desc->wait) {
170 if(!idle)
171 runtime_xadd(&desc->done, 1);
172 goto exit;
173 } else if (try < 6*desc->nthr) {
174 me->nosyield++;
175 runtime_osyield();
176 } else {
177 me->nsleep++;
178 runtime_usleep(1);
182 exit:
183 runtime_xadd64(&desc->nsteal, me->nsteal);
184 runtime_xadd64(&desc->nstealcnt, me->nstealcnt);
185 runtime_xadd64(&desc->nprocyield, me->nprocyield);
186 runtime_xadd64(&desc->nosyield, me->nosyield);
187 runtime_xadd64(&desc->nsleep, me->nsleep);
188 me->nsteal = 0;
189 me->nstealcnt = 0;
190 me->nprocyield = 0;
191 me->nosyield = 0;
192 me->nsleep = 0;
195 // For testing from Go.
196 void
197 runtime_parforiters(ParFor *desc, uintptr tid, uintptr *start, uintptr *end)
199 *start = (uint32)desc->thr[tid].pos;
200 *end = (uint32)(desc->thr[tid].pos>>32);