i'm an idiot: let threads consume working queue!
[raymarch.git] / mtrender.d
blobcea8a5be69ac8688fb26b58bd8b07e78cb2571c8
1 module mtrender;
3 //version = render_debug;
6 private:
7 import gfxcore : texImage, vlWidth, vlHeight, scale, scanlines;
8 import raytracer;
9 import vecs;
11 import core.time : MonoTime, Duration;
13 import core.atomic;
14 import core.sync.condition;
15 import core.sync.rwmutex;
16 import core.sync.mutex;
17 import core.thread;
19 import std.concurrency;
22 __gshared Mutex mutexCondCanRender;
23 __gshared Condition waitCondCanRender;
24 shared bool renderComplete = true;
25 shared int renderDie = 0; // 1: waiting for death; 2: dead
26 shared uint frameDur;
29 public @property bool mtrenderComplete () {
30 return atomicLoad(renderComplete);
34 public @property uint mtrenderFrameTime () {
35 return atomicLoad(frameDur);
39 public void mtrenderStartSignal () {
40 atomicStore(renderComplete, false);
41 synchronized(mutexCondCanRender) waitCondCanRender.notify();
45 public void mtrenderInit () {
46 mutexCondCanRender = new Mutex();
47 waitCondCanRender = new Condition(mutexCondCanRender);
49 nextLineAdjustment = texImage.adjustmentForNextLine();
50 offR = texImage.redByteOffset();
51 offB = texImage.blueByteOffset();
52 offG = texImage.greenByteOffset();
53 bpp = texImage.bytesPerPixel();
54 imgdata = texImage.getDataPointer();
55 fpxoffset = texImage.offsetForTopLeftPixel();
57 { import core.stdc.stdio; printf("%u tiles per image\n", tilesPerTexture()); }
59 initLights();
61 animate();
62 prepareRenderer();
63 foreach (uint tile; 0..tilesPerTexture) renderToTexture(tile);
65 renderThread = new Thread(&renderThreadFunc);
66 renderThread.start();
70 public void mtrenderShutdown () {
71 synchronized(mutexCondCanRender) waitCondCanRender.notify(); // just in case
72 atomicStore(renderDie, 1); // die
73 while (atomicLoad(renderDie) != 2) {}
77 // ////////////////////////////////////////////////////////////////////////// //
78 enum TileSize = 32; // tile is 32x32 pixels
80 // various image parameters
81 __gshared int nextLineAdjustment;
82 __gshared int offR;
83 __gshared int offB;
84 __gshared int offG;
85 __gshared int bpp;
86 __gshared ubyte* imgdata;
87 __gshared int fpxoffset;
89 __gshared Vec3[4] scp;
90 __gshared LightInfo[] lights;
92 __gshared Vec3 grady1v;
93 __gshared Vec3 grady2v;
95 __gshared Thread renderThread;
98 // ////////////////////////////////////////////////////////////////////////// //
99 void prepareRenderer () @nogc {
100 import std.math : sin, cos;
102 // vertex shader common part
103 auto vpn = (vrp-prp).normalize;
104 auto u = (vuv%vpn).normalize;
105 auto v = vpn%u;
106 auto vcv = prp+vpn;
108 //vertex shader for each vertex
109 immutable RmFloat[2][4] vPos = [
110 [-1, 1], //0--1
111 [ 1, 1], //| |
112 [ 1, -1], //3--2
113 [-1, -1],
116 Vec3 scrCoord = void; // temp
117 immutable RmFloat cxy = cast(RmFloat)vlWidth/cast(RmFloat)vlHeight;
118 foreach (immutable i; 0..vPos.length) {
119 scrCoord.x = vcv.x+vPos.ptr[i].ptr[0]*u.x*cxy+vPos.ptr[i].ptr[1]*v.x;
120 scrCoord.y = vcv.y+vPos.ptr[i].ptr[0]*u.y*cxy+vPos.ptr[i].ptr[1]*v.y;
121 scrCoord.z = vcv.z+vPos.ptr[i].ptr[0]*u.z*cxy+vPos.ptr[i].ptr[1]*v.z;
122 scp.ptr[i] = (scrCoord-prp).normalize;
125 RmFloat y_inc = rmFloat!1.0/vlHeight;
127 grady1v = (scp.ptr[3]-scp.ptr[0])*y_inc;
128 grady2v = (scp.ptr[2]-scp.ptr[1])*y_inc;
132 uint tilesPerTexture () @nogc {
133 return ((vlWidth+TileSize-1)/TileSize)*((vlHeight+TileSize-1)/TileSize);
137 void tileOfs (uint tile, out int x, out int y) @nogc {
138 immutable tpx = (vlWidth+TileSize-1)/TileSize;
139 x = (tile%tpx)*TileSize;
140 y = (tile/tpx)*TileSize;
144 // Adam don't care about @nogc, but image functions actually are
145 void renderToTexture (uint tile) @nogc {
146 RmFloat x = -rmFloat!0.5, y = -rmFloat!0.5, x_inc = rmFloat!1.0/vlWidth, y_inc = rmFloat!1.0/vlHeight;
148 int tofsx, tofsy;
149 tileOfs(tile, tofsx, tofsy);
150 //{ import core.stdc.stdio; printf("tile=%u; x=%u; y=%u\n", tile, tofsx, tofsy); }
151 int ex = tofsx+TileSize;
152 int ey = tofsy+TileSize;
153 if (ex > vlWidth) ex = vlWidth;
154 if (ey > vlHeight) ey = vlHeight;
156 x += x_inc*tofsx;
157 y += y_inc*tofsy;
159 auto accy1v = scp.ptr[0]+grady1v*tofsy;
160 auto accy2v = scp.ptr[1]+grady2v*tofsy;
162 Vec3 colorout = void;
163 Vec3 gradxv = void, gradxvdown = void, accxvdown = void, accxv = void, rdx = void;
165 auto offset = fpxoffset+tofsy*scale*nextLineAdjustment+tofsx*scale*bpp;
166 auto startOfLine = imgdata+offset; // get our pointer lined up on the first pixel
168 RmFloat oldx = x;
169 foreach (int iy; tofsy..ey) {
170 auto vbptr = startOfLine; // we keep the start of line separately so moving to the next line is simple and portable
171 startOfLine += nextLineAdjustment*scale;
172 gradxv = (accy2v-accy1v)*x_inc;
173 accxv = accy1v+gradxv*tofsx;
175 accy1v += grady1v;
176 accy2v += grady2v;
178 accxvdown = accy1v+gradxv*tofsx;
179 gradxvdown = (accy2v-accy1v)*x_inc;
181 foreach (int ix; tofsx..ex) {
182 rdx = accxv+gradxv;
183 raymarch(/*x, y,*/ accxv, lights[], colorout, rdx, accxvdown);
184 ubyte r = void, g = void, b = void;
185 if (colorout.x < 0 || colorout.y < 0 || colorout.z < 0) {
186 r = g = b = 0;
187 } else {
188 r = clampToByte(cast(int)(colorout.x*rmFloat!255.0));
189 g = clampToByte(cast(int)(colorout.y*rmFloat!255.0));
190 b = clampToByte(cast(int)(colorout.z*rmFloat!255.0));
192 static if (scale > 1) {
193 foreach (immutable _; 0..scale) {
194 vbptr[offR] = r;
195 vbptr[offG] = g;
196 vbptr[offB] = b;
197 static if (!scanlines) {
198 auto vbptrv = vbptr;
199 foreach (immutable _1; 1..scale) {
200 vbptrv += nextLineAdjustment;
201 vbptrv[offR] = r;
202 vbptrv[offG] = g;
203 vbptrv[offB] = b;
206 vbptr += bpp;
208 } else {
209 vbptr[offR] = r;
210 vbptr[offG] = g;
211 vbptr[offB] = b;
212 vbptr += bpp;
214 x += x_inc;
215 //accxv += gradxv;
216 accxv = rdx;
217 accxvdown += gradxvdown;
219 y += y_inc;
220 //x = -rmFloat!0.5;
221 x = oldx;
226 // ////////////////////////////////////////////////////////////////////////// //
227 __gshared MonoTime sttime, lasttime;
228 shared bool paused = false;
229 __gshared wasPaused = false;
232 public void mtrenderTogglePause () {
233 auto ps = atomicLoad(paused);
234 atomicStore(paused, !paused);
238 public void mtrenderPaused (bool v) {
239 atomicStore(paused, v);
243 void initLights () {
244 lights.length = 3;
245 // fog
246 lights[0].active = true;
247 lights[0].color = Vec3(rmFloat!0.8, rmFloat!0.9, rmFloat!1.0);
248 // light
249 lights[1].active = true;
250 lights[1].color = Vec3(rmFloat!1.00, rmFloat!0.85, rmFloat!0.55);
251 // light
252 lights[2].active = true;
253 lights[2].color = Vec3(rmFloat!0.00, rmFloat!1.0, rmFloat!0.0);
257 void animate () @nogc {
258 __gshared bool firstTime = true;
259 if (firstTime) {
260 sttime = lasttime = MonoTime.currTime;
261 firstTime = false;
264 import std.math : abs, sin, cos;
266 if (atomicLoad(paused)) {
267 if (!wasPaused) {
268 wasPaused = true;
270 lasttime = MonoTime.currTime;
271 } else {
272 wasPaused = false;
273 //worldtime = cast(RmFloat)(MonoTime.currTime-sttime).total!"msecs"/rmFloat!1000.0;
274 auto time = MonoTime.currTime;
275 worldtime += cast(RmFloat)(time-lasttime).total!"msecs"/rmFloat!1000.0;
276 lasttime = time;
279 Vec3 auto_vuv = void, auto_vrp = void, auto_prp = void;
281 // view up vector
282 auto_vuv.x = 0; //sin(worldtime/*timemsecs*/);
283 auto_vuv.y = 1;
284 auto_vuv.z = 0;
286 // view reference point
287 auto_vrp.x = 0; //sin(time*rmFloat!0.7)*rmFloat!10.0;
288 auto_vrp.y = 0;
289 auto_vrp.z = 0; //cos(time*rmFloat!0.9)*rmFloat!10.0;
291 // camera position
292 auto_prp.x = rmFloat!3.0; //sin(time*rmFloat!0.7)*rmFloat!20.0+auto_vrp.x+rmFloat!20.0;
293 auto_prp.y = rmFloat!3.0; //sin(time)*rmFloat!4.0+rmFloat!4.0+auto_vrp.y+rmFloat!3.0;
294 auto_prp.z = rmFloat!3.0; //cos(time*rmFloat!0.6)*rmFloat!20.0+auto_vrp.z+rmFloat!14.0;
296 vuv = auto_vuv;
297 vrp = auto_vrp;
298 prp = auto_prp;
300 // first light
301 //L = Vec3(sin(rmFloat!0.0)*rmFloat!20.0, rmFloat!10.0+sin(rmFloat!0.0)*rmFloat!20.0, cos(rmFloat!0.0)*rmFloat!20.0);
302 lights[1].active = true;
303 lights[1].origin = Vec3(sin(worldtime)*rmFloat!20.0, rmFloat!20.0/*+sin(worldtime)*rmFloat!20.0*/, cos(worldtime)*rmFloat!20.0);
304 //lights[1].origin = Vec3(rmFloat!00.0, rmFloat!(-10.0), rmFloat!00.0);
305 lights[1].color.x = abs(sin(worldtime));
306 lights[1].color.y = abs(cos(worldtime/3));
307 lights[1].color.z = rmFloat!0;
308 // second light
309 lights[2].active = false;
310 lights[2].origin = Vec3(rmFloat!00.0, rmFloat!(100.0), rmFloat!00.0);
314 // ////////////////////////////////////////////////////////////////////////// //
315 __gshared int maxTiles;
316 shared int curTile;
319 void tileRenderFunc (Tid ownerTid, uint wkid) {
320 bool exit = false;
321 while (!exit) {
322 version(render_debug) { import core.stdc.stdio; printf(" worker %u idle...\n", wkid); }
323 receive(
324 (int action) {
325 if (action == 0) exit = true;
328 if (exit) break;
329 // render tiles in loop
330 for (;;) {
331 auto tnum = atomicOp!"+="(curTile, 1)-1;
332 if (tnum >= maxTiles) break; // no more tiles
333 renderToTexture(tnum);
335 ownerTid.send(wkid, 2); // we are done
337 ownerTid.send(wkid, 0);
341 // ////////////////////////////////////////////////////////////////////////// //
342 enum ThreadCount = 4;
345 void renderThreadFunc () {
346 Tid[ThreadCount] wkTids;
347 // spawn worker threads
348 foreach (uint idx; 0..ThreadCount) wkTids[idx] = spawn(&tileRenderFunc, thisTid, idx);
349 // main loop
350 maxTiles = tilesPerTexture; // this is constant
351 for (;;) {
352 if (atomicLoad(renderDie) == 1) break;
353 synchronized(mutexCondCanRender) waitCondCanRender.wait();
354 if (atomicLoad(renderDie) == 1) break;
355 auto cft = MonoTime.currTime;
356 animate();
357 prepareRenderer();
358 // start from tile 0 ;-)
359 atomicStore(curTile, 0);
360 // wake up rendering threads with "do your job" signal
361 foreach (immutable idx; 0..ThreadCount) wkTids[idx].send(1);
362 // wait for all rendering threads to complete
363 foreach (immutable _; 0..ThreadCount) receive((uint wkid, int act) {});
364 atomicStore(frameDur, cast(uint)(MonoTime.currTime-cft).total!"msecs");
365 atomicStore(renderComplete, true);
367 // wake up rendering threads with "die" signal
368 foreach (immutable idx; 0..ThreadCount) wkTids[idx].send(0);
369 // wait for all rendering threads to complete
370 foreach (immutable _; 0..ThreadCount) receive((uint wkid, int act) {});
371 atomicStore(renderDie, 2);