mtrender.d

   1 module mtrender;
   2
   3 //version = render_debug;
   4
   5
   6 private:
   7 import gfxcore : texImage, vlWidth, vlHeight, scale, scanlines;
   8 import raytracer;
   9 import vecs;
  10
  11 import core.time : MonoTime, Duration;
  12
  13 import core.atomic;
  14 import core.sync.condition;
  15 import core.sync.rwmutex;
  16 import core.sync.mutex;
  17 import core.thread;
  18
  19 import std.concurrency;
  20
  21
  22 __gshared Mutex mutexCondCanRender;
  23 __gshared Condition waitCondCanRender;
  24 shared bool renderComplete = true;
  25 shared int renderDie = 0; // 1: waiting for death; 2: dead
  26 shared uint frameDur;
  27
  28
  29 public @property bool mtrenderComplete () {
  30   return atomicLoad(renderComplete);
  31 }
  32
  33
  34 public @property uint mtrenderFrameTime () {
  35   return atomicLoad(frameDur);
  36 }
  37
  38
  39 public void mtrenderStartSignal () {
  40   atomicStore(renderComplete, false);
  41   synchronized(mutexCondCanRender) waitCondCanRender.notify();
  42 }
  43
  44
  45 public void mtrenderInit () {
  46   mutexCondCanRender = new Mutex();
  47   waitCondCanRender = new Condition(mutexCondCanRender);
  48
  49   nextLineAdjustment = texImage.adjustmentForNextLine();
  50   offR = texImage.redByteOffset();
  51   offB = texImage.blueByteOffset();
  52   offG = texImage.greenByteOffset();
  53   bpp = texImage.bytesPerPixel();
  54   imgdata = texImage.getDataPointer();
  55   fpxoffset = texImage.offsetForTopLeftPixel();
  56
  57   { import core.stdc.stdio; printf("%u tiles per image\n", tilesPerTexture()); }
  58
  59   animate();
  60   prepareRenderer();
  61   foreach (uint tile; 0..tilesPerTexture) renderToTexture(tile);
  62
  63   renderThread = new Thread(&renderThreadFunc);
  64   renderThread.start();
  65 }
  66
  67
  68 public void mtrenderShutdown () {
  69   synchronized(mutexCondCanRender) waitCondCanRender.notify(); // just in case
  70   atomicStore(renderDie, 1); // die
  71   while (atomicLoad(renderDie) != 2) {}
  72 }
  73
  74
  75 // ////////////////////////////////////////////////////////////////////////// //
  76 enum TileSize = 32; // tile is 32x32 pixels
  77
  78 // various image parameters
  79 __gshared int nextLineAdjustment;
  80 __gshared int offR;
  81 __gshared int offB;
  82 __gshared int offG;
  83 __gshared int bpp;
  84 __gshared ubyte* imgdata;
  85 __gshared int fpxoffset;
  86
  87 __gshared Vec3[4] scp;
  88 __gshared Vec3 L;
  89
  90 __gshared Vec3 grady1v;
  91 __gshared Vec3 grady2v;
  92
  93 __gshared Thread renderThread;
  94
  95
  96 // ////////////////////////////////////////////////////////////////////////// //
  97 void prepareRenderer () @nogc {
  98   import std.math : sin, cos;
  99
 100   // vertex shader common part
 101   auto vpn = (vrp-prp).normalize;
 102   auto u = (vuv%vpn).normalize;
 103   auto v = vpn%u;
 104   auto vcv = prp+vpn;
 105
 106   //vertex shader for each vertex
 107   immutable float[2][4] vPos = [
 108     [-1,  1], //0--1
 109     [ 1,  1], //|  |
 110     [ 1, -1], //3--2
 111     [-1, -1],
 112   ];
 113
 114   Vec3 scrCoord = void; // temp
 115   immutable float cxy = cast(float)vlWidth/cast(float)vlHeight;
 116   foreach (immutable i; 0..vPos.length) {
 117     scrCoord.x = vcv.x+vPos.ptr[i].ptr[0]*u.x*cxy+vPos.ptr[i].ptr[1]*v.x;
 118     scrCoord.y = vcv.y+vPos.ptr[i].ptr[0]*u.y*cxy+vPos.ptr[i].ptr[1]*v.y;
 119     scrCoord.z = vcv.z+vPos.ptr[i].ptr[0]*u.z*cxy+vPos.ptr[i].ptr[1]*v.z;
 120     scp.ptr[i] = (scrCoord-prp).normalize;
 121   }
 122
 123   float y_inc = 1.0f/vlHeight;
 124
 125   grady1v = (scp.ptr[3]-scp.ptr[0])*y_inc;
 126   grady2v = (scp.ptr[2]-scp.ptr[1])*y_inc;
 127 }
 128
 129
 130 uint tilesPerTexture () @nogc {
 131   return ((vlWidth+TileSize-1)/TileSize)*((vlHeight+TileSize-1)/TileSize);
 132 }
 133
 134
 135 void tileOfs (uint tile, out int x, out int y) @nogc {
 136   immutable tpx = (vlWidth+TileSize-1)/TileSize;
 137   x = (tile%tpx)*TileSize;
 138   y = (tile/tpx)*TileSize;
 139 }
 140
 141
 142 // Adam don't care about @nogc, but image functions actually are
 143 void renderToTexture (uint tile) @nogc {
 144   float x = -0.5f, y = -0.5f, x_inc = 1.0f/vlWidth, y_inc = 1.0f/vlHeight;
 145
 146   int tofsx, tofsy;
 147   tileOfs(tile, tofsx, tofsy);
 148   //{ import core.stdc.stdio; printf("tile=%u; x=%u; y=%u\n", tile, tofsx, tofsy); }
 149   int ex = tofsx+TileSize;
 150   int ey = tofsy+TileSize;
 151   if (ex > vlWidth) ex = vlWidth;
 152   if (ey > vlHeight) ey = vlHeight;
 153
 154   x += x_inc*tofsx;
 155   y += y_inc*tofsy;
 156
 157   auto accy1v = scp.ptr[0]+grady1v*tofsy;
 158   auto accy2v = scp.ptr[1]+grady2v*tofsy;
 159
 160   Vec3 colorout = void;
 161   Vec3 gradxv = void, gradxvdown = void, accxvdown = void, accxv = void, rdx = void;
 162
 163   auto offset = fpxoffset+tofsy*scale*nextLineAdjustment+tofsx*scale*bpp;
 164   auto startOfLine = imgdata+offset; // get our pointer lined up on the first pixel
 165
 166   float oldx = x;
 167   foreach (int iy; tofsy..ey) {
 168     auto vbptr = startOfLine; // we keep the start of line separately so moving to the next line is simple and portable
 169     startOfLine += nextLineAdjustment*scale;
 170     gradxv = (accy2v-accy1v)*x_inc;
 171     accxv = accy1v+gradxv*tofsx;
 172
 173     accy1v += grady1v;
 174     accy2v += grady2v;
 175
 176     accxvdown = accy1v+gradxv*tofsx;
 177     gradxvdown = (accy2v-accy1v)*x_inc;
 178
 179     foreach (int ix; tofsx..ex) {
 180       rdx = accxv+gradxv;
 181       raymarch(/*x, y,*/ accxv, L, colorout, rdx, accxvdown);
 182       ubyte r = void, g = void, b = void;
 183       if (colorout.x < 0 || colorout.y < 0 || colorout.z < 0) {
 184         r = g = b = 0;
 185       } else {
 186         r = clampToByte(cast(int)(colorout.x*255.0f));
 187         g = clampToByte(cast(int)(colorout.y*255.0f));
 188         b = clampToByte(cast(int)(colorout.z*255.0f));
 189       }
 190       static if (scale > 1) {
 191         foreach (immutable _; 0..scale) {
 192           vbptr[offR] = r;
 193           vbptr[offG] = g;
 194           vbptr[offB] = b;
 195           static if (!scanlines) {
 196             auto vbptrv = vbptr;
 197             foreach (immutable _1; 1..scale) {
 198               vbptrv += nextLineAdjustment;
 199               vbptrv[offR] = r;
 200               vbptrv[offG] = g;
 201               vbptrv[offB] = b;
 202             }
 203           }
 204           vbptr += bpp;
 205         }
 206       } else {
 207         vbptr[offR] = r;
 208         vbptr[offG] = g;
 209         vbptr[offB] = b;
 210         vbptr += bpp;
 211       }
 212       x += x_inc;
 213       //accxv += gradxv;
 214       accxv = rdx;
 215       accxvdown += gradxvdown;
 216     }
 217     y += y_inc;
 218     //x = -0.5f;
 219     x = oldx;
 220   }
 221 }
 222
 223
 224 // ////////////////////////////////////////////////////////////////////////// //
 225 __gshared MonoTime sttime, lasttime;
 226 shared bool paused = false;
 227 __gshared wasPaused = false;
 228
 229
 230 public void mtrenderTogglePause () {
 231   auto ps = atomicLoad(paused);
 232   atomicStore(paused, !paused);
 233 }
 234
 235
 236 public void mtrenderPaused (bool v) {
 237   atomicStore(paused, v);
 238 }
 239
 240
 241 void animate () @nogc {
 242   __gshared bool firstTime = true;
 243   if (firstTime) {
 244     sttime = lasttime = MonoTime.currTime;
 245     firstTime = false;
 246   }
 247
 248   import std.math : sin, cos;
 249
 250   if (atomicLoad(paused)) {
 251     if (!wasPaused) {
 252       wasPaused = true;
 253     }
 254     lasttime = MonoTime.currTime;
 255   } else {
 256     wasPaused = false;
 257     //worldtime = cast(float)(MonoTime.currTime-sttime).total!"msecs"/1000.0f;
 258     auto time = MonoTime.currTime;
 259     worldtime += cast(float)(time-lasttime).total!"msecs"/1000.0f;
 260     lasttime = time;
 261   }
 262
 263   Vec3 auto_vuv = void, auto_vrp = void, auto_prp = void;
 264
 265   // view up vector
 266   auto_vuv.x = 0; //sin(worldtime/*timemsecs*/);
 267   auto_vuv.y = 1;
 268   auto_vuv.z = 0;
 269
 270   // view reference point
 271   auto_vrp.x = 0; //sin(time*0.7f)*10.0f;
 272   auto_vrp.y = 0;
 273   auto_vrp.z = 0; //cos(time*0.9f)*10.0f;
 274
 275   // camera position
 276   auto_prp.x = 3.0f; //sin(time*0.7f)*20.0f+auto_vrp.x+20.0f;
 277   auto_prp.y = 3.0f; //sin(time)*4.0f+4.0f+auto_vrp.y+3.0f;
 278   auto_prp.z = 3.0f; //cos(time*0.6f)*20.0f+auto_vrp.z+14.0f;
 279
 280   vuv = auto_vuv;
 281   vrp = auto_vrp;
 282   prp = auto_prp;
 283
 284   L = Vec3(sin(worldtime)*20.0f, 20.0f/*+sin(worldtime)*20.0f*/, cos(worldtime)*20.0f);
 285   //L = Vec3(sin(0.0f)*20.0f, 10.0f+sin(0.0f)*20.0f, cos(0.0f)*20.0f);
 286 }
 287
 288
 289 // ////////////////////////////////////////////////////////////////////////// //
 290 void tileRenderFunc (Tid ownerTid, uint wkid) {
 291   bool exit = false;
 292   uint tnum;
 293   while (!exit) {
 294     version(render_debug) { import core.stdc.stdio; printf("  worker %u idle...\n", wkid); }
 295     receive(
 296       (uint tile) {
 297         tnum = tile;
 298         if (tile == uint.max) exit = true;
 299       },
 300     );
 301     if (exit) break;
 302     version(render_debug) { import core.stdc.stdio; printf("  worker %u got tile %u\n", wkid, tnum); }
 303     renderToTexture(tnum);
 304     version(render_debug) { import core.stdc.stdio; printf("  worker %u completed tile %u\n", wkid, tnum); }
 305     ownerTid.send(wkid, 1);
 306   }
 307   ownerTid.send(wkid, 666);
 308 }
 309
 310
 311 // ////////////////////////////////////////////////////////////////////////// //
 312 enum ThreadCount = 4;
 313 __gshared Tid[ThreadCount] wkTids;
 314 __gshared bool[ThreadCount] wkFree;
 315 __gshared uint wkFreeCount = 0;
 316
 317
 318 void spawnWorkers () {
 319   foreach (uint idx; 0..ThreadCount) {
 320     wkTids[idx] = spawn(&tileRenderFunc, thisTid, idx);
 321     wkFree[idx] = true;
 322     ++wkFreeCount;
 323   }
 324 }
 325
 326
 327 void renderThreadFunc () {
 328   static void waitForFreeWorker () {
 329     receive(
 330       (uint wkid, int id) {
 331         wkFree[wkid] = true;
 332         ++wkFreeCount;
 333         version(render_debug) { import core.stdc.stdio; printf("  worker %u added to free pool (%u free workers now)\n", wkid, wkFreeCount); }
 334       },
 335       (Variant v) {
 336         { import core.stdc.stdio; printf("  FUUUUUUUUUUUUUUU\n"); }
 337       }
 338     );
 339   }
 340
 341   spawnWorkers();
 342   for (;;) {
 343     if (atomicLoad(renderDie) == 1) break;
 344     synchronized(mutexCondCanRender) waitCondCanRender.wait();
 345     version(render_debug) { import core.stdc.stdio; printf("render job started (%u free workers, %u tiles)...\n", wkFreeCount, tilesPerTexture); }
 346     auto cft = MonoTime.currTime;
 347     animate();
 348     prepareRenderer();
 349     // pass tiles to threads
 350     uint tile = 0, tileDone = 0, tcount = tilesPerTexture;
 351     while (tileDone < tcount) {
 352       // collect free threads
 353       if (tile >= tcount || wkFreeCount == 0) {
 354         // nothing to do, just wait
 355         version(render_debug) { import core.stdc.stdio; printf("  %u of %u tiles done; %u workers still busy\n", tileDone, tcount, ThreadCount-wkFreeCount); }
 356         waitForFreeWorker();
 357         ++tileDone;
 358       }
 359       if (tile < tcount) {
 360         // dispatch next tile
 361         foreach (uint idx; 0..ThreadCount) {
 362           if (wkFree[idx]) {
 363             version(render_debug) { import core.stdc.stdio; printf("  dispatching tile %u to worker %u\n", tile, idx); }
 364             wkFree[idx] = false;
 365             --wkFreeCount;
 366             wkTids[idx].send(tile);
 367             ++tile;
 368             break;
 369           }
 370         }
 371       }
 372     }
 373     version(render_debug) { import core.stdc.stdio; printf("render job complete\n"); }
 374     atomicStore(frameDur, cast(uint)(MonoTime.currTime-cft).total!"msecs");
 375     atomicStore(renderComplete, true);
 376   }
 377   // kill all threads
 378   foreach (uint idx; 0..ThreadCount) wkTids[idx].send(uint.max);
 379   foreach (uint idx; 0..ThreadCount) {
 380     receive(
 381       (uint wkid, int id) {}
 382     );
 383   }
 384   atomicStore(renderDie, 2);
 385 }