_obsolete_dont_use/jpeg.d

   1 // Baseline JPEG decoder
   2 // adapted from https://github.com/lgvz/imageformats
   3 // Boost License, i suppose
   4 module iv.jpeg;
   5
   6 private:
   7 import arsd.color;
   8 import iv.vfs;
   9
  10
  11 // ////////////////////////////////////////////////////////////////////////// //
  12 public class ImageIOException : Exception {
  13   this (string msg, string file=__FILE__, size_t line=__LINE__, Throwable next=null) const pure nothrow @safe @nogc {
  14     super(msg, file, line, next);
  15   }
  16 }
  17
  18
  19 // ////////////////////////////////////////////////////////////////////////// //
  20 // public declarations
  21 public bool detectJpeg (VFile stream) {
  22   try {
  23     int w, h, c;
  24     readJpegInfo(stream, w, h, c);
  25     return true;
  26   } catch (Exception) {
  27     return false;
  28   } finally {
  29     stream.seek(0, Seek.Set);
  30   }
  31 }
  32
  33
  34 public void readJpegInfo (VFile stream, out int w, out int h, out int chans) {
  35   import std.bitmanip : bigEndianToNative;
  36
  37   ubyte[2] marker = void;
  38   stream.rawReadExact(marker[]);
  39
  40   // SOI
  41   if (marker[0..2] != [0xff, 0xd8]) throw new ImageIOException("not JPEG");
  42
  43   for (;;) {
  44     stream.rawReadExact(marker[]);
  45     if (marker[0] != 0xff) throw new ImageIOException("no frame header");
  46     while (marker[1] == 0xff) stream.rawReadExact(marker[1..$]);
  47     enum SKIP = 0xff;
  48     switch (marker[1]) with (Marker) {
  49       case SOF0: .. case SOF3: goto case;
  50       case SOF9: .. case SOF11:
  51         ubyte[8] tmp;
  52         stream.rawReadExact(tmp[0..8]);
  53         //int len = bigEndianToNative!ushort(tmp[0..2]);
  54         w = bigEndianToNative!ushort(tmp[5..7]);
  55         h = bigEndianToNative!ushort(tmp[3..5]);
  56         chans = tmp[7];
  57         return;
  58       case SOS, EOI: throw new ImageIOException("no frame header");
  59       case DRI, DHT, DQT, COM: goto case SKIP;
  60       case APP0: .. case APPf: goto case SKIP;
  61       case SKIP:
  62         ubyte[2] lenbuf = void;
  63         stream.rawReadExact(lenbuf[]);
  64         int skiplen = bigEndianToNative!ushort(lenbuf)-2;
  65         stream.seek(skiplen, Seek.Cur);
  66         break;
  67       default: throw new ImageIOException("unsupported marker");
  68     }
  69   }
  70   assert(0);
  71 }
  72
  73
  74 public TrueColorImage readJpeg (VFile stream) {
  75   enum req_chans = 4;
  76
  77   // SOI
  78   ubyte[2] tmp = void;
  79   stream.rawReadExact(tmp[]);
  80   if (tmp[0..2] != [0xff, 0xd8]) throw new ImageIOException("not JPEG");
  81
  82   JPEG_Decoder dc;// = { stream: stream }; // DMD bug: no postblit is called
  83   dc.stream = stream;
  84
  85   read_markers(dc); // reads until first scan header or eoi
  86   if (dc.eoi_reached) throw new ImageIOException("no image data");
  87
  88   dc.tgt_chans = (req_chans == 0 ? dc.num_comps : cast(int)req_chans);
  89
  90   auto pixels = decode_jpeg(dc);
  91   //assert(pixels.length == dc.width*dc.height*4);
  92   return new TrueColorImage(dc.width, dc.height, pixels);
  93 }
  94
  95
  96 public TrueColorImage readJpeg (const(char)[] fname) { return readJpeg(VFile(fname)); }
  97
  98
  99 // ////////////////////////////////////////////////////////////////////////// //
 100 private:
 101 struct JPEG_Decoder {
 102   @disable this (this); // just in case
 103
 104   VFile stream;
 105
 106   bool has_frame_header = false;
 107   bool eoi_reached = false;
 108
 109   ubyte[64][4] qtables;
 110   HuffTab[2] ac_tables;
 111   HuffTab[2] dc_tables;
 112
 113   ubyte cb;      // current byte (next bit always at MSB)
 114   int bits_left; // num of unused bits in cb
 115
 116   bool correct_comp_ids;
 117   Component[3] comps;
 118   ubyte num_comps;
 119   int tgt_chans;
 120
 121   int width, height;
 122
 123   int hmax, vmax;
 124
 125   ushort restart_interval; // number of MCUs in restart interval
 126
 127   // image component
 128   static struct Component {
 129     ubyte sfx, sfy; // sampling factors, aka. h and v
 130     size_t x, y;    // total num of samples, without fill samples
 131     ubyte qtable;
 132     ubyte ac_table;
 133     ubyte dc_table;
 134     int pred;       // dc prediction
 135     ubyte[] data;   // reconstructed samples
 136   }
 137
 138   int num_mcu_x;
 139   int num_mcu_y;
 140 }
 141
 142
 143 struct HuffTab {
 144   ubyte[256] values;
 145   ubyte[257] sizes;
 146   short[16] mincode, maxcode;
 147   short[16] valptr;
 148 }
 149
 150
 151 enum Marker : ubyte {
 152   SOI = 0xd8,    // start of image
 153   SOF0 = 0xc0,   // start of frame / baseline DCT
 154   //SOF1 = 0xc1,   // start of frame / extended seq.
 155   //SOF2 = 0xc2,   // start of frame / progressive DCT
 156   SOF3 = 0xc3,   // start of frame / lossless
 157   SOF9 = 0xc9,   // start of frame / extended seq., arithmetic
 158   SOF11 = 0xcb,   // start of frame / lossless, arithmetic
 159   DHT = 0xc4,    // define huffman tables
 160   DQT = 0xdb,    // define quantization tables
 161   DRI = 0xdd,    // define restart interval
 162   SOS = 0xda,    // start of scan
 163   DNL = 0xdc,    // define number of lines
 164   RST0 = 0xd0,   // restart entropy coded data
 165   // ...
 166   RST7 = 0xd7,   // restart entropy coded data
 167   APP0 = 0xe0,   // application 0 segment
 168   // ...
 169   APPf = 0xef,   // application f segment
 170   //DAC = 0xcc,    // define arithmetic conditioning table
 171   COM = 0xfe,    // comment
 172   EOI = 0xd9,    // end of image
 173 }
 174
 175
 176 void read_markers (ref JPEG_Decoder dc) {
 177   import std.bitmanip : bigEndianToNative;
 178   bool has_next_scan_header = false;
 179   while (!has_next_scan_header && !dc.eoi_reached) {
 180     ubyte[2] marker = void;
 181     dc.stream.rawReadExact(marker[]);
 182     if (marker[0] != 0xff) throw new ImageIOException("no marker");
 183     while (marker[1] == 0xff) dc.stream.rawReadExact(marker[1..$]);
 184     debug(DebugJPEG) writefln("marker: %s (%1$x)\t", cast(Marker)marker[1]);
 185     switch (marker[1]) with (Marker) {
 186       case DHT: dc.read_huffman_tables(); break;
 187       case DQT: dc.read_quantization_tables(); break;
 188       case SOF0:
 189         if (dc.has_frame_header) throw new ImageIOException("extra frame header");
 190         debug(DebugJPEG) writeln();
 191         dc.read_frame_header();
 192         dc.has_frame_header = true;
 193         break;
 194       case SOS:
 195         if (!dc.has_frame_header) throw new ImageIOException("no frame header");
 196         dc.read_scan_header();
 197         has_next_scan_header = true;
 198         break;
 199       case DRI: dc.read_restart_interval(); break;
 200       case EOI: dc.eoi_reached = true; break;
 201       case APP0: .. case APPf: goto case;
 202       case COM:
 203         debug(DebugJPEG) writefln("-> skipping segment");
 204         ubyte[2] lenbuf = void;
 205         dc.stream.rawReadExact(lenbuf[]);
 206         int len = bigEndianToNative!ushort(lenbuf)-2;
 207         dc.stream.seek(len, Seek.Cur);
 208         break;
 209       default: throw new ImageIOException("invalid / unsupported marker");
 210     }
 211   }
 212 }
 213
 214
 215 // DHT -- define huffman tables
 216 void read_huffman_tables (ref JPEG_Decoder dc) {
 217   import std.bitmanip : bigEndianToNative;
 218   ubyte[19] tmp = void;
 219   dc.stream.rawReadExact(tmp[0..2]);
 220   int len = bigEndianToNative!ushort(tmp[0..2]);
 221   len -= 2;
 222   while (0 < len) {
 223     dc.stream.rawReadExact(tmp[0..17]);   // info byte & the BITS
 224     ubyte table_slot = tmp[0]&0xf; // must be 0 or 1 for baseline
 225     ubyte table_class = tmp[0]>>4;  // 0 = dc table, 1 = ac table
 226     if (1 < table_slot || 1 < table_class) throw new ImageIOException("invalid / not supported");
 227     // compute total number of huffman codes
 228     int mt = 0;
 229     foreach (immutable i; 1..17) mt += tmp[i];
 230     if (256 < mt) throw new ImageIOException("invalid / not supported"); // TODO where in the spec?
 231     if (table_class == 0) {
 232       dc.stream.rawReadExact(dc.dc_tables[table_slot].values[0..mt]);
 233       derive_table(dc.dc_tables[table_slot], tmp[1..17]);
 234     } else {
 235       dc.stream.rawReadExact(dc.ac_tables[table_slot].values[0..mt]);
 236       derive_table(dc.ac_tables[table_slot], tmp[1..17]);
 237     }
 238     len -= 17+mt;
 239   }
 240 }
 241
 242
 243 // num_values is the BITS
 244 void derive_table (ref HuffTab table, in ref ubyte[16] num_values) {
 245   short[256] codes;
 246   int k = 0;
 247   foreach (immutable i; 0..16) {
 248     foreach (immutable j; 0..num_values[i]) {
 249       table.sizes[k] = cast(ubyte)(i+1);
 250       ++k;
 251     }
 252   }
 253   table.sizes[k] = 0;
 254   k = 0;
 255   short code = 0;
 256   ubyte si = table.sizes[k];
 257   for (;;) {
 258     do {
 259       codes[k] = code;
 260       ++code;
 261       ++k;
 262     } while (si == table.sizes[k]);
 263     if (table.sizes[k] == 0) break;
 264     debug(DebugJPEG) assert(si < table.sizes[k]);
 265     do {
 266       code <<= 1;
 267       ++si;
 268     } while (si != table.sizes[k]);
 269   }
 270   derive_mincode_maxcode_valptr(table.mincode, table.maxcode, table.valptr, codes, num_values);
 271 }
 272
 273
 274 // F.15
 275 void derive_mincode_maxcode_valptr (ref short[16] mincode, ref short[16] maxcode, ref short[16] valptr, in ref short[256] codes, in ref ubyte[16] num_values) pure {
 276   mincode[] = -1;
 277   maxcode[] = -1;
 278   valptr[] = -1;
 279   int j = 0;
 280   foreach (immutable i; 0..16) {
 281     if (num_values[i] != 0) {
 282       valptr[i] = cast(short)j;
 283       mincode[i] = codes[j];
 284       j += num_values[i]-1;
 285       maxcode[i] = codes[j];
 286       j += 1;
 287     }
 288   }
 289 }
 290
 291
 292 // DQT -- define quantization tables
 293 void read_quantization_tables (ref JPEG_Decoder dc) {
 294   import std.bitmanip : bigEndianToNative;
 295   ubyte[2] tmp = void;
 296   dc.stream.rawReadExact(tmp[0..2]);
 297   int len = bigEndianToNative!ushort(tmp[0..2]);
 298   if (len%65 != 2) throw new ImageIOException("invalid / not supported");
 299   len -= 2;
 300   while (0 < len) {
 301     dc.stream.rawReadExact(tmp[0..1]);
 302     ubyte table_info = tmp[0];
 303     ubyte table_slot = table_info&0xf;
 304     ubyte precision = table_info>>4;  // 0 = 8 bit, 1 = 16 bit
 305     if (3 < table_slot || precision != 0) throw new ImageIOException("invalid / not supported"); // only 8 bit for baseline
 306     dc.stream.rawReadExact(dc.qtables[table_slot][0..64]);
 307     len -= 1+64;
 308   }
 309 }
 310
 311
 312 // SOF0 -- start of frame
 313 void read_frame_header (ref JPEG_Decoder dc) {
 314   import std.bitmanip : bigEndianToNative;
 315   ubyte[9] tmp = void;
 316   dc.stream.rawReadExact(tmp[0..8]);
 317   int len = bigEndianToNative!ushort(tmp[0..2]);  // 8+num_comps*3
 318   ubyte precision = tmp[2];
 319   dc.height = bigEndianToNative!ushort(tmp[3..5]);
 320   dc.width = bigEndianToNative!ushort(tmp[5..7]);
 321   dc.num_comps = tmp[7];
 322   if (precision != 8 || (dc.num_comps != 1 && dc.num_comps != 3) || len != 8+dc.num_comps*3) throw new ImageIOException("invalid / not supported");
 323   dc.hmax = 0;
 324   dc.vmax = 0;
 325   int mcu_du = 0; // data units in one mcu
 326   dc.stream.rawReadExact(tmp[0..dc.num_comps*3]);
 327   foreach (immutable i; 0..dc.num_comps) {
 328     ubyte ci = tmp[i*3];
 329     // JFIF says ci should be i+1, but there are images where ci is i. Normalize ids
 330     // so that ci == i, always. So much for standards...
 331     if (i == 0) { dc.correct_comp_ids = ci == i+1; }
 332     if ((dc.correct_comp_ids && ci != i+1) || (!dc.correct_comp_ids && ci != i)) throw new ImageIOException("invalid component id");
 333     auto comp = &dc.comps[i];
 334     ubyte sampling_factors = tmp[i*3+1];
 335     comp.sfx = sampling_factors>>4;
 336     comp.sfy = sampling_factors&0xf;
 337     comp.qtable = tmp[i*3+2];
 338     if (comp.sfy < 1 || 4 < comp.sfy || comp.sfx < 1 || 4 < comp.sfx || 3 < comp.qtable) throw new ImageIOException("invalid / not supported");
 339     if (dc.hmax < comp.sfx) dc.hmax = comp.sfx;
 340     if (dc.vmax < comp.sfy) dc.vmax = comp.sfy;
 341     mcu_du += comp.sfx*comp.sfy;
 342   }
 343   if (10 < mcu_du) throw new ImageIOException("invalid / not supported");
 344   foreach (immutable i; 0..dc.num_comps) {
 345     import std.math : ceil;
 346     dc.comps[i].x = cast(size_t)ceil(dc.width*(cast(double)dc.comps[i].sfx/dc.hmax));
 347     dc.comps[i].y = cast(size_t)ceil(dc.height*(cast(double)dc.comps[i].sfy/dc.vmax));
 348     debug(DebugJPEG) writefln("%d comp %d sfx/sfy: %d/%d", i, dc.comps[i].id, dc.comps[i].sfx, dc.comps[i].sfy);
 349   }
 350   size_t mcu_w = dc.hmax*8;
 351   size_t mcu_h = dc.vmax*8;
 352   dc.num_mcu_x = cast(int)((dc.width+mcu_w-1)/mcu_w);
 353   dc.num_mcu_y = cast(int)((dc.height+mcu_h-1)/mcu_h);
 354   debug(DebugJPEG) {
 355     writefln("\tlen: %s", len);
 356     writefln("\tprecision: %s", precision);
 357     writefln("\tdimensions: %s x %s", dc.width, dc.height);
 358     writefln("\tnum_comps: %s", dc.num_comps);
 359     writefln("\tnum_mcu_x: %s", dc.num_mcu_x);
 360     writefln("\tnum_mcu_y: %s", dc.num_mcu_y);
 361   }
 362 }
 363
 364
 365 // SOS -- start of scan
 366 void read_scan_header (ref JPEG_Decoder dc) {
 367   import std.bitmanip : bigEndianToNative;
 368   import core.stdc.stdlib : alloca;
 369   ubyte[3] tmp = void;
 370   dc.stream.rawReadExact(tmp[]);
 371   ushort len = bigEndianToNative!ushort(tmp[0..2]);
 372   ubyte num_scan_comps = tmp[2];
 373   if (num_scan_comps != dc.num_comps || len != 6+num_scan_comps*2) throw new ImageIOException("invalid / not supported");
 374   auto buf = (cast(ubyte*)alloca((len-3)*ubyte.sizeof))[0..len-3];
 375   dc.stream.rawReadExact(buf[]);
 376   foreach (immutable i; 0..num_scan_comps) {
 377     uint ci = buf[i*2]-((dc.correct_comp_ids) ? 1 : 0);
 378     if (ci >= dc.num_comps) throw new ImageIOException("invalid component id");
 379     ubyte tables = buf[i*2+1];
 380     dc.comps[ci].dc_table = tables>>4;
 381     dc.comps[ci].ac_table = tables&0xf;
 382     if (1 < dc.comps[ci].dc_table || 1 < dc.comps[ci].ac_table) throw new ImageIOException("invalid / not supported");
 383   }
 384   // ignore these
 385   //ubyte spectral_start = buf[$-3];
 386   //ubyte spectral_end = buf[$-2];
 387   //ubyte approx = buf[$-1];
 388 }
 389
 390
 391 void read_restart_interval (ref JPEG_Decoder dc) {
 392   import std.bitmanip : bigEndianToNative;
 393   ubyte[4] tmp = void;
 394   dc.stream.rawReadExact(tmp[]);
 395   ushort len = bigEndianToNative!ushort(tmp[0..2]);
 396   if (len != 4) throw new ImageIOException("invalid / not supported");
 397   dc.restart_interval = bigEndianToNative!ushort(tmp[2..4]);
 398   debug(DebugJPEG) writeln("restart interval set to: ", dc.restart_interval);
 399 }
 400
 401
 402 // reads data after the SOS segment
 403 ubyte[] decode_jpeg (ref JPEG_Decoder dc) {
 404   foreach (ref comp; dc.comps[0..dc.num_comps]) comp.data = new ubyte[dc.num_mcu_x*comp.sfx*8*dc.num_mcu_y*comp.sfy*8];
 405   // E.7 -- Multiple scans are for progressive images which are not supported
 406   //while (!dc.eoi_reached) {
 407     decode_scan(dc);    // E.2.3
 408     //read_markers(dc);   // reads until next scan header or eoi
 409   //}
 410   // throw away fill samples and convert to target format
 411   return dc.reconstruct();
 412 }
 413
 414
 415 // E.2.3 and E.8 and E.9
 416 void decode_scan (ref JPEG_Decoder dc) {
 417   debug(DebugJPEG) writeln("decode scan...");
 418   int intervals, mcus;
 419   if (0 < dc.restart_interval) {
 420     int total_mcus = dc.num_mcu_x*dc.num_mcu_y;
 421     intervals = (total_mcus+dc.restart_interval-1)/dc.restart_interval;
 422     mcus = dc.restart_interval;
 423   } else {
 424     intervals = 1;
 425     mcus = dc.num_mcu_x*dc.num_mcu_y;
 426   }
 427   debug(DebugJPEG) writeln("intervals: ", intervals);
 428   foreach (immutable mcu_j; 0..dc.num_mcu_y) {
 429     foreach (immutable mcu_i; 0..dc.num_mcu_x) {
 430       // decode mcu
 431       foreach (immutable c; 0..dc.num_comps) {
 432         auto comp = &dc.comps[c];
 433         foreach (immutable du_j; 0..comp.sfy) {
 434           foreach (immutable du_i; 0..comp.sfx) {
 435             // decode entropy, dequantize & dezigzag
 436             short[64] data = decode_block(dc, *comp, dc.qtables[comp.qtable]);
 437             // idct & level-shift
 438             int outx = (mcu_i*comp.sfx+du_i)*8;
 439             int outy = (mcu_j*comp.sfy+du_j)*8;
 440             int dst_stride = dc.num_mcu_x*comp.sfx*8;
 441             ubyte* dst = comp.data.ptr+outy*dst_stride+outx;
 442             stbi__idct_block(dst, dst_stride, data);
 443           }
 444         }
 445       }
 446       --mcus;
 447       if (!mcus) {
 448         --intervals;
 449         if (!intervals) return;
 450         read_restart(dc.stream); // RSTx marker
 451         if (intervals == 1) {
 452           // last interval, may have fewer MCUs than defined by DRI
 453           mcus = (dc.num_mcu_y-mcu_j-1)*dc.num_mcu_x+dc.num_mcu_x-mcu_i-1;
 454         } else {
 455           mcus = dc.restart_interval;
 456         }
 457         // reset decoder
 458         dc.cb = 0;
 459         dc.bits_left = 0;
 460         foreach (immutable k; 0..dc.num_comps) dc.comps[k].pred = 0;
 461       }
 462     }
 463   }
 464 }
 465
 466
 467 // RST0-RST7
 468 void read_restart (VFile stream) {
 469   ubyte[2] tmp = void;
 470   stream.rawReadExact(tmp[]);
 471   if (tmp[0] != 0xff || tmp[1] < Marker.RST0 || Marker.RST7 < tmp[1]) throw new ImageIOException("reset marker missing");
 472   // the markers should cycle 0 through 7, could check that here...
 473 }
 474
 475
 476 immutable ubyte[64] dezigzag = [
 477      0,  1,  8, 16,  9,  2,  3, 10,
 478     17, 24, 32, 25, 18, 11,  4,  5,
 479     12, 19, 26, 33, 40, 48, 41, 34,
 480     27, 20, 13,  6,  7, 14, 21, 28,
 481     35, 42, 49, 56, 57, 50, 43, 36,
 482     29, 22, 15, 23, 30, 37, 44, 51,
 483     58, 59, 52, 45, 38, 31, 39, 46,
 484     53, 60, 61, 54, 47, 55, 62, 63,
 485 ];
 486
 487
 488 // decode entropy, dequantize & dezigzag (see section F.2)
 489 short[64] decode_block (ref JPEG_Decoder dc, ref JPEG_Decoder.Component comp, in ref ubyte[64] qtable) {
 490   short[64] res = 0;
 491   ubyte t = decode_huff(dc, dc.dc_tables[comp.dc_table]);
 492   int diff = t ? dc.receive_and_extend(t) : 0;
 493   comp.pred = comp.pred+diff;
 494   res[0] = cast(short)(comp.pred*qtable[0]);
 495   int k = 1;
 496   do {
 497     ubyte rs = decode_huff(dc, dc.ac_tables[comp.ac_table]);
 498     ubyte rrrr = rs>>4;
 499     ubyte ssss = rs&0xf;
 500     if (ssss == 0) {
 501       if (rrrr != 0xf) break; // end of block
 502       k += 16; // run length is 16
 503       continue;
 504     }
 505     k += rrrr;
 506     if (63 < k) throw new ImageIOException("corrupt block");
 507     res[dezigzag[k]] = cast(short)(dc.receive_and_extend(ssss)*qtable[k]);
 508     k += 1;
 509   } while (k < 64);
 510   return res;
 511 }
 512
 513
 514 int receive_and_extend (ref JPEG_Decoder dc, ubyte s) {
 515   // receive
 516   int symbol = 0;
 517   foreach (immutable _; 0..s) symbol = (symbol<<1)+nextbit(dc);
 518   // extend
 519   int vt = 1<<(s-1);
 520   if (symbol < vt) return symbol+(-1<<s)+1;
 521   return symbol;
 522 }
 523
 524
 525 // F.16 -- the DECODE
 526 ubyte decode_huff (ref JPEG_Decoder dc, in ref HuffTab tab) {
 527   short code = nextbit(dc);
 528   int i = 0;
 529   while (tab.maxcode[i] < code) {
 530     code = cast(short)((code<<1)+nextbit(dc));
 531     i += 1;
 532     if (tab.maxcode.length <= i) throw new ImageIOException("corrupt huffman coding");
 533   }
 534   int j = tab.valptr[i]+code-tab.mincode[i];
 535   if (tab.values.length <= cast(uint)j) throw new ImageIOException("corrupt huffman coding");
 536   return tab.values[j];
 537 }
 538
 539
 540 // F.2.2.5 and F.18
 541 ubyte nextbit (ref JPEG_Decoder dc) {
 542   if (!dc.bits_left) {
 543     ubyte[1] bytebuf = void;
 544     dc.stream.rawReadExact(bytebuf[]);
 545     dc.cb = bytebuf[0];
 546     dc.bits_left = 8;
 547     if (dc.cb == 0xff) {
 548       dc.stream.rawReadExact(bytebuf[]);
 549       if (bytebuf[0] != 0x0) throw new ImageIOException("unexpected marker");
 550     }
 551   }
 552   ubyte r = dc.cb>>7;
 553   dc.cb <<= 1;
 554   dc.bits_left -= 1;
 555   return r;
 556 }
 557
 558
 559 ubyte[] reconstruct (in ref JPEG_Decoder dc) {
 560   auto result = new ubyte[dc.width*dc.height*dc.tgt_chans];
 561   switch (dc.num_comps*10+dc.tgt_chans) {
 562     case 34, 33:
 563       // Use specialized bilinear filtering functions for the frequent cases where
 564       // Cb & Cr channels have half resolution.
 565       if ((dc.comps[0].sfx <= 2 && dc.comps[0].sfy <= 2) &&
 566          (dc.comps[0].sfx+dc.comps[0].sfy >= 3) &&
 567          dc.comps[1].sfx == 1 && dc.comps[1].sfy == 1 &&
 568          dc.comps[2].sfx == 1 && dc.comps[2].sfy == 1) {
 569           void function (in ubyte[], in ubyte[], ubyte[]) resample;
 570           switch (dc.comps[0].sfx*10+dc.comps[0].sfy) {
 571             case 22: resample = &upsample_h2_v2; break;
 572             case 21: resample = &upsample_h2_v1; break;
 573             case 12: resample = &upsample_h1_v2; break;
 574             default: throw new ImageIOException("bug");
 575           }
 576           auto comp1 = new ubyte[](dc.width);
 577           auto comp2 = new ubyte[](dc.width);
 578           size_t s = 0;
 579           size_t di = 0;
 580           foreach (immutable j; 0..dc.height) {
 581             size_t mi = j/dc.comps[0].sfy;
 582             size_t si = (mi == 0 || mi >= (dc.height-1)/dc.comps[0].sfy ? mi : mi-1+s*2);
 583             s ^= 1;
 584             size_t cs = dc.num_mcu_x*dc.comps[1].sfx*8;
 585             size_t cl0 = mi*cs;
 586             size_t cl1 = si*cs;
 587             resample(dc.comps[1].data[cl0..cl0+dc.comps[1].x], dc.comps[1].data[cl1..cl1+dc.comps[1].x], comp1[]);
 588             resample(dc.comps[2].data[cl0..cl0+dc.comps[2].x], dc.comps[2].data[cl1..cl1+dc.comps[2].x], comp2[]);
 589             foreach (immutable i; 0..dc.width) {
 590               result[di..di+3] = ycbcr_to_rgb(dc.comps[0].data[j*dc.num_mcu_x*dc.comps[0].sfx*8+i], comp1[i], comp2[i]);
 591               if (dc.tgt_chans == 4) result[di+3] = 255;
 592               di += dc.tgt_chans;
 593             }
 594           }
 595           return result;
 596       }
 597       foreach (const ref comp; dc.comps[0..dc.num_comps]) {
 598         if (comp.sfx != dc.hmax || comp.sfy != dc.vmax) return dc.upsample(result);
 599       }
 600       size_t si, di;
 601       foreach (immutable j; 0..dc.height) {
 602         foreach (immutable i; 0..dc.width) {
 603           result[di..di+3] = ycbcr_to_rgb(dc.comps[0].data[si+i], dc.comps[1].data[si+i], dc.comps[2].data[si+i]);
 604           if (dc.tgt_chans == 4) result[di+3] = 255;
 605           di += dc.tgt_chans;
 606         }
 607         si += dc.num_mcu_x*dc.comps[0].sfx*8;
 608       }
 609       return result;
 610     case 32, 12, 31, 11:
 611       const comp = &dc.comps[0];
 612       if (comp.sfx == dc.hmax && comp.sfy == dc.vmax) {
 613         size_t si, di;
 614         if (dc.tgt_chans == 2) {
 615           foreach (immutable j; 0..dc.height) {
 616             foreach (immutable i; 0..dc.width) {
 617               result[di++] = comp.data[si+i];
 618               result[di++] = 255;
 619             }
 620             si += dc.num_mcu_x*comp.sfx*8;
 621           }
 622         } else {
 623           foreach (immutable j; 0..dc.height) {
 624             result[di..di+dc.width] = comp.data[si..si+dc.width];
 625             si += dc.num_mcu_x*comp.sfx*8;
 626             di += dc.width;
 627           }
 628         }
 629         return result;
 630       }
 631       // need to resample (haven't tested this...)
 632       return dc.upsample_luma(result);
 633     case 14, 13:
 634       const comp = &dc.comps[0];
 635       size_t si, di;
 636       foreach (immutable j; 0..dc.height) {
 637         foreach (immutable i; 0..dc.width) {
 638           result[di..di+3] = comp.data[si+i];
 639           if (dc.tgt_chans == 4) result[di+3] = 255;
 640           di += dc.tgt_chans;
 641         }
 642         si += dc.num_mcu_x*comp.sfx*8;
 643       }
 644       return result;
 645     default: assert(0);
 646   }
 647 }
 648
 649
 650 void upsample_h2_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
 651   ubyte mix() (ubyte mm, ubyte ms, ubyte sm, ubyte ss) {
 652     pragma(inline, true);
 653     return cast(ubyte)((cast(uint)mm*3*3+cast(uint)ms*3*1+cast(uint)sm*1*3+cast(uint)ss*1*1+8)/16);
 654   }
 655
 656   result[0] = cast(ubyte)((cast(uint)line0[0]*3+cast(uint)line1[0]*1+2)/4);
 657   if (line0.length == 1) return;
 658   result[1] = mix(line0[0], line0[1], line1[0], line1[1]);
 659
 660   size_t di = 2;
 661   foreach (immutable i; 1..line0.length) {
 662     result[di] = mix(line0[i], line0[i-1], line1[i], line1[i-1]);
 663     di += 1;
 664     if (i == line0.length-1) {
 665       if (di < result.length) result[di] = cast(ubyte)((cast(uint)line0[i]*3+cast(uint)line1[i]*1+2)/4);
 666       return;
 667     }
 668     result[di] = mix(line0[i], line0[i+1], line1[i], line1[i+1]);
 669     di += 1;
 670   }
 671 }
 672
 673
 674 void upsample_h2_v1 (in ubyte[] line0, in ubyte[] _line1, ubyte[] result) {
 675   result[0] = line0[0];
 676   if (line0.length == 1) return;
 677   result[1] = cast(ubyte)((cast(uint)line0[0]*3+cast(uint)line0[1]*1+2)/4);
 678   size_t di = 2;
 679   foreach (immutable i; 1..line0.length) {
 680     result[di] = cast(ubyte)((cast(uint)line0[i-1]*1+cast(uint)line0[i+0]*3+2)/4);
 681     di += 1;
 682     if (i == line0.length-1) {
 683       if (di < result.length) result[di] = line0[i];
 684       return;
 685     }
 686     result[di] = cast(ubyte)((cast(uint)line0[i+0]*3+cast(uint)line0[i+1]*1+2)/4);
 687     di += 1;
 688   }
 689 }
 690
 691
 692 void upsample_h1_v2 (in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
 693   foreach (immutable i; 0..result.length) result[i] = cast(ubyte)((cast(uint)line0[i]*3+cast(uint)line1[i]*1+2)/4);
 694 }
 695
 696
 697 // Nearest neighbor
 698 ubyte[] upsample_luma (in ref JPEG_Decoder dc, ubyte[] result) {
 699   const size_t stride0 = dc.num_mcu_x*dc.comps[0].sfx*8;
 700   const y_step0 = cast(float)dc.comps[0].sfy/cast(float)dc.vmax;
 701   const x_step0 = cast(float)dc.comps[0].sfx/cast(float)dc.hmax;
 702   float y0 = y_step0*0.5;
 703   size_t y0i = 0;
 704   size_t di;
 705   foreach (immutable j; 0..dc.height) {
 706     float x0 = x_step0*0.5;
 707     size_t x0i = 0;
 708     foreach (immutable i; 0..dc.width) {
 709       result[di] = dc.comps[0].data[y0i+x0i];
 710       if (dc.tgt_chans == 2) result[di+1] = 255;
 711       di += dc.tgt_chans;
 712       x0 += x_step0;
 713       if (x0 >= 1.0) { x0 -= 1.0; x0i += 1; }
 714     }
 715     y0 += y_step0;
 716     if (y0 >= 1.0) { y0 -= 1.0; y0i += stride0; }
 717   }
 718   return result;
 719 }
 720
 721
 722 // Nearest neighbor
 723 ubyte[] upsample (in ref JPEG_Decoder dc, ubyte[] result) {
 724   const size_t stride0 = dc.num_mcu_x*dc.comps[0].sfx*8;
 725   const size_t stride1 = dc.num_mcu_x*dc.comps[1].sfx*8;
 726   const size_t stride2 = dc.num_mcu_x*dc.comps[2].sfx*8;
 727   const y_step0 = cast(float)dc.comps[0].sfy/cast(float)dc.vmax;
 728   const y_step1 = cast(float)dc.comps[1].sfy/cast(float)dc.vmax;
 729   const y_step2 = cast(float)dc.comps[2].sfy/cast(float)dc.vmax;
 730   const x_step0 = cast(float)dc.comps[0].sfx/cast(float)dc.hmax;
 731   const x_step1 = cast(float)dc.comps[1].sfx/cast(float)dc.hmax;
 732   const x_step2 = cast(float)dc.comps[2].sfx/cast(float)dc.hmax;
 733   float y0 = y_step0*0.5;
 734   float y1 = y_step1*0.5;
 735   float y2 = y_step2*0.5;
 736   size_t y0i = 0;
 737   size_t y1i = 0;
 738   size_t y2i = 0;
 739   size_t di;
 740   foreach (immutable _j; 0..dc.height) {
 741     float x0 = x_step0*0.5;
 742     float x1 = x_step1*0.5;
 743     float x2 = x_step2*0.5;
 744     size_t x0i = 0;
 745     size_t x1i = 0;
 746     size_t x2i = 0;
 747     foreach (immutable i; 0..dc.width) {
 748       result[di..di+3] = ycbcr_to_rgb(dc.comps[0].data[y0i+x0i], dc.comps[1].data[y1i+x1i], dc.comps[2].data[y2i+x2i]);
 749       if (dc.tgt_chans == 4) result[di+3] = 255;
 750       di += dc.tgt_chans;
 751       x0 += x_step0;
 752       x1 += x_step1;
 753       x2 += x_step2;
 754       if (x0 >= 1.0) { x0 -= 1.0; x0i += 1; }
 755       if (x1 >= 1.0) { x1 -= 1.0; x1i += 1; }
 756       if (x2 >= 1.0) { x2 -= 1.0; x2i += 1; }
 757     }
 758     y0 += y_step0;
 759     y1 += y_step1;
 760     y2 += y_step2;
 761     if (y0 >= 1.0) { y0 -= 1.0; y0i += stride0; }
 762     if (y1 >= 1.0) { y1 -= 1.0; y1i += stride1; }
 763     if (y2 >= 1.0) { y2 -= 1.0; y2i += stride2; }
 764   }
 765   return result;
 766 }
 767
 768
 769 ubyte[3] ycbcr_to_rgb (ubyte y, ubyte cb, ubyte cr) pure {
 770   ubyte[3] rgb = void;
 771   rgb[0] = clamp(y+1.402*(cr-128));
 772   rgb[1] = clamp(y-0.34414*(cb-128)-0.71414*(cr-128));
 773   rgb[2] = clamp(y+1.772*(cb-128));
 774   return rgb;
 775 }
 776
 777
 778 ubyte clamp() (float x) pure {
 779   if (x < 0) return 0;
 780   if (255 < x) return 255;
 781   return cast(ubyte)x;
 782 }
 783
 784
 785 // ------------------------------------------------------------
 786 // The IDCT stuff here (to the next dashed line) is copied and adapted from
 787 // stb_image which is released under public domain.  Many thanks to stb_image
 788 // author, Sean Barrett.
 789 // Link: https://github.com/nothings/stb/blob/master/stb_image.h
 790 int f2f() (float x) pure { pragma(inline, true); return cast(int)(x*4096+0.5); }
 791 int fsh() (int x) pure { pragma(inline, true); return x<<12; }
 792
 793 // from stb_image, derived from jidctint -- DCT_ISLOW
 794 void STBI__IDCT_1D() (ref int t0, ref int t1, ref int t2, ref int t3,
 795                       ref int x0, ref int x1, ref int x2, ref int x3,
 796                       int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7) pure
 797 {
 798   int p1, p2, p3, p4, p5;
 799   //int t0, t1, t2, t3, p1, p2, p3, p4, p5, x0, x1, x2, x3;
 800   p2 = s2;
 801   p3 = s6;
 802   p1 = (p2+p3)*f2f(0.5411961f);
 803   t2 = p1+p3*f2f(-1.847759065f);
 804   t3 = p1+p2*f2f(0.765366865f);
 805   p2 = s0;
 806   p3 = s4;
 807   t0 = fsh(p2+p3);
 808   t1 = fsh(p2-p3);
 809   x0 = t0+t3;
 810   x3 = t0-t3;
 811   x1 = t1+t2;
 812   x2 = t1-t2;
 813   t0 = s7;
 814   t1 = s5;
 815   t2 = s3;
 816   t3 = s1;
 817   p3 = t0+t2;
 818   p4 = t1+t3;
 819   p1 = t0+t3;
 820   p2 = t1+t2;
 821   p5 = (p3+p4)*f2f(1.175875602f);
 822   t0 = t0*f2f(0.298631336f);
 823   t1 = t1*f2f(2.053119869f);
 824   t2 = t2*f2f(3.072711026f);
 825   t3 = t3*f2f(1.501321110f);
 826   p1 = p5+p1*f2f(-0.899976223f);
 827   p2 = p5+p2*f2f(-2.562915447f);
 828   p3 = p3*f2f(-1.961570560f);
 829   p4 = p4*f2f(-0.390180644f);
 830   t3 += p1+p4;
 831   t2 += p2+p3;
 832   t1 += p2+p4;
 833   t0 += p1+p3;
 834 }
 835
 836 // idct and level-shift
 837 void stbi__idct_block (ubyte* dst, int dst_stride, in ref short[64] data) pure {
 838   int i;
 839   int[64] val;
 840   int* v = val.ptr;
 841   const(short)* d = data.ptr;
 842   // columns
 843   for (i = 0; i < 8; ++i, ++d, ++v) {
 844     // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
 845     if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0 && d[40] == 0 && d[48] == 0 && d[56] == 0) {
 846       //    no shortcut                 0     seconds
 847       //    (1|2|3|4|5|6|7)==0          0     seconds
 848       //    all separate               -0.047 seconds
 849       //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
 850       int dcterm = d[0]<<2;
 851       v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
 852     } else {
 853       int t0, t1, t2, t3, x0, x1, x2, x3;
 854       STBI__IDCT_1D(t0, t1, t2, t3, x0, x1, x2, x3, d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56]);
 855       // constants scaled things up by 1<<12; let's bring them back
 856       // down, but keep 2 extra bits of precision
 857       x0 += 512; x1 += 512; x2 += 512; x3 += 512;
 858       v[ 0] = (x0+t3)>>10;
 859       v[56] = (x0-t3)>>10;
 860       v[ 8] = (x1+t2)>>10;
 861       v[48] = (x1-t2)>>10;
 862       v[16] = (x2+t1)>>10;
 863       v[40] = (x2-t1)>>10;
 864       v[24] = (x3+t0)>>10;
 865       v[32] = (x3-t0)>>10;
 866     }
 867   }
 868
 869   ubyte* o = dst;
 870   for (i = 0, v = val.ptr; i < 8; ++i, v += 8, o += dst_stride) {
 871     // no fast case since the first 1D IDCT spread components out
 872     int t0, t1, t2, t3, x0, x1, x2, x3;
 873     STBI__IDCT_1D(t0, t1, t2, t3, x0, x1, x2, x3, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
 874     // constants scaled things up by 1<<12, plus we had 1<<2 from first
 875     // loop, plus horizontal and vertical each scale by sqrt(8) so together
 876     // we've got an extra 1<<3, so 1<<17 total we need to remove.
 877     // so we want to round that, which means adding 0.5*1<<17,
 878     // aka 65536. Also, we'll end up with -128 to 127 that we want
 879     // to encode as 0-255 by adding 128, so we'll add that before the shift
 880     x0 += 65536+(128<<17);
 881     x1 += 65536+(128<<17);
 882     x2 += 65536+(128<<17);
 883     x3 += 65536+(128<<17);
 884     // tried computing the shifts into temps, or'ing the temps to see
 885     // if any were out of range, but that was slower
 886     o[0] = stbi__clamp((x0+t3)>>17);
 887     o[7] = stbi__clamp((x0-t3)>>17);
 888     o[1] = stbi__clamp((x1+t2)>>17);
 889     o[6] = stbi__clamp((x1-t2)>>17);
 890     o[2] = stbi__clamp((x2+t1)>>17);
 891     o[5] = stbi__clamp((x2-t1)>>17);
 892     o[3] = stbi__clamp((x3+t0)>>17);
 893     o[4] = stbi__clamp((x3-t0)>>17);
 894   }
 895 }
 896
 897 // clamp to 0-255
 898 ubyte stbi__clamp() (int x) pure {
 899   if (cast(uint)x > 255) {
 900     if (x < 0) return 0;
 901     if (x > 255) return 255;
 902   }
 903   return cast(ubyte)x;
 904 }
 905 // the above is adapted from stb_image
 906 // ------------------------------------------------------------