jpeg.d

   1 // Baseline JPEG decoder
   2 // adapted from https://github.com/lgvz/imageformats
   3 // Boost License, i suppose
   4 module jpeg;
   5
   6 private:
   7 import arsd.color;
   8 import iv.vfs;
   9 import iv.vfs.streams;
  10
  11
  12 // ////////////////////////////////////////////////////////////////////////// //
  13 public class ImageIOException : Exception {
  14   this (string msg, string file=__FILE__, size_t line=__LINE__, Throwable next=null) const pure nothrow @safe @nogc {
  15     super(msg, file, line, next);
  16   }
  17 }
  18
  19
  20 // ////////////////////////////////////////////////////////////////////////// //
  21 // public declarations
  22 public bool detectJpeg (VFile stream) {
  23   try {
  24     int w, h, c;
  25     readJpegInfo(stream, w, h, c);
  26     return true;
  27   } catch (Exception) {
  28     return false;
  29   } finally {
  30     stream.seek(0, Seek.Set);
  31   }
  32 }
  33
  34
  35 public void readJpegInfo (VFile stream, out int w, out int h, out int chans) {
  36   import std.bitmanip : bigEndianToNative;
  37
  38   ubyte[2] marker = void;
  39   stream.rawReadExact(marker[]);
  40
  41   // SOI
  42   if (marker[0..2] != [0xff, 0xd8]) throw new ImageIOException("not JPEG");
  43
  44   for (;;) {
  45     stream.rawReadExact(marker[]);
  46     if (marker[0] != 0xff) throw new ImageIOException("no frame header");
  47     while (marker[1] == 0xff) stream.rawReadExact(marker[1..$]);
  48     enum SKIP = 0xff;
  49     switch (marker[1]) with (Marker) {
  50       case SOF0: .. case SOF3: goto case;
  51       case SOF9: .. case SOF11:
  52         ubyte[8] tmp;
  53         stream.rawReadExact(tmp[0..8]);
  54         //int len = bigEndianToNative!ushort(tmp[0..2]);
  55         w = bigEndianToNative!ushort(tmp[5..7]);
  56         h = bigEndianToNative!ushort(tmp[3..5]);
  57         chans = tmp[7];
  58         return;
  59       case SOS, EOI: throw new ImageIOException("no frame header");
  60       case DRI, DHT, DQT, COM: goto case SKIP;
  61       case APP0: .. case APPf: goto case SKIP;
  62       case SKIP:
  63         ubyte[2] lenbuf = void;
  64         stream.rawReadExact(lenbuf[]);
  65         int skiplen = bigEndianToNative!ushort(lenbuf)-2;
  66         stream.seek(skiplen, Seek.Cur);
  67         break;
  68       default: throw new ImageIOException("unsupported marker");
  69     }
  70   }
  71   assert(0);
  72 }
  73
  74
  75 public TrueColorImage readJpeg (VFile stream) {
  76   enum req_chans = 4;
  77
  78   // SOI
  79   ubyte[2] tmp = void;
  80   stream.rawReadExact(tmp[]);
  81   if (tmp[0..2] != [0xff, 0xd8]) throw new ImageIOException("not JPEG");
  82
  83   JPEG_Decoder dc = { stream: stream };
  84
  85   read_markers(dc); // reads until first scan header or eoi
  86   if (dc.eoi_reached) throw new ImageIOException("no image data");
  87
  88   dc.tgt_chans = (req_chans == 0 ? dc.num_comps : cast(int)req_chans);
  89
  90   auto pixels = decode_jpeg(dc);
  91   //assert(pixels.length == dc.width*dc.height*4);
  92   return new TrueColorImage(dc.width, dc.height, pixels);
  93 }
  94
  95
  96 // ////////////////////////////////////////////////////////////////////////// //
  97 private:
  98 struct JPEG_Decoder {
  99   @disable this (this); // just in case
 100
 101   VFile stream;
 102
 103   bool has_frame_header = false;
 104   bool eoi_reached = false;
 105
 106   ubyte[64][4] qtables;
 107   HuffTab[2] ac_tables;
 108   HuffTab[2] dc_tables;
 109
 110   ubyte cb;      // current byte (next bit always at MSB)
 111   int bits_left; // num of unused bits in cb
 112
 113   bool correct_comp_ids;
 114   Component[3] comps;
 115   ubyte num_comps;
 116   int tgt_chans;
 117
 118   int width, height;
 119
 120   int hmax, vmax;
 121
 122   ushort restart_interval; // number of MCUs in restart interval
 123
 124   // image component
 125   static struct Component {
 126     ubyte sfx, sfy; // sampling factors, aka. h and v
 127     size_t x, y;    // total num of samples, without fill samples
 128     ubyte qtable;
 129     ubyte ac_table;
 130     ubyte dc_table;
 131     int pred;       // dc prediction
 132     ubyte[] data;   // reconstructed samples
 133   }
 134
 135   int num_mcu_x;
 136   int num_mcu_y;
 137 }
 138
 139
 140 struct HuffTab {
 141   ubyte[256] values;
 142   ubyte[257] sizes;
 143   short[16] mincode, maxcode;
 144   short[16] valptr;
 145 }
 146
 147
 148 enum Marker : ubyte {
 149   SOI = 0xd8,    // start of image
 150   SOF0 = 0xc0,   // start of frame / baseline DCT
 151   //SOF1 = 0xc1,   // start of frame / extended seq.
 152   //SOF2 = 0xc2,   // start of frame / progressive DCT
 153   SOF3 = 0xc3,   // start of frame / lossless
 154   SOF9 = 0xc9,   // start of frame / extended seq., arithmetic
 155   SOF11 = 0xcb,   // start of frame / lossless, arithmetic
 156   DHT = 0xc4,    // define huffman tables
 157   DQT = 0xdb,    // define quantization tables
 158   DRI = 0xdd,    // define restart interval
 159   SOS = 0xda,    // start of scan
 160   DNL = 0xdc,    // define number of lines
 161   RST0 = 0xd0,   // restart entropy coded data
 162   // ...
 163   RST7 = 0xd7,   // restart entropy coded data
 164   APP0 = 0xe0,   // application 0 segment
 165   // ...
 166   APPf = 0xef,   // application f segment
 167   //DAC = 0xcc,    // define arithmetic conditioning table
 168   COM = 0xfe,    // comment
 169   EOI = 0xd9,    // end of image
 170 }
 171
 172
 173 void read_markers (ref JPEG_Decoder dc) {
 174   import std.bitmanip : bigEndianToNative;
 175   bool has_next_scan_header = false;
 176   while (!has_next_scan_header && !dc.eoi_reached) {
 177     ubyte[2] marker = void;
 178     dc.stream.rawReadExact(marker[]);
 179     if (marker[0] != 0xff) throw new ImageIOException("no marker");
 180     while (marker[1] == 0xff) dc.stream.rawReadExact(marker[1..$]);
 181     debug(DebugJPEG) writefln("marker: %s (%1$x)\t", cast(Marker)marker[1]);
 182     switch (marker[1]) with (Marker) {
 183       case DHT: dc.read_huffman_tables(); break;
 184       case DQT: dc.read_quantization_tables(); break;
 185       case SOF0:
 186         if (dc.has_frame_header) throw new ImageIOException("extra frame header");
 187         debug(DebugJPEG) writeln();
 188         dc.read_frame_header();
 189         dc.has_frame_header = true;
 190         break;
 191       case SOS:
 192         if (!dc.has_frame_header) throw new ImageIOException("no frame header");
 193         dc.read_scan_header();
 194         has_next_scan_header = true;
 195         break;
 196       case DRI: dc.read_restart_interval(); break;
 197       case EOI: dc.eoi_reached = true; break;
 198       case APP0: .. case APPf: goto case;
 199       case COM:
 200         debug(DebugJPEG) writefln("-> skipping segment");
 201         ubyte[2] lenbuf = void;
 202         dc.stream.rawReadExact(lenbuf[]);
 203         int len = bigEndianToNative!ushort(lenbuf)-2;
 204         dc.stream.seek(len, Seek.Cur);
 205         break;
 206       default: throw new ImageIOException("invalid / unsupported marker");
 207     }
 208   }
 209 }
 210
 211
 212 // DHT -- define huffman tables
 213 void read_huffman_tables (ref JPEG_Decoder dc) {
 214   import std.bitmanip : bigEndianToNative;
 215   ubyte[19] tmp = void;
 216   dc.stream.rawReadExact(tmp[0..2]);
 217   int len = bigEndianToNative!ushort(tmp[0..2]);
 218   len -= 2;
 219   while (0 < len) {
 220     dc.stream.rawReadExact(tmp[0..17]);   // info byte & the BITS
 221     ubyte table_slot = tmp[0]&0xf; // must be 0 or 1 for baseline
 222     ubyte table_class = tmp[0]>>4;  // 0 = dc table, 1 = ac table
 223     if (1 < table_slot || 1 < table_class) throw new ImageIOException("invalid / not supported");
 224     // compute total number of huffman codes
 225     int mt = 0;
 226     foreach (immutable i; 1..17) mt += tmp[i];
 227     if (256 < mt) throw new ImageIOException("invalid / not supported"); // TODO where in the spec?
 228     if (table_class == 0) {
 229       dc.stream.rawReadExact(dc.dc_tables[table_slot].values[0..mt]);
 230       derive_table(dc.dc_tables[table_slot], tmp[1..17]);
 231     } else {
 232       dc.stream.rawReadExact(dc.ac_tables[table_slot].values[0..mt]);
 233       derive_table(dc.ac_tables[table_slot], tmp[1..17]);
 234     }
 235     len -= 17+mt;
 236   }
 237 }
 238
 239
 240 // num_values is the BITS
 241 void derive_table (ref HuffTab table, in ref ubyte[16] num_values) {
 242   short[256] codes;
 243   int k = 0;
 244   foreach (immutable i; 0..16) {
 245     foreach (immutable j; 0..num_values[i]) {
 246       table.sizes[k] = cast(ubyte)(i+1);
 247       ++k;
 248     }
 249   }
 250   table.sizes[k] = 0;
 251   k = 0;
 252   short code = 0;
 253   ubyte si = table.sizes[k];
 254   for (;;) {
 255     do {
 256       codes[k] = code;
 257       ++code;
 258       ++k;
 259     } while (si == table.sizes[k]);
 260     if (table.sizes[k] == 0) break;
 261     debug(DebugJPEG) assert(si < table.sizes[k]);
 262     do {
 263       code <<= 1;
 264       ++si;
 265     } while (si != table.sizes[k]);
 266   }
 267   derive_mincode_maxcode_valptr(table.mincode, table.maxcode, table.valptr, codes, num_values);
 268 }
 269
 270
 271 // F.15
 272 void derive_mincode_maxcode_valptr (ref short[16] mincode, ref short[16] maxcode, ref short[16] valptr, in ref short[256] codes, in ref ubyte[16] num_values) pure {
 273   mincode[] = -1;
 274   maxcode[] = -1;
 275   valptr[] = -1;
 276   int j = 0;
 277   foreach (immutable i; 0..16) {
 278     if (num_values[i] != 0) {
 279       valptr[i] = cast(short)j;
 280       mincode[i] = codes[j];
 281       j += num_values[i]-1;
 282       maxcode[i] = codes[j];
 283       j += 1;
 284     }
 285   }
 286 }
 287
 288
 289 // DQT -- define quantization tables
 290 void read_quantization_tables (ref JPEG_Decoder dc) {
 291   import std.bitmanip : bigEndianToNative;
 292   ubyte[2] tmp = void;
 293   dc.stream.rawReadExact(tmp[0..2]);
 294   int len = bigEndianToNative!ushort(tmp[0..2]);
 295   if (len%65 != 2) throw new ImageIOException("invalid / not supported");
 296   len -= 2;
 297   while (0 < len) {
 298     dc.stream.rawReadExact(tmp[0..1]);
 299     ubyte table_info = tmp[0];
 300     ubyte table_slot = table_info&0xf;
 301     ubyte precision = table_info>>4;  // 0 = 8 bit, 1 = 16 bit
 302     if (3 < table_slot || precision != 0) throw new ImageIOException("invalid / not supported"); // only 8 bit for baseline
 303     dc.stream.rawReadExact(dc.qtables[table_slot][0..64]);
 304     len -= 1+64;
 305   }
 306 }
 307
 308
 309 // SOF0 -- start of frame
 310 void read_frame_header (ref JPEG_Decoder dc) {
 311   import std.bitmanip : bigEndianToNative;
 312   ubyte[9] tmp = void;
 313   dc.stream.rawReadExact(tmp[0..8]);
 314   int len = bigEndianToNative!ushort(tmp[0..2]);  // 8+num_comps*3
 315   ubyte precision = tmp[2];
 316   dc.height = bigEndianToNative!ushort(tmp[3..5]);
 317   dc.width = bigEndianToNative!ushort(tmp[5..7]);
 318   dc.num_comps = tmp[7];
 319   if (precision != 8 || (dc.num_comps != 1 && dc.num_comps != 3) || len != 8+dc.num_comps*3) throw new ImageIOException("invalid / not supported");
 320   dc.hmax = 0;
 321   dc.vmax = 0;
 322   int mcu_du = 0; // data units in one mcu
 323   dc.stream.rawReadExact(tmp[0..dc.num_comps*3]);
 324   foreach (immutable i; 0..dc.num_comps) {
 325     ubyte ci = tmp[i*3];
 326     // JFIF says ci should be i+1, but there are images where ci is i. Normalize ids
 327     // so that ci == i, always. So much for standards...
 328     if (i == 0) { dc.correct_comp_ids = ci == i+1; }
 329     if ((dc.correct_comp_ids && ci != i+1) || (!dc.correct_comp_ids && ci != i)) throw new ImageIOException("invalid component id");
 330     auto comp = &dc.comps[i];
 331     ubyte sampling_factors = tmp[i*3+1];
 332     comp.sfx = sampling_factors>>4;
 333     comp.sfy = sampling_factors&0xf;
 334     comp.qtable = tmp[i*3+2];
 335     if (comp.sfy < 1 || 4 < comp.sfy || comp.sfx < 1 || 4 < comp.sfx || 3 < comp.qtable) throw new ImageIOException("invalid / not supported");
 336     if (dc.hmax < comp.sfx) dc.hmax = comp.sfx;
 337     if (dc.vmax < comp.sfy) dc.vmax = comp.sfy;
 338     mcu_du += comp.sfx*comp.sfy;
 339   }
 340   if (10 < mcu_du) throw new ImageIOException("invalid / not supported");
 341   foreach (immutable i; 0..dc.num_comps) {
 342     import std.math : ceil;
 343     dc.comps[i].x = cast(size_t)ceil(dc.width*(cast(double)dc.comps[i].sfx/dc.hmax));
 344     dc.comps[i].y = cast(size_t)ceil(dc.height*(cast(double)dc.comps[i].sfy/dc.vmax));
 345     debug(DebugJPEG) writefln("%d comp %d sfx/sfy: %d/%d", i, dc.comps[i].id, dc.comps[i].sfx, dc.comps[i].sfy);
 346   }
 347   size_t mcu_w = dc.hmax*8;
 348   size_t mcu_h = dc.vmax*8;
 349   dc.num_mcu_x = cast(int)((dc.width+mcu_w-1)/mcu_w);
 350   dc.num_mcu_y = cast(int)((dc.height+mcu_h-1)/mcu_h);
 351   debug(DebugJPEG) {
 352     writefln("\tlen: %s", len);
 353     writefln("\tprecision: %s", precision);
 354     writefln("\tdimensions: %s x %s", dc.width, dc.height);
 355     writefln("\tnum_comps: %s", dc.num_comps);
 356     writefln("\tnum_mcu_x: %s", dc.num_mcu_x);
 357     writefln("\tnum_mcu_y: %s", dc.num_mcu_y);
 358   }
 359 }
 360
 361
 362 // SOS -- start of scan
 363 void read_scan_header (ref JPEG_Decoder dc) {
 364   import std.bitmanip : bigEndianToNative;
 365   import core.stdc.stdlib : alloca;
 366   ubyte[3] tmp = void;
 367   dc.stream.rawReadExact(tmp[]);
 368   ushort len = bigEndianToNative!ushort(tmp[0..2]);
 369   ubyte num_scan_comps = tmp[2];
 370   if (num_scan_comps != dc.num_comps || len != 6+num_scan_comps*2) throw new ImageIOException("invalid / not supported");
 371   auto buf = (cast(ubyte*)alloca((len-3)*ubyte.sizeof))[0..len-3];
 372   dc.stream.rawReadExact(buf[]);
 373   foreach (immutable i; 0..num_scan_comps) {
 374     uint ci = buf[i*2]-((dc.correct_comp_ids) ? 1 : 0);
 375     if (ci >= dc.num_comps) throw new ImageIOException("invalid component id");
 376     ubyte tables = buf[i*2+1];
 377     dc.comps[ci].dc_table = tables>>4;
 378     dc.comps[ci].ac_table = tables&0xf;
 379     if (1 < dc.comps[ci].dc_table || 1 < dc.comps[ci].ac_table) throw new ImageIOException("invalid / not supported");
 380   }
 381   // ignore these
 382   //ubyte spectral_start = buf[$-3];
 383   //ubyte spectral_end = buf[$-2];
 384   //ubyte approx = buf[$-1];
 385 }
 386
 387
 388 void read_restart_interval (ref JPEG_Decoder dc) {
 389   import std.bitmanip : bigEndianToNative;
 390   ubyte[4] tmp = void;
 391   dc.stream.rawReadExact(tmp[]);
 392   ushort len = bigEndianToNative!ushort(tmp[0..2]);
 393   if (len != 4) throw new ImageIOException("invalid / not supported");
 394   dc.restart_interval = bigEndianToNative!ushort(tmp[2..4]);
 395   debug(DebugJPEG) writeln("restart interval set to: ", dc.restart_interval);
 396 }
 397
 398
 399 // reads data after the SOS segment
 400 ubyte[] decode_jpeg (ref JPEG_Decoder dc) {
 401   foreach (ref comp; dc.comps[0..dc.num_comps]) comp.data = new ubyte[dc.num_mcu_x*comp.sfx*8*dc.num_mcu_y*comp.sfy*8];
 402   // E.7 -- Multiple scans are for progressive images which are not supported
 403   //while (!dc.eoi_reached) {
 404     decode_scan(dc);    // E.2.3
 405     //read_markers(dc);   // reads until next scan header or eoi
 406   //}
 407   // throw away fill samples and convert to target format
 408   return dc.reconstruct();
 409 }
 410
 411
 412 // E.2.3 and E.8 and E.9
 413 void decode_scan (ref JPEG_Decoder dc) {
 414   debug(DebugJPEG) writeln("decode scan...");
 415   int intervals, mcus;
 416   if (0 < dc.restart_interval) {
 417     int total_mcus = dc.num_mcu_x*dc.num_mcu_y;
 418     intervals = (total_mcus+dc.restart_interval-1)/dc.restart_interval;
 419     mcus = dc.restart_interval;
 420   } else {
 421     intervals = 1;
 422     mcus = dc.num_mcu_x*dc.num_mcu_y;
 423   }
 424   debug(DebugJPEG) writeln("intervals: ", intervals);
 425   foreach (immutable mcu_j; 0..dc.num_mcu_y) {
 426     foreach (immutable mcu_i; 0..dc.num_mcu_x) {
 427       // decode mcu
 428       foreach (immutable c; 0..dc.num_comps) {
 429         auto comp = &dc.comps[c];
 430         foreach (immutable du_j; 0..comp.sfy) {
 431           foreach (immutable du_i; 0..comp.sfx) {
 432             // decode entropy, dequantize & dezigzag
 433             short[64] data = decode_block(dc, *comp, dc.qtables[comp.qtable]);
 434             // idct & level-shift
 435             int outx = (mcu_i*comp.sfx+du_i)*8;
 436             int outy = (mcu_j*comp.sfy+du_j)*8;
 437             int dst_stride = dc.num_mcu_x*comp.sfx*8;
 438             ubyte* dst = comp.data.ptr+outy*dst_stride+outx;
 439             stbi__idct_block(dst, dst_stride, data);
 440           }
 441         }
 442       }
 443       --mcus;
 444       if (!mcus) {
 445         --intervals;
 446         if (!intervals) return;
 447         read_restart(dc.stream); // RSTx marker
 448         if (intervals == 1) {
 449           // last interval, may have fewer MCUs than defined by DRI
 450           mcus = (dc.num_mcu_y-mcu_j-1)*dc.num_mcu_x+dc.num_mcu_x-mcu_i-1;
 451         } else {
 452           mcus = dc.restart_interval;
 453         }
 454         // reset decoder
 455         dc.cb = 0;
 456         dc.bits_left = 0;
 457         foreach (immutable k; 0..dc.num_comps) dc.comps[k].pred = 0;
 458       }
 459     }
 460   }
 461 }
 462
 463
 464 // RST0-RST7
 465 void read_restart (VFile stream) {
 466   ubyte[2] tmp = void;
 467   stream.rawReadExact(tmp[]);
 468   if (tmp[0] != 0xff || tmp[1] < Marker.RST0 || Marker.RST7 < tmp[1]) throw new ImageIOException("reset marker missing");
 469   // the markers should cycle 0 through 7, could check that here...
 470 }
 471
 472
 473 immutable ubyte[64] dezigzag = [
 474      0,  1,  8, 16,  9,  2,  3, 10,
 475     17, 24, 32, 25, 18, 11,  4,  5,
 476     12, 19, 26, 33, 40, 48, 41, 34,
 477     27, 20, 13,  6,  7, 14, 21, 28,
 478     35, 42, 49, 56, 57, 50, 43, 36,
 479     29, 22, 15, 23, 30, 37, 44, 51,
 480     58, 59, 52, 45, 38, 31, 39, 46,
 481     53, 60, 61, 54, 47, 55, 62, 63,
 482 ];
 483
 484
 485 // decode entropy, dequantize & dezigzag (see section F.2)
 486 short[64] decode_block (ref JPEG_Decoder dc, ref JPEG_Decoder.Component comp, in ref ubyte[64] qtable) {
 487   short[64] res = 0;
 488   ubyte t = decode_huff(dc, dc.dc_tables[comp.dc_table]);
 489   int diff = t ? dc.receive_and_extend(t) : 0;
 490   comp.pred = comp.pred+diff;
 491   res[0] = cast(short)(comp.pred*qtable[0]);
 492   int k = 1;
 493   do {
 494     ubyte rs = decode_huff(dc, dc.ac_tables[comp.ac_table]);
 495     ubyte rrrr = rs>>4;
 496     ubyte ssss = rs&0xf;
 497     if (ssss == 0) {
 498       if (rrrr != 0xf) break; // end of block
 499       k += 16; // run length is 16
 500       continue;
 501     }
 502     k += rrrr;
 503     if (63 < k) throw new ImageIOException("corrupt block");
 504     res[dezigzag[k]] = cast(short)(dc.receive_and_extend(ssss)*qtable[k]);
 505     k += 1;
 506   } while (k < 64);
 507   return res;
 508 }
 509
 510
 511 int receive_and_extend (ref JPEG_Decoder dc, ubyte s) {
 512   // receive
 513   int symbol = 0;
 514   foreach (immutable _; 0..s) symbol = (symbol<<1)+nextbit(dc);
 515   // extend
 516   int vt = 1<<(s-1);
 517   if (symbol < vt) return symbol+(-1<<s)+1;
 518   return symbol;
 519 }
 520
 521
 522 // F.16 -- the DECODE
 523 ubyte decode_huff (ref JPEG_Decoder dc, in ref HuffTab tab) {
 524   short code = nextbit(dc);
 525   int i = 0;
 526   while (tab.maxcode[i] < code) {
 527     code = cast(short)((code<<1)+nextbit(dc));
 528     i += 1;
 529     if (tab.maxcode.length <= i) throw new ImageIOException("corrupt huffman coding");
 530   }
 531   int j = tab.valptr[i]+code-tab.mincode[i];
 532   if (tab.values.length <= cast(uint)j) throw new ImageIOException("corrupt huffman coding");
 533   return tab.values[j];
 534 }
 535
 536
 537 // F.2.2.5 and F.18
 538 ubyte nextbit (ref JPEG_Decoder dc) {
 539   if (!dc.bits_left) {
 540     ubyte[1] bytebuf = void;
 541     dc.stream.rawReadExact(bytebuf[]);
 542     dc.cb = bytebuf[0];
 543     dc.bits_left = 8;
 544     if (dc.cb == 0xff) {
 545       dc.stream.rawReadExact(bytebuf[]);
 546       if (bytebuf[0] != 0x0) throw new ImageIOException("unexpected marker");
 547     }
 548   }
 549   ubyte r = dc.cb>>7;
 550   dc.cb <<= 1;
 551   dc.bits_left -= 1;
 552   return r;
 553 }
 554
 555
 556 ubyte[] reconstruct (in ref JPEG_Decoder dc) {
 557   auto result = new ubyte[dc.width*dc.height*dc.tgt_chans];
 558   switch (dc.num_comps*10+dc.tgt_chans) {
 559     case 34, 33:
 560       // Use specialized bilinear filtering functions for the frequent cases where
 561       // Cb & Cr channels have half resolution.
 562       if ((dc.comps[0].sfx <= 2 && dc.comps[0].sfy <= 2) &&
 563          (dc.comps[0].sfx+dc.comps[0].sfy >= 3) &&
 564          dc.comps[1].sfx == 1 && dc.comps[1].sfy == 1 &&
 565          dc.comps[2].sfx == 1 && dc.comps[2].sfy == 1) {
 566           void function (in ubyte[], in ubyte[], ubyte[]) resample;
 567           switch (dc.comps[0].sfx*10+dc.comps[0].sfy) {
 568             case 22: resample = &upsample_h2_v2; break;
 569             case 21: resample = &upsample_h2_v1; break;
 570             case 12: resample = &upsample_h1_v2; break;
 571             default: throw new ImageIOException("bug");
 572           }
 573           auto comp1 = new ubyte[](dc.width);
 574           auto comp2 = new ubyte[](dc.width);
 575           size_t s = 0;
 576           size_t di = 0;
 577           foreach (immutable j; 0..dc.height) {
 578             size_t mi = j/dc.comps[0].sfy;
 579             size_t si = (mi == 0 || mi >= (dc.height-1)/dc.comps[0].sfy ? mi : mi-1+s*2);
 580             s ^= 1;
 581             size_t cs = dc.num_mcu_x*dc.comps[1].sfx*8;
 582             size_t cl0 = mi*cs;
 583             size_t cl1 = si*cs;
 584             resample(dc.comps[1].data[cl0..cl0+dc.comps[1].x], dc.comps[1].data[cl1..cl1+dc.comps[1].x], comp1[]);
 585             resample(dc.comps[2].data[cl0..cl0+dc.comps[2].x], dc.comps[2].data[cl1..cl1+dc.comps[2].x], comp2[]);
 586             foreach (immutable i; 0..dc.width) {
 587               result[di..di+3] = ycbcr_to_rgb(dc.comps[0].data[j*dc.num_mcu_x*dc.comps[0].sfx*8+i], comp1[i], comp2[i]);
 588               if (dc.tgt_chans == 4) result[di+3] = 255;
 589               di += dc.tgt_chans;
 590             }
 591           }
 592           return result;
 593       }
 594       foreach (const ref comp; dc.comps[0..dc.num_comps]) {
 595         if (comp.sfx != dc.hmax || comp.sfy != dc.vmax) return dc.upsample(result);
 596       }
 597       size_t si, di;
 598       foreach (immutable j; 0..dc.height) {
 599         foreach (immutable i; 0..dc.width) {
 600           result[di..di+3] = ycbcr_to_rgb(dc.comps[0].data[si+i], dc.comps[1].data[si+i], dc.comps[2].data[si+i]);
 601           if (dc.tgt_chans == 4) result[di+3] = 255;
 602           di += dc.tgt_chans;
 603         }
 604         si += dc.num_mcu_x*dc.comps[0].sfx*8;
 605       }
 606       return result;
 607     case 32, 12, 31, 11:
 608       const comp = &dc.comps[0];
 609       if (comp.sfx == dc.hmax && comp.sfy == dc.vmax) {
 610         size_t si, di;
 611         if (dc.tgt_chans == 2) {
 612           foreach (immutable j; 0..dc.height) {
 613             foreach (immutable i; 0..dc.width) {
 614               result[di++] = comp.data[si+i];
 615               result[di++] = 255;
 616             }
 617             si += dc.num_mcu_x*comp.sfx*8;
 618           }
 619         } else {
 620           foreach (immutable j; 0..dc.height) {
 621             result[di..di+dc.width] = comp.data[si..si+dc.width];
 622             si += dc.num_mcu_x*comp.sfx*8;
 623             di += dc.width;
 624           }
 625         }
 626         return result;
 627       }
 628       // need to resample (haven't tested this...)
 629       return dc.upsample_luma(result);
 630     case 14, 13:
 631       const comp = &dc.comps[0];
 632       size_t si, di;
 633       foreach (immutable j; 0..dc.height) {
 634         foreach (immutable i; 0..dc.width) {
 635           result[di..di+3] = comp.data[si+i];
 636           if (dc.tgt_chans == 4) result[di+3] = 255;
 637           di += dc.tgt_chans;
 638         }
 639         si += dc.num_mcu_x*comp.sfx*8;
 640       }
 641       return result;
 642     default: assert(0);
 643   }
 644 }
 645
 646
 647 void upsample_h2_v2(in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
 648   ubyte mix() (ubyte mm, ubyte ms, ubyte sm, ubyte ss) {
 649     pragma(inline, true);
 650     return cast(ubyte)((cast(uint)mm*3*3+cast(uint)ms*3*1+cast(uint)sm*1*3+cast(uint)ss*1*1+8)/16);
 651   }
 652
 653   result[0] = cast(ubyte)((cast(uint)line0[0]*3+cast(uint)line1[0]*1+2)/4);
 654   if (line0.length == 1) return;
 655   result[1] = mix(line0[0], line0[1], line1[0], line1[1]);
 656
 657   size_t di = 2;
 658   foreach (immutable i; 1..line0.length) {
 659     result[di] = mix(line0[i], line0[i-1], line1[i], line1[i-1]);
 660     di += 1;
 661     if (i == line0.length-1) {
 662       if (di < result.length) result[di] = cast(ubyte)((cast(uint)line0[i]*3+cast(uint)line1[i]*1+2)/4);
 663       return;
 664     }
 665     result[di] = mix(line0[i], line0[i+1], line1[i], line1[i+1]);
 666     di += 1;
 667   }
 668 }
 669
 670
 671 void upsample_h2_v1 (in ubyte[] line0, in ubyte[] _line1, ubyte[] result) {
 672   result[0] = line0[0];
 673   if (line0.length == 1) return;
 674   result[1] = cast(ubyte)((cast(uint)line0[0]*3+cast(uint)line0[1]*1+2)/4);
 675   size_t di = 2;
 676   foreach (immutable i; 1..line0.length) {
 677     result[di] = cast(ubyte)((cast(uint)line0[i-1]*1+cast(uint)line0[i+0]*3+2)/4);
 678     di += 1;
 679     if (i == line0.length-1) {
 680       if (di < result.length) result[di] = line0[i];
 681       return;
 682     }
 683     result[di] = cast(ubyte)((cast(uint)line0[i+0]*3+cast(uint)line0[i+1]*1+2)/4);
 684     di += 1;
 685   }
 686 }
 687
 688
 689 void upsample_h1_v2 (in ubyte[] line0, in ubyte[] line1, ubyte[] result) {
 690   foreach (immutable i; 0..result.length) result[i] = cast(ubyte)((cast(uint)line0[i]*3+cast(uint)line1[i]*1+2)/4);
 691 }
 692
 693
 694 // Nearest neighbor
 695 ubyte[] upsample_luma (in ref JPEG_Decoder dc, ubyte[] result) {
 696   const size_t stride0 = dc.num_mcu_x*dc.comps[0].sfx*8;
 697   const y_step0 = cast(float)dc.comps[0].sfy/cast(float)dc.vmax;
 698   const x_step0 = cast(float)dc.comps[0].sfx/cast(float)dc.hmax;
 699   float y0 = y_step0*0.5;
 700   size_t y0i = 0;
 701   size_t di;
 702   foreach (immutable j; 0..dc.height) {
 703     float x0 = x_step0*0.5;
 704     size_t x0i = 0;
 705     foreach (immutable i; 0..dc.width) {
 706       result[di] = dc.comps[0].data[y0i+x0i];
 707       if (dc.tgt_chans == 2) result[di+1] = 255;
 708       di += dc.tgt_chans;
 709       x0 += x_step0;
 710       if (x0 >= 1.0) { x0 -= 1.0; x0i += 1; }
 711     }
 712     y0 += y_step0;
 713     if (y0 >= 1.0) { y0 -= 1.0; y0i += stride0; }
 714   }
 715   return result;
 716 }
 717
 718
 719 // Nearest neighbor
 720 ubyte[] upsample (in ref JPEG_Decoder dc, ubyte[] result) {
 721   const size_t stride0 = dc.num_mcu_x*dc.comps[0].sfx*8;
 722   const size_t stride1 = dc.num_mcu_x*dc.comps[1].sfx*8;
 723   const size_t stride2 = dc.num_mcu_x*dc.comps[2].sfx*8;
 724   const y_step0 = cast(float)dc.comps[0].sfy/cast(float)dc.vmax;
 725   const y_step1 = cast(float)dc.comps[1].sfy/cast(float)dc.vmax;
 726   const y_step2 = cast(float)dc.comps[2].sfy/cast(float)dc.vmax;
 727   const x_step0 = cast(float)dc.comps[0].sfx/cast(float)dc.hmax;
 728   const x_step1 = cast(float)dc.comps[1].sfx/cast(float)dc.hmax;
 729   const x_step2 = cast(float)dc.comps[2].sfx/cast(float)dc.hmax;
 730   float y0 = y_step0*0.5;
 731   float y1 = y_step1*0.5;
 732   float y2 = y_step2*0.5;
 733   size_t y0i = 0;
 734   size_t y1i = 0;
 735   size_t y2i = 0;
 736   size_t di;
 737   foreach (immutable _j; 0..dc.height) {
 738     float x0 = x_step0*0.5;
 739     float x1 = x_step1*0.5;
 740     float x2 = x_step2*0.5;
 741     size_t x0i = 0;
 742     size_t x1i = 0;
 743     size_t x2i = 0;
 744     foreach (immutable i; 0..dc.width) {
 745       result[di..di+3] = ycbcr_to_rgb(dc.comps[0].data[y0i+x0i], dc.comps[1].data[y1i+x1i], dc.comps[2].data[y2i+x2i]);
 746       if (dc.tgt_chans == 4) result[di+3] = 255;
 747       di += dc.tgt_chans;
 748       x0 += x_step0;
 749       x1 += x_step1;
 750       x2 += x_step2;
 751       if (x0 >= 1.0) { x0 -= 1.0; x0i += 1; }
 752       if (x1 >= 1.0) { x1 -= 1.0; x1i += 1; }
 753       if (x2 >= 1.0) { x2 -= 1.0; x2i += 1; }
 754     }
 755     y0 += y_step0;
 756     y1 += y_step1;
 757     y2 += y_step2;
 758     if (y0 >= 1.0) { y0 -= 1.0; y0i += stride0; }
 759     if (y1 >= 1.0) { y1 -= 1.0; y1i += stride1; }
 760     if (y2 >= 1.0) { y2 -= 1.0; y2i += stride2; }
 761   }
 762   return result;
 763 }
 764
 765
 766 ubyte[3] ycbcr_to_rgb (ubyte y, ubyte cb, ubyte cr) pure {
 767   ubyte[3] rgb = void;
 768   rgb[0] = clamp(y+1.402*(cr-128));
 769   rgb[1] = clamp(y-0.34414*(cb-128)-0.71414*(cr-128));
 770   rgb[2] = clamp(y+1.772*(cb-128));
 771   return rgb;
 772 }
 773
 774
 775 ubyte clamp() (float x) pure {
 776   if (x < 0) return 0;
 777   if (255 < x) return 255;
 778   return cast(ubyte)x;
 779 }
 780
 781
 782 // ------------------------------------------------------------
 783 // The IDCT stuff here (to the next dashed line) is copied and adapted from
 784 // stb_image which is released under public domain.  Many thanks to stb_image
 785 // author, Sean Barrett.
 786 // Link: https://github.com/nothings/stb/blob/master/stb_image.h
 787 int f2f() (float x) pure { pragma(inline, true); return cast(int)(x*4096+0.5); }
 788 int fsh() (int x) pure { pragma(inline, true); return x<<12; }
 789
 790 // from stb_image, derived from jidctint -- DCT_ISLOW
 791 void STBI__IDCT_1D() (ref int t0, ref int t1, ref int t2, ref int t3,
 792                       ref int x0, ref int x1, ref int x2, ref int x3,
 793                       int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7) pure
 794 {
 795   int p1, p2, p3, p4, p5;
 796   //int t0, t1, t2, t3, p1, p2, p3, p4, p5, x0, x1, x2, x3;
 797   p2 = s2;
 798   p3 = s6;
 799   p1 = (p2+p3)*f2f(0.5411961f);
 800   t2 = p1+p3*f2f(-1.847759065f);
 801   t3 = p1+p2*f2f(0.765366865f);
 802   p2 = s0;
 803   p3 = s4;
 804   t0 = fsh(p2+p3);
 805   t1 = fsh(p2-p3);
 806   x0 = t0+t3;
 807   x3 = t0-t3;
 808   x1 = t1+t2;
 809   x2 = t1-t2;
 810   t0 = s7;
 811   t1 = s5;
 812   t2 = s3;
 813   t3 = s1;
 814   p3 = t0+t2;
 815   p4 = t1+t3;
 816   p1 = t0+t3;
 817   p2 = t1+t2;
 818   p5 = (p3+p4)*f2f(1.175875602f);
 819   t0 = t0*f2f(0.298631336f);
 820   t1 = t1*f2f(2.053119869f);
 821   t2 = t2*f2f(3.072711026f);
 822   t3 = t3*f2f(1.501321110f);
 823   p1 = p5+p1*f2f(-0.899976223f);
 824   p2 = p5+p2*f2f(-2.562915447f);
 825   p3 = p3*f2f(-1.961570560f);
 826   p4 = p4*f2f(-0.390180644f);
 827   t3 += p1+p4;
 828   t2 += p2+p3;
 829   t1 += p2+p4;
 830   t0 += p1+p3;
 831 }
 832
 833 // idct and level-shift
 834 void stbi__idct_block (ubyte* dst, int dst_stride, in ref short[64] data) pure {
 835   int i;
 836   int[64] val;
 837   int* v = val.ptr;
 838   const(short)* d = data.ptr;
 839   // columns
 840   for (i = 0; i < 8; ++i, ++d, ++v) {
 841     // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
 842     if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0 && d[40] == 0 && d[48] == 0 && d[56] == 0) {
 843       //    no shortcut                 0     seconds
 844       //    (1|2|3|4|5|6|7)==0          0     seconds
 845       //    all separate               -0.047 seconds
 846       //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
 847       int dcterm = d[0]<<2;
 848       v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
 849     } else {
 850       int t0, t1, t2, t3, x0, x1, x2, x3;
 851       STBI__IDCT_1D(t0, t1, t2, t3, x0, x1, x2, x3, d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56]);
 852       // constants scaled things up by 1<<12; let's bring them back
 853       // down, but keep 2 extra bits of precision
 854       x0 += 512; x1 += 512; x2 += 512; x3 += 512;
 855       v[ 0] = (x0+t3)>>10;
 856       v[56] = (x0-t3)>>10;
 857       v[ 8] = (x1+t2)>>10;
 858       v[48] = (x1-t2)>>10;
 859       v[16] = (x2+t1)>>10;
 860       v[40] = (x2-t1)>>10;
 861       v[24] = (x3+t0)>>10;
 862       v[32] = (x3-t0)>>10;
 863     }
 864   }
 865
 866   ubyte* o = dst;
 867   for (i = 0, v = val.ptr; i < 8; ++i, v += 8, o += dst_stride) {
 868     // no fast case since the first 1D IDCT spread components out
 869     int t0, t1, t2, t3, x0, x1, x2, x3;
 870     STBI__IDCT_1D(t0, t1, t2, t3, x0, x1, x2, x3, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
 871     // constants scaled things up by 1<<12, plus we had 1<<2 from first
 872     // loop, plus horizontal and vertical each scale by sqrt(8) so together
 873     // we've got an extra 1<<3, so 1<<17 total we need to remove.
 874     // so we want to round that, which means adding 0.5*1<<17,
 875     // aka 65536. Also, we'll end up with -128 to 127 that we want
 876     // to encode as 0-255 by adding 128, so we'll add that before the shift
 877     x0 += 65536+(128<<17);
 878     x1 += 65536+(128<<17);
 879     x2 += 65536+(128<<17);
 880     x3 += 65536+(128<<17);
 881     // tried computing the shifts into temps, or'ing the temps to see
 882     // if any were out of range, but that was slower
 883     o[0] = stbi__clamp((x0+t3)>>17);
 884     o[7] = stbi__clamp((x0-t3)>>17);
 885     o[1] = stbi__clamp((x1+t2)>>17);
 886     o[6] = stbi__clamp((x1-t2)>>17);
 887     o[2] = stbi__clamp((x2+t1)>>17);
 888     o[5] = stbi__clamp((x2-t1)>>17);
 889     o[3] = stbi__clamp((x3+t0)>>17);
 890     o[4] = stbi__clamp((x3-t0)>>17);
 891   }
 892 }
 893
 894 // clamp to 0-255
 895 ubyte stbi__clamp() (int x) pure {
 896   if (cast(uint)x > 255) {
 897     if (x < 0) return 0;
 898     if (x > 255) return 255;
 899   }
 900   return cast(ubyte)x;
 901 }
 902 // the above is adapted from stb_image
 903 // ------------------------------------------------------------