obsolete/jpegd.d

   1 // jpgd.h - C++ class for JPEG decompression.
   2 // Rich Geldreich <richgel99@gmail.com>
   3 // Alex Evans: Linear memory allocator (taken from jpge.h).
   4 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
   5 // D translation by Ketmar // Invisible Vector
   6 //
   7 // This is free and unencumbered software released into the public domain.
   8 //
   9 // Anyone is free to copy, modify, publish, use, compile, sell, or
  10 // distribute this software, either in source code form or as a compiled
  11 // binary, for any purpose, commercial or non-commercial, and by any
  12 // means.
  13 //
  14 // In jurisdictions that recognize copyright laws, the author or authors
  15 // of this software dedicate any and all copyright interest in the
  16 // software to the public domain. We make this dedication for the benefit
  17 // of the public at large and to the detriment of our heirs and
  18 // successors. We intend this dedication to be an overt act of
  19 // relinquishment in perpetuity of all present and future rights to this
  20 // software under copyright law.
  21 //
  22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  25 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  26 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  27 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  28 // OTHER DEALINGS IN THE SOFTWARE.
  29 //
  30 // For more information, please refer to <http://unlicense.org/>
  31 //
  32 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
  33 //
  34 // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
  35 // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
  36 // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
  37 /**
  38  * Loads a JPEG image from a memory buffer or a file.
  39  * req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
  40  * On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
  41  * Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
  42  */
  43 module iv.jpegd /*is aliced*/;
  44
  45 import iv.alice;
  46
  47 // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
  48 // This is slower, but results in higher quality on images with highly saturated colors.
  49 version = JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING;
  50
  51 /// Input stream interface.
  52 /// This delegate is called when the internal input buffer is empty.
  53 /// Parameters:
  54 ///   pBuf - input buffer
  55 ///   max_bytes_to_read - maximum bytes that can be written to pBuf
  56 ///   pEOF_flag - set this to true if at end of stream (no more bytes remaining)
  57 ///   Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
  58 ///   Notes: This delegate will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
  59 alias JpegStreamReadFunc = int delegate (void* pBuf, int max_bytes_to_read, bool* pEOF_flag);
  60
  61
  62 // ////////////////////////////////////////////////////////////////////////// //
  63 private:
  64 void *jpgd_malloc (usize nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
  65 void jpgd_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
  66
  67 // Success/failure error codes.
  68 alias jpgd_status = int;
  69 enum /*jpgd_status*/ {
  70   JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
  71   JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
  72   JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
  73   JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
  74   JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
  75   JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
  76   JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
  77   JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
  78   JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM,
  79 }
  80
  81 enum {
  82   JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
  83   JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384,
  84 }
  85
  86 // DCT coefficients are stored in this sequence.
  87 static immutable int[64] g_ZAG = [  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
  88
  89 alias JPEG_MARKER = int;
  90 enum /*JPEG_MARKER*/ {
  91   M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
  92   M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
  93   M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
  94   M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
  95   M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0,
  96 }
  97
  98 alias JPEG_SUBSAMPLING = int;
  99 enum /*JPEG_SUBSAMPLING*/ { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
 100
 101 enum CONST_BITS = 13;
 102 enum PASS1_BITS = 2;
 103 enum SCALEDONE = cast(int)1;
 104
 105 enum FIX_0_298631336 = cast(int)2446;  /* FIX(0.298631336) */
 106 enum FIX_0_390180644 = cast(int)3196;  /* FIX(0.390180644) */
 107 enum FIX_0_541196100 = cast(int)4433;  /* FIX(0.541196100) */
 108 enum FIX_0_765366865 = cast(int)6270;  /* FIX(0.765366865) */
 109 enum FIX_0_899976223 = cast(int)7373;  /* FIX(0.899976223) */
 110 enum FIX_1_175875602 = cast(int)9633;  /* FIX(1.175875602) */
 111 enum FIX_1_501321110 = cast(int)12299; /* FIX(1.501321110) */
 112 enum FIX_1_847759065 = cast(int)15137; /* FIX(1.847759065) */
 113 enum FIX_1_961570560 = cast(int)16069; /* FIX(1.961570560) */
 114 enum FIX_2_053119869 = cast(int)16819; /* FIX(2.053119869) */
 115 enum FIX_2_562915447 = cast(int)20995; /* FIX(2.562915447) */
 116 enum FIX_3_072711026 = cast(int)25172; /* FIX(3.072711026) */
 117
 118 int DESCALE() (int x, int n) { pragma(inline, true); return (((x) + (SCALEDONE << ((n)-1))) >> (n)); }
 119 int DESCALE_ZEROSHIFT() (int x, int n) { pragma(inline, true); return (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)); }
 120 ubyte CLAMP() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i) >> 31) & 0xFF) : i); }
 121
 122
 123 // Compiler creates a fast path 1D IDCT for X non-zero columns
 124 struct Row(int NONZERO_COLS) {
 125 pure nothrow @trusted @nogc:
 126   static void idct(int* pTemp, const(jpeg_decoder.jpgd_block_t)* pSrc) {
 127     static if (NONZERO_COLS == 0) {
 128       // nothing
 129     } else static if (NONZERO_COLS == 1) {
 130       immutable int dcval = (pSrc[0] << PASS1_BITS);
 131       pTemp[0] = dcval;
 132       pTemp[1] = dcval;
 133       pTemp[2] = dcval;
 134       pTemp[3] = dcval;
 135       pTemp[4] = dcval;
 136       pTemp[5] = dcval;
 137       pTemp[6] = dcval;
 138       pTemp[7] = dcval;
 139     } else {
 140       // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
 141       //#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
 142       template ACCESS_COL(int x) {
 143         static if (x < NONZERO_COLS) enum ACCESS_COL = "cast(int)pSrc["~x.stringof~"]"; else enum ACCESS_COL = "0";
 144       }
 145
 146       immutable int z2 = mixin(ACCESS_COL!2), z3 = mixin(ACCESS_COL!6);
 147
 148       immutable int z1 = (z2 + z3)*FIX_0_541196100;
 149       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
 150       immutable int tmp3 = z1 + z2*FIX_0_765366865;
 151
 152       immutable int tmp0 = (mixin(ACCESS_COL!0) + mixin(ACCESS_COL!4)) << CONST_BITS;
 153       immutable int tmp1 = (mixin(ACCESS_COL!0) - mixin(ACCESS_COL!4)) << CONST_BITS;
 154
 155       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
 156
 157       immutable int atmp0 = mixin(ACCESS_COL!7), atmp1 = mixin(ACCESS_COL!5), atmp2 = mixin(ACCESS_COL!3), atmp3 = mixin(ACCESS_COL!1);
 158
 159       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
 160       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
 161
 162       immutable int az1 = bz1*(-FIX_0_899976223);
 163       immutable int az2 = bz2*(-FIX_2_562915447);
 164       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
 165       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
 166
 167       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
 168       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
 169       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
 170       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
 171
 172       pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
 173       pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
 174       pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
 175       pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
 176       pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
 177       pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
 178       pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
 179       pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
 180     }
 181   }
 182 }
 183
 184
 185 // Compiler creates a fast path 1D IDCT for X non-zero rows
 186 struct Col (int NONZERO_ROWS) {
 187 pure nothrow @trusted @nogc:
 188   static void idct(ubyte* pDst_ptr, const(int)* pTemp) {
 189     static assert(NONZERO_ROWS > 0);
 190     static if (NONZERO_ROWS == 1) {
 191       int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
 192       immutable ubyte dcval_clamped = cast(ubyte)CLAMP(dcval);
 193       pDst_ptr[0*8] = dcval_clamped;
 194       pDst_ptr[1*8] = dcval_clamped;
 195       pDst_ptr[2*8] = dcval_clamped;
 196       pDst_ptr[3*8] = dcval_clamped;
 197       pDst_ptr[4*8] = dcval_clamped;
 198       pDst_ptr[5*8] = dcval_clamped;
 199       pDst_ptr[6*8] = dcval_clamped;
 200       pDst_ptr[7*8] = dcval_clamped;
 201     } else {
 202       // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
 203       //#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
 204       template ACCESS_ROW(int x) {
 205         static if (x < NONZERO_ROWS) enum ACCESS_ROW = "pTemp["~(x*8).stringof~"]"; else enum ACCESS_ROW = "0";
 206       }
 207
 208       immutable int z2 = mixin(ACCESS_ROW!2);
 209       immutable int z3 = mixin(ACCESS_ROW!6);
 210
 211       immutable int z1 = (z2 + z3)*FIX_0_541196100;
 212       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
 213       immutable int tmp3 = z1 + z2*FIX_0_765366865;
 214
 215       immutable int tmp0 = (mixin(ACCESS_ROW!0) + mixin(ACCESS_ROW!4)) << CONST_BITS;
 216       immutable int tmp1 = (mixin(ACCESS_ROW!0) - mixin(ACCESS_ROW!4)) << CONST_BITS;
 217
 218       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
 219
 220       immutable int atmp0 = mixin(ACCESS_ROW!7), atmp1 = mixin(ACCESS_ROW!5), atmp2 = mixin(ACCESS_ROW!3), atmp3 = mixin(ACCESS_ROW!1);
 221
 222       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
 223       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
 224
 225       immutable int az1 = bz1*(-FIX_0_899976223);
 226       immutable int az2 = bz2*(-FIX_2_562915447);
 227       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
 228       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
 229
 230       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
 231       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
 232       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
 233       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
 234
 235       int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
 236       pDst_ptr[8*0] = cast(ubyte)CLAMP(i);
 237
 238       i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
 239       pDst_ptr[8*7] = cast(ubyte)CLAMP(i);
 240
 241       i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
 242       pDst_ptr[8*1] = cast(ubyte)CLAMP(i);
 243
 244       i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
 245       pDst_ptr[8*6] = cast(ubyte)CLAMP(i);
 246
 247       i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
 248       pDst_ptr[8*2] = cast(ubyte)CLAMP(i);
 249
 250       i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
 251       pDst_ptr[8*5] = cast(ubyte)CLAMP(i);
 252
 253       i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
 254       pDst_ptr[8*3] = cast(ubyte)CLAMP(i);
 255
 256       i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
 257       pDst_ptr[8*4] = cast(ubyte)CLAMP(i);
 258     }
 259   }
 260 }
 261
 262
 263 static immutable ubyte[512] s_idct_row_table = [
 264   1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
 265   4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
 266   6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
 267   6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
 268   8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
 269   8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
 270   8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
 271   8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
 272 ];
 273
 274 static immutable ubyte[64] s_idct_col_table = [ 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ];
 275
 276 void idct() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr, int block_max_zag) {
 277   assert(block_max_zag >= 1);
 278   assert(block_max_zag <= 64);
 279
 280   if (block_max_zag <= 1)
 281   {
 282     int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
 283     k = CLAMP(k);
 284     k = k | (k<<8);
 285     k = k | (k<<16);
 286
 287     for (int i = 8; i > 0; i--)
 288     {
 289       *cast(int*)&pDst_ptr[0] = k;
 290       *cast(int*)&pDst_ptr[4] = k;
 291       pDst_ptr += 8;
 292     }
 293     return;
 294   }
 295
 296   int[64] temp;
 297
 298   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
 299   int* pTemp = temp.ptr;
 300
 301   const(ubyte)* pRow_tab = &s_idct_row_table.ptr[(block_max_zag - 1) * 8];
 302   int i;
 303   for (i = 8; i > 0; i--, pRow_tab++)
 304   {
 305     switch (*pRow_tab)
 306     {
 307       case 0: Row!(0).idct(pTemp, pSrc); break;
 308       case 1: Row!(1).idct(pTemp, pSrc); break;
 309       case 2: Row!(2).idct(pTemp, pSrc); break;
 310       case 3: Row!(3).idct(pTemp, pSrc); break;
 311       case 4: Row!(4).idct(pTemp, pSrc); break;
 312       case 5: Row!(5).idct(pTemp, pSrc); break;
 313       case 6: Row!(6).idct(pTemp, pSrc); break;
 314       case 7: Row!(7).idct(pTemp, pSrc); break;
 315       case 8: Row!(8).idct(pTemp, pSrc); break;
 316       default: assert(0);
 317     }
 318
 319     pSrc += 8;
 320     pTemp += 8;
 321   }
 322
 323   pTemp = temp.ptr;
 324
 325   immutable int nonzero_rows = s_idct_col_table.ptr[block_max_zag - 1];
 326   for (i = 8; i > 0; i--)
 327   {
 328     switch (nonzero_rows)
 329     {
 330       case 1: Col!(1).idct(pDst_ptr, pTemp); break;
 331       case 2: Col!(2).idct(pDst_ptr, pTemp); break;
 332       case 3: Col!(3).idct(pDst_ptr, pTemp); break;
 333       case 4: Col!(4).idct(pDst_ptr, pTemp); break;
 334       case 5: Col!(5).idct(pDst_ptr, pTemp); break;
 335       case 6: Col!(6).idct(pDst_ptr, pTemp); break;
 336       case 7: Col!(7).idct(pDst_ptr, pTemp); break;
 337       case 8: Col!(8).idct(pDst_ptr, pTemp); break;
 338       default: assert(0);
 339     }
 340
 341     pTemp++;
 342     pDst_ptr++;
 343   }
 344 }
 345
 346 void idct_4x4() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr) {
 347   int[64] temp;
 348   int* pTemp = temp.ptr;
 349   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
 350
 351   for (int i = 4; i > 0; i--)
 352   {
 353     Row!(4).idct(pTemp, pSrc);
 354     pSrc += 8;
 355     pTemp += 8;
 356   }
 357
 358   pTemp = temp.ptr;
 359   for (int i = 8; i > 0; i--)
 360   {
 361     Col!(4).idct(pDst_ptr, pTemp);
 362     pTemp++;
 363     pDst_ptr++;
 364   }
 365 }
 366
 367
 368 // ////////////////////////////////////////////////////////////////////////// //
 369 struct jpeg_decoder {
 370 private import core.stdc.string : memcpy, memset;
 371 private:
 372   static auto JPGD_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
 373   static auto JPGD_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
 374
 375   alias jpgd_quant_t = short;
 376   alias jpgd_block_t = short;
 377   alias pDecode_block_func = void function (ref jpeg_decoder, int, int, int);
 378
 379   static struct huff_tables {
 380     bool ac_table;
 381     uint[256] look_up;
 382     uint[256] look_up2;
 383     ubyte[256] code_size;
 384     uint[512] tree;
 385   }
 386
 387   static struct coeff_buf {
 388     ubyte* pData;
 389     int block_num_x, block_num_y;
 390     int block_len_x, block_len_y;
 391     int block_size;
 392   }
 393
 394   static struct mem_block {
 395     mem_block* m_pNext;
 396     usize m_used_count;
 397     usize m_size;
 398     char[1] m_data;
 399   }
 400
 401   mem_block* m_pMem_blocks;
 402   int m_image_x_size;
 403   int m_image_y_size;
 404   JpegStreamReadFunc readfn;
 405   int m_progressive_flag;
 406   ubyte[JPGD_MAX_HUFF_TABLES] m_huff_ac;
 407   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_num;      // pointer to number of Huffman codes per bit size
 408   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_val;      // pointer to Huffman codes per bit size
 409   jpgd_quant_t*[JPGD_MAX_QUANT_TABLES] m_quant; // pointer to quantization tables
 410   int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
 411   int m_comps_in_frame;                         // # of components in frame
 412   int[JPGD_MAX_COMPONENTS] m_comp_h_samp;       // component's horizontal sampling factor
 413   int[JPGD_MAX_COMPONENTS] m_comp_v_samp;       // component's vertical sampling factor
 414   int[JPGD_MAX_COMPONENTS] m_comp_quant;        // component's quantization table selector
 415   int[JPGD_MAX_COMPONENTS] m_comp_ident;        // component's ID
 416   int[JPGD_MAX_COMPONENTS] m_comp_h_blocks;
 417   int[JPGD_MAX_COMPONENTS] m_comp_v_blocks;
 418   int m_comps_in_scan;                          // # of components in scan
 419   int[JPGD_MAX_COMPS_IN_SCAN] m_comp_list;      // components in this scan
 420   int[JPGD_MAX_COMPONENTS] m_comp_dc_tab;       // component's DC Huffman coding table selector
 421   int[JPGD_MAX_COMPONENTS] m_comp_ac_tab;       // component's AC Huffman coding table selector
 422   int m_spectral_start;                         // spectral selection start
 423   int m_spectral_end;                           // spectral selection end
 424   int m_successive_low;                         // successive approximation low
 425   int m_successive_high;                        // successive approximation high
 426   int m_max_mcu_x_size;                         // MCU's max. X size in pixels
 427   int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
 428   int m_blocks_per_mcu;
 429   int m_max_blocks_per_row;
 430   int m_mcus_per_row, m_mcus_per_col;
 431   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_org;
 432   int m_total_lines_left;                       // total # lines left in image
 433   int m_mcu_lines_left;                         // total # lines left in this MCU
 434   int m_real_dest_bytes_per_scan_line;
 435   int m_dest_bytes_per_scan_line;               // rounded up
 436   int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
 437   huff_tables*[JPGD_MAX_HUFF_TABLES] m_pHuff_tabs;
 438   coeff_buf*[JPGD_MAX_COMPONENTS] m_dc_coeffs;
 439   coeff_buf*[JPGD_MAX_COMPONENTS] m_ac_coeffs;
 440   int m_eob_run;
 441   int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
 442   ubyte* m_pIn_buf_ofs;
 443   int m_in_buf_left;
 444   int m_tem_flag;
 445   bool m_eof_flag;
 446   ubyte[128] m_in_buf_pad_start;
 447   ubyte[JPGD_IN_BUF_SIZE+128] m_in_buf;
 448   ubyte[128] m_in_buf_pad_end;
 449   int m_bits_left;
 450   uint m_bit_buf;
 451   int m_restart_interval;
 452   int m_restarts_left;
 453   int m_next_restart_num;
 454   int m_max_mcus_per_row;
 455   int m_max_blocks_per_mcu;
 456   int m_expanded_blocks_per_mcu;
 457   int m_expanded_blocks_per_row;
 458   int m_expanded_blocks_per_component;
 459   bool m_freq_domain_chroma_upsample;
 460   int m_max_mcus_per_col;
 461   uint[JPGD_MAX_COMPONENTS] m_last_dc_val;
 462   jpgd_block_t* m_pMCU_coefficients;
 463   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_block_max_zag;
 464   ubyte* m_pSample_buf;
 465   int[256] m_crr;
 466   int[256] m_cbb;
 467   int[256] m_crg;
 468   int[256] m_cbg;
 469   ubyte* m_pScan_line_0;
 470   ubyte* m_pScan_line_1;
 471   jpgd_status m_error_code;
 472   bool m_ready_flag;
 473   int m_total_bytes_read;
 474
 475 public:
 476   // Inspect `error_code` after constructing to determine if the stream is valid or not. You may look at the `width`, `height`, etc.
 477   // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
 478   this (JpegStreamReadFunc rfn) { decode_init(rfn); }
 479
 480   ~this () { free_all_blocks(); }
 481
 482   @disable this (this); // no copies
 483
 484   // Call this method after constructing the object to begin decompression.
 485   // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
 486   int begin_decoding () {
 487     if (m_ready_flag) return JPGD_SUCCESS;
 488     if (m_error_code) return JPGD_FAILED;
 489     try {
 490       decode_start();
 491       m_ready_flag = true;
 492       return JPGD_SUCCESS;
 493     } catch (Exception) {}
 494     return JPGD_FAILED;
 495   }
 496
 497   // Returns the next scan line.
 498   // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (`bytes_per_pixel` will return 1).
 499   // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and `bytes_per_pixel` will return 4).
 500   // Returns JPGD_SUCCESS if a scan line has been returned.
 501   // Returns JPGD_DONE if all scan lines have been returned.
 502   // Returns JPGD_FAILED if an error occurred. Inspect `error_code` for a more info.
 503   int decode (/*const void** */void** pScan_line, uint* pScan_line_len) {
 504     if (m_error_code || !m_ready_flag) return JPGD_FAILED;
 505     if (m_total_lines_left == 0) return JPGD_DONE;
 506     try {
 507       if (m_mcu_lines_left == 0) {
 508         if (m_progressive_flag) load_next_row(); else decode_next_row();
 509         // Find the EOI marker if that was the last row.
 510         if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
 511         m_mcu_lines_left = m_max_mcu_y_size;
 512       }
 513       if (m_freq_domain_chroma_upsample) {
 514         expanded_convert();
 515         *pScan_line = m_pScan_line_0;
 516       } else {
 517         switch (m_scan_type) {
 518           case JPGD_YH2V2:
 519             if ((m_mcu_lines_left & 1) == 0) {
 520               H2V2Convert();
 521               *pScan_line = m_pScan_line_0;
 522             } else {
 523               *pScan_line = m_pScan_line_1;
 524             }
 525             break;
 526           case JPGD_YH2V1:
 527             H2V1Convert();
 528             *pScan_line = m_pScan_line_0;
 529             break;
 530           case JPGD_YH1V2:
 531             if ((m_mcu_lines_left & 1) == 0) {
 532               H1V2Convert();
 533               *pScan_line = m_pScan_line_0;
 534             } else {
 535               *pScan_line = m_pScan_line_1;
 536             }
 537             break;
 538           case JPGD_YH1V1:
 539             H1V1Convert();
 540             *pScan_line = m_pScan_line_0;
 541             break;
 542           case JPGD_GRAYSCALE:
 543             gray_convert();
 544             *pScan_line = m_pScan_line_0;
 545             break;
 546           default:
 547         }
 548       }
 549       *pScan_line_len = m_real_dest_bytes_per_scan_line;
 550       --m_mcu_lines_left;
 551       --m_total_lines_left;
 552       return JPGD_SUCCESS;
 553     } catch (Exception) {}
 554     return JPGD_FAILED;
 555   }
 556
 557   @property const pure nothrow @safe @nogc {
 558     jpgd_status error_code () { pragma(inline, true); return m_error_code; }
 559
 560     int width () { pragma(inline, true); return m_image_x_size; }
 561     int height () { pragma(inline, true); return m_image_y_size; }
 562
 563     int num_components () { pragma(inline, true); return m_comps_in_frame; }
 564
 565     int bytes_per_pixel () { pragma(inline, true); return m_dest_bytes_per_pixel; }
 566     int bytes_per_scan_line () { pragma(inline, true); return m_image_x_size * bytes_per_pixel(); }
 567
 568     // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
 569     int total_bytes_read () { pragma(inline, true); return m_total_bytes_read; }
 570   }
 571
 572 private:
 573   // Retrieve one character from the input stream.
 574   uint get_char () {
 575     // Any bytes remaining in buffer?
 576     if (!m_in_buf_left) {
 577       // Try to get more bytes.
 578       prep_in_buffer();
 579       // Still nothing to get?
 580       if (!m_in_buf_left) {
 581         // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
 582         int t = m_tem_flag;
 583         m_tem_flag ^= 1;
 584         return (t ? 0xD9 : 0xFF);
 585       }
 586     }
 587     uint c = *m_pIn_buf_ofs++;
 588     --m_in_buf_left;
 589     return c;
 590   }
 591
 592   // Same as previous method, except can indicate if the character is a pad character or not.
 593   uint get_char (bool* pPadding_flag) {
 594     if (!m_in_buf_left) {
 595       prep_in_buffer();
 596       if (!m_in_buf_left) {
 597         *pPadding_flag = true;
 598         int t = m_tem_flag;
 599         m_tem_flag ^= 1;
 600         return (t ? 0xD9 : 0xFF);
 601       }
 602     }
 603     *pPadding_flag = false;
 604     uint c = *m_pIn_buf_ofs++;
 605     --m_in_buf_left;
 606     return c;
 607   }
 608
 609   // Inserts a previously retrieved character back into the input buffer.
 610   void stuff_char (ubyte q) {
 611     *(--m_pIn_buf_ofs) = q;
 612     m_in_buf_left++;
 613   }
 614
 615   // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
 616   ubyte get_octet () {
 617     bool padding_flag;
 618     int c = get_char(&padding_flag);
 619     if (c == 0xFF) {
 620       if (padding_flag) return 0xFF;
 621       c = get_char(&padding_flag);
 622       if (padding_flag) { stuff_char(0xFF); return 0xFF; }
 623       if (c == 0x00) return 0xFF;
 624       stuff_char(cast(ubyte)(c));
 625       stuff_char(0xFF);
 626       return 0xFF;
 627     }
 628     return cast(ubyte)(c);
 629   }
 630
 631   // Retrieves a variable number of bits from the input stream. Does not recognize markers.
 632   uint get_bits (int num_bits) {
 633     if (!num_bits) return 0;
 634     uint i = m_bit_buf >> (32 - num_bits);
 635     if ((m_bits_left -= num_bits) <= 0) {
 636       m_bit_buf <<= (num_bits += m_bits_left);
 637       uint c1 = get_char();
 638       uint c2 = get_char();
 639       m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
 640       m_bit_buf <<= -m_bits_left;
 641       m_bits_left += 16;
 642       assert(m_bits_left >= 0);
 643     } else {
 644       m_bit_buf <<= num_bits;
 645     }
 646     return i;
 647   }
 648
 649   // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
 650   uint get_bits_no_markers (int num_bits) {
 651     if (!num_bits) return 0;
 652     uint i = m_bit_buf >> (32 - num_bits);
 653     if ((m_bits_left -= num_bits) <= 0) {
 654       m_bit_buf <<= (num_bits += m_bits_left);
 655       if (m_in_buf_left < 2 || m_pIn_buf_ofs[0] == 0xFF || m_pIn_buf_ofs[1] == 0xFF) {
 656         uint c1 = get_octet();
 657         uint c2 = get_octet();
 658         m_bit_buf |= (c1 << 8) | c2;
 659       } else {
 660         m_bit_buf |= (cast(uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
 661         m_in_buf_left -= 2;
 662         m_pIn_buf_ofs += 2;
 663       }
 664       m_bit_buf <<= -m_bits_left;
 665       m_bits_left += 16;
 666       assert(m_bits_left >= 0);
 667     } else {
 668       m_bit_buf <<= num_bits;
 669     }
 670     return i;
 671   }
 672
 673   // Decodes a Huffman encoded symbol.
 674   int huff_decode (huff_tables *pH) {
 675     int symbol;
 676     // Check first 8-bits: do we have a complete symbol?
 677     if ((symbol = pH.look_up.ptr[m_bit_buf >> 24]) < 0) {
 678       // Decode more bits, use a tree traversal to find symbol.
 679       int ofs = 23;
 680       do {
 681         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
 682         --ofs;
 683       } while (symbol < 0);
 684       get_bits_no_markers(8 + (23 - ofs));
 685     } else {
 686       get_bits_no_markers(pH.code_size.ptr[symbol]);
 687     }
 688     return symbol;
 689   }
 690
 691   // Decodes a Huffman encoded symbol.
 692   int huff_decode (huff_tables *pH, ref int extra_bits) {
 693     int symbol;
 694     // Check first 8-bits: do we have a complete symbol?
 695     if ((symbol = pH.look_up2.ptr[m_bit_buf >> 24]) < 0) {
 696       // Use a tree traversal to find symbol.
 697       int ofs = 23;
 698       do {
 699         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
 700         --ofs;
 701       } while (symbol < 0);
 702       get_bits_no_markers(8 + (23 - ofs));
 703       extra_bits = get_bits_no_markers(symbol & 0xF);
 704     } else {
 705       assert(((symbol >> 8) & 31) == pH.code_size.ptr[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
 706       if (symbol & 0x8000) {
 707         get_bits_no_markers((symbol >> 8) & 31);
 708         extra_bits = symbol >> 16;
 709       } else {
 710         int code_size = (symbol >> 8) & 31;
 711         int num_extra_bits = symbol & 0xF;
 712         int bits = code_size + num_extra_bits;
 713         if (bits <= (m_bits_left + 16)) {
 714           extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
 715         } else {
 716           get_bits_no_markers(code_size);
 717           extra_bits = get_bits_no_markers(num_extra_bits);
 718         }
 719       }
 720       symbol &= 0xFF;
 721     }
 722     return symbol;
 723   }
 724
 725   // Tables and macro used to fully decode the DPCM differences.
 726   static immutable int[16] s_extend_test = [ 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 ];
 727   static immutable int[16] s_extend_offset = [ 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 ];
 728   static immutable int[18] s_extend_mask = [ 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) ];
 729   // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
 730   //#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
 731   static JPGD_HUFF_EXTEND (int x, int s) nothrow @trusted @nogc { pragma(inline, true); return (((x) < s_extend_test.ptr[s & 15]) ? ((x) + s_extend_offset.ptr[s & 15]) : (x)); }
 732
 733   // Clamps a value between 0-255.
 734   //static ubyte clamp (int i) { if (cast(uint)(i) > 255) i = (((~i) >> 31) & 0xFF); return cast(ubyte)(i); }
 735   alias clamp = CLAMP;
 736
 737   static struct DCT_Upsample {
 738   static:
 739     static struct Matrix44 {
 740     pure nothrow @trusted @nogc:
 741       alias Element_Type = int;
 742       enum { NUM_ROWS = 4, NUM_COLS = 4 }
 743
 744       Element_Type[NUM_COLS][NUM_ROWS] v;
 745
 746       this() (in auto ref Matrix44 m) {
 747         foreach (immutable r; 0..NUM_ROWS) v[r][] = m.v[r][];
 748       }
 749
 750       //@property int rows () const { pragma(inline, true); return NUM_ROWS; }
 751       //@property int cols () const { pragma(inline, true); return NUM_COLS; }
 752
 753       ref inout(Element_Type) at (int r, int c) inout { pragma(inline, true); return v.ptr[r].ptr[c]; }
 754
 755       ref Matrix44 opOpAssign(string op:"+") (in auto ref Matrix44 a) {
 756         foreach (int r; 0..NUM_ROWS) {
 757           at(r, 0) += a.at(r, 0);
 758           at(r, 1) += a.at(r, 1);
 759           at(r, 2) += a.at(r, 2);
 760           at(r, 3) += a.at(r, 3);
 761         }
 762         return this;
 763       }
 764
 765       ref Matrix44 opOpAssign(string op:"-") (in auto ref Matrix44 a) {
 766         foreach (int r; 0..NUM_ROWS) {
 767           at(r, 0) -= a.at(r, 0);
 768           at(r, 1) -= a.at(r, 1);
 769           at(r, 2) -= a.at(r, 2);
 770           at(r, 3) -= a.at(r, 3);
 771         }
 772         return this;
 773       }
 774
 775       Matrix44 opBinary(string op:"+") (in auto ref Matrix44 b) const {
 776         alias a = this;
 777         Matrix44 ret;
 778         foreach (int r; 0..NUM_ROWS) {
 779           ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
 780           ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
 781           ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
 782           ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
 783         }
 784         return ret;
 785       }
 786
 787       Matrix44 opBinary(string op:"-") (in auto ref Matrix44 b) const {
 788         alias a = this;
 789         Matrix44 ret;
 790         foreach (int r; 0..NUM_ROWS) {
 791           ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
 792           ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
 793           ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
 794           ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
 795         }
 796         return ret;
 797       }
 798
 799       static void add_and_store() (jpgd_block_t* pDst, in auto ref Matrix44 a, in auto ref Matrix44 b) {
 800         foreach (int r; 0..4) {
 801           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) + b.at(r, 0));
 802           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) + b.at(r, 1));
 803           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) + b.at(r, 2));
 804           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) + b.at(r, 3));
 805         }
 806       }
 807
 808       static void sub_and_store() (jpgd_block_t* pDst, in auto ref Matrix44 a, in auto ref Matrix44 b) {
 809         foreach (int r; 0..4) {
 810           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) - b.at(r, 0));
 811           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) - b.at(r, 1));
 812           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) - b.at(r, 2));
 813           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) - b.at(r, 3));
 814         }
 815       }
 816     }
 817
 818     enum FRACT_BITS = 10;
 819     enum SCALE = 1 << FRACT_BITS;
 820
 821     alias Temp_Type = int;
 822     //TODO: convert defines to mixins
 823     //#define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
 824     //#define F(i) ((int)((i) * SCALE + .5f))
 825     // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
 826     //#define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
 827
 828     static int D(T) (T i) { pragma(inline, true); return (((i) + (SCALE >> 1)) >> FRACT_BITS); }
 829     enum F(float i) = (cast(int)((i) * SCALE + 0.5f));
 830
 831     // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
 832     static struct P_Q(int NUM_ROWS, int NUM_COLS) {
 833       static void calc (ref Matrix44 P, ref Matrix44 Q, const(jpgd_block_t)* pSrc) {
 834         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
 835         template AT(int c, int r) {
 836           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
 837         }
 838         // 4x8 = 4x8 times 8x8, matrix 0 is constant
 839         immutable Temp_Type X000 = mixin(AT!(0, 0));
 840         immutable Temp_Type X001 = mixin(AT!(0, 1));
 841         immutable Temp_Type X002 = mixin(AT!(0, 2));
 842         immutable Temp_Type X003 = mixin(AT!(0, 3));
 843         immutable Temp_Type X004 = mixin(AT!(0, 4));
 844         immutable Temp_Type X005 = mixin(AT!(0, 5));
 845         immutable Temp_Type X006 = mixin(AT!(0, 6));
 846         immutable Temp_Type X007 = mixin(AT!(0, 7));
 847         immutable Temp_Type X010 = D(F!(0.415735f) * mixin(AT!(1, 0)) + F!(0.791065f) * mixin(AT!(3, 0)) + F!(-0.352443f) * mixin(AT!(5, 0)) + F!(0.277785f) * mixin(AT!(7, 0)));
 848         immutable Temp_Type X011 = D(F!(0.415735f) * mixin(AT!(1, 1)) + F!(0.791065f) * mixin(AT!(3, 1)) + F!(-0.352443f) * mixin(AT!(5, 1)) + F!(0.277785f) * mixin(AT!(7, 1)));
 849         immutable Temp_Type X012 = D(F!(0.415735f) * mixin(AT!(1, 2)) + F!(0.791065f) * mixin(AT!(3, 2)) + F!(-0.352443f) * mixin(AT!(5, 2)) + F!(0.277785f) * mixin(AT!(7, 2)));
 850         immutable Temp_Type X013 = D(F!(0.415735f) * mixin(AT!(1, 3)) + F!(0.791065f) * mixin(AT!(3, 3)) + F!(-0.352443f) * mixin(AT!(5, 3)) + F!(0.277785f) * mixin(AT!(7, 3)));
 851         immutable Temp_Type X014 = D(F!(0.415735f) * mixin(AT!(1, 4)) + F!(0.791065f) * mixin(AT!(3, 4)) + F!(-0.352443f) * mixin(AT!(5, 4)) + F!(0.277785f) * mixin(AT!(7, 4)));
 852         immutable Temp_Type X015 = D(F!(0.415735f) * mixin(AT!(1, 5)) + F!(0.791065f) * mixin(AT!(3, 5)) + F!(-0.352443f) * mixin(AT!(5, 5)) + F!(0.277785f) * mixin(AT!(7, 5)));
 853         immutable Temp_Type X016 = D(F!(0.415735f) * mixin(AT!(1, 6)) + F!(0.791065f) * mixin(AT!(3, 6)) + F!(-0.352443f) * mixin(AT!(5, 6)) + F!(0.277785f) * mixin(AT!(7, 6)));
 854         immutable Temp_Type X017 = D(F!(0.415735f) * mixin(AT!(1, 7)) + F!(0.791065f) * mixin(AT!(3, 7)) + F!(-0.352443f) * mixin(AT!(5, 7)) + F!(0.277785f) * mixin(AT!(7, 7)));
 855         immutable Temp_Type X020 = mixin(AT!(4, 0));
 856         immutable Temp_Type X021 = mixin(AT!(4, 1));
 857         immutable Temp_Type X022 = mixin(AT!(4, 2));
 858         immutable Temp_Type X023 = mixin(AT!(4, 3));
 859         immutable Temp_Type X024 = mixin(AT!(4, 4));
 860         immutable Temp_Type X025 = mixin(AT!(4, 5));
 861         immutable Temp_Type X026 = mixin(AT!(4, 6));
 862         immutable Temp_Type X027 = mixin(AT!(4, 7));
 863         immutable Temp_Type X030 = D(F!(0.022887f) * mixin(AT!(1, 0)) + F!(-0.097545f) * mixin(AT!(3, 0)) + F!(0.490393f) * mixin(AT!(5, 0)) + F!(0.865723f) * mixin(AT!(7, 0)));
 864         immutable Temp_Type X031 = D(F!(0.022887f) * mixin(AT!(1, 1)) + F!(-0.097545f) * mixin(AT!(3, 1)) + F!(0.490393f) * mixin(AT!(5, 1)) + F!(0.865723f) * mixin(AT!(7, 1)));
 865         immutable Temp_Type X032 = D(F!(0.022887f) * mixin(AT!(1, 2)) + F!(-0.097545f) * mixin(AT!(3, 2)) + F!(0.490393f) * mixin(AT!(5, 2)) + F!(0.865723f) * mixin(AT!(7, 2)));
 866         immutable Temp_Type X033 = D(F!(0.022887f) * mixin(AT!(1, 3)) + F!(-0.097545f) * mixin(AT!(3, 3)) + F!(0.490393f) * mixin(AT!(5, 3)) + F!(0.865723f) * mixin(AT!(7, 3)));
 867         immutable Temp_Type X034 = D(F!(0.022887f) * mixin(AT!(1, 4)) + F!(-0.097545f) * mixin(AT!(3, 4)) + F!(0.490393f) * mixin(AT!(5, 4)) + F!(0.865723f) * mixin(AT!(7, 4)));
 868         immutable Temp_Type X035 = D(F!(0.022887f) * mixin(AT!(1, 5)) + F!(-0.097545f) * mixin(AT!(3, 5)) + F!(0.490393f) * mixin(AT!(5, 5)) + F!(0.865723f) * mixin(AT!(7, 5)));
 869         immutable Temp_Type X036 = D(F!(0.022887f) * mixin(AT!(1, 6)) + F!(-0.097545f) * mixin(AT!(3, 6)) + F!(0.490393f) * mixin(AT!(5, 6)) + F!(0.865723f) * mixin(AT!(7, 6)));
 870         immutable Temp_Type X037 = D(F!(0.022887f) * mixin(AT!(1, 7)) + F!(-0.097545f) * mixin(AT!(3, 7)) + F!(0.490393f) * mixin(AT!(5, 7)) + F!(0.865723f) * mixin(AT!(7, 7)));
 871
 872         // 4x4 = 4x8 times 8x4, matrix 1 is constant
 873         P.at(0, 0) = X000;
 874         P.at(0, 1) = D(X001 * F!(0.415735f) + X003 * F!(0.791065f) + X005 * F!(-0.352443f) + X007 * F!(0.277785f));
 875         P.at(0, 2) = X004;
 876         P.at(0, 3) = D(X001 * F!(0.022887f) + X003 * F!(-0.097545f) + X005 * F!(0.490393f) + X007 * F!(0.865723f));
 877         P.at(1, 0) = X010;
 878         P.at(1, 1) = D(X011 * F!(0.415735f) + X013 * F!(0.791065f) + X015 * F!(-0.352443f) + X017 * F!(0.277785f));
 879         P.at(1, 2) = X014;
 880         P.at(1, 3) = D(X011 * F!(0.022887f) + X013 * F!(-0.097545f) + X015 * F!(0.490393f) + X017 * F!(0.865723f));
 881         P.at(2, 0) = X020;
 882         P.at(2, 1) = D(X021 * F!(0.415735f) + X023 * F!(0.791065f) + X025 * F!(-0.352443f) + X027 * F!(0.277785f));
 883         P.at(2, 2) = X024;
 884         P.at(2, 3) = D(X021 * F!(0.022887f) + X023 * F!(-0.097545f) + X025 * F!(0.490393f) + X027 * F!(0.865723f));
 885         P.at(3, 0) = X030;
 886         P.at(3, 1) = D(X031 * F!(0.415735f) + X033 * F!(0.791065f) + X035 * F!(-0.352443f) + X037 * F!(0.277785f));
 887         P.at(3, 2) = X034;
 888         P.at(3, 3) = D(X031 * F!(0.022887f) + X033 * F!(-0.097545f) + X035 * F!(0.490393f) + X037 * F!(0.865723f));
 889         // 40 muls 24 adds
 890
 891         // 4x4 = 4x8 times 8x4, matrix 1 is constant
 892         Q.at(0, 0) = D(X001 * F!(0.906127f) + X003 * F!(-0.318190f) + X005 * F!(0.212608f) + X007 * F!(-0.180240f));
 893         Q.at(0, 1) = X002;
 894         Q.at(0, 2) = D(X001 * F!(-0.074658f) + X003 * F!(0.513280f) + X005 * F!(0.768178f) + X007 * F!(-0.375330f));
 895         Q.at(0, 3) = X006;
 896         Q.at(1, 0) = D(X011 * F!(0.906127f) + X013 * F!(-0.318190f) + X015 * F!(0.212608f) + X017 * F!(-0.180240f));
 897         Q.at(1, 1) = X012;
 898         Q.at(1, 2) = D(X011 * F!(-0.074658f) + X013 * F!(0.513280f) + X015 * F!(0.768178f) + X017 * F!(-0.375330f));
 899         Q.at(1, 3) = X016;
 900         Q.at(2, 0) = D(X021 * F!(0.906127f) + X023 * F!(-0.318190f) + X025 * F!(0.212608f) + X027 * F!(-0.180240f));
 901         Q.at(2, 1) = X022;
 902         Q.at(2, 2) = D(X021 * F!(-0.074658f) + X023 * F!(0.513280f) + X025 * F!(0.768178f) + X027 * F!(-0.375330f));
 903         Q.at(2, 3) = X026;
 904         Q.at(3, 0) = D(X031 * F!(0.906127f) + X033 * F!(-0.318190f) + X035 * F!(0.212608f) + X037 * F!(-0.180240f));
 905         Q.at(3, 1) = X032;
 906         Q.at(3, 2) = D(X031 * F!(-0.074658f) + X033 * F!(0.513280f) + X035 * F!(0.768178f) + X037 * F!(-0.375330f));
 907         Q.at(3, 3) = X036;
 908         // 40 muls 24 adds
 909       }
 910     }
 911
 912     static struct R_S(int NUM_ROWS, int NUM_COLS) {
 913       static void calc(ref Matrix44 R, ref Matrix44 S, const(jpgd_block_t)* pSrc) {
 914         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
 915         template AT(int c, int r) {
 916           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
 917         }
 918         // 4x8 = 4x8 times 8x8, matrix 0 is constant
 919         immutable Temp_Type X100 = D(F!(0.906127f) * mixin(AT!(1, 0)) + F!(-0.318190f) * mixin(AT!(3, 0)) + F!(0.212608f) * mixin(AT!(5, 0)) + F!(-0.180240f) * mixin(AT!(7, 0)));
 920         immutable Temp_Type X101 = D(F!(0.906127f) * mixin(AT!(1, 1)) + F!(-0.318190f) * mixin(AT!(3, 1)) + F!(0.212608f) * mixin(AT!(5, 1)) + F!(-0.180240f) * mixin(AT!(7, 1)));
 921         immutable Temp_Type X102 = D(F!(0.906127f) * mixin(AT!(1, 2)) + F!(-0.318190f) * mixin(AT!(3, 2)) + F!(0.212608f) * mixin(AT!(5, 2)) + F!(-0.180240f) * mixin(AT!(7, 2)));
 922         immutable Temp_Type X103 = D(F!(0.906127f) * mixin(AT!(1, 3)) + F!(-0.318190f) * mixin(AT!(3, 3)) + F!(0.212608f) * mixin(AT!(5, 3)) + F!(-0.180240f) * mixin(AT!(7, 3)));
 923         immutable Temp_Type X104 = D(F!(0.906127f) * mixin(AT!(1, 4)) + F!(-0.318190f) * mixin(AT!(3, 4)) + F!(0.212608f) * mixin(AT!(5, 4)) + F!(-0.180240f) * mixin(AT!(7, 4)));
 924         immutable Temp_Type X105 = D(F!(0.906127f) * mixin(AT!(1, 5)) + F!(-0.318190f) * mixin(AT!(3, 5)) + F!(0.212608f) * mixin(AT!(5, 5)) + F!(-0.180240f) * mixin(AT!(7, 5)));
 925         immutable Temp_Type X106 = D(F!(0.906127f) * mixin(AT!(1, 6)) + F!(-0.318190f) * mixin(AT!(3, 6)) + F!(0.212608f) * mixin(AT!(5, 6)) + F!(-0.180240f) * mixin(AT!(7, 6)));
 926         immutable Temp_Type X107 = D(F!(0.906127f) * mixin(AT!(1, 7)) + F!(-0.318190f) * mixin(AT!(3, 7)) + F!(0.212608f) * mixin(AT!(5, 7)) + F!(-0.180240f) * mixin(AT!(7, 7)));
 927         immutable Temp_Type X110 = mixin(AT!(2, 0));
 928         immutable Temp_Type X111 = mixin(AT!(2, 1));
 929         immutable Temp_Type X112 = mixin(AT!(2, 2));
 930         immutable Temp_Type X113 = mixin(AT!(2, 3));
 931         immutable Temp_Type X114 = mixin(AT!(2, 4));
 932         immutable Temp_Type X115 = mixin(AT!(2, 5));
 933         immutable Temp_Type X116 = mixin(AT!(2, 6));
 934         immutable Temp_Type X117 = mixin(AT!(2, 7));
 935         immutable Temp_Type X120 = D(F!(-0.074658f) * mixin(AT!(1, 0)) + F!(0.513280f) * mixin(AT!(3, 0)) + F!(0.768178f) * mixin(AT!(5, 0)) + F!(-0.375330f) * mixin(AT!(7, 0)));
 936         immutable Temp_Type X121 = D(F!(-0.074658f) * mixin(AT!(1, 1)) + F!(0.513280f) * mixin(AT!(3, 1)) + F!(0.768178f) * mixin(AT!(5, 1)) + F!(-0.375330f) * mixin(AT!(7, 1)));
 937         immutable Temp_Type X122 = D(F!(-0.074658f) * mixin(AT!(1, 2)) + F!(0.513280f) * mixin(AT!(3, 2)) + F!(0.768178f) * mixin(AT!(5, 2)) + F!(-0.375330f) * mixin(AT!(7, 2)));
 938         immutable Temp_Type X123 = D(F!(-0.074658f) * mixin(AT!(1, 3)) + F!(0.513280f) * mixin(AT!(3, 3)) + F!(0.768178f) * mixin(AT!(5, 3)) + F!(-0.375330f) * mixin(AT!(7, 3)));
 939         immutable Temp_Type X124 = D(F!(-0.074658f) * mixin(AT!(1, 4)) + F!(0.513280f) * mixin(AT!(3, 4)) + F!(0.768178f) * mixin(AT!(5, 4)) + F!(-0.375330f) * mixin(AT!(7, 4)));
 940         immutable Temp_Type X125 = D(F!(-0.074658f) * mixin(AT!(1, 5)) + F!(0.513280f) * mixin(AT!(3, 5)) + F!(0.768178f) * mixin(AT!(5, 5)) + F!(-0.375330f) * mixin(AT!(7, 5)));
 941         immutable Temp_Type X126 = D(F!(-0.074658f) * mixin(AT!(1, 6)) + F!(0.513280f) * mixin(AT!(3, 6)) + F!(0.768178f) * mixin(AT!(5, 6)) + F!(-0.375330f) * mixin(AT!(7, 6)));
 942         immutable Temp_Type X127 = D(F!(-0.074658f) * mixin(AT!(1, 7)) + F!(0.513280f) * mixin(AT!(3, 7)) + F!(0.768178f) * mixin(AT!(5, 7)) + F!(-0.375330f) * mixin(AT!(7, 7)));
 943         immutable Temp_Type X130 = mixin(AT!(6, 0));
 944         immutable Temp_Type X131 = mixin(AT!(6, 1));
 945         immutable Temp_Type X132 = mixin(AT!(6, 2));
 946         immutable Temp_Type X133 = mixin(AT!(6, 3));
 947         immutable Temp_Type X134 = mixin(AT!(6, 4));
 948         immutable Temp_Type X135 = mixin(AT!(6, 5));
 949         immutable Temp_Type X136 = mixin(AT!(6, 6));
 950         immutable Temp_Type X137 = mixin(AT!(6, 7));
 951         // 80 muls 48 adds
 952
 953         // 4x4 = 4x8 times 8x4, matrix 1 is constant
 954         R.at(0, 0) = X100;
 955         R.at(0, 1) = D(X101 * F!(0.415735f) + X103 * F!(0.791065f) + X105 * F!(-0.352443f) + X107 * F!(0.277785f));
 956         R.at(0, 2) = X104;
 957         R.at(0, 3) = D(X101 * F!(0.022887f) + X103 * F!(-0.097545f) + X105 * F!(0.490393f) + X107 * F!(0.865723f));
 958         R.at(1, 0) = X110;
 959         R.at(1, 1) = D(X111 * F!(0.415735f) + X113 * F!(0.791065f) + X115 * F!(-0.352443f) + X117 * F!(0.277785f));
 960         R.at(1, 2) = X114;
 961         R.at(1, 3) = D(X111 * F!(0.022887f) + X113 * F!(-0.097545f) + X115 * F!(0.490393f) + X117 * F!(0.865723f));
 962         R.at(2, 0) = X120;
 963         R.at(2, 1) = D(X121 * F!(0.415735f) + X123 * F!(0.791065f) + X125 * F!(-0.352443f) + X127 * F!(0.277785f));
 964         R.at(2, 2) = X124;
 965         R.at(2, 3) = D(X121 * F!(0.022887f) + X123 * F!(-0.097545f) + X125 * F!(0.490393f) + X127 * F!(0.865723f));
 966         R.at(3, 0) = X130;
 967         R.at(3, 1) = D(X131 * F!(0.415735f) + X133 * F!(0.791065f) + X135 * F!(-0.352443f) + X137 * F!(0.277785f));
 968         R.at(3, 2) = X134;
 969         R.at(3, 3) = D(X131 * F!(0.022887f) + X133 * F!(-0.097545f) + X135 * F!(0.490393f) + X137 * F!(0.865723f));
 970         // 40 muls 24 adds
 971         // 4x4 = 4x8 times 8x4, matrix 1 is constant
 972         S.at(0, 0) = D(X101 * F!(0.906127f) + X103 * F!(-0.318190f) + X105 * F!(0.212608f) + X107 * F!(-0.180240f));
 973         S.at(0, 1) = X102;
 974         S.at(0, 2) = D(X101 * F!(-0.074658f) + X103 * F!(0.513280f) + X105 * F!(0.768178f) + X107 * F!(-0.375330f));
 975         S.at(0, 3) = X106;
 976         S.at(1, 0) = D(X111 * F!(0.906127f) + X113 * F!(-0.318190f) + X115 * F!(0.212608f) + X117 * F!(-0.180240f));
 977         S.at(1, 1) = X112;
 978         S.at(1, 2) = D(X111 * F!(-0.074658f) + X113 * F!(0.513280f) + X115 * F!(0.768178f) + X117 * F!(-0.375330f));
 979         S.at(1, 3) = X116;
 980         S.at(2, 0) = D(X121 * F!(0.906127f) + X123 * F!(-0.318190f) + X125 * F!(0.212608f) + X127 * F!(-0.180240f));
 981         S.at(2, 1) = X122;
 982         S.at(2, 2) = D(X121 * F!(-0.074658f) + X123 * F!(0.513280f) + X125 * F!(0.768178f) + X127 * F!(-0.375330f));
 983         S.at(2, 3) = X126;
 984         S.at(3, 0) = D(X131 * F!(0.906127f) + X133 * F!(-0.318190f) + X135 * F!(0.212608f) + X137 * F!(-0.180240f));
 985         S.at(3, 1) = X132;
 986         S.at(3, 2) = D(X131 * F!(-0.074658f) + X133 * F!(0.513280f) + X135 * F!(0.768178f) + X137 * F!(-0.375330f));
 987         S.at(3, 3) = X136;
 988         // 40 muls 24 adds
 989       }
 990     }
 991   } // end namespace DCT_Upsample
 992
 993   // Unconditionally frees all allocated m_blocks.
 994   void free_all_blocks () {
 995     //m_pStream = null;
 996     readfn = null;
 997     for (mem_block *b = m_pMem_blocks; b; ) {
 998       mem_block* n = b.m_pNext;
 999       jpgd_free(b);
1000       b = n;
1001     }
1002     m_pMem_blocks = null;
1003   }
1004
1005   // This method handles all errors. It will never return.
1006   // It could easily be changed to use C++ exceptions.
1007   /*JPGD_NORETURN*/ void stop_decoding (jpgd_status status) {
1008     m_error_code = status;
1009     free_all_blocks();
1010     //longjmp(m_jmp_state, status);
1011     throw new Exception("jpeg decoding error");
1012   }
1013
1014   void* alloc (usize nSize, bool zero=false) {
1015     nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
1016     char *rv = null;
1017     for (mem_block *b = m_pMem_blocks; b; b = b.m_pNext)
1018     {
1019       if ((b.m_used_count + nSize) <= b.m_size)
1020       {
1021         rv = b.m_data.ptr + b.m_used_count;
1022         b.m_used_count += nSize;
1023         break;
1024       }
1025     }
1026     if (!rv)
1027     {
1028       int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
1029       mem_block *b = cast(mem_block*)jpgd_malloc(mem_block.sizeof + capacity);
1030       if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
1031       b.m_pNext = m_pMem_blocks; m_pMem_blocks = b;
1032       b.m_used_count = nSize;
1033       b.m_size = capacity;
1034       rv = b.m_data.ptr;
1035     }
1036     if (zero) memset(rv, 0, nSize);
1037     return rv;
1038   }
1039
1040   void word_clear (void *p, ushort c, uint n) {
1041     ubyte *pD = cast(ubyte*)p;
1042     immutable ubyte l = c & 0xFF, h = (c >> 8) & 0xFF;
1043     while (n)
1044     {
1045       pD[0] = l; pD[1] = h; pD += 2;
1046       n--;
1047     }
1048   }
1049
1050   // Refill the input buffer.
1051   // This method will sit in a loop until (A) the buffer is full or (B)
1052   // the stream's read() method reports and end of file condition.
1053   void prep_in_buffer () {
1054     m_in_buf_left = 0;
1055     m_pIn_buf_ofs = m_in_buf.ptr;
1056
1057     if (m_eof_flag)
1058       return;
1059
1060     do
1061     {
1062       int bytes_read = readfn(m_in_buf.ptr + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
1063       if (bytes_read == -1)
1064         stop_decoding(JPGD_STREAM_READ);
1065
1066       m_in_buf_left += bytes_read;
1067     } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
1068
1069     m_total_bytes_read += m_in_buf_left;
1070
1071     // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
1072     // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
1073     word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
1074   }
1075
1076   // Read a Huffman code table.
1077   void read_dht_marker () {
1078     int i, index, count;
1079     ubyte[17] huff_num;
1080     ubyte[256] huff_val;
1081
1082     uint num_left = get_bits(16);
1083
1084     if (num_left < 2)
1085       stop_decoding(JPGD_BAD_DHT_MARKER);
1086
1087     num_left -= 2;
1088
1089     while (num_left)
1090     {
1091       index = get_bits(8);
1092
1093       huff_num.ptr[0] = 0;
1094
1095       count = 0;
1096
1097       for (i = 1; i <= 16; i++)
1098       {
1099         huff_num.ptr[i] = cast(ubyte)(get_bits(8));
1100         count += huff_num.ptr[i];
1101       }
1102
1103       if (count > 255)
1104         stop_decoding(JPGD_BAD_DHT_COUNTS);
1105
1106       for (i = 0; i < count; i++)
1107         huff_val.ptr[i] = cast(ubyte)(get_bits(8));
1108
1109       i = 1 + 16 + count;
1110
1111       if (num_left < cast(uint)i)
1112         stop_decoding(JPGD_BAD_DHT_MARKER);
1113
1114       num_left -= i;
1115
1116       if ((index & 0x10) > 0x10)
1117         stop_decoding(JPGD_BAD_DHT_INDEX);
1118
1119       index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
1120
1121       if (index >= JPGD_MAX_HUFF_TABLES)
1122         stop_decoding(JPGD_BAD_DHT_INDEX);
1123
1124       if (!m_huff_num.ptr[index])
1125         m_huff_num.ptr[index] = cast(ubyte*)alloc(17);
1126
1127       if (!m_huff_val.ptr[index])
1128         m_huff_val.ptr[index] = cast(ubyte*)alloc(256);
1129
1130       m_huff_ac.ptr[index] = (index & 0x10) != 0;
1131       memcpy(m_huff_num.ptr[index], huff_num.ptr, 17);
1132       memcpy(m_huff_val.ptr[index], huff_val.ptr, 256);
1133     }
1134   }
1135
1136   // Read a quantization table.
1137   void read_dqt_marker () {
1138     int n, i, prec;
1139     uint num_left;
1140     uint temp;
1141
1142     num_left = get_bits(16);
1143
1144     if (num_left < 2)
1145       stop_decoding(JPGD_BAD_DQT_MARKER);
1146
1147     num_left -= 2;
1148
1149     while (num_left)
1150     {
1151       n = get_bits(8);
1152       prec = n >> 4;
1153       n &= 0x0F;
1154
1155       if (n >= JPGD_MAX_QUANT_TABLES)
1156         stop_decoding(JPGD_BAD_DQT_TABLE);
1157
1158       if (!m_quant.ptr[n])
1159         m_quant.ptr[n] = cast(jpgd_quant_t*)alloc(64 * jpgd_quant_t.sizeof);
1160
1161       // read quantization entries, in zag order
1162       for (i = 0; i < 64; i++)
1163       {
1164         temp = get_bits(8);
1165
1166         if (prec)
1167           temp = (temp << 8) + get_bits(8);
1168
1169         m_quant.ptr[n][i] = cast(jpgd_quant_t)(temp);
1170       }
1171
1172       i = 64 + 1;
1173
1174       if (prec)
1175         i += 64;
1176
1177       if (num_left < cast(uint)i)
1178         stop_decoding(JPGD_BAD_DQT_LENGTH);
1179
1180       num_left -= i;
1181     }
1182   }
1183
1184   // Read the start of frame (SOF) marker.
1185   void read_sof_marker () {
1186     int i;
1187     uint num_left;
1188
1189     num_left = get_bits(16);
1190
1191     if (get_bits(8) != 8)   /* precision: sorry, only 8-bit precision is supported right now */
1192       stop_decoding(JPGD_BAD_PRECISION);
1193
1194     m_image_y_size = get_bits(16);
1195
1196     if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
1197       stop_decoding(JPGD_BAD_HEIGHT);
1198
1199     m_image_x_size = get_bits(16);
1200
1201     if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
1202       stop_decoding(JPGD_BAD_WIDTH);
1203
1204     m_comps_in_frame = get_bits(8);
1205
1206     if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
1207       stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1208
1209     if (num_left != cast(uint)(m_comps_in_frame * 3 + 8))
1210       stop_decoding(JPGD_BAD_SOF_LENGTH);
1211
1212     for (i = 0; i < m_comps_in_frame; i++)
1213     {
1214       m_comp_ident.ptr[i]  = get_bits(8);
1215       m_comp_h_samp.ptr[i] = get_bits(4);
1216       m_comp_v_samp.ptr[i] = get_bits(4);
1217       m_comp_quant.ptr[i]  = get_bits(8);
1218     }
1219   }
1220
1221   // Used to skip unrecognized markers.
1222   void skip_variable_marker () {
1223     uint num_left;
1224
1225     num_left = get_bits(16);
1226
1227     if (num_left < 2)
1228       stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1229
1230     num_left -= 2;
1231
1232     while (num_left)
1233     {
1234       get_bits(8);
1235       num_left--;
1236     }
1237   }
1238
1239   // Read a define restart interval (DRI) marker.
1240   void read_dri_marker () {
1241     if (get_bits(16) != 4)
1242       stop_decoding(JPGD_BAD_DRI_LENGTH);
1243
1244     m_restart_interval = get_bits(16);
1245   }
1246
1247   // Read a start of scan (SOS) marker.
1248   void read_sos_marker () {
1249     uint num_left;
1250     int i, ci, n, c, cc;
1251
1252     num_left = get_bits(16);
1253
1254     n = get_bits(8);
1255
1256     m_comps_in_scan = n;
1257
1258     num_left -= 3;
1259
1260     if ( (num_left != cast(uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
1261       stop_decoding(JPGD_BAD_SOS_LENGTH);
1262
1263     for (i = 0; i < n; i++)
1264     {
1265       cc = get_bits(8);
1266       c = get_bits(8);
1267       num_left -= 2;
1268
1269       for (ci = 0; ci < m_comps_in_frame; ci++)
1270         if (cc == m_comp_ident.ptr[ci])
1271           break;
1272
1273       if (ci >= m_comps_in_frame)
1274         stop_decoding(JPGD_BAD_SOS_COMP_ID);
1275
1276       m_comp_list.ptr[i]    = ci;
1277       m_comp_dc_tab.ptr[ci] = (c >> 4) & 15;
1278       m_comp_ac_tab.ptr[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1279     }
1280
1281     m_spectral_start  = get_bits(8);
1282     m_spectral_end    = get_bits(8);
1283     m_successive_high = get_bits(4);
1284     m_successive_low  = get_bits(4);
1285
1286     if (!m_progressive_flag)
1287     {
1288       m_spectral_start = 0;
1289       m_spectral_end = 63;
1290     }
1291
1292     num_left -= 3;
1293
1294     /* read past whatever is num_left */
1295     while (num_left)
1296     {
1297       get_bits(8);
1298       num_left--;
1299     }
1300   }
1301
1302   // Finds the next marker.
1303   int next_marker () {
1304     uint c, bytes;
1305
1306     bytes = 0;
1307
1308     do
1309     {
1310       do
1311       {
1312         bytes++;
1313         c = get_bits(8);
1314       } while (c != 0xFF);
1315
1316       do
1317       {
1318         c = get_bits(8);
1319       } while (c == 0xFF);
1320
1321     } while (c == 0);
1322
1323     // If bytes > 0 here, there where extra bytes before the marker (not good).
1324
1325     return c;
1326   }
1327
1328   // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1329   // encountered.
1330   int process_markers () {
1331     int c;
1332
1333     for ( ; ; ) {
1334       c = next_marker();
1335
1336       switch (c)
1337       {
1338         case M_SOF0:
1339         case M_SOF1:
1340         case M_SOF2:
1341         case M_SOF3:
1342         case M_SOF5:
1343         case M_SOF6:
1344         case M_SOF7:
1345         //case M_JPG:
1346         case M_SOF9:
1347         case M_SOF10:
1348         case M_SOF11:
1349         case M_SOF13:
1350         case M_SOF14:
1351         case M_SOF15:
1352         case M_SOI:
1353         case M_EOI:
1354         case M_SOS:
1355           return c;
1356         case M_DHT:
1357           read_dht_marker();
1358           break;
1359         // No arithmitic support - dumb patents!
1360         case M_DAC:
1361           stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1362           break;
1363         case M_DQT:
1364           read_dqt_marker();
1365           break;
1366         case M_DRI:
1367           read_dri_marker();
1368           break;
1369         //case M_APP0:  /* no need to read the JFIF marker */
1370
1371         case M_JPG:
1372         case M_RST0:    /* no parameters */
1373         case M_RST1:
1374         case M_RST2:
1375         case M_RST3:
1376         case M_RST4:
1377         case M_RST5:
1378         case M_RST6:
1379         case M_RST7:
1380         case M_TEM:
1381           stop_decoding(JPGD_UNEXPECTED_MARKER);
1382           break;
1383         default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1384           skip_variable_marker();
1385           break;
1386       }
1387     }
1388   }
1389
1390   // Finds the start of image (SOI) marker.
1391   // This code is rather defensive: it only checks the first 512 bytes to avoid
1392   // false positives.
1393   void locate_soi_marker () {
1394     uint lastchar, thischar;
1395     uint bytesleft;
1396
1397     lastchar = get_bits(8);
1398
1399     thischar = get_bits(8);
1400
1401     /* ok if it's a normal JPEG file without a special header */
1402
1403     if ((lastchar == 0xFF) && (thischar == M_SOI))
1404       return;
1405
1406     bytesleft = 4096; //512;
1407
1408     for ( ; ; )
1409     {
1410       if (--bytesleft == 0)
1411         stop_decoding(JPGD_NOT_JPEG);
1412
1413       lastchar = thischar;
1414
1415       thischar = get_bits(8);
1416
1417       if (lastchar == 0xFF)
1418       {
1419         if (thischar == M_SOI)
1420           break;
1421         else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1422           stop_decoding(JPGD_NOT_JPEG);
1423       }
1424     }
1425
1426     // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1427     thischar = (m_bit_buf >> 24) & 0xFF;
1428
1429     if (thischar != 0xFF)
1430       stop_decoding(JPGD_NOT_JPEG);
1431   }
1432
1433   // Find a start of frame (SOF) marker.
1434   void locate_sof_marker () {
1435     locate_soi_marker();
1436
1437     int c = process_markers();
1438
1439     switch (c)
1440     {
1441       case M_SOF2:
1442         m_progressive_flag = true;
1443         goto case;
1444       case M_SOF0:  /* baseline DCT */
1445       case M_SOF1:  /* extended sequential DCT */
1446         read_sof_marker();
1447         break;
1448       case M_SOF9:  /* Arithmitic coding */
1449         stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1450         break;
1451       default:
1452         stop_decoding(JPGD_UNSUPPORTED_MARKER);
1453         break;
1454     }
1455   }
1456
1457   // Find a start of scan (SOS) marker.
1458   int locate_sos_marker () {
1459     int c;
1460
1461     c = process_markers();
1462
1463     if (c == M_EOI)
1464       return false;
1465     else if (c != M_SOS)
1466       stop_decoding(JPGD_UNEXPECTED_MARKER);
1467
1468     read_sos_marker();
1469
1470     return true;
1471   }
1472
1473   // Reset everything to default/uninitialized state.
1474   void initit (JpegStreamReadFunc rfn) {
1475     m_pMem_blocks = null;
1476     m_error_code = JPGD_SUCCESS;
1477     m_ready_flag = false;
1478     m_image_x_size = m_image_y_size = 0;
1479     readfn = rfn;
1480     m_progressive_flag = false;
1481
1482     memset(m_huff_ac.ptr, 0, m_huff_ac.sizeof);
1483     memset(m_huff_num.ptr, 0, m_huff_num.sizeof);
1484     memset(m_huff_val.ptr, 0, m_huff_val.sizeof);
1485     memset(m_quant.ptr, 0, m_quant.sizeof);
1486
1487     m_scan_type = 0;
1488     m_comps_in_frame = 0;
1489
1490     memset(m_comp_h_samp.ptr, 0, m_comp_h_samp.sizeof);
1491     memset(m_comp_v_samp.ptr, 0, m_comp_v_samp.sizeof);
1492     memset(m_comp_quant.ptr, 0, m_comp_quant.sizeof);
1493     memset(m_comp_ident.ptr, 0, m_comp_ident.sizeof);
1494     memset(m_comp_h_blocks.ptr, 0, m_comp_h_blocks.sizeof);
1495     memset(m_comp_v_blocks.ptr, 0, m_comp_v_blocks.sizeof);
1496
1497     m_comps_in_scan = 0;
1498     memset(m_comp_list.ptr, 0, m_comp_list.sizeof);
1499     memset(m_comp_dc_tab.ptr, 0, m_comp_dc_tab.sizeof);
1500     memset(m_comp_ac_tab.ptr, 0, m_comp_ac_tab.sizeof);
1501
1502     m_spectral_start = 0;
1503     m_spectral_end = 0;
1504     m_successive_low = 0;
1505     m_successive_high = 0;
1506     m_max_mcu_x_size = 0;
1507     m_max_mcu_y_size = 0;
1508     m_blocks_per_mcu = 0;
1509     m_max_blocks_per_row = 0;
1510     m_mcus_per_row = 0;
1511     m_mcus_per_col = 0;
1512     m_expanded_blocks_per_component = 0;
1513     m_expanded_blocks_per_mcu = 0;
1514     m_expanded_blocks_per_row = 0;
1515     m_freq_domain_chroma_upsample = false;
1516
1517     memset(m_mcu_org.ptr, 0, m_mcu_org.sizeof);
1518
1519     m_total_lines_left = 0;
1520     m_mcu_lines_left = 0;
1521     m_real_dest_bytes_per_scan_line = 0;
1522     m_dest_bytes_per_scan_line = 0;
1523     m_dest_bytes_per_pixel = 0;
1524
1525     memset(m_pHuff_tabs.ptr, 0, m_pHuff_tabs.sizeof);
1526
1527     memset(m_dc_coeffs.ptr, 0, m_dc_coeffs.sizeof);
1528     memset(m_ac_coeffs.ptr, 0, m_ac_coeffs.sizeof);
1529     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1530
1531     m_eob_run = 0;
1532
1533     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1534
1535     m_pIn_buf_ofs = m_in_buf.ptr;
1536     m_in_buf_left = 0;
1537     m_eof_flag = false;
1538     m_tem_flag = 0;
1539
1540     memset(m_in_buf_pad_start.ptr, 0, m_in_buf_pad_start.sizeof);
1541     memset(m_in_buf.ptr, 0, m_in_buf.sizeof);
1542     memset(m_in_buf_pad_end.ptr, 0, m_in_buf_pad_end.sizeof);
1543
1544     m_restart_interval = 0;
1545     m_restarts_left    = 0;
1546     m_next_restart_num = 0;
1547
1548     m_max_mcus_per_row = 0;
1549     m_max_blocks_per_mcu = 0;
1550     m_max_mcus_per_col = 0;
1551
1552     memset(m_last_dc_val.ptr, 0, m_last_dc_val.sizeof);
1553     m_pMCU_coefficients = null;
1554     m_pSample_buf = null;
1555
1556     m_total_bytes_read = 0;
1557
1558     m_pScan_line_0 = null;
1559     m_pScan_line_1 = null;
1560
1561     // Ready the input buffer.
1562     prep_in_buffer();
1563
1564     // Prime the bit buffer.
1565     m_bits_left = 16;
1566     m_bit_buf = 0;
1567
1568     get_bits(16);
1569     get_bits(16);
1570
1571     for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1572       m_mcu_block_max_zag.ptr[i] = 64;
1573   }
1574
1575   enum SCALEBITS = 16;
1576   enum ONE_HALF = (cast(int) 1 << (SCALEBITS-1));
1577   enum FIX(float x) = (cast(int)((x) * (1L<<SCALEBITS) + 0.5f));
1578
1579   // Create a few tables that allow us to quickly convert YCbCr to RGB.
1580   void create_look_ups () {
1581     for (int i = 0; i <= 255; i++)
1582     {
1583       int k = i - 128;
1584       m_crr.ptr[i] = ( FIX!(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
1585       m_cbb.ptr[i] = ( FIX!(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
1586       m_crg.ptr[i] = (-FIX!(0.71414f)) * k;
1587       m_cbg.ptr[i] = (-FIX!(0.34414f)) * k + ONE_HALF;
1588     }
1589   }
1590
1591   // This method throws back into the stream any bytes that where read
1592   // into the bit buffer during initial marker scanning.
1593   void fix_in_buffer () {
1594     // In case any 0xFF's where pulled into the buffer during marker scanning.
1595     assert((m_bits_left & 7) == 0);
1596
1597     if (m_bits_left == 16)
1598       stuff_char(cast(ubyte)(m_bit_buf & 0xFF));
1599
1600     if (m_bits_left >= 8)
1601       stuff_char(cast(ubyte)((m_bit_buf >> 8) & 0xFF));
1602
1603     stuff_char(cast(ubyte)((m_bit_buf >> 16) & 0xFF));
1604     stuff_char(cast(ubyte)((m_bit_buf >> 24) & 0xFF));
1605
1606     m_bits_left = 16;
1607     get_bits_no_markers(16);
1608     get_bits_no_markers(16);
1609   }
1610
1611   void transform_mcu (int mcu_row) {
1612     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1613     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1614
1615     for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1616     {
1617       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1618       pSrc_ptr += 64;
1619       pDst_ptr += 64;
1620     }
1621   }
1622
1623   static immutable ubyte[64] s_max_rc = [
1624     17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1625     102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1626     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1627     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1628   ];
1629
1630   void transform_mcu_expand (int mcu_row) {
1631     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1632     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1633
1634     // Y IDCT
1635     int mcu_block;
1636     for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
1637     {
1638       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1639       pSrc_ptr += 64;
1640       pDst_ptr += 64;
1641     }
1642
1643     // Chroma IDCT, with upsampling
1644     jpgd_block_t[64] temp_block;
1645
1646     for (int i = 0; i < 2; i++)
1647     {
1648       DCT_Upsample.Matrix44 P, Q, R, S;
1649
1650       assert(m_mcu_block_max_zag.ptr[mcu_block] >= 1);
1651       assert(m_mcu_block_max_zag.ptr[mcu_block] <= 64);
1652
1653       int max_zag = m_mcu_block_max_zag.ptr[mcu_block++] - 1;
1654       if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
1655       switch (s_max_rc.ptr[max_zag])
1656       {
1657       case 1*16+1:
1658         DCT_Upsample.P_Q!(1, 1).calc(P, Q, pSrc_ptr);
1659         DCT_Upsample.R_S!(1, 1).calc(R, S, pSrc_ptr);
1660         break;
1661       case 1*16+2:
1662         DCT_Upsample.P_Q!(1, 2).calc(P, Q, pSrc_ptr);
1663         DCT_Upsample.R_S!(1, 2).calc(R, S, pSrc_ptr);
1664         break;
1665       case 2*16+2:
1666         DCT_Upsample.P_Q!(2, 2).calc(P, Q, pSrc_ptr);
1667         DCT_Upsample.R_S!(2, 2).calc(R, S, pSrc_ptr);
1668         break;
1669       case 3*16+2:
1670         DCT_Upsample.P_Q!(3, 2).calc(P, Q, pSrc_ptr);
1671         DCT_Upsample.R_S!(3, 2).calc(R, S, pSrc_ptr);
1672         break;
1673       case 3*16+3:
1674         DCT_Upsample.P_Q!(3, 3).calc(P, Q, pSrc_ptr);
1675         DCT_Upsample.R_S!(3, 3).calc(R, S, pSrc_ptr);
1676         break;
1677       case 3*16+4:
1678         DCT_Upsample.P_Q!(3, 4).calc(P, Q, pSrc_ptr);
1679         DCT_Upsample.R_S!(3, 4).calc(R, S, pSrc_ptr);
1680         break;
1681       case 4*16+4:
1682         DCT_Upsample.P_Q!(4, 4).calc(P, Q, pSrc_ptr);
1683         DCT_Upsample.R_S!(4, 4).calc(R, S, pSrc_ptr);
1684         break;
1685       case 5*16+4:
1686         DCT_Upsample.P_Q!(5, 4).calc(P, Q, pSrc_ptr);
1687         DCT_Upsample.R_S!(5, 4).calc(R, S, pSrc_ptr);
1688         break;
1689       case 5*16+5:
1690         DCT_Upsample.P_Q!(5, 5).calc(P, Q, pSrc_ptr);
1691         DCT_Upsample.R_S!(5, 5).calc(R, S, pSrc_ptr);
1692         break;
1693       case 5*16+6:
1694         DCT_Upsample.P_Q!(5, 6).calc(P, Q, pSrc_ptr);
1695         DCT_Upsample.R_S!(5, 6).calc(R, S, pSrc_ptr);
1696         break;
1697       case 6*16+6:
1698         DCT_Upsample.P_Q!(6, 6).calc(P, Q, pSrc_ptr);
1699         DCT_Upsample.R_S!(6, 6).calc(R, S, pSrc_ptr);
1700         break;
1701       case 7*16+6:
1702         DCT_Upsample.P_Q!(7, 6).calc(P, Q, pSrc_ptr);
1703         DCT_Upsample.R_S!(7, 6).calc(R, S, pSrc_ptr);
1704         break;
1705       case 7*16+7:
1706         DCT_Upsample.P_Q!(7, 7).calc(P, Q, pSrc_ptr);
1707         DCT_Upsample.R_S!(7, 7).calc(R, S, pSrc_ptr);
1708         break;
1709       case 7*16+8:
1710         DCT_Upsample.P_Q!(7, 8).calc(P, Q, pSrc_ptr);
1711         DCT_Upsample.R_S!(7, 8).calc(R, S, pSrc_ptr);
1712         break;
1713       case 8*16+8:
1714         DCT_Upsample.P_Q!(8, 8).calc(P, Q, pSrc_ptr);
1715         DCT_Upsample.R_S!(8, 8).calc(R, S, pSrc_ptr);
1716         break;
1717       default:
1718         assert(false);
1719       }
1720
1721       auto a = DCT_Upsample.Matrix44(P + Q);
1722       P -= Q;
1723       DCT_Upsample.Matrix44* b = &P;
1724       auto c = DCT_Upsample.Matrix44(R + S);
1725       R -= S;
1726       DCT_Upsample.Matrix44* d = &R;
1727
1728       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, a, c);
1729       idct_4x4(temp_block.ptr, pDst_ptr);
1730       pDst_ptr += 64;
1731
1732       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, a, c);
1733       idct_4x4(temp_block.ptr, pDst_ptr);
1734       pDst_ptr += 64;
1735
1736       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, *b, *d);
1737       idct_4x4(temp_block.ptr, pDst_ptr);
1738       pDst_ptr += 64;
1739
1740       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, *b, *d);
1741       idct_4x4(temp_block.ptr, pDst_ptr);
1742       pDst_ptr += 64;
1743
1744       pSrc_ptr += 64;
1745     }
1746   }
1747
1748   // Loads and dequantizes the next row of (already decoded) coefficients.
1749   // Progressive images only.
1750   void load_next_row () {
1751     int i;
1752     jpgd_block_t *p;
1753     jpgd_quant_t *q;
1754     int mcu_row, mcu_block, row_block = 0;
1755     int component_num, component_id;
1756     int[JPGD_MAX_COMPONENTS] block_x_mcu;
1757
1758     memset(block_x_mcu.ptr, 0, JPGD_MAX_COMPONENTS * int.sizeof);
1759
1760     for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1761     {
1762       int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1763
1764       for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1765       {
1766         component_id = m_mcu_org.ptr[mcu_block];
1767         q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1768
1769         p = m_pMCU_coefficients + 64 * mcu_block;
1770
1771         jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1772         jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1773         p[0] = pDC[0];
1774         memcpy(&p[1], &pAC[1], 63 * jpgd_block_t.sizeof);
1775
1776         for (i = 63; i > 0; i--)
1777           if (p[g_ZAG[i]])
1778             break;
1779
1780         m_mcu_block_max_zag.ptr[mcu_block] = i + 1;
1781
1782         for ( ; i >= 0; i--)
1783           if (p[g_ZAG[i]])
1784             p[g_ZAG[i]] = cast(jpgd_block_t)(p[g_ZAG[i]] * q[i]);
1785
1786         row_block++;
1787
1788         if (m_comps_in_scan == 1)
1789           block_x_mcu.ptr[component_id]++;
1790         else
1791         {
1792           if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
1793           {
1794             block_x_mcu_ofs = 0;
1795
1796             if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
1797             {
1798               block_y_mcu_ofs = 0;
1799
1800               block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
1801             }
1802           }
1803         }
1804       }
1805
1806       if (m_freq_domain_chroma_upsample)
1807         transform_mcu_expand(mcu_row);
1808       else
1809         transform_mcu(mcu_row);
1810     }
1811
1812     if (m_comps_in_scan == 1)
1813       m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
1814     else
1815     {
1816       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
1817       {
1818         component_id = m_comp_list.ptr[component_num];
1819
1820         m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
1821       }
1822     }
1823   }
1824
1825   // Restart interval processing.
1826   void process_restart () {
1827     int i;
1828     int c = 0;
1829
1830     // Align to a byte boundry
1831     // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
1832     //get_bits_no_markers(m_bits_left & 7);
1833
1834     // Let's scan a little bit to find the marker, but not _too_ far.
1835     // 1536 is a "fudge factor" that determines how much to scan.
1836     for (i = 1536; i > 0; i--)
1837       if (get_char() == 0xFF)
1838         break;
1839
1840     if (i == 0)
1841       stop_decoding(JPGD_BAD_RESTART_MARKER);
1842
1843     for ( ; i > 0; i--)
1844       if ((c = get_char()) != 0xFF)
1845         break;
1846
1847     if (i == 0)
1848       stop_decoding(JPGD_BAD_RESTART_MARKER);
1849
1850     // Is it the expected marker? If not, something bad happened.
1851     if (c != (m_next_restart_num + M_RST0))
1852       stop_decoding(JPGD_BAD_RESTART_MARKER);
1853
1854     // Reset each component's DC prediction values.
1855     memset(&m_last_dc_val, 0, m_comps_in_frame * uint.sizeof);
1856
1857     m_eob_run = 0;
1858
1859     m_restarts_left = m_restart_interval;
1860
1861     m_next_restart_num = (m_next_restart_num + 1) & 7;
1862
1863     // Get the bit buffer going again...
1864
1865     m_bits_left = 16;
1866     get_bits_no_markers(16);
1867     get_bits_no_markers(16);
1868   }
1869
1870   static int dequantize_ac (int c, int q) { pragma(inline, true); c *= q; return c; }
1871
1872   // Decodes and dequantizes the next row of coefficients.
1873   void decode_next_row () {
1874     int row_block = 0;
1875
1876     for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1877     {
1878       if ((m_restart_interval) && (m_restarts_left == 0))
1879         process_restart();
1880
1881       jpgd_block_t* p = m_pMCU_coefficients;
1882       for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
1883       {
1884         int component_id = m_mcu_org.ptr[mcu_block];
1885         jpgd_quant_t* q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1886
1887         int r, s;
1888         s = huff_decode(m_pHuff_tabs.ptr[m_comp_dc_tab.ptr[component_id]], r);
1889         s = JPGD_HUFF_EXTEND(r, s);
1890
1891         m_last_dc_val.ptr[component_id] = (s += m_last_dc_val.ptr[component_id]);
1892
1893         p[0] = cast(jpgd_block_t)(s * q[0]);
1894
1895         int prev_num_set = m_mcu_block_max_zag.ptr[mcu_block];
1896
1897         huff_tables *pH = m_pHuff_tabs.ptr[m_comp_ac_tab.ptr[component_id]];
1898
1899         int k;
1900         for (k = 1; k < 64; k++)
1901         {
1902           int extra_bits;
1903           s = huff_decode(pH, extra_bits);
1904
1905           r = s >> 4;
1906           s &= 15;
1907
1908           if (s)
1909           {
1910             if (r)
1911             {
1912               if ((k + r) > 63)
1913                 stop_decoding(JPGD_DECODE_ERROR);
1914
1915               if (k < prev_num_set)
1916               {
1917                 int n = JPGD_MIN(r, prev_num_set - k);
1918                 int kt = k;
1919                 while (n--)
1920                   p[g_ZAG[kt++]] = 0;
1921               }
1922
1923               k += r;
1924             }
1925
1926             s = JPGD_HUFF_EXTEND(extra_bits, s);
1927
1928             assert(k < 64);
1929
1930             p[g_ZAG[k]] = cast(jpgd_block_t)(dequantize_ac(s, q[k])); //s * q[k];
1931           }
1932           else
1933           {
1934             if (r == 15)
1935             {
1936               if ((k + 16) > 64)
1937                 stop_decoding(JPGD_DECODE_ERROR);
1938
1939               if (k < prev_num_set)
1940               {
1941                 int n = JPGD_MIN(16, prev_num_set - k);
1942                 int kt = k;
1943                 while (n--)
1944                 {
1945                   assert(kt <= 63);
1946                   p[g_ZAG[kt++]] = 0;
1947                 }
1948               }
1949
1950               k += 16 - 1; // - 1 because the loop counter is k
1951               assert(p[g_ZAG[k]] == 0);
1952             }
1953             else
1954               break;
1955           }
1956         }
1957
1958         if (k < prev_num_set)
1959         {
1960           int kt = k;
1961           while (kt < prev_num_set)
1962             p[g_ZAG[kt++]] = 0;
1963         }
1964
1965         m_mcu_block_max_zag.ptr[mcu_block] = k;
1966
1967         row_block++;
1968       }
1969
1970       if (m_freq_domain_chroma_upsample)
1971         transform_mcu_expand(mcu_row);
1972       else
1973         transform_mcu(mcu_row);
1974
1975       m_restarts_left--;
1976     }
1977   }
1978
1979   // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
1980   void H1V1Convert () {
1981     int row = m_max_mcu_y_size - m_mcu_lines_left;
1982     ubyte *d = m_pScan_line_0;
1983     ubyte *s = m_pSample_buf + row * 8;
1984
1985     for (int i = m_max_mcus_per_row; i > 0; i--)
1986     {
1987       for (int j = 0; j < 8; j++)
1988       {
1989         int y = s[j];
1990         int cb = s[64+j];
1991         int cr = s[128+j];
1992
1993         d[0] = clamp(y + m_crr.ptr[cr]);
1994         d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
1995         d[2] = clamp(y + m_cbb.ptr[cb]);
1996         d[3] = 255;
1997
1998         d += 4;
1999       }
2000
2001       s += 64*3;
2002     }
2003   }
2004
2005   // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
2006   void H2V1Convert () {
2007     int row = m_max_mcu_y_size - m_mcu_lines_left;
2008     ubyte *d0 = m_pScan_line_0;
2009     ubyte *y = m_pSample_buf + row * 8;
2010     ubyte *c = m_pSample_buf + 2*64 + row * 8;
2011
2012     for (int i = m_max_mcus_per_row; i > 0; i--)
2013     {
2014       for (int l = 0; l < 2; l++)
2015       {
2016         for (int j = 0; j < 4; j++)
2017         {
2018           int cb = c[0];
2019           int cr = c[64];
2020
2021           int rc = m_crr.ptr[cr];
2022           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2023           int bc = m_cbb.ptr[cb];
2024
2025           int yy = y[j<<1];
2026           d0[0] = clamp(yy+rc);
2027           d0[1] = clamp(yy+gc);
2028           d0[2] = clamp(yy+bc);
2029           d0[3] = 255;
2030
2031           yy = y[(j<<1)+1];
2032           d0[4] = clamp(yy+rc);
2033           d0[5] = clamp(yy+gc);
2034           d0[6] = clamp(yy+bc);
2035           d0[7] = 255;
2036
2037           d0 += 8;
2038
2039           c++;
2040         }
2041         y += 64;
2042       }
2043
2044       y += 64*4 - 64*2;
2045       c += 64*4 - 8;
2046     }
2047   }
2048
2049   // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
2050   void H1V2Convert () {
2051     int row = m_max_mcu_y_size - m_mcu_lines_left;
2052     ubyte *d0 = m_pScan_line_0;
2053     ubyte *d1 = m_pScan_line_1;
2054     ubyte *y;
2055     ubyte *c;
2056
2057     if (row < 8)
2058       y = m_pSample_buf + row * 8;
2059     else
2060       y = m_pSample_buf + 64*1 + (row & 7) * 8;
2061
2062     c = m_pSample_buf + 64*2 + (row >> 1) * 8;
2063
2064     for (int i = m_max_mcus_per_row; i > 0; i--)
2065     {
2066       for (int j = 0; j < 8; j++)
2067       {
2068         int cb = c[0+j];
2069         int cr = c[64+j];
2070
2071         int rc = m_crr.ptr[cr];
2072         int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2073         int bc = m_cbb.ptr[cb];
2074
2075         int yy = y[j];
2076         d0[0] = clamp(yy+rc);
2077         d0[1] = clamp(yy+gc);
2078         d0[2] = clamp(yy+bc);
2079         d0[3] = 255;
2080
2081         yy = y[8+j];
2082         d1[0] = clamp(yy+rc);
2083         d1[1] = clamp(yy+gc);
2084         d1[2] = clamp(yy+bc);
2085         d1[3] = 255;
2086
2087         d0 += 4;
2088         d1 += 4;
2089       }
2090
2091       y += 64*4;
2092       c += 64*4;
2093     }
2094   }
2095
2096   // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2097   void H2V2Convert () {
2098     int row = m_max_mcu_y_size - m_mcu_lines_left;
2099     ubyte *d0 = m_pScan_line_0;
2100     ubyte *d1 = m_pScan_line_1;
2101     ubyte *y;
2102     ubyte *c;
2103
2104     if (row < 8)
2105       y = m_pSample_buf + row * 8;
2106     else
2107       y = m_pSample_buf + 64*2 + (row & 7) * 8;
2108
2109     c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2110
2111     for (int i = m_max_mcus_per_row; i > 0; i--)
2112     {
2113       for (int l = 0; l < 2; l++)
2114       {
2115         for (int j = 0; j < 8; j += 2)
2116         {
2117           int cb = c[0];
2118           int cr = c[64];
2119
2120           int rc = m_crr.ptr[cr];
2121           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2122           int bc = m_cbb.ptr[cb];
2123
2124           int yy = y[j];
2125           d0[0] = clamp(yy+rc);
2126           d0[1] = clamp(yy+gc);
2127           d0[2] = clamp(yy+bc);
2128           d0[3] = 255;
2129
2130           yy = y[j+1];
2131           d0[4] = clamp(yy+rc);
2132           d0[5] = clamp(yy+gc);
2133           d0[6] = clamp(yy+bc);
2134           d0[7] = 255;
2135
2136           yy = y[j+8];
2137           d1[0] = clamp(yy+rc);
2138           d1[1] = clamp(yy+gc);
2139           d1[2] = clamp(yy+bc);
2140           d1[3] = 255;
2141
2142           yy = y[j+8+1];
2143           d1[4] = clamp(yy+rc);
2144           d1[5] = clamp(yy+gc);
2145           d1[6] = clamp(yy+bc);
2146           d1[7] = 255;
2147
2148           d0 += 8;
2149           d1 += 8;
2150
2151           c++;
2152         }
2153         y += 64;
2154       }
2155
2156       y += 64*6 - 64*2;
2157       c += 64*6 - 8;
2158     }
2159   }
2160
2161   // Y (1 block per MCU) to 8-bit grayscale
2162   void gray_convert () {
2163     int row = m_max_mcu_y_size - m_mcu_lines_left;
2164     ubyte *d = m_pScan_line_0;
2165     ubyte *s = m_pSample_buf + row * 8;
2166
2167     for (int i = m_max_mcus_per_row; i > 0; i--)
2168     {
2169       *cast(uint*)d = *cast(uint*)s;
2170       *cast(uint*)(&d[4]) = *cast(uint*)(&s[4]);
2171
2172       s += 64;
2173       d += 8;
2174     }
2175   }
2176
2177   void expanded_convert () {
2178     int row = m_max_mcu_y_size - m_mcu_lines_left;
2179
2180     ubyte* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp.ptr[0] + (row & 7) * 8;
2181
2182     ubyte* d = m_pScan_line_0;
2183
2184     for (int i = m_max_mcus_per_row; i > 0; i--)
2185     {
2186       for (int k = 0; k < m_max_mcu_x_size; k += 8)
2187       {
2188         immutable int Y_ofs = k * 8;
2189         immutable int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2190         immutable int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2191         for (int j = 0; j < 8; j++)
2192         {
2193           int y = Py[Y_ofs + j];
2194           int cb = Py[Cb_ofs + j];
2195           int cr = Py[Cr_ofs + j];
2196
2197           d[0] = clamp(y + m_crr.ptr[cr]);
2198           d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2199           d[2] = clamp(y + m_cbb.ptr[cb]);
2200           d[3] = 255;
2201
2202           d += 4;
2203         }
2204       }
2205
2206       Py += 64 * m_expanded_blocks_per_mcu;
2207     }
2208   }
2209
2210   // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2211   void find_eoi () {
2212     if (!m_progressive_flag)
2213     {
2214       // Attempt to read the EOI marker.
2215       //get_bits_no_markers(m_bits_left & 7);
2216
2217       // Prime the bit buffer
2218       m_bits_left = 16;
2219       get_bits(16);
2220       get_bits(16);
2221
2222       // The next marker _should_ be EOI
2223       process_markers();
2224     }
2225
2226     m_total_bytes_read -= m_in_buf_left;
2227   }
2228
2229   // Creates the tables needed for efficient Huffman decoding.
2230   void make_huff_table (int index, huff_tables *pH) {
2231     int p, i, l, si;
2232     ubyte[257] huffsize;
2233     uint[257] huffcode;
2234     uint code;
2235     uint subtree;
2236     int code_size;
2237     int lastp;
2238     int nextfreeentry;
2239     int currententry;
2240
2241     pH.ac_table = m_huff_ac.ptr[index] != 0;
2242
2243     p = 0;
2244
2245     for (l = 1; l <= 16; l++)
2246     {
2247       for (i = 1; i <= m_huff_num.ptr[index][l]; i++)
2248         huffsize.ptr[p++] = cast(ubyte)(l);
2249     }
2250
2251     huffsize.ptr[p] = 0;
2252
2253     lastp = p;
2254
2255     code = 0;
2256     si = huffsize.ptr[0];
2257     p = 0;
2258
2259     while (huffsize.ptr[p])
2260     {
2261       while (huffsize.ptr[p] == si)
2262       {
2263         huffcode.ptr[p++] = code;
2264         code++;
2265       }
2266
2267       code <<= 1;
2268       si++;
2269     }
2270
2271     memset(pH.look_up.ptr, 0, pH.look_up.sizeof);
2272     memset(pH.look_up2.ptr, 0, pH.look_up2.sizeof);
2273     memset(pH.tree.ptr, 0, pH.tree.sizeof);
2274     memset(pH.code_size.ptr, 0, pH.code_size.sizeof);
2275
2276     nextfreeentry = -1;
2277
2278     p = 0;
2279
2280     while (p < lastp)
2281     {
2282       i = m_huff_val.ptr[index][p];
2283       code = huffcode.ptr[p];
2284       code_size = huffsize.ptr[p];
2285
2286       pH.code_size.ptr[i] = cast(ubyte)(code_size);
2287
2288       if (code_size <= 8)
2289       {
2290         code <<= (8 - code_size);
2291
2292         for (l = 1 << (8 - code_size); l > 0; l--)
2293         {
2294           assert(i < 256);
2295
2296           pH.look_up.ptr[code] = i;
2297
2298           bool has_extrabits = false;
2299           int extra_bits = 0;
2300           int num_extra_bits = i & 15;
2301
2302           int bits_to_fetch = code_size;
2303           if (num_extra_bits)
2304           {
2305             int total_codesize = code_size + num_extra_bits;
2306             if (total_codesize <= 8)
2307             {
2308               has_extrabits = true;
2309               extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2310               assert(extra_bits <= 0x7FFF);
2311               bits_to_fetch += num_extra_bits;
2312             }
2313           }
2314
2315           if (!has_extrabits)
2316             pH.look_up2.ptr[code] = i | (bits_to_fetch << 8);
2317           else
2318             pH.look_up2.ptr[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2319
2320           code++;
2321         }
2322       }
2323       else
2324       {
2325         subtree = (code >> (code_size - 8)) & 0xFF;
2326
2327         currententry = pH.look_up.ptr[subtree];
2328
2329         if (currententry == 0)
2330         {
2331           pH.look_up.ptr[subtree] = currententry = nextfreeentry;
2332           pH.look_up2.ptr[subtree] = currententry = nextfreeentry;
2333
2334           nextfreeentry -= 2;
2335         }
2336
2337         code <<= (16 - (code_size - 8));
2338
2339         for (l = code_size; l > 9; l--)
2340         {
2341           if ((code & 0x8000) == 0)
2342             currententry--;
2343
2344           if (pH.tree.ptr[-currententry - 1] == 0)
2345           {
2346             pH.tree.ptr[-currententry - 1] = nextfreeentry;
2347
2348             currententry = nextfreeentry;
2349
2350             nextfreeentry -= 2;
2351           }
2352           else
2353             currententry = pH.tree.ptr[-currententry - 1];
2354
2355           code <<= 1;
2356         }
2357
2358         if ((code & 0x8000) == 0)
2359           currententry--;
2360
2361         pH.tree.ptr[-currententry - 1] = i;
2362       }
2363
2364       p++;
2365     }
2366   }
2367
2368   // Verifies the quantization tables needed for this scan are available.
2369   void check_quant_tables () {
2370     for (int i = 0; i < m_comps_in_scan; i++)
2371       if (m_quant.ptr[m_comp_quant.ptr[m_comp_list.ptr[i]]] == null)
2372         stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2373   }
2374
2375   // Verifies that all the Huffman tables needed for this scan are available.
2376   void check_huff_tables () {
2377     for (int i = 0; i < m_comps_in_scan; i++)
2378     {
2379       if ((m_spectral_start == 0) && (m_huff_num.ptr[m_comp_dc_tab.ptr[m_comp_list.ptr[i]]] == null))
2380         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2381
2382       if ((m_spectral_end > 0) && (m_huff_num.ptr[m_comp_ac_tab.ptr[m_comp_list.ptr[i]]] == null))
2383         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2384     }
2385
2386     for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2387       if (m_huff_num.ptr[i])
2388       {
2389         if (!m_pHuff_tabs.ptr[i])
2390           m_pHuff_tabs.ptr[i] = cast(huff_tables*)alloc(huff_tables.sizeof);
2391
2392         make_huff_table(i, m_pHuff_tabs.ptr[i]);
2393       }
2394   }
2395
2396   // Determines the component order inside each MCU.
2397   // Also calcs how many MCU's are on each row, etc.
2398   void calc_mcu_block_order () {
2399     int component_num, component_id;
2400     int max_h_samp = 0, max_v_samp = 0;
2401
2402     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2403     {
2404       if (m_comp_h_samp.ptr[component_id] > max_h_samp)
2405         max_h_samp = m_comp_h_samp.ptr[component_id];
2406
2407       if (m_comp_v_samp.ptr[component_id] > max_v_samp)
2408         max_v_samp = m_comp_v_samp.ptr[component_id];
2409     }
2410
2411     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2412     {
2413       m_comp_h_blocks.ptr[component_id] = ((((m_image_x_size * m_comp_h_samp.ptr[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2414       m_comp_v_blocks.ptr[component_id] = ((((m_image_y_size * m_comp_v_samp.ptr[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2415     }
2416
2417     if (m_comps_in_scan == 1)
2418     {
2419       m_mcus_per_row = m_comp_h_blocks.ptr[m_comp_list.ptr[0]];
2420       m_mcus_per_col = m_comp_v_blocks.ptr[m_comp_list.ptr[0]];
2421     }
2422     else
2423     {
2424       m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2425       m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2426     }
2427
2428     if (m_comps_in_scan == 1)
2429     {
2430       m_mcu_org.ptr[0] = m_comp_list.ptr[0];
2431
2432       m_blocks_per_mcu = 1;
2433     }
2434     else
2435     {
2436       m_blocks_per_mcu = 0;
2437
2438       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2439       {
2440         int num_blocks;
2441
2442         component_id = m_comp_list.ptr[component_num];
2443
2444         num_blocks = m_comp_h_samp.ptr[component_id] * m_comp_v_samp.ptr[component_id];
2445
2446         while (num_blocks--)
2447           m_mcu_org.ptr[m_blocks_per_mcu++] = component_id;
2448       }
2449     }
2450   }
2451
2452   // Starts a new scan.
2453   int init_scan () {
2454     if (!locate_sos_marker())
2455       return false;
2456
2457     calc_mcu_block_order();
2458
2459     check_huff_tables();
2460
2461     check_quant_tables();
2462
2463     memset(m_last_dc_val.ptr, 0, m_comps_in_frame * uint.sizeof);
2464
2465     m_eob_run = 0;
2466
2467     if (m_restart_interval)
2468     {
2469       m_restarts_left = m_restart_interval;
2470       m_next_restart_num = 0;
2471     }
2472
2473     fix_in_buffer();
2474
2475     return true;
2476   }
2477
2478   // Starts a frame. Determines if the number of components or sampling factors
2479   // are supported.
2480   void init_frame () {
2481     int i;
2482
2483     if (m_comps_in_frame == 1)
2484     {
2485       if ((m_comp_h_samp.ptr[0] != 1) || (m_comp_v_samp.ptr[0] != 1))
2486         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2487
2488       m_scan_type = JPGD_GRAYSCALE;
2489       m_max_blocks_per_mcu = 1;
2490       m_max_mcu_x_size = 8;
2491       m_max_mcu_y_size = 8;
2492     }
2493     else if (m_comps_in_frame == 3)
2494     {
2495       if ( ((m_comp_h_samp.ptr[1] != 1) || (m_comp_v_samp.ptr[1] != 1)) ||
2496            ((m_comp_h_samp.ptr[2] != 1) || (m_comp_v_samp.ptr[2] != 1)) )
2497         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2498
2499       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2500       {
2501         m_scan_type = JPGD_YH1V1;
2502
2503         m_max_blocks_per_mcu = 3;
2504         m_max_mcu_x_size = 8;
2505         m_max_mcu_y_size = 8;
2506       }
2507       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2508       {
2509         m_scan_type = JPGD_YH2V1;
2510         m_max_blocks_per_mcu = 4;
2511         m_max_mcu_x_size = 16;
2512         m_max_mcu_y_size = 8;
2513       }
2514       else if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 2))
2515       {
2516         m_scan_type = JPGD_YH1V2;
2517         m_max_blocks_per_mcu = 4;
2518         m_max_mcu_x_size = 8;
2519         m_max_mcu_y_size = 16;
2520       }
2521       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2522       {
2523         m_scan_type = JPGD_YH2V2;
2524         m_max_blocks_per_mcu = 6;
2525         m_max_mcu_x_size = 16;
2526         m_max_mcu_y_size = 16;
2527       }
2528       else
2529         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2530     }
2531     else
2532       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2533
2534     m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2535     m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2536
2537     // These values are for the *destination* pixels: after conversion.
2538     if (m_scan_type == JPGD_GRAYSCALE)
2539       m_dest_bytes_per_pixel = 1;
2540     else
2541       m_dest_bytes_per_pixel = 4;
2542
2543     m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2544
2545     m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2546
2547     // Initialize two scan line buffers.
2548     m_pScan_line_0 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2549     if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2550       m_pScan_line_1 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2551
2552     m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2553
2554     // Should never happen
2555     if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2556       stop_decoding(JPGD_ASSERTION_ERROR);
2557
2558     // Allocate the coefficient buffer, enough for one MCU
2559     m_pMCU_coefficients = cast(jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * jpgd_block_t.sizeof);
2560
2561     for (i = 0; i < m_max_blocks_per_mcu; i++)
2562       m_mcu_block_max_zag.ptr[i] = 64;
2563
2564     m_expanded_blocks_per_component = m_comp_h_samp.ptr[0] * m_comp_v_samp.ptr[0];
2565     m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2566     m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2567     // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
2568     m_freq_domain_chroma_upsample = false;
2569     version(JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING) {
2570       m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2571     }
2572
2573     if (m_freq_domain_chroma_upsample)
2574       m_pSample_buf = cast(ubyte*)alloc(m_expanded_blocks_per_row * 64);
2575     else
2576       m_pSample_buf = cast(ubyte*)alloc(m_max_blocks_per_row * 64);
2577
2578     m_total_lines_left = m_image_y_size;
2579
2580     m_mcu_lines_left = 0;
2581
2582     create_look_ups();
2583   }
2584
2585   // The coeff_buf series of methods originally stored the coefficients
2586   // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2587   // was used to make this process more efficient. Now, we can store the entire
2588   // thing in RAM.
2589   coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) {
2590     coeff_buf* cb = cast(coeff_buf*)alloc(coeff_buf.sizeof);
2591
2592     cb.block_num_x = block_num_x;
2593     cb.block_num_y = block_num_y;
2594     cb.block_len_x = block_len_x;
2595     cb.block_len_y = block_len_y;
2596     cb.block_size = (block_len_x * block_len_y) * jpgd_block_t.sizeof;
2597     cb.pData = cast(ubyte*)alloc(cb.block_size * block_num_x * block_num_y, true);
2598     return cb;
2599   }
2600
2601   jpgd_block_t* coeff_buf_getp (coeff_buf *cb, int block_x, int block_y) {
2602     assert((block_x < cb.block_num_x) && (block_y < cb.block_num_y));
2603     return cast(jpgd_block_t*)(cb.pData + block_x * cb.block_size + block_y * (cb.block_size * cb.block_num_x));
2604   }
2605
2606   // The following methods decode the various types of m_blocks encountered
2607   // in progressively encoded images.
2608   static void decode_block_dc_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2609     int s, r;
2610     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2611
2612     if ((s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_dc_tab.ptr[component_id]])) != 0)
2613     {
2614       r = pD.get_bits_no_markers(s);
2615       s = JPGD_HUFF_EXTEND(r, s);
2616     }
2617
2618     pD.m_last_dc_val.ptr[component_id] = (s += pD.m_last_dc_val.ptr[component_id]);
2619
2620     p[0] = cast(jpgd_block_t)(s << pD.m_successive_low);
2621   }
2622
2623   static void decode_block_dc_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2624     if (pD.get_bits_no_markers(1))
2625     {
2626       jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2627
2628       p[0] |= (1 << pD.m_successive_low);
2629     }
2630   }
2631
2632   static void decode_block_ac_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2633     int k, s, r;
2634
2635     if (pD.m_eob_run)
2636     {
2637       pD.m_eob_run--;
2638       return;
2639     }
2640
2641     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2642
2643     for (k = pD.m_spectral_start; k <= pD.m_spectral_end; k++)
2644     {
2645       s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2646
2647       r = s >> 4;
2648       s &= 15;
2649
2650       if (s)
2651       {
2652         if ((k += r) > 63)
2653           pD.stop_decoding(JPGD_DECODE_ERROR);
2654
2655         r = pD.get_bits_no_markers(s);
2656         s = JPGD_HUFF_EXTEND(r, s);
2657
2658         p[g_ZAG[k]] = cast(jpgd_block_t)(s << pD.m_successive_low);
2659       }
2660       else
2661       {
2662         if (r == 15)
2663         {
2664           if ((k += 15) > 63)
2665             pD.stop_decoding(JPGD_DECODE_ERROR);
2666         }
2667         else
2668         {
2669           pD.m_eob_run = 1 << r;
2670
2671           if (r)
2672             pD.m_eob_run += pD.get_bits_no_markers(r);
2673
2674           pD.m_eob_run--;
2675
2676           break;
2677         }
2678       }
2679     }
2680   }
2681
2682   static void decode_block_ac_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2683     int s, k, r;
2684     int p1 = 1 << pD.m_successive_low;
2685     int m1 = (-1) << pD.m_successive_low;
2686     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2687
2688     assert(pD.m_spectral_end <= 63);
2689
2690     k = pD.m_spectral_start;
2691
2692     if (pD.m_eob_run == 0)
2693     {
2694       for ( ; k <= pD.m_spectral_end; k++)
2695       {
2696         s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2697
2698         r = s >> 4;
2699         s &= 15;
2700
2701         if (s)
2702         {
2703           if (s != 1)
2704             pD.stop_decoding(JPGD_DECODE_ERROR);
2705
2706           if (pD.get_bits_no_markers(1))
2707             s = p1;
2708           else
2709             s = m1;
2710         }
2711         else
2712         {
2713           if (r != 15)
2714           {
2715             pD.m_eob_run = 1 << r;
2716
2717             if (r)
2718               pD.m_eob_run += pD.get_bits_no_markers(r);
2719
2720             break;
2721           }
2722         }
2723
2724         do
2725         {
2726           jpgd_block_t *this_coef = p + g_ZAG[k & 63];
2727
2728           if (*this_coef != 0)
2729           {
2730             if (pD.get_bits_no_markers(1))
2731             {
2732               if ((*this_coef & p1) == 0)
2733               {
2734                 if (*this_coef >= 0)
2735                   *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2736                 else
2737                   *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2738               }
2739             }
2740           }
2741           else
2742           {
2743             if (--r < 0)
2744               break;
2745           }
2746
2747           k++;
2748
2749         } while (k <= pD.m_spectral_end);
2750
2751         if ((s) && (k < 64))
2752         {
2753           p[g_ZAG[k]] = cast(jpgd_block_t)(s);
2754         }
2755       }
2756     }
2757
2758     if (pD.m_eob_run > 0)
2759     {
2760       for ( ; k <= pD.m_spectral_end; k++)
2761       {
2762         jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2763
2764         if (*this_coef != 0)
2765         {
2766           if (pD.get_bits_no_markers(1))
2767           {
2768             if ((*this_coef & p1) == 0)
2769             {
2770               if (*this_coef >= 0)
2771                 *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2772               else
2773                 *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2774             }
2775           }
2776         }
2777       }
2778
2779       pD.m_eob_run--;
2780     }
2781   }
2782
2783   // Decode a scan in a progressively encoded image.
2784   void decode_scan (pDecode_block_func decode_block_func) {
2785     int mcu_row, mcu_col, mcu_block;
2786     int[JPGD_MAX_COMPONENTS] block_x_mcu;
2787     int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
2788
2789     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
2790
2791     for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
2792     {
2793       int component_num, component_id;
2794
2795       memset(block_x_mcu.ptr, 0, block_x_mcu.sizeof);
2796
2797       for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2798       {
2799         int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2800
2801         if ((m_restart_interval) && (m_restarts_left == 0))
2802           process_restart();
2803
2804         for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
2805         {
2806           component_id = m_mcu_org.ptr[mcu_block];
2807
2808           decode_block_func(this, component_id, block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
2809
2810           if (m_comps_in_scan == 1)
2811             block_x_mcu.ptr[component_id]++;
2812           else
2813           {
2814             if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
2815             {
2816               block_x_mcu_ofs = 0;
2817
2818               if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
2819               {
2820                 block_y_mcu_ofs = 0;
2821                 block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
2822               }
2823             }
2824           }
2825         }
2826
2827         m_restarts_left--;
2828       }
2829
2830       if (m_comps_in_scan == 1)
2831         m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
2832       else
2833       {
2834         for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2835         {
2836           component_id = m_comp_list.ptr[component_num];
2837           m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
2838         }
2839       }
2840     }
2841   }
2842
2843   // Decode a progressively encoded image.
2844   void init_progressive () {
2845     int i;
2846
2847     if (m_comps_in_frame == 4)
2848       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2849
2850     // Allocate the coefficient buffers.
2851     for (i = 0; i < m_comps_in_frame; i++)
2852     {
2853       m_dc_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 1, 1);
2854       m_ac_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 8, 8);
2855     }
2856
2857     for ( ; ; )
2858     {
2859       int dc_only_scan, refinement_scan;
2860       pDecode_block_func decode_block_func;
2861
2862       if (!init_scan())
2863         break;
2864
2865       dc_only_scan = (m_spectral_start == 0);
2866       refinement_scan = (m_successive_high != 0);
2867
2868       if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
2869         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2870
2871       if (dc_only_scan)
2872       {
2873         if (m_spectral_end)
2874           stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2875       }
2876       else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
2877         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2878
2879       if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
2880         stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
2881
2882       if (dc_only_scan)
2883       {
2884         if (refinement_scan)
2885           decode_block_func = &decode_block_dc_refine;
2886         else
2887           decode_block_func = &decode_block_dc_first;
2888       }
2889       else
2890       {
2891         if (refinement_scan)
2892           decode_block_func = &decode_block_ac_refine;
2893         else
2894           decode_block_func = &decode_block_ac_first;
2895       }
2896
2897       decode_scan(decode_block_func);
2898
2899       m_bits_left = 16;
2900       get_bits(16);
2901       get_bits(16);
2902     }
2903
2904     m_comps_in_scan = m_comps_in_frame;
2905
2906     for (i = 0; i < m_comps_in_frame; i++)
2907       m_comp_list.ptr[i] = i;
2908
2909     calc_mcu_block_order();
2910   }
2911
2912   void init_sequential () {
2913     if (!init_scan())
2914       stop_decoding(JPGD_UNEXPECTED_MARKER);
2915   }
2916
2917   void decode_start () {
2918     init_frame();
2919
2920     if (m_progressive_flag)
2921       init_progressive();
2922     else
2923       init_sequential();
2924   }
2925
2926   void decode_init (JpegStreamReadFunc rfn) {
2927     initit(rfn);
2928     locate_sof_marker();
2929   }
2930 }
2931
2932
2933 // ////////////////////////////////////////////////////////////////////////// //
2934 /// read JPEG image header, determine dimensions and number of components.
2935 /// return `false` if image is not JPEG (i hope).
2936 public bool detect_jpeg_image_from_stream (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps) {
2937   if (rfn is null) return false;
2938   auto decoder = jpeg_decoder(rfn);
2939   version(jpegd_test) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
2940   if (decoder.error_code != JPGD_SUCCESS) return false;
2941   width = decoder.width;
2942   height = decoder.height;
2943   actual_comps = decoder.num_components;
2944   return true;
2945 }
2946
2947
2948 // ////////////////////////////////////////////////////////////////////////// //
2949 /// read JPEG image header, determine dimensions and number of components.
2950 /// return `false` if image is not JPEG (i hope).
2951 public bool detect_jpeg_image_from_file (const(char)[] filename, out int width, out int height, out int actual_comps) {
2952   import core.stdc.stdio;
2953
2954   FILE* m_pFile;
2955   bool m_eof_flag, m_error_flag;
2956
2957   if (filename.length == 0) throw new Exception("cannot open unnamed file");
2958   if (filename.length < 2048) {
2959     import core.stdc.stdlib : alloca;
2960     auto tfn = (cast(char*)alloca(filename.length+1))[0..filename.length+1];
2961     tfn[0..filename.length] = filename[];
2962     tfn[filename.length] = 0;
2963     m_pFile = fopen(tfn.ptr, "rb");
2964   } else {
2965     import core.stdc.stdlib : malloc, free;
2966     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
2967     if (tfn !is null) {
2968       scope(exit) free(tfn.ptr);
2969       m_pFile = fopen(tfn.ptr, "rb");
2970     }
2971   }
2972   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
2973   scope(exit) if (m_pFile) fclose(m_pFile);
2974
2975   return detect_jpeg_image_from_stream(
2976     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
2977       if (m_pFile is null) return -1;
2978       if (m_eof_flag) {
2979         *pEOF_flag = true;
2980         return 0;
2981       }
2982       if (m_error_flag) return -1;
2983       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
2984       if (bytes_read < max_bytes_to_read) {
2985         if (ferror(m_pFile)) {
2986           m_error_flag = true;
2987           return -1;
2988         }
2989         m_eof_flag = true;
2990         *pEOF_flag = true;
2991       }
2992       return bytes_read;
2993     },
2994     width, height, actual_comps);
2995 }
2996
2997
2998 // ////////////////////////////////////////////////////////////////////////// //
2999 /// read JPEG image header, determine dimensions and number of components.
3000 /// return `false` if image is not JPEG (i hope).
3001 public bool detect_jpeg_image_from_memory (const(void)[] buf, out int width, out int height, out int actual_comps) {
3002   bool m_eof_flag;
3003   usize bufpos;
3004   auto b = cast(const(ubyte)*)buf.ptr;
3005
3006   return detect_jpeg_image_from_stream(
3007     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3008       import core.stdc.string : memcpy;
3009       if (bufpos >= buf.length) {
3010         *pEOF_flag = true;
3011         return 0;
3012       }
3013       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3014       memcpy(pBuf, b, max_bytes_to_read);
3015       b += max_bytes_to_read;
3016       return max_bytes_to_read;
3017     },
3018     width, height, actual_comps);
3019 }
3020
3021
3022 // ////////////////////////////////////////////////////////////////////////// //
3023 /// decompress JPEG image, what else?
3024 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3025 public ubyte[] decompress_jpeg_image_from_stream(bool useMalloc=false) (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps, int req_comps=-1) {
3026   import core.stdc.string : memcpy;
3027
3028   //actual_comps = 0;
3029   if (rfn is null) return null;
3030   if (req_comps != -1 && req_comps != 1 && req_comps != 3 && req_comps != 4) return null;
3031
3032   auto decoder = jpeg_decoder(rfn);
3033   if (decoder.error_code != JPGD_SUCCESS) return null;
3034   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3035
3036   immutable int image_width = decoder.width;
3037   immutable int image_height = decoder.height;
3038   width = image_width;
3039   height = image_height;
3040   actual_comps = decoder.num_components;
3041   if (req_comps < 0) req_comps = decoder.num_components;
3042
3043   if (decoder.begin_decoding() != JPGD_SUCCESS) return null;
3044
3045   immutable int dst_bpl = image_width*req_comps;
3046
3047   static if (useMalloc) {
3048     ubyte* pImage_data = cast(ubyte*)jpgd_malloc(dst_bpl*image_height);
3049     if (pImage_data is null) return null;
3050     auto idata = pImage_data[0..dst_bpl*image_height];
3051   } else {
3052     auto idata = new ubyte[](dst_bpl*image_height);
3053     auto pImage_data = idata.ptr;
3054   }
3055
3056   for (int y = 0; y < image_height; ++y) {
3057     const(ubyte)* pScan_line;
3058     uint scan_line_len;
3059     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3060       jpgd_free(pImage_data);
3061       return null;
3062     }
3063
3064     ubyte* pDst = pImage_data+y*dst_bpl;
3065
3066     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3067       memcpy(pDst, pScan_line, dst_bpl);
3068     } else if (decoder.num_components == 1) {
3069       if (req_comps == 3) {
3070         for (int x = 0; x < image_width; ++x) {
3071           ubyte luma = pScan_line[x];
3072           pDst[0] = luma;
3073           pDst[1] = luma;
3074           pDst[2] = luma;
3075           pDst += 3;
3076         }
3077       } else {
3078         for (int x = 0; x < image_width; ++x) {
3079           ubyte luma = pScan_line[x];
3080           pDst[0] = luma;
3081           pDst[1] = luma;
3082           pDst[2] = luma;
3083           pDst[3] = 255;
3084           pDst += 4;
3085         }
3086       }
3087     } else if (decoder.num_components == 3) {
3088       if (req_comps == 1) {
3089         immutable int YR = 19595, YG = 38470, YB = 7471;
3090         for (int x = 0; x < image_width; ++x) {
3091           int r = pScan_line[x*4+0];
3092           int g = pScan_line[x*4+1];
3093           int b = pScan_line[x*4+2];
3094           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3095         }
3096       } else {
3097         for (int x = 0; x < image_width; ++x) {
3098           pDst[0] = pScan_line[x*4+0];
3099           pDst[1] = pScan_line[x*4+1];
3100           pDst[2] = pScan_line[x*4+2];
3101           pDst += 3;
3102         }
3103       }
3104     }
3105   }
3106
3107   return idata;
3108 }
3109
3110
3111 // ////////////////////////////////////////////////////////////////////////// //
3112 /// decompress JPEG image from disk file.
3113 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3114 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (const(char)[] filename, out int width, out int height, out int actual_comps, int req_comps=-1) {
3115   import core.stdc.stdio;
3116
3117   FILE* m_pFile;
3118   bool m_eof_flag, m_error_flag;
3119
3120   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3121   if (filename.length < 2048) {
3122     import core.stdc.stdlib : alloca;
3123     auto tfn = (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3124     tfn[0..filename.length] = filename[];
3125     tfn[filename.length] = 0;
3126     m_pFile = fopen(tfn.ptr, "rb");
3127   } else {
3128     import core.stdc.stdlib : malloc, free;
3129     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3130     if (tfn !is null) {
3131       scope(exit) free(tfn.ptr);
3132       m_pFile = fopen(tfn.ptr, "rb");
3133     }
3134   }
3135   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3136   scope(exit) if (m_pFile) fclose(m_pFile);
3137
3138   return decompress_jpeg_image_from_stream!useMalloc(
3139     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3140       if (m_pFile is null) return -1;
3141       if (m_eof_flag) {
3142         *pEOF_flag = true;
3143         return 0;
3144       }
3145       if (m_error_flag) return -1;
3146       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3147       if (bytes_read < max_bytes_to_read) {
3148         if (ferror(m_pFile)) {
3149           m_error_flag = true;
3150           return -1;
3151         }
3152         m_eof_flag = true;
3153         *pEOF_flag = true;
3154       }
3155       return bytes_read;
3156     },
3157     width, height, actual_comps, req_comps);
3158 }
3159
3160
3161 // ////////////////////////////////////////////////////////////////////////// //
3162 /// decompress JPEG image from memory buffer.
3163 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3164 public ubyte[] decompress_jpeg_image_from_memory(bool useMalloc=false) (const(void)[] buf, out int width, out int height, out int actual_comps, int req_comps=-1) {
3165   bool m_eof_flag;
3166   usize bufpos;
3167   auto b = cast(const(ubyte)*)buf.ptr;
3168
3169   return decompress_jpeg_image_from_stream!useMalloc(
3170     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3171       import core.stdc.string : memcpy;
3172       if (bufpos >= buf.length) {
3173         *pEOF_flag = true;
3174         return 0;
3175       }
3176       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3177       memcpy(pBuf, b, max_bytes_to_read);
3178       b += max_bytes_to_read;
3179       return max_bytes_to_read;
3180     },
3181     width, height, actual_comps, req_comps);
3182 }
3183
3184
3185 // ////////////////////////////////////////////////////////////////////////// //
3186 // if we have access "iv.vfs", add some handy API
3187 static if (__traits(compiles, { import iv.vfs; })) enum JpegHasIVVFS = true; else enum JpegHasIVVFS = false;
3188
3189 static if (JpegHasIVVFS) {
3190 import iv.vfs;
3191
3192 // ////////////////////////////////////////////////////////////////////////// //
3193 /// decompress JPEG image from disk file.
3194 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3195 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (VFile fl, out int width, out int height, out int actual_comps, int req_comps=-1) {
3196   return decompress_jpeg_image_from_stream!useMalloc(
3197     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3198       if (!fl.isOpen) return -1;
3199       if (fl.eof) {
3200         *pEOF_flag = true;
3201         return 0;
3202       }
3203       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3204       if (fl.eof) *pEOF_flag = true;
3205       return cast(int)rd.length;
3206     },
3207     width, height, actual_comps, req_comps);
3208 }
3209 // vfs API
3210 }
3211
3212
3213 // ////////////////////////////////////////////////////////////////////////// //
3214 // if we have access "arsd.color", add some handy API
3215 static if (__traits(compiles, { import arsd.color; })) enum JpegHasArsd = true; else enum JpegHasArsd = false;
3216
3217 static if (JpegHasArsd) {
3218 import arsd.color;
3219
3220 // ////////////////////////////////////////////////////////////////////////// //
3221 /// decompress JPEG image, what else?
3222 public MemoryImage readJpegFromStream (scope JpegStreamReadFunc rfn) {
3223   import core.stdc.string : memcpy;
3224   enum req_comps = 4;
3225
3226   if (rfn is null) return null;
3227
3228   auto decoder = jpeg_decoder(rfn);
3229   if (decoder.error_code != JPGD_SUCCESS) return null;
3230   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3231
3232   immutable int image_width = decoder.width;
3233   immutable int image_height = decoder.height;
3234   //width = image_width;
3235   //height = image_height;
3236   //actual_comps = decoder.num_components;
3237
3238   if (decoder.begin_decoding() != JPGD_SUCCESS || image_width < 1 || image_height < 1) return null;
3239
3240   immutable int dst_bpl = image_width*req_comps;
3241   auto img = new TrueColorImage(image_width, image_height);
3242   ubyte* pImage_data = img.imageData.bytes.ptr;
3243
3244   for (int y = 0; y < image_height; ++y) {
3245     const(ubyte)* pScan_line;
3246     uint scan_line_len;
3247     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3248       jpgd_free(pImage_data);
3249       return null;
3250     }
3251
3252     ubyte* pDst = pImage_data+y*dst_bpl;
3253
3254     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3255       memcpy(pDst, pScan_line, dst_bpl);
3256     } else if (decoder.num_components == 1) {
3257       if (req_comps == 3) {
3258         for (int x = 0; x < image_width; ++x) {
3259           ubyte luma = pScan_line[x];
3260           pDst[0] = luma;
3261           pDst[1] = luma;
3262           pDst[2] = luma;
3263           pDst += 3;
3264         }
3265       } else {
3266         for (int x = 0; x < image_width; ++x) {
3267           ubyte luma = pScan_line[x];
3268           pDst[0] = luma;
3269           pDst[1] = luma;
3270           pDst[2] = luma;
3271           pDst[3] = 255;
3272           pDst += 4;
3273         }
3274       }
3275     } else if (decoder.num_components == 3) {
3276       if (req_comps == 1) {
3277         immutable int YR = 19595, YG = 38470, YB = 7471;
3278         for (int x = 0; x < image_width; ++x) {
3279           int r = pScan_line[x*4+0];
3280           int g = pScan_line[x*4+1];
3281           int b = pScan_line[x*4+2];
3282           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3283         }
3284       } else {
3285         for (int x = 0; x < image_width; ++x) {
3286           pDst[0] = pScan_line[x*4+0];
3287           pDst[1] = pScan_line[x*4+1];
3288           pDst[2] = pScan_line[x*4+2];
3289           pDst += 3;
3290         }
3291       }
3292     }
3293   }
3294
3295   return img;
3296 }
3297
3298
3299 // ////////////////////////////////////////////////////////////////////////// //
3300 /// decompress JPEG image from disk file.
3301 public MemoryImage readJpeg (const(char)[] filename) {
3302   import core.stdc.stdio;
3303
3304   FILE* m_pFile;
3305   bool m_eof_flag, m_error_flag;
3306
3307   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3308   if (filename.length < 2048) {
3309     import core.stdc.stdlib : alloca;
3310     auto tfn = (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3311     tfn[0..filename.length] = filename[];
3312     tfn[filename.length] = 0;
3313     m_pFile = fopen(tfn.ptr, "rb");
3314   } else {
3315     import core.stdc.stdlib : malloc, free;
3316     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3317     if (tfn !is null) {
3318       scope(exit) free(tfn.ptr);
3319       m_pFile = fopen(tfn.ptr, "rb");
3320     }
3321   }
3322   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3323   scope(exit) if (m_pFile) fclose(m_pFile);
3324
3325   return readJpegFromStream(
3326     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3327       if (m_pFile is null) return -1;
3328       if (m_eof_flag) {
3329         *pEOF_flag = true;
3330         return 0;
3331       }
3332       if (m_error_flag) return -1;
3333       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3334       if (bytes_read < max_bytes_to_read) {
3335         if (ferror(m_pFile)) {
3336           m_error_flag = true;
3337           return -1;
3338         }
3339         m_eof_flag = true;
3340         *pEOF_flag = true;
3341       }
3342       return bytes_read;
3343     }
3344   );
3345 }
3346
3347
3348 // ////////////////////////////////////////////////////////////////////////// //
3349 /// decompress JPEG image from memory buffer.
3350 public MemoryImage readJpegFromMemory (const(void)[] buf) {
3351   bool m_eof_flag;
3352   usize bufpos;
3353   auto b = cast(const(ubyte)*)buf.ptr;
3354
3355   return readJpegFromStream(
3356     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3357       import core.stdc.string : memcpy;
3358       if (bufpos >= buf.length) {
3359         *pEOF_flag = true;
3360         return 0;
3361       }
3362       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3363       memcpy(pBuf, b, max_bytes_to_read);
3364       b += max_bytes_to_read;
3365       return max_bytes_to_read;
3366     }
3367   );
3368 }
3369 // done with arsd API
3370 }
3371
3372
3373 static if (JpegHasIVVFS) {
3374 public MemoryImage readJpeg (VFile fl) {
3375   return readJpegFromStream(
3376     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3377       if (!fl.isOpen) return -1;
3378       if (fl.eof) {
3379         *pEOF_flag = true;
3380         return 0;
3381       }
3382       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3383       if (fl.eof) *pEOF_flag = true;
3384       return cast(int)rd.length;
3385     }
3386   );
3387 }
3388
3389 public bool detectJpeg (VFile fl, out int width, out int height, out int actual_comps) {
3390   return detect_jpeg_image_from_stream(
3391     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3392       if (!fl.isOpen) return -1;
3393       if (fl.eof) {
3394         *pEOF_flag = true;
3395         return 0;
3396       }
3397       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3398       if (fl.eof) *pEOF_flag = true;
3399       return cast(int)rd.length;
3400     },
3401     width, height, actual_comps);
3402 }
3403 // vfs API
3404 }
3405
3406
3407 // ////////////////////////////////////////////////////////////////////////// //
3408 version(jpegd_test) {
3409 import arsd.color;
3410 import arsd.png;
3411
3412 void main (string[] args) {
3413   import std.stdio;
3414   int width, height, comps;
3415   {
3416     assert(detect_jpeg_image_from_file((args.length > 1 ? args[1] : "image.jpg"), width, height, comps));
3417     writeln(width, "x", height, "x", comps);
3418     auto img = readJpeg((args.length > 1 ? args[1] : "image.jpg"));
3419     writeln(img.width, "x", img.height);
3420     writePng("z00.png", img);
3421   }
3422   {
3423     ubyte[] file;
3424     {
3425       auto fl = File(args.length > 1 ? args[1] : "image.jpg");
3426       file.length = cast(int)fl.size;
3427       fl.rawRead(file[]);
3428     }
3429     assert(detect_jpeg_image_from_memory(file[], width, height, comps));
3430     writeln(width, "x", height, "x", comps);
3431     auto img = readJpegFromMemory(file[]);
3432     writeln(img.width, "x", img.height);
3433     writePng("z01.png", img);
3434   }
3435 }
3436 }