apps/recorder/jpeg_load.c

   1 /***************************************************************************
   2 *             __________               __   ___.
   3 *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 *                     \/            \/     \/    \/            \/
   8 * $Id$
   9 *
  10 * JPEG image viewer
  11 * (This is a real mess if it has to be coded in one single C file)
  12 *
  13 * Copyright (C) 2009 Andrew Mahone fractional decode, split IDCT - 16-point
  14 *   IDCT based on IJG jpeg-7 pre-release
  15 * File scrolling addition (C) 2005 Alexander Spyridakis
  16 * Copyright (C) 2004 Jörg Hohensohn aka [IDC]Dragon
  17 * Heavily borrowed from the IJG implementation (C) Thomas G. Lane
  18 * Small & fast downscaling IDCT (C) 2002 by Guido Vollbeding  JPEGclub.org
  19 *
  20 * This program is free software; you can redistribute it and/or
  21 * modify it under the terms of the GNU General Public License
  22 * as published by the Free Software Foundation; either version 2
  23 * of the License, or (at your option) any later version.
  24 *
  25 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  26 * KIND, either express or implied.
  27 *
  28 ****************************************************************************/
  29
  30 #include "plugin.h"
  31 #include "debug.h"
  32 #include "jpeg_load.h"
  33 /*#define JPEG_BS_DEBUG*/
  34 /* for portability of below JPEG code */
  35 #define MEMSET(p,v,c) memset(p,v,c)
  36 #define MEMCPY(d,s,c) memcpy(d,s,c)
  37 #define INLINE static inline
  38 #define ENDIAN_SWAP16(n) n /* only for poor little endian machines */
  39
  40 /**************** begin JPEG code ********************/
  41
  42 #ifdef HAVE_LCD_COLOR
  43 typedef struct uint8_rgb jpeg_pix_t;
  44 #else
  45 typedef uint8_t jpeg_pix_t;
  46 #endif
  47 #define JPEG_PIX_SZ (sizeof(jpeg_pix_t))
  48
  49 /* This can't be in jpeg_load.h because plugin.h includes it, and it conflicts
  50  * with the definition in jpeg_decoder.h
  51  */
  52 struct jpeg
  53 {
  54     int fd;
  55     int buf_left;
  56     unsigned char *buf_index;
  57     unsigned long int bitbuf;
  58     int bitbuf_bits;
  59     int marker_ind;
  60     int marker_val;
  61     unsigned char marker;
  62     int x_size, y_size; /* size of image (can be less than block boundary) */
  63     int x_phys, y_phys; /* physical size, block aligned */
  64     int x_mbl; /* x dimension of MBL */
  65     int y_mbl; /* y dimension of MBL */
  66     int blocks; /* blocks per MB */
  67     int restart_interval; /* number of MCUs between RSTm markers */
  68     int restart; /* blocks until next restart marker */
  69     int mcu_row; /* current row relative to first row of this row of MCUs */
  70     unsigned char *out_ptr; /* pointer to current row to output */
  71     int cur_row; /* current row relative to top of image */
  72     int set_rows;
  73     int store_pos[4]; /* for Y block ordering */
  74 #ifdef HAVE_LCD_COLOR
  75     int last_dc_val[3];
  76 #else
  77     int last_dc_val;
  78 #endif
  79     int h_scale[2]; /* horizontal scalefactor = (2**N) / 8 */
  80     int v_scale[2]; /* same as above, for vertical direction */
  81     int k_need[3]; /* per component zig-zag index of last needed coefficient */
  82     int zero_need[3]; /* per compenent number of coefficients to zero */
  83     jpeg_pix_t *img_buf;
  84
  85     int quanttable[4][QUANT_TABLE_LENGTH]; /* raw quantization tables 0-3 */
  86
  87     struct huffman_table hufftable[2]; /* Huffman tables  */
  88     struct derived_tbl dc_derived_tbls[2]; /* Huffman-LUTs */
  89     struct derived_tbl ac_derived_tbls[2];
  90
  91     struct frame_component frameheader[3]; /* Component descriptor */
  92     struct scan_component scanheader[3]; /* currently not used */
  93
  94     int mcu_membership[6]; /* info per block */
  95     int tab_membership[6];
  96     int subsample_x[3]; /* info per component */
  97     int subsample_y[3];
  98     unsigned char buf[JPEG_READ_BUF_SIZE];
  99     struct img_part part;
 100 };
 101
 102 INLINE unsigned range_limit(int value)
 103 {
 104 #if CONFIG_CPU == SH7034
 105     unsigned tmp;
 106     asm (  /* Note: Uses knowledge that only low byte of result is used */
 107         "mov     #-128,%[t]  \n"
 108         "sub     %[t],%[v]   \n"  /* value -= -128; equals value += 128; */
 109         "extu.b  %[v],%[t]   \n"
 110         "cmp/eq  %[v],%[t]   \n"  /* low byte == whole number ? */
 111         "bt      1f          \n"  /* yes: no overflow */
 112         "cmp/pz  %[v]        \n"  /* overflow: positive? */
 113         "subc    %[v],%[v]   \n"  /* %[r] now either 0 or 0xffffffff */
 114     "1:                      \n"
 115         : /* outputs */
 116         [v]"+r"(value),
 117         [t]"=&r"(tmp)
 118     );
 119     return value;
 120 #elif defined(CPU_COLDFIRE)
 121     /* Note: Uses knowledge that only the low byte of the result is used */
 122     asm (
 123         "add.l   #128,%[v]   \n"  /* value += 128; */
 124         "cmp.l   #255,%[v]   \n"  /* overflow? */
 125         "bls.b   1f          \n"  /* no: return value */
 126         /* yes: set low byte to appropriate boundary */
 127         "spl.b   %[v]        \n"
 128     "1:                      \n"
 129         : /* outputs */
 130         [v]"+d"(value)
 131     );
 132     return value;
 133 #elif defined(CPU_ARM)
 134     /* Note: Uses knowledge that only the low byte of the result is used */
 135     asm (
 136         "add     %[v], %[v], #128    \n"  /* value += 128 */
 137         "cmp     %[v], #255          \n"  /* out of range 0..255? */
 138         "mvnhi   %[v], %[v], asr #31 \n"  /* yes: set all bits to ~(sign_bit) */
 139         : /* outputs */
 140         [v]"+r"(value)
 141     );
 142     return value;
 143 #else
 144     value += 128;
 145
 146     if ((unsigned)value <= 255)
 147         return value;
 148
 149     if (value < 0)
 150         return 0;
 151
 152     return 255;
 153 #endif
 154 }
 155
 156 static inline int clamp_component(int x)
 157 {
 158     if ((unsigned)x > 255)
 159         x = x < 0 ? 0 : 255;
 160     return x;
 161 }
 162
 163 /* IDCT implementation */
 164
 165
 166 #define CONST_BITS 13
 167 #define PASS1_BITS 2
 168
 169
 170 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
 171 * causing a lot of useless floating-point operations at run time.
 172 * To get around this we use the following pre-calculated constants.
 173 * If you change CONST_BITS you may want to add appropriate values.
 174 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
 175 */
 176 #define FIX_0_298631336  2446 /* FIX(0.298631336) */
 177 #define FIX_0_390180644  3196 /* FIX(0.390180644) */
 178 #define FIX_0_541196100  4433 /* FIX(0.541196100) */
 179 #define FIX_0_765366865  6270 /* FIX(0.765366865) */
 180 #define FIX_0_899976223  7373 /* FIX(0.899976223) */
 181 #define FIX_1_175875602  9633 /* FIX(1.175875602) */
 182 #define FIX_1_501321110 12299 /* FIX(1.501321110) */
 183 #define FIX_1_847759065 15137 /* FIX(1.847759065) */
 184 #define FIX_1_961570560 16069 /* FIX(1.961570560) */
 185 #define FIX_2_053119869 16819 /* FIX(2.053119869) */
 186 #define FIX_2_562915447 20995 /* FIX(2.562915447) */
 187 #define FIX_3_072711026 25172 /* FIX(3.072711026) */
 188
 189
 190
 191 /* Multiply an long variable by an long constant to yield an long result.
 192 * For 8-bit samples with the recommended scaling, all the variable
 193 * and constant values involved are no more than 16 bits wide, so a
 194 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
 195 * For 12-bit samples, a full 32-bit multiplication will be needed.
 196 */
 197 #define MULTIPLY16(var,const)  (((short) (var)) * ((short) (const)))
 198
 199 #define MULTIPLY(var1, var2) ((var1) * (var2))
 200
 201 /*
 202  * Macros for handling fixed-point arithmetic; these are used by many
 203  * but not all of the DCT/IDCT modules.
 204  *
 205  * All values are expected to be of type INT32.
 206  * Fractional constants are scaled left by CONST_BITS bits.
 207  * CONST_BITS is defined within each module using these macros,
 208  * and may differ from one module to the next.
 209  */
 210 #define ONE ((long)1)
 211 #define CONST_SCALE (ONE << CONST_BITS)
 212
 213 /* Convert a positive real constant to an integer scaled by CONST_SCALE.
 214  * Caution: some C compilers fail to reduce "FIX(constant)" at compile time,
 215  * thus causing a lot of useless floating-point operations at run time.
 216  */
 217 #define FIX(x) ((long) ((x) * CONST_SCALE + 0.5))
 218 #define RIGHT_SHIFT(x,shft)     ((x) >> (shft))
 219
 220 /* Descale and correctly round an int value that's scaled by N bits.
 221 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
 222 * the fudge factor is correct for either sign of X.
 223 */
 224 #define DESCALE(x,n) (((x) + (1l << ((n)-1))) >> (n))
 225
 226 #define DS_OUT ((CONST_BITS)+(PASS1_BITS)+3)
 227
 228 /*
 229  * Conversion of full 0-255 range YCrCb to RGB:
 230  *   |R|   |1.000000 -0.000001  1.402000| |Y'|
 231  *   |G| = |1.000000 -0.334136 -0.714136| |Pb|
 232  *   |B|   |1.000000  1.772000  0.000000| |Pr|
 233  * Scaled (yields s15-bit output):
 234  *   |R|   |128    0  179| |Y       |
 235  *   |G| = |128  -43  -91| |Cb - 128|
 236  *   |B|   |128  227    0| |Cr - 128|
 237  */
 238 #define YFAC            128
 239 #define RVFAC           179
 240 #define GUFAC           (-43)
 241 #define GVFAC           (-91)
 242 #define BUFAC           227
 243 #define COMPONENT_SHIFT  15
 244
 245 /* horizontal-pass 1-point IDCT */
 246 static void idct1h(int *ws, unsigned char *out, int rows, int rowstep)
 247 {
 248     int row;
 249     for (row = 0; row < rows; row++)
 250     {
 251         *out = range_limit((int) DESCALE(*ws, DS_OUT));
 252         out += rowstep;
 253         ws += 8;
 254     }
 255 }
 256
 257 /* vertical-pass 2-point IDCT */
 258 static void idct2v(int *ws, int cols)
 259 {
 260     int col;
 261     for (col = 0; col < cols; col++)
 262     {
 263         int tmp1 = ws[0];
 264         int tmp2 = ws[8];
 265         ws[0] = tmp1 + tmp2;
 266         ws[8] = tmp1 - tmp2;
 267         ws++;
 268     }
 269 }
 270
 271 /* horizontal-pass 2-point IDCT */
 272 static void idct2h(int *ws, unsigned char *out, int rows, int rowstep)
 273 {
 274     int row;
 275     for (row = 0; row < rows; row++)
 276     {
 277         int tmp1 = ws[0] + (ONE << (DS_OUT - 1));
 278         int tmp2 = ws[1];
 279         out[JPEG_PIX_SZ*0] = range_limit((int) RIGHT_SHIFT(tmp1 + tmp2,
 280             DS_OUT));
 281         out[JPEG_PIX_SZ*1] = range_limit((int) RIGHT_SHIFT(tmp1 - tmp2,
 282             DS_OUT));
 283         out += rowstep;
 284         ws += 8;
 285     }
 286 }
 287
 288 /* vertical-pass 4-point IDCT */
 289 static void idct4v(int *ws, int cols)
 290 {
 291     int tmp0, tmp2, tmp10, tmp12;
 292     int z1, z2, z3;
 293     int col;
 294     for (col = 0; col < cols; col++, ws++)
 295     {
 296         /* Even part */
 297
 298         tmp0 = ws[8*0];
 299         tmp2 = ws[8*2];
 300
 301         tmp10 = (tmp0 + tmp2) << PASS1_BITS;
 302         tmp12 = (tmp0 - tmp2) << PASS1_BITS;
 303
 304         /* Odd part */
 305         /* Same rotation as in the even part of the 8x8 LL&M IDCT */
 306
 307         z2 = ws[8*1];
 308         z3 = ws[8*3];
 309
 310         z1 = MULTIPLY16(z2 + z3, FIX_0_541196100) +
 311             (ONE << (CONST_BITS - PASS1_BITS - 1));
 312         tmp0 = RIGHT_SHIFT(z1 + MULTIPLY16(z3, - FIX_1_847759065),
 313             CONST_BITS-PASS1_BITS);
 314         tmp2 = RIGHT_SHIFT(z1 + MULTIPLY16(z2, FIX_0_765366865),
 315             CONST_BITS-PASS1_BITS);
 316
 317         /* Final output stage */
 318
 319         ws[8*0] = (int) (tmp10 + tmp2);
 320         ws[8*3] = (int) (tmp10 - tmp2);
 321         ws[8*1] = (int) (tmp12 + tmp0);
 322         ws[8*2] = (int) (tmp12 - tmp0);
 323     }
 324 }
 325
 326 /* horizontal-pass 4-point IDCT */
 327 static void idct4h(int *ws, unsigned char *out, int rows, int rowstep)
 328 {
 329     int tmp0, tmp2, tmp10, tmp12;
 330     int z1, z2, z3;
 331     int row;
 332     for (row = 0; row < rows; row++, out += rowstep, ws += 8)
 333     {
 334         /* Even part */
 335
 336         tmp0 = (int) ws[0] + (ONE << (PASS1_BITS + 2));
 337         tmp2 = (int) ws[2];
 338
 339         tmp10 = (tmp0 + tmp2) << CONST_BITS;
 340         tmp12 = (tmp0 - tmp2) << CONST_BITS;
 341
 342         /* Odd part */
 343         /* Same rotation as in the even part of the 8x8 LL&M IDCT */
 344
 345         z2 = (int) ws[1];
 346         z3 = (int) ws[3];
 347
 348         z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 349         tmp0 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
 350         tmp2 = z1 + MULTIPLY16(z2, FIX_0_765366865);
 351
 352         /* Final output stage */
 353
 354         out[JPEG_PIX_SZ*0] = range_limit((int) RIGHT_SHIFT(tmp10 + tmp2,
 355             DS_OUT));
 356         out[JPEG_PIX_SZ*3] = range_limit((int) RIGHT_SHIFT(tmp10 - tmp2,
 357             DS_OUT));
 358         out[JPEG_PIX_SZ*1] = range_limit((int) RIGHT_SHIFT(tmp12 + tmp0,
 359             DS_OUT));
 360         out[JPEG_PIX_SZ*2] = range_limit((int) RIGHT_SHIFT(tmp12 - tmp0,
 361             DS_OUT));
 362     }
 363 }
 364
 365 /* vertical-pass 8-point IDCT */
 366 static void idct8v(int *ws, int cols)
 367 {
 368     long tmp0, tmp1, tmp2, tmp3;
 369     long tmp10, tmp11, tmp12, tmp13;
 370     long z1, z2, z3, z4, z5;
 371     int col;
 372     for (col = 0; col < cols; col++, ws++)
 373     {
 374     /* Due to quantization, we will usually find that many of the input
 375     * coefficients are zero, especially the AC terms.  We can exploit this
 376     * by short-circuiting the IDCT calculation for any column in which all
 377     * the AC terms are zero.  In that case each output is equal to the
 378     * DC coefficient (with scale factor as needed).
 379     * With typical images and quantization tables, half or more of the
 380     * column DCT calculations can be simplified this way.
 381     */
 382         if ((ws[8*1] | ws[8*2] | ws[8*3]
 383            | ws[8*4] | ws[8*5] | ws[8*6] | ws[8*7]) == 0)
 384         {
 385             /* AC terms all zero */
 386             int dcval = ws[8*0] << PASS1_BITS;
 387
 388             ws[8*0] = ws[8*1] = ws[8*2] = ws[8*3] = ws[8*4]
 389                        = ws[8*5] = ws[8*6] = ws[8*7] = dcval;
 390             continue;
 391         }
 392
 393         /* Even part: reverse the even part of the forward DCT. */
 394         /* The rotator is sqrt(2)*c(-6). */
 395
 396         z2 = ws[8*2];
 397         z3 = ws[8*6];
 398
 399         z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 400         tmp2 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
 401         tmp3 = z1 + MULTIPLY16(z2, FIX_0_765366865);
 402
 403         z2 = ws[8*0] << CONST_BITS;
 404         z2 += ONE << (CONST_BITS - PASS1_BITS - 1);
 405         z3 = ws[8*4] << CONST_BITS;
 406
 407         tmp0 = (z2 + z3);
 408         tmp1 = (z2 - z3);
 409
 410         tmp10 = tmp0 + tmp3;
 411         tmp13 = tmp0 - tmp3;
 412         tmp11 = tmp1 + tmp2;
 413         tmp12 = tmp1 - tmp2;
 414
 415         /* Odd part per figure 8; the matrix is unitary and hence its
 416            transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively. */
 417
 418         tmp0 = ws[8*7];
 419         tmp1 = ws[8*5];
 420         tmp2 = ws[8*3];
 421         tmp3 = ws[8*1];
 422
 423         z1 = tmp0 + tmp3;
 424         z2 = tmp1 + tmp2;
 425         z3 = tmp0 + tmp2;
 426         z4 = tmp1 + tmp3;
 427         z5 = MULTIPLY16(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 428
 429         tmp0 = MULTIPLY16(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 430         tmp1 = MULTIPLY16(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 431         tmp2 = MULTIPLY16(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 432         tmp3 = MULTIPLY16(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 433         z1 = MULTIPLY16(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 434         z2 = MULTIPLY16(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 435         z3 = MULTIPLY16(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 436         z4 = MULTIPLY16(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 437
 438         z3 += z5;
 439         z4 += z5;
 440
 441         tmp0 += z1 + z3;
 442         tmp1 += z2 + z4;
 443         tmp2 += z2 + z3;
 444         tmp3 += z1 + z4;
 445
 446         /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 447
 448         ws[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
 449         ws[8*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
 450         ws[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
 451         ws[8*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
 452         ws[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
 453         ws[8*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
 454         ws[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
 455         ws[8*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
 456     }
 457 }
 458
 459 /* horizontal-pass 8-point IDCT */
 460 static void idct8h(int *ws, unsigned char *out, int rows, int rowstep)
 461 {
 462     long tmp0, tmp1, tmp2, tmp3;
 463     long tmp10, tmp11, tmp12, tmp13;
 464     long z1, z2, z3, z4, z5;
 465     int row;
 466     for (row = 0; row < rows; row++, out += rowstep, ws += 8)
 467     {
 468         /* Rows of zeroes can be exploited in the same way as we did with
 469          * columns. However, the column calculation has created many nonzero AC
 470          * terms, so the simplification applies less often (typically 5% to 10%
 471          * of the time). On machines with very fast multiplication, it's
 472          * possible that the test takes more time than it's worth.  In that
 473          * case this section may be commented out.
 474         */
 475
 476 #ifndef NO_ZERO_ROW_TEST
 477         if ((ws[1] | ws[2] | ws[3]
 478            | ws[4] | ws[5] | ws[6] | ws[7]) == 0)
 479         {
 480             /* AC terms all zero */
 481             unsigned char dcval = range_limit((int) DESCALE((long) ws[0],
 482                 PASS1_BITS+3));
 483
 484             out[JPEG_PIX_SZ*0] = dcval;
 485             out[JPEG_PIX_SZ*1] = dcval;
 486             out[JPEG_PIX_SZ*2] = dcval;
 487             out[JPEG_PIX_SZ*3] = dcval;
 488             out[JPEG_PIX_SZ*4] = dcval;
 489             out[JPEG_PIX_SZ*5] = dcval;
 490             out[JPEG_PIX_SZ*6] = dcval;
 491             out[JPEG_PIX_SZ*7] = dcval;
 492             continue;
 493         }
 494 #endif
 495
 496         /* Even part: reverse the even part of the forward DCT. */
 497         /* The rotator is sqrt(2)*c(-6). */
 498
 499         z2 = (long) ws[2];
 500         z3 = (long) ws[6];
 501
 502         z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 503         tmp2 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
 504         tmp3 = z1 + MULTIPLY16(z2, FIX_0_765366865);
 505
 506         z4 = (long) ws[0] + (ONE << (PASS1_BITS + 2));
 507         z4 <<= CONST_BITS;
 508         z5 = (long) ws[4] << CONST_BITS;
 509         tmp0 = z4 + z5;
 510         tmp1 = z4 - z5;
 511
 512         tmp10 = tmp0 + tmp3;
 513         tmp13 = tmp0 - tmp3;
 514         tmp11 = tmp1 + tmp2;
 515         tmp12 = tmp1 - tmp2;
 516
 517         /* Odd part per figure 8; the matrix is unitary and hence its
 518         * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */
 519
 520         tmp0 = (long) ws[7];
 521         tmp1 = (long) ws[5];
 522         tmp2 = (long) ws[3];
 523         tmp3 = (long) ws[1];
 524
 525         z1 = tmp0 + tmp3;
 526         z2 = tmp1 + tmp2;
 527         z3 = tmp0 + tmp2;
 528         z4 = tmp1 + tmp3;
 529         z5 = MULTIPLY16(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 530
 531         tmp0 = MULTIPLY16(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 532         tmp1 = MULTIPLY16(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 533         tmp2 = MULTIPLY16(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 534         tmp3 = MULTIPLY16(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 535         z1 = MULTIPLY16(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 536         z2 = MULTIPLY16(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 537         z3 = MULTIPLY16(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 538         z4 = MULTIPLY16(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 539
 540         z3 += z5;
 541         z4 += z5;
 542
 543         tmp0 += z1 + z3;
 544         tmp1 += z2 + z4;
 545         tmp2 += z2 + z3;
 546         tmp3 += z1 + z4;
 547
 548         /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 549
 550         out[JPEG_PIX_SZ*0] = range_limit((int) RIGHT_SHIFT(tmp10 + tmp3,
 551             DS_OUT));
 552         out[JPEG_PIX_SZ*7] = range_limit((int) RIGHT_SHIFT(tmp10 - tmp3,
 553             DS_OUT));
 554         out[JPEG_PIX_SZ*1] = range_limit((int) RIGHT_SHIFT(tmp11 + tmp2,
 555             DS_OUT));
 556         out[JPEG_PIX_SZ*6] = range_limit((int) RIGHT_SHIFT(tmp11 - tmp2,
 557             DS_OUT));
 558         out[JPEG_PIX_SZ*2] = range_limit((int) RIGHT_SHIFT(tmp12 + tmp1,
 559             DS_OUT));
 560         out[JPEG_PIX_SZ*5] = range_limit((int) RIGHT_SHIFT(tmp12 - tmp1,
 561             DS_OUT));
 562         out[JPEG_PIX_SZ*3] = range_limit((int) RIGHT_SHIFT(tmp13 + tmp0,
 563             DS_OUT));
 564         out[JPEG_PIX_SZ*4] = range_limit((int) RIGHT_SHIFT(tmp13 - tmp0,
 565             DS_OUT));
 566     }
 567 }
 568
 569 /* vertical-pass 16-point IDCT */
 570 static void idct16v(int *ws, int cols)
 571 {
 572     long tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
 573     long tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
 574     long z1, z2, z3, z4;
 575     int col;
 576     for (col = 0; col < cols; col++, ws++)
 577     {
 578         /* Even part */
 579
 580         tmp0 = ws[8*0] << CONST_BITS;
 581         /* Add fudge factor here for final descale. */
 582         tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
 583
 584         z1 = ws[8*4];
 585         tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
 586         tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
 587
 588         tmp10 = tmp0 + tmp1;
 589         tmp11 = tmp0 - tmp1;
 590         tmp12 = tmp0 + tmp2;
 591         tmp13 = tmp0 - tmp2;
 592
 593         z1 = ws[8*2];
 594         z2 = ws[8*6];
 595         z3 = z1 - z2;
 596         z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
 597         z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
 598
 599         /* (c6+c2)[16] = (c3+c1)[8] */
 600         tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);
 601         /* (c6-c14)[16] = (c3-c7)[8] */
 602         tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);
 603         /* (c2-c10)[16] = (c1-c5)[8] */
 604         tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887));
 605         /* (c10-c14)[16] = (c5-c7)[8] */
 606         tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579));
 607
 608         tmp20 = tmp10 + tmp0;
 609         tmp27 = tmp10 - tmp0;
 610         tmp21 = tmp12 + tmp1;
 611         tmp26 = tmp12 - tmp1;
 612         tmp22 = tmp13 + tmp2;
 613         tmp25 = tmp13 - tmp2;
 614         tmp23 = tmp11 + tmp3;
 615         tmp24 = tmp11 - tmp3;
 616
 617         /* Odd part */
 618
 619         z1 = ws[8*1];
 620         z2 = ws[8*3];
 621         z3 = ws[8*5];
 622         z4 = ws[8*7];
 623
 624         tmp11 = z1 + z3;
 625
 626         tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
 627         tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
 628         tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
 629         tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
 630         tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
 631         tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
 632         tmp0  = tmp1 + tmp2 + tmp3 -
 633             MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
 634         tmp13 = tmp10 + tmp11 + tmp12 -
 635             MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
 636         z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
 637         tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
 638         tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
 639         z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
 640         tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
 641         tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
 642         z2    += z4;
 643         z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
 644         tmp1  += z1;
 645         tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
 646         z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
 647         tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
 648         tmp12 += z2;
 649         z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
 650         tmp2  += z2;
 651         tmp3  += z2;
 652         z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
 653         tmp10 += z2;
 654         tmp11 += z2;
 655
 656         /* Final output stage */
 657         ws[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
 658         ws[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
 659         ws[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
 660         ws[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
 661         ws[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
 662         ws[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
 663         ws[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
 664         ws[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
 665         ws[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
 666         ws[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
 667         ws[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
 668         ws[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
 669         ws[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
 670         ws[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
 671         ws[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
 672         ws[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
 673     }
 674 }
 675
 676 /* horizontal-pass 16-point IDCT */
 677 static void idct16h(int *ws, unsigned char *out, int rows, int rowstep)
 678 {
 679     long tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
 680     long tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
 681     long z1, z2, z3, z4;
 682     int row;
 683     for (row = 0; row < rows; row++, out += rowstep, ws += 8)
 684     {
 685         /* Even part */
 686
 687         /* Add fudge factor here for final descale. */
 688         tmp0 = (long) ws[0] + (ONE << (PASS1_BITS+2));
 689         tmp0 <<= CONST_BITS;
 690
 691         z1 = (long) ws[4];
 692         tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
 693         tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
 694
 695         tmp10 = tmp0 + tmp1;
 696         tmp11 = tmp0 - tmp1;
 697         tmp12 = tmp0 + tmp2;
 698         tmp13 = tmp0 - tmp2;
 699
 700         z1 = (long) ws[2];
 701         z2 = (long) ws[6];
 702         z3 = z1 - z2;
 703         z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
 704         z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
 705
 706         /* (c6+c2)[16] = (c3+c1)[8] */
 707         tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);
 708         /* (c6-c14)[16] = (c3-c7)[8] */
 709         tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);
 710         /* (c2-c10)[16] = (c1-c5)[8] */
 711         tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887));
 712         /* (c10-c14)[16] = (c5-c7)[8] */
 713         tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579));
 714
 715         tmp20 = tmp10 + tmp0;
 716         tmp27 = tmp10 - tmp0;
 717         tmp21 = tmp12 + tmp1;
 718         tmp26 = tmp12 - tmp1;
 719         tmp22 = tmp13 + tmp2;
 720         tmp25 = tmp13 - tmp2;
 721         tmp23 = tmp11 + tmp3;
 722         tmp24 = tmp11 - tmp3;
 723
 724         /* Odd part */
 725
 726         z1 = (long) ws[1];
 727         z2 = (long) ws[3];
 728         z3 = (long) ws[5];
 729         z4 = (long) ws[7];
 730
 731         tmp11 = z1 + z3;
 732
 733         tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
 734         tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
 735         tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
 736         tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
 737         tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
 738         tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
 739         tmp0  = tmp1 + tmp2 + tmp3 -
 740             MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
 741         tmp13 = tmp10 + tmp11 + tmp12 -
 742             MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
 743         z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
 744         tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
 745         tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
 746         z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
 747         tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
 748         tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
 749         z2    += z4;
 750         z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
 751         tmp1  += z1;
 752         tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
 753         z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
 754         tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
 755         tmp12 += z2;
 756         z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
 757         tmp2  += z2;
 758         tmp3  += z2;
 759         z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
 760         tmp10 += z2;
 761         tmp11 += z2;
 762
 763         /* Final output stage */
 764
 765         out[JPEG_PIX_SZ*0]  = range_limit((int) RIGHT_SHIFT(tmp20 + tmp0,
 766             DS_OUT));
 767         out[JPEG_PIX_SZ*15] = range_limit((int) RIGHT_SHIFT(tmp20 - tmp0,
 768             DS_OUT));
 769         out[JPEG_PIX_SZ*1]  = range_limit((int) RIGHT_SHIFT(tmp21 + tmp1,
 770             DS_OUT));
 771         out[JPEG_PIX_SZ*14] = range_limit((int) RIGHT_SHIFT(tmp21 - tmp1,
 772             DS_OUT));
 773         out[JPEG_PIX_SZ*2]  = range_limit((int) RIGHT_SHIFT(tmp22 + tmp2,
 774             DS_OUT));
 775         out[JPEG_PIX_SZ*13] = range_limit((int) RIGHT_SHIFT(tmp22 - tmp2,
 776             DS_OUT));
 777         out[JPEG_PIX_SZ*3]  = range_limit((int) RIGHT_SHIFT(tmp23 + tmp3,
 778             DS_OUT));
 779         out[JPEG_PIX_SZ*12] = range_limit((int) RIGHT_SHIFT(tmp23 - tmp3,
 780             DS_OUT));
 781         out[JPEG_PIX_SZ*4]  = range_limit((int) RIGHT_SHIFT(tmp24 + tmp10,
 782             DS_OUT));
 783         out[JPEG_PIX_SZ*11] = range_limit((int) RIGHT_SHIFT(tmp24 - tmp10,
 784             DS_OUT));
 785         out[JPEG_PIX_SZ*5]  = range_limit((int) RIGHT_SHIFT(tmp25 + tmp11,
 786             DS_OUT));
 787         out[JPEG_PIX_SZ*10] = range_limit((int) RIGHT_SHIFT(tmp25 - tmp11,
 788             DS_OUT));
 789         out[JPEG_PIX_SZ*6]  = range_limit((int) RIGHT_SHIFT(tmp26 + tmp12,
 790             DS_OUT));
 791         out[JPEG_PIX_SZ*9]  = range_limit((int) RIGHT_SHIFT(tmp26 - tmp12,
 792             DS_OUT));
 793         out[JPEG_PIX_SZ*7]  = range_limit((int) RIGHT_SHIFT(tmp27 + tmp13,
 794             DS_OUT));
 795         out[JPEG_PIX_SZ*8]  = range_limit((int) RIGHT_SHIFT(tmp27 - tmp13,
 796             DS_OUT));
 797     }
 798 }
 799
 800 struct idct_entry {
 801     int v_scale;
 802     int h_scale;
 803     void (*v_idct)(int *ws, int cols);
 804     void (*h_idct)(int *ws, unsigned char *out, int rows, int rowstep);
 805 };
 806
 807 struct idct_entry idct_tbl[] = {
 808     { PASS1_BITS, CONST_BITS, NULL, idct1h },
 809     { PASS1_BITS, CONST_BITS, idct2v, idct2h },
 810     { 0, 0, idct4v, idct4h },
 811     { 0, 0, idct8v, idct8h },
 812     { 0, 0, idct16v, idct16h },
 813 };
 814
 815 /* JPEG decoder implementation */
 816
 817 INLINE void fill_buf(struct jpeg* p_jpeg)
 818 {
 819         p_jpeg->buf_left = read(p_jpeg->fd, p_jpeg->buf, JPEG_READ_BUF_SIZE);
 820         p_jpeg->buf_index = p_jpeg->buf;
 821 }
 822
 823 static unsigned char *getc(struct jpeg* p_jpeg)
 824 {
 825     if (p_jpeg->buf_left < 1)
 826         fill_buf(p_jpeg);
 827     if (p_jpeg->buf_left < 1)
 828         return NULL;
 829     p_jpeg->buf_left--;
 830     return p_jpeg->buf_index++;
 831 }
 832
 833 INLINE bool skip_bytes_seek(struct jpeg* p_jpeg)
 834 {
 835     if (lseek(p_jpeg->fd, -p_jpeg->buf_left, SEEK_CUR) < 0)
 836         return false;
 837     p_jpeg->buf_left = 0;
 838     return true;
 839 }
 840
 841 static bool skip_bytes(struct jpeg* p_jpeg, int count)
 842 {
 843     p_jpeg->buf_left -= count;
 844     p_jpeg->buf_index += count;
 845     return p_jpeg->buf_left >= 0 || skip_bytes_seek(p_jpeg);
 846 }
 847
 848 #define e_skip_bytes(jpeg, count) \
 849 do {\
 850     if (!skip_bytes((jpeg),(count))) \
 851         return -1; \
 852 } while (0)
 853
 854 #define e_getc(jpeg, code) \
 855 ({ \
 856     unsigned char *c; \
 857     if (!(c = getc(jpeg))) \
 858         return (code); \
 859     *c; \
 860 })
 861
 862 #define d_getc(jpeg, def) \
 863 ({ \
 864     unsigned char *cp = getc(jpeg); \
 865     unsigned char c = cp ? *cp : (def); \
 866     c; \
 867 })
 868
 869 static void putc(struct jpeg* p_jpeg)
 870 {
 871     p_jpeg->buf_left++;
 872     p_jpeg->buf_index--;
 873 }
 874
 875 /* Preprocess the JPEG JFIF file */
 876 static int process_markers(struct jpeg* p_jpeg)
 877 {
 878     unsigned char c;
 879     int marker_size; /* variable length of marker segment */
 880     int i, j, n;
 881     int ret = 0; /* returned flags */
 882
 883     while ((c = e_getc(p_jpeg, -1)))
 884     {
 885         if (c != 0xFF) /* no marker? */
 886         {
 887             putc(p_jpeg);
 888             break; /* exit marker processing */
 889         }
 890
 891         c = e_getc(p_jpeg, -1);
 892         switch (c)
 893         {
 894         case 0xFF: /* Fill byte */
 895             ret |= FILL_FF;
 896         case 0x00: /* Zero stuffed byte - entropy data */
 897             putc(p_jpeg);
 898             continue;
 899
 900         case 0xC0: /* SOF Huff  - Baseline DCT */
 901             {
 902                 ret |= SOF0;
 903                 marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
 904                 marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
 905                 n = e_getc(p_jpeg, -1); /* sample precision (= 8 or 12) */
 906                 if (n != 8)
 907                 {
 908                     return(-1); /* Unsupported sample precision */
 909                 }
 910                 p_jpeg->y_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
 911                 p_jpeg->y_size |= e_getc(p_jpeg, -1); /* Lowbyte */
 912                 p_jpeg->x_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
 913                 p_jpeg->x_size |= e_getc(p_jpeg, -1); /* Lowbyte */
 914
 915                 n = (marker_size-2-6)/3;
 916                 if (e_getc(p_jpeg, -1) != n || (n != 1 && n != 3))
 917                 {
 918                     return(-2); /* Unsupported SOF0 component specification */
 919                 }
 920                 for (i=0; i<n; i++)
 921                 {
 922                     /* Component info */
 923                     p_jpeg->frameheader[i].ID = e_getc(p_jpeg, -1);
 924                     p_jpeg->frameheader[i].horizontal_sampling =
 925                         (c = e_getc(p_jpeg, -1)) >> 4;
 926                     p_jpeg->frameheader[i].vertical_sampling = c & 0x0F;
 927                     p_jpeg->frameheader[i].quanttable_select =
 928                         e_getc(p_jpeg, -1);
 929                     if (p_jpeg->frameheader[i].horizontal_sampling > 2
 930                      || p_jpeg->frameheader[i].vertical_sampling > 2)
 931                     return -3; /* Unsupported SOF0 subsampling */
 932                 }
 933                 p_jpeg->blocks = n;
 934             }
 935             break;
 936
 937         case 0xC1: /* SOF Huff  - Extended sequential DCT*/
 938         case 0xC2: /* SOF Huff  - Progressive DCT*/
 939         case 0xC3: /* SOF Huff  - Spatial (sequential) lossless*/
 940         case 0xC5: /* SOF Huff  - Differential sequential DCT*/
 941         case 0xC6: /* SOF Huff  - Differential progressive DCT*/
 942         case 0xC7: /* SOF Huff  - Differential spatial*/
 943         case 0xC8: /* SOF Arith - Reserved for JPEG extensions*/
 944         case 0xC9: /* SOF Arith - Extended sequential DCT*/
 945         case 0xCA: /* SOF Arith - Progressive DCT*/
 946         case 0xCB: /* SOF Arith - Spatial (sequential) lossless*/
 947         case 0xCD: /* SOF Arith - Differential sequential DCT*/
 948         case 0xCE: /* SOF Arith - Differential progressive DCT*/
 949         case 0xCF: /* SOF Arith - Differential spatial*/
 950             {
 951                 return (-4); /* other DCT model than baseline not implemented */
 952             }
 953
 954         case 0xC4: /* Define Huffman Table(s) */
 955             {
 956                 ret |= DHT;
 957                 marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
 958                 marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
 959                 marker_size -= 2;
 960
 961                 while (marker_size > 17) /* another table */
 962                 {
 963                     c = e_getc(p_jpeg, -1);
 964                     marker_size--;
 965                     int sum = 0;
 966                     i = c & 0x0F; /* table index */
 967                     if (i > 1)
 968                     {
 969                         return (-5); /* Huffman table index out of range */
 970                     } else {
 971                         if (c & 0xF0) /* AC table */
 972                         {
 973                             for (j=0; j<16; j++)
 974                             {
 975                                 p_jpeg->hufftable[i].huffmancodes_ac[j] =
 976                                     (c = e_getc(p_jpeg, -1));
 977                                 sum += c;
 978                                 marker_size -= 1;
 979                             }
 980                             if(16 + sum > AC_LEN)
 981                                 return -10; /* longer than allowed */
 982
 983                             for (; j < 16 + sum; j++)
 984                             {
 985                                 p_jpeg->hufftable[i].huffmancodes_ac[j] =
 986                                     e_getc(p_jpeg, -1);
 987                                 marker_size--;
 988                             }
 989                         }
 990                         else /* DC table */
 991                         {
 992                             for (j=0; j<16; j++)
 993                             {
 994                                 p_jpeg->hufftable[i].huffmancodes_dc[j] =
 995                                     (c = e_getc(p_jpeg, -1));
 996                                 sum += c;
 997                                 marker_size--;
 998                             }
 999                             if(16 + sum > DC_LEN)
1000                                 return -11; /* longer than allowed */
1001
1002                             for (; j < 16 + sum; j++)
1003                             {
1004                                 p_jpeg->hufftable[i].huffmancodes_dc[j] =
1005                                     e_getc(p_jpeg, -1);
1006                                 marker_size--;
1007                             }
1008                         }
1009                     }
1010                 } /* while */
1011                 e_skip_bytes(p_jpeg, marker_size);
1012             }
1013             break;
1014
1015         case 0xCC: /* Define Arithmetic coding conditioning(s) */
1016             return(-6); /* Arithmetic coding not supported */
1017
1018         case 0xD8: /* Start of Image */
1019         case 0xD9: /* End of Image */
1020         case 0x01: /* for temp private use arith code */
1021             break; /* skip parameterless marker */
1022
1023
1024         case 0xDA: /* Start of Scan */
1025             {
1026                 ret |= SOS;
1027                 marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
1028                 marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
1029                 marker_size -= 2;
1030
1031                 n = (marker_size-1-3)/2;
1032                 if (e_getc(p_jpeg, -1) != n || (n != 1 && n != 3))
1033                 {
1034                     return (-7); /* Unsupported SOS component specification */
1035                 }
1036                 marker_size--;
1037                 for (i=0; i<n; i++)
1038                 {
1039                     p_jpeg->scanheader[i].ID = e_getc(p_jpeg, -1);
1040                     p_jpeg->scanheader[i].DC_select = (c = e_getc(p_jpeg, -1))
1041                         >> 4;
1042                     p_jpeg->scanheader[i].AC_select = c & 0x0F;
1043                     marker_size -= 2;
1044                 }
1045                 /* skip spectral information */
1046                 e_skip_bytes(p_jpeg, marker_size);
1047             }
1048             break;
1049
1050         case 0xDB: /* Define quantization Table(s) */
1051             {
1052                 ret |= DQT;
1053                 marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
1054                 marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
1055                 marker_size -= 2;
1056
1057                 n = (marker_size)/(QUANT_TABLE_LENGTH+1); /* # of tables */
1058                 for (i=0; i<n; i++)
1059                 {
1060                     int id = e_getc(p_jpeg, -1); /* ID */
1061                     marker_size--;
1062                     if (id >= 4)
1063                     {
1064                         return (-8); /* Unsupported quantization table */
1065                     }
1066                     /* Read Quantisation table: */
1067                     for (j=0; j<QUANT_TABLE_LENGTH; j++)
1068                     {
1069                         p_jpeg->quanttable[id][j] = e_getc(p_jpeg, -1);
1070                         marker_size--;
1071                     }
1072                 }
1073                 e_skip_bytes(p_jpeg, marker_size);
1074             }
1075             break;
1076
1077         case 0xDD: /* Define Restart Interval */
1078             {
1079                 marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
1080                 marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
1081                 marker_size -= 4;
1082                 /* Highbyte */
1083                 p_jpeg->restart_interval = e_getc(p_jpeg, -1) << 8;
1084                 p_jpeg->restart_interval |= e_getc(p_jpeg, -1); /* Lowbyte */
1085                 e_skip_bytes(p_jpeg, marker_size); /* skip segment */
1086             }
1087             break;
1088
1089         case 0xDC: /* Define Number of Lines */
1090         case 0xDE: /* Define Hierarchical progression */
1091         case 0xDF: /* Expand Reference Component(s) */
1092         case 0xE0: /* Application Field 0*/
1093         case 0xE1: /* Application Field 1*/
1094         case 0xE2: /* Application Field 2*/
1095         case 0xE3: /* Application Field 3*/
1096         case 0xE4: /* Application Field 4*/
1097         case 0xE5: /* Application Field 5*/
1098         case 0xE6: /* Application Field 6*/
1099         case 0xE7: /* Application Field 7*/
1100         case 0xE8: /* Application Field 8*/
1101         case 0xE9: /* Application Field 9*/
1102         case 0xEA: /* Application Field 10*/
1103         case 0xEB: /* Application Field 11*/
1104         case 0xEC: /* Application Field 12*/
1105         case 0xED: /* Application Field 13*/
1106         case 0xEE: /* Application Field 14*/
1107         case 0xEF: /* Application Field 15*/
1108         case 0xFE: /* Comment */
1109             {
1110                 marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
1111                 marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
1112                 marker_size -= 2;
1113                 e_skip_bytes(p_jpeg, marker_size); /* skip segment */
1114             }
1115             break;
1116
1117         case 0xF0: /* Reserved for JPEG extensions */
1118         case 0xF1: /* Reserved for JPEG extensions */
1119         case 0xF2: /* Reserved for JPEG extensions */
1120         case 0xF3: /* Reserved for JPEG extensions */
1121         case 0xF4: /* Reserved for JPEG extensions */
1122         case 0xF5: /* Reserved for JPEG extensions */
1123         case 0xF6: /* Reserved for JPEG extensions */
1124         case 0xF7: /* Reserved for JPEG extensions */
1125         case 0xF8: /* Reserved for JPEG extensions */
1126         case 0xF9: /* Reserved for JPEG extensions */
1127         case 0xFA: /* Reserved for JPEG extensions */
1128         case 0xFB: /* Reserved for JPEG extensions */
1129         case 0xFC: /* Reserved for JPEG extensions */
1130         case 0xFD: /* Reserved for JPEG extensions */
1131         case 0x02: /* Reserved */
1132         default:
1133             return (-9); /* Unknown marker */
1134         } /* switch */
1135     } /* while */
1136
1137     return (ret); /* return flags with seen markers */
1138 }
1139
1140 static const struct huffman_table luma_table =
1141 {
1142     {
1143         0x00,0x01,0x05,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,0x00,0x00,
1144         0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B
1145     },
1146     {
1147         0x00,0x02,0x01,0x03,0x03,0x02,0x04,0x03,0x05,0x05,0x04,0x04,0x00,0x00,
1148         0x01,0x7D,0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,
1149         0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08,0x23,0x42,
1150         0xB1,0xC1,0x15,0x52,0xD1,0xF0,0x24,0x33,0x62,0x72,0x82,0x09,0x0A,0x16,
1151         0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28,0x29,0x2A,0x34,0x35,0x36,0x37,
1152         0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,
1153         0x56,0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,
1154         0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
1155         0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,
1156         0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,
1157         0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,
1158         0xD7,0xD8,0xD9,0xDA,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,
1159         0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA
1160     }
1161 };
1162
1163 static const struct huffman_table chroma_table =
1164 {
1165     {
1166         0x00,0x03,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,
1167         0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B
1168     },
1169     {
1170         0x00,0x02,0x01,0x02,0x04,0x04,0x03,0x04,0x07,0x05,0x04,0x04,0x00,0x01,
1171         0x02,0x77,0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,
1172         0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xA1,0xB1,
1173         0xC1,0x09,0x23,0x33,0x52,0xF0,0x15,0x62,0x72,0xD1,0x0A,0x16,0x24,0x34,
1174         0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26,0x27,0x28,0x29,0x2A,0x35,0x36,
1175         0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,
1176         0x55,0x56,0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,
1177         0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87,
1178         0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,
1179         0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,
1180         0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,
1181         0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,
1182         0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA
1183     }
1184 };
1185
1186 static void default_huff_tbl(struct jpeg* p_jpeg)
1187 {
1188
1189     MEMCPY(&p_jpeg->hufftable[0], &luma_table, sizeof(luma_table));
1190     MEMCPY(&p_jpeg->hufftable[1], &chroma_table, sizeof(chroma_table));
1191
1192     return;
1193 }
1194
1195 /* Compute the derived values for a Huffman table */
1196 static void fix_huff_tbl(int* htbl, struct derived_tbl* dtbl)
1197 {
1198     int p, i, l, si;
1199     int lookbits, ctr;
1200     char huffsize[257];
1201     unsigned int huffcode[257];
1202     unsigned int code;
1203
1204     dtbl->pub = htbl; /* fill in back link */
1205
1206     /* Figure C.1: make table of Huffman code length for each symbol */
1207     /* Note that this is in code-length order. */
1208
1209     p = 0;
1210     for (l = 1; l <= 16; l++)
1211     {    /* all possible code length */
1212         for (i = 1; i <= (int) htbl[l-1]; i++)  /* all codes per length */
1213             huffsize[p++] = (char) l;
1214     }
1215     huffsize[p] = 0;
1216
1217     /* Figure C.2: generate the codes themselves */
1218     /* Note that this is in code-length order. */
1219
1220     code = 0;
1221     si = huffsize[0];
1222     p = 0;
1223     while (huffsize[p])
1224     {
1225         while (((int) huffsize[p]) == si)
1226         {
1227             huffcode[p++] = code;
1228             code++;
1229         }
1230         code <<= 1;
1231         si++;
1232     }
1233
1234     /* Figure F.15: generate decoding tables for bit-sequential decoding */
1235
1236     p = 0;
1237     for (l = 1; l <= 16; l++)
1238     {
1239         if (htbl[l-1])
1240         {
1241             /* huffval[] index of 1st symbol of code length l */
1242             dtbl->valptr[l] = p;
1243             dtbl->mincode[l] = huffcode[p]; /* minimum code of length l */
1244             p += htbl[l-1];
1245             dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
1246         }
1247         else
1248         {
1249             dtbl->maxcode[l] = -1;  /* -1 if no codes of this length */
1250         }
1251     }
1252     dtbl->maxcode[17] = 0xFFFFFL; /* ensures huff_DECODE terminates */
1253
1254     /* Compute lookahead tables to speed up decoding.
1255     * First we set all the table entries to 0, indicating "too long";
1256     * then we iterate through the Huffman codes that are short enough and
1257     * fill in all the entries that correspond to bit sequences starting
1258     * with that code.
1259     */
1260
1261     MEMSET(dtbl->look_nbits, 0, sizeof(dtbl->look_nbits));
1262
1263     p = 0;
1264     for (l = 1; l <= HUFF_LOOKAHEAD; l++)
1265     {
1266         for (i = 1; i <= (int) htbl[l-1]; i++, p++)
1267         {
1268             /* l = current code's length, p = its index in huffcode[] &
1269              * huffval[]. Generate left-justified code followed by all possible
1270              * bit sequences
1271              */
1272             lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
1273             for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--)
1274             {
1275                 dtbl->look_nbits[lookbits] = l;
1276                 dtbl->look_sym[lookbits] = htbl[16+p];
1277                 lookbits++;
1278             }
1279         }
1280     }
1281 }
1282
1283
1284 /* zag[i] is the natural-order position of the i'th element of zigzag order.
1285  * If the incoming data is corrupted, decode_mcu could attempt to
1286  * reference values beyond the end of the array.  To avoid a wild store,
1287  * we put some extra zeroes after the real entries.
1288  */
1289 static const unsigned char zag[] =
1290 {
1291      0,  1,  8, 16,  9,  2,  3, 10,
1292     17, 24, 32, 25, 18, 11,  4,  5,
1293     12, 19, 26, 33, 40, 48, 41, 34,
1294     27, 20, 13,  6,  7, 14, 21, 28,
1295     35, 42, 49, 56, 57, 50, 43, 36,
1296     29, 22, 15, 23, 30, 37, 44, 51,
1297     58, 59, 52, 45, 38, 31, 39, 46,
1298     53, 60, 61, 54, 47, 55, 62, 63,
1299      0,  0,  0,  0,  0,  0,  0,  0, /* extra entries in case k>63 below */
1300      0,  0,  0,  0,  0,  0,  0,  0
1301 };
1302
1303 /* zig[i] is the the zig-zag order position of the i'th element of natural
1304  * order, reading left-to-right then top-to-bottom.
1305  */
1306 static const unsigned char zig[] =
1307 {
1308      0,  1,  5,  6, 14, 15, 27, 28,
1309      2,  4,  7, 13, 16, 26, 29, 42,
1310      3,  8, 12, 17, 25, 30, 41, 43,
1311      9, 11, 18, 24, 31, 40, 44, 53,
1312     10, 19, 23, 32, 39, 45, 52, 54,
1313     20, 22, 33, 38, 46, 51, 55, 60,
1314     21, 34, 37, 47, 50, 56, 59, 61,
1315     35, 36, 48, 49, 57, 58, 62, 63
1316 };
1317
1318 /* Reformat some image header data so that the decoder can use it properly. */
1319 INLINE void fix_headers(struct jpeg* p_jpeg)
1320 {
1321     int i;
1322
1323     for (i=0; i<4; i++)
1324         p_jpeg->store_pos[i] = i; /* default ordering */
1325
1326     /* assignments for the decoding of blocks */
1327     if (p_jpeg->frameheader[0].horizontal_sampling == 2
1328         && p_jpeg->frameheader[0].vertical_sampling == 1)
1329     {   /* 4:2:2 */
1330         p_jpeg->blocks = 4;
1331         p_jpeg->x_mbl = (p_jpeg->x_size+15) / 16;
1332         p_jpeg->x_phys = p_jpeg->x_mbl * 16;
1333         p_jpeg->y_mbl = (p_jpeg->y_size+7) / 8;
1334         p_jpeg->y_phys = p_jpeg->y_mbl * 8;
1335         p_jpeg->mcu_membership[0] = 0; /* Y1=Y2=0, U=1, V=2 */
1336         p_jpeg->mcu_membership[1] = 0;
1337         p_jpeg->mcu_membership[2] = 1;
1338         p_jpeg->mcu_membership[3] = 2;
1339         p_jpeg->tab_membership[0] = 0; /* DC, DC, AC, AC */
1340         p_jpeg->tab_membership[1] = 0;
1341         p_jpeg->tab_membership[2] = 1;
1342         p_jpeg->tab_membership[3] = 1;
1343         p_jpeg->subsample_x[0] = 1;
1344         p_jpeg->subsample_x[1] = 2;
1345         p_jpeg->subsample_x[2] = 2;
1346         p_jpeg->subsample_y[0] = 1;
1347         p_jpeg->subsample_y[1] = 1;
1348         p_jpeg->subsample_y[2] = 1;
1349     }
1350     if (p_jpeg->frameheader[0].horizontal_sampling == 1
1351         && p_jpeg->frameheader[0].vertical_sampling == 2)
1352     {   /* 4:2:2 vertically subsampled */
1353         p_jpeg->store_pos[1] = 2; /* block positions are mirrored */
1354         p_jpeg->store_pos[2] = 1;
1355         p_jpeg->blocks = 4;
1356         p_jpeg->x_mbl = (p_jpeg->x_size+7) / 8;
1357         p_jpeg->x_phys = p_jpeg->x_mbl * 8;
1358         p_jpeg->y_mbl = (p_jpeg->y_size+15) / 16;
1359         p_jpeg->y_phys = p_jpeg->y_mbl * 16;
1360         p_jpeg->mcu_membership[0] = 0; /* Y1=Y2=0, U=1, V=2 */
1361         p_jpeg->mcu_membership[1] = 0;
1362         p_jpeg->mcu_membership[2] = 1;
1363         p_jpeg->mcu_membership[3] = 2;
1364         p_jpeg->tab_membership[0] = 0; /* DC, DC, AC, AC */
1365         p_jpeg->tab_membership[1] = 0;
1366         p_jpeg->tab_membership[2] = 1;
1367         p_jpeg->tab_membership[3] = 1;
1368         p_jpeg->subsample_x[0] = 1;
1369         p_jpeg->subsample_x[1] = 1;
1370         p_jpeg->subsample_x[2] = 1;
1371         p_jpeg->subsample_y[0] = 1;
1372         p_jpeg->subsample_y[1] = 2;
1373         p_jpeg->subsample_y[2] = 2;
1374     }
1375     else if (p_jpeg->frameheader[0].horizontal_sampling == 2
1376         && p_jpeg->frameheader[0].vertical_sampling == 2)
1377     {   /* 4:2:0 */
1378         p_jpeg->blocks = 6;
1379         p_jpeg->x_mbl = (p_jpeg->x_size+15) / 16;
1380         p_jpeg->x_phys = p_jpeg->x_mbl * 16;
1381         p_jpeg->y_mbl = (p_jpeg->y_size+15) / 16;
1382         p_jpeg->y_phys = p_jpeg->y_mbl * 16;
1383         p_jpeg->mcu_membership[0] = 0;
1384         p_jpeg->mcu_membership[1] = 0;
1385         p_jpeg->mcu_membership[2] = 0;
1386         p_jpeg->mcu_membership[3] = 0;
1387         p_jpeg->mcu_membership[4] = 1;
1388         p_jpeg->mcu_membership[5] = 2;
1389         p_jpeg->tab_membership[0] = 0;
1390         p_jpeg->tab_membership[1] = 0;
1391         p_jpeg->tab_membership[2] = 0;
1392         p_jpeg->tab_membership[3] = 0;
1393         p_jpeg->tab_membership[4] = 1;
1394         p_jpeg->tab_membership[5] = 1;
1395         p_jpeg->subsample_x[0] = 1;
1396         p_jpeg->subsample_x[1] = 2;
1397         p_jpeg->subsample_x[2] = 2;
1398         p_jpeg->subsample_y[0] = 1;
1399         p_jpeg->subsample_y[1] = 2;
1400         p_jpeg->subsample_y[2] = 2;
1401     }
1402     else if (p_jpeg->frameheader[0].horizontal_sampling == 1
1403         && p_jpeg->frameheader[0].vertical_sampling == 1)
1404     {   /* 4:4:4 */
1405         /* don't overwrite p_jpeg->blocks */
1406         p_jpeg->x_mbl = (p_jpeg->x_size+7) / 8;
1407         p_jpeg->x_phys = p_jpeg->x_mbl * 8;
1408         p_jpeg->y_mbl = (p_jpeg->y_size+7) / 8;
1409         p_jpeg->y_phys = p_jpeg->y_mbl * 8;
1410         p_jpeg->mcu_membership[0] = 0;
1411         p_jpeg->mcu_membership[1] = 1;
1412         p_jpeg->mcu_membership[2] = 2;
1413         p_jpeg->tab_membership[0] = 0;
1414         p_jpeg->tab_membership[1] = 1;
1415         p_jpeg->tab_membership[2] = 1;
1416         p_jpeg->subsample_x[0] = 1;
1417         p_jpeg->subsample_x[1] = 1;
1418         p_jpeg->subsample_x[2] = 1;
1419         p_jpeg->subsample_y[0] = 1;
1420         p_jpeg->subsample_y[1] = 1;
1421         p_jpeg->subsample_y[2] = 1;
1422     }
1423     else
1424     {
1425         /* error */
1426     }
1427
1428 }
1429
1430 INLINE void fix_huff_tables(struct jpeg *p_jpeg)
1431 {
1432     fix_huff_tbl(p_jpeg->hufftable[0].huffmancodes_dc,
1433         &p_jpeg->dc_derived_tbls[0]);
1434     fix_huff_tbl(p_jpeg->hufftable[0].huffmancodes_ac,
1435         &p_jpeg->ac_derived_tbls[0]);
1436     fix_huff_tbl(p_jpeg->hufftable[1].huffmancodes_dc,
1437         &p_jpeg->dc_derived_tbls[1]);
1438     fix_huff_tbl(p_jpeg->hufftable[1].huffmancodes_ac,
1439         &p_jpeg->ac_derived_tbls[1]);
1440 }
1441
1442 /* Because some of the IDCT routines never multiply by any constants, and
1443  * therefore do not produce shifted output, we add the shift into the
1444  * quantization table when one of these IDCT routines is used, rather than
1445  * have the IDCT shift each value it processes.
1446  */
1447 INLINE void fix_quant_tables(struct jpeg *p_jpeg)
1448 {
1449     int shift, i, x, y, a;
1450     for (i = 0; i < 2; i++)
1451     {
1452         shift = idct_tbl[p_jpeg->v_scale[i]].v_scale +
1453             idct_tbl[p_jpeg->h_scale[i]].h_scale;
1454         if (shift)
1455         {
1456             a = 0;
1457             for (y = 0; y < 1 << p_jpeg->h_scale[i]; y++)
1458             {
1459                 for (x = 0; x < 1 << p_jpeg->v_scale[i]; x++)
1460                     p_jpeg->quanttable[i][zig[a+x]] <<= shift;
1461                 a += 8;
1462             }
1463         }
1464     }
1465 }
1466
1467 /*
1468 * These functions/macros provide the in-line portion of bit fetching.
1469 * Use check_bit_buffer to ensure there are N bits in get_buffer
1470 * before using get_bits, peek_bits, or drop_bits.
1471 *  check_bit_buffer(state,n,action);
1472 *    Ensure there are N bits in get_buffer; if suspend, take action.
1473 *  val = get_bits(n);
1474 *    Fetch next N bits.
1475 *  val = peek_bits(n);
1476 *    Fetch next N bits without removing them from the buffer.
1477 *  drop_bits(n);
1478 *    Discard next N bits.
1479 * The value N should be a simple variable, not an expression, because it
1480 * is evaluated multiple times.
1481 */
1482
1483 static void fill_bit_buffer(struct jpeg* p_jpeg)
1484 {
1485     unsigned char byte, marker;
1486
1487     if (p_jpeg->marker_val)
1488         p_jpeg->marker_ind += 16;
1489     byte = d_getc(p_jpeg, 0);
1490     if (byte == 0xFF) /* legal marker can be byte stuffing or RSTm */
1491     {   /* simplification: just skip the (one-byte) marker code */
1492         marker = d_getc(p_jpeg, 0);
1493         if ((marker & ~7) == 0xD0)
1494         {
1495             p_jpeg->marker_val = marker;
1496             p_jpeg->marker_ind = 8;
1497         }
1498     }
1499     p_jpeg->bitbuf = (p_jpeg->bitbuf << 8) | byte;
1500
1501     byte = d_getc(p_jpeg, 0);
1502     if (byte == 0xFF) /* legal marker can be byte stuffing or RSTm */
1503     {   /* simplification: just skip the (one-byte) marker code */
1504         marker = d_getc(p_jpeg, 0);
1505         if ((marker & ~7) == 0xD0)
1506         {
1507             p_jpeg->marker_val = marker;
1508             p_jpeg->marker_ind = 0;
1509         }
1510     }
1511     p_jpeg->bitbuf = (p_jpeg->bitbuf << 8) | byte;
1512     p_jpeg->bitbuf_bits += 16;
1513 #ifdef JPEG_BS_DEBUG
1514     DEBUGF("read in: %X\n", p_jpeg->bitbuf & 0xFFFF);
1515 #endif
1516 }
1517
1518 INLINE void check_bit_buffer(struct jpeg *p_jpeg, int nbits)
1519 {
1520     if (nbits > p_jpeg->bitbuf_bits)
1521         fill_bit_buffer(p_jpeg);
1522 }
1523
1524 INLINE int get_bits(struct jpeg *p_jpeg, int nbits)
1525 {
1526 #ifdef JPEG_BS_DEBUG
1527     if (nbits > p_jpeg->bitbuf_bits)
1528         DEBUGF("bitbuffer underrun\n");
1529     int mask = 1 << (p_jpeg->bitbuf_bits - 1);
1530     int i;
1531     DEBUGF("get %d bits: ", nbits);
1532     for (i = 0; i < nbits; i++)
1533         DEBUGF("%d",!!(p_jpeg->bitbuf & (mask >>= 1)));
1534     DEBUGF("\n");
1535 #endif
1536     return ((int) (p_jpeg->bitbuf >> (p_jpeg->bitbuf_bits -= nbits))) &
1537         ((1<<nbits)-1);
1538 }
1539
1540 INLINE int peek_bits(struct jpeg *p_jpeg, int nbits)
1541 {
1542 #ifdef JPEG_BS_DEBUG
1543     int mask = 1 << (p_jpeg->bitbuf_bits - 1);
1544     int i;
1545     DEBUGF("peek %d bits: ", nbits);
1546     for (i = 0; i < nbits; i++)
1547         DEBUGF("%d",!!(p_jpeg->bitbuf & (mask >>= 1)));
1548     DEBUGF("\n");
1549 #endif
1550     return ((int) (p_jpeg->bitbuf >> (p_jpeg->bitbuf_bits - nbits))) &
1551         ((1<<nbits)-1);
1552 }
1553
1554 INLINE void drop_bits(struct jpeg *p_jpeg, int nbits)
1555 {
1556 #ifdef JPEG_BS_DEBUG
1557     int mask = 1 << (p_jpeg->bitbuf_bits - 1);
1558     int i;
1559     DEBUGF("drop %d bits: ", nbits);
1560     for (i = 0; i < nbits; i++)
1561         DEBUGF("%d",!!(p_jpeg->bitbuf & (mask >>= 1)));
1562     DEBUGF("\n");
1563 #endif
1564     p_jpeg->bitbuf_bits -= nbits;
1565 }
1566
1567 /* re-synchronize to entropy data (skip restart marker) */
1568 static void search_restart(struct jpeg *p_jpeg)
1569 {
1570     if (p_jpeg->marker_val)
1571     {
1572         p_jpeg->marker_val = 0;
1573         p_jpeg->bitbuf_bits = p_jpeg->marker_ind;
1574         p_jpeg->marker_ind = 0;
1575         return;
1576     }
1577     unsigned char byte;
1578     p_jpeg->bitbuf_bits = 0;
1579     while ((byte = d_getc(p_jpeg, 0xFF)))
1580     {
1581         if (byte == 0xff)
1582         {
1583             byte = d_getc(p_jpeg, 0xD0);
1584             if ((byte & ~7) == 0xD0)
1585             {
1586                 return;
1587             }
1588             else
1589                 putc(p_jpeg);
1590         }
1591     }
1592 }
1593
1594 /* Figure F.12: extend sign bit. */
1595 #define HUFF_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
1596
1597 static const int extend_test[16] =   /* entry n is 2**(n-1) */
1598 {
1599     0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
1600     0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000
1601 };
1602
1603 static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
1604 {
1605     0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
1606     ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
1607     ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
1608     ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1
1609 };
1610
1611 /* Decode a single value */
1612 INLINE int huff_decode_dc(struct jpeg *p_jpeg, struct derived_tbl* tbl)
1613 {
1614     int nb, look, s, r;
1615
1616     check_bit_buffer(p_jpeg, HUFF_LOOKAHEAD);
1617     look = peek_bits(p_jpeg, HUFF_LOOKAHEAD);
1618     if ((nb = tbl->look_nbits[look]) != 0)
1619     {
1620         drop_bits(p_jpeg, nb);
1621         s = tbl->look_sym[look];
1622         check_bit_buffer(p_jpeg, s);
1623         r = get_bits(p_jpeg, s);
1624         s = HUFF_EXTEND(r, s);
1625     }
1626     else
1627     {   /*  slow_DECODE(s, HUFF_LOOKAHEAD+1)) < 0); */
1628         long code;
1629         nb=HUFF_LOOKAHEAD+1;
1630         check_bit_buffer(p_jpeg, nb);
1631         code = get_bits(p_jpeg, nb);
1632         while (code > tbl->maxcode[nb])
1633         {
1634             code <<= 1;
1635             check_bit_buffer(p_jpeg, 1);
1636             code |= get_bits(p_jpeg, 1);
1637             nb++;
1638         }
1639         if (nb > 16) /* error in Huffman */
1640         {
1641             s=0; /* fake a zero, this is most safe */
1642         }
1643         else
1644         {
1645             s = tbl->pub[16 + tbl->valptr[nb] +
1646                 ((int) (code - tbl->mincode[nb]))];
1647             check_bit_buffer(p_jpeg, s);
1648             r = get_bits(p_jpeg, s);
1649             s = HUFF_EXTEND(r, s);
1650         }
1651     } /* end slow decode */
1652     return s;
1653 }
1654
1655 INLINE int huff_decode_ac(struct jpeg *p_jpeg, struct derived_tbl* tbl)
1656 {
1657     int nb, look, s;
1658
1659     check_bit_buffer(p_jpeg, HUFF_LOOKAHEAD);
1660     look = peek_bits(p_jpeg, HUFF_LOOKAHEAD);
1661     if ((nb = tbl->look_nbits[look]) != 0)
1662     {
1663         drop_bits(p_jpeg, nb);
1664         s = tbl->look_sym[look];
1665     }
1666     else
1667     {   /*  slow_DECODE(s, HUFF_LOOKAHEAD+1)) < 0); */
1668         long code;
1669         nb=HUFF_LOOKAHEAD+1;
1670         check_bit_buffer(p_jpeg, nb);
1671         code = get_bits(p_jpeg, nb);
1672         while (code > tbl->maxcode[nb])
1673         {
1674             code <<= 1;
1675             check_bit_buffer(p_jpeg, 1);
1676             code |= get_bits(p_jpeg, 1);
1677             nb++;
1678         }
1679         if (nb > 16) /* error in Huffman */
1680         {
1681             s=0; /* fake a zero, this is most safe */
1682         }
1683         else
1684         {
1685             s = tbl->pub[16 + tbl->valptr[nb] +
1686                 ((int) (code - tbl->mincode[nb]))];
1687         }
1688     } /* end slow decode */
1689     return s;
1690 }
1691
1692 static struct img_part *store_row_jpeg(void *jpeg_args)
1693 {
1694     struct jpeg *p_jpeg = (struct jpeg*) jpeg_args;
1695     unsigned int width = p_jpeg->x_mbl << p_jpeg->h_scale[1];
1696     unsigned int b_width = width * JPEG_PIX_SZ;
1697     int height = 1U << p_jpeg->v_scale[1];
1698     int x;
1699     if (!p_jpeg->mcu_row) /* Need to decode a new row of MCUs */
1700     {
1701         p_jpeg->out_ptr = (unsigned char *)p_jpeg->img_buf;
1702         int store_offs[4];
1703         int mcu_offset = JPEG_PIX_SZ << p_jpeg->h_scale[1];
1704         unsigned char *out = p_jpeg->out_ptr;
1705         store_offs[p_jpeg->store_pos[0]] = 0;
1706         store_offs[p_jpeg->store_pos[1]] = JPEG_PIX_SZ << p_jpeg->h_scale[0];
1707         store_offs[p_jpeg->store_pos[2]] = b_width << p_jpeg->v_scale[0];
1708         store_offs[p_jpeg->store_pos[3]] = store_offs[1] + store_offs[2];
1709
1710         int block[128]; /* decoded DCT coefficients */
1711         for (x = 0; x < p_jpeg->x_mbl; x++)
1712         {
1713             int blkn;
1714             for (blkn = 0; blkn < p_jpeg->blocks; blkn++)
1715             {
1716                 int k = 1; /* coefficient index */
1717                 int s, r; /* huffman values */
1718                 int ci = p_jpeg->mcu_membership[blkn]; /* component index */
1719                 int ti = p_jpeg->tab_membership[blkn]; /* table index */
1720                 struct derived_tbl* dctbl = &p_jpeg->dc_derived_tbls[ti];
1721                 struct derived_tbl* actbl = &p_jpeg->ac_derived_tbls[ti];
1722
1723                 /* Section F.2.2.1: decode the DC coefficient difference */
1724                 s = huff_decode_dc(p_jpeg, dctbl);
1725
1726 #ifndef HAVE_LCD_COLOR
1727                 if (!ci)
1728 #endif
1729                 {
1730 #ifdef HAVE_LCD_COLOR
1731                     p_jpeg->last_dc_val[ci] += s;
1732                     /* output it (assumes zag[0] = 0) */
1733                     block[0] = p_jpeg->last_dc_val[ci] *
1734                         p_jpeg->quanttable[!!ci][0];
1735 #else
1736                     p_jpeg->last_dc_val += s;
1737                     /* output it (assumes zag[0] = 0) */
1738                     block[0] = p_jpeg->last_dc_val *
1739                         p_jpeg->quanttable[!!ci][0];
1740 #endif
1741                     /* coefficient buffer must be cleared */
1742                     MEMSET(block+1, 0, p_jpeg->zero_need[!!ci] * sizeof(int));
1743                     /* Section F.2.2.2: decode the AC coefficients */
1744                     for (; k < p_jpeg->k_need[!!ci]; k++)
1745                     {
1746                         s = huff_decode_ac(p_jpeg, actbl);
1747                         r = s >> 4;
1748                         s &= 15;
1749                         if (s)
1750                         {
1751                             k += r;
1752                             check_bit_buffer(p_jpeg, s);
1753                             r = get_bits(p_jpeg, s);
1754                             r = HUFF_EXTEND(r, s);
1755                             int a = zag[k];
1756                             if (a <= zag[p_jpeg->k_need[!!ci]] && (a & 7) <=
1757                                 (zag[p_jpeg->k_need[!!ci]] & 7))
1758                             {
1759                                 r *= p_jpeg->quanttable[!!ci][k];
1760                                 block[zag[k]] = r ;
1761                             }
1762                         }
1763                         else
1764                         {
1765                             if (r != 15)
1766                             {
1767                                 k = 64;
1768                                 break;
1769                             }
1770                             k += r;
1771                         }
1772                     }  /* for k */
1773                 }
1774                 for (; k < 64; k++)
1775                 {
1776                     s = huff_decode_ac(p_jpeg, actbl);
1777                     r = s >> 4;
1778                     s &= 15;
1779
1780                     if (s)
1781                     {
1782                         k += r;
1783                         check_bit_buffer(p_jpeg, s);
1784                         drop_bits(p_jpeg, s);
1785                     }
1786                     else
1787                     {
1788                         if (r != 15)
1789                             break;
1790                         k += r;
1791                     }
1792                 }  /* for k */
1793 #ifndef HAVE_LCD_COLOR
1794                 if (!ci)
1795 #endif
1796                 {
1797                     unsigned char si = !!ci;
1798                     int idct_cols = 1 << MIN(p_jpeg->h_scale[si], 3);
1799                     int idct_rows = 1 << p_jpeg->v_scale[si];
1800                     unsigned char *b_out = out + (ci ? ci : store_offs[blkn]);
1801                     if (idct_tbl[p_jpeg->v_scale[si]].v_idct)
1802                         idct_tbl[p_jpeg->v_scale[si]].v_idct(block, idct_cols);
1803                     idct_tbl[p_jpeg->h_scale[si]].h_idct(block, b_out,
1804                         idct_rows, b_width);
1805                 }
1806             } /* for blkn */
1807             /* don't starve other threads while an MCU row decodes */
1808             yield();
1809 #ifdef HAVE_LCD_COLOR
1810             unsigned int xp;
1811             int yp;
1812             unsigned char *row = out;
1813             if (p_jpeg->blocks > 1) {
1814                 for (yp = 0; yp < height; yp++, row += b_width)
1815                 {
1816                     unsigned char *px = row;
1817                     for (xp = 0; xp < 1U << p_jpeg->h_scale[1];
1818                         xp++, px += JPEG_PIX_SZ)
1819                     {
1820                         int y, u, v, rv, guv, bu;
1821                         y = px[0] * YFAC + (YFAC >> 1);
1822                         u = px[1] - 128;
1823                         v = px[2] - 128;
1824                         rv = RVFAC * v;
1825                         guv = GUFAC * u + GVFAC * v;
1826                         bu = BUFAC * u;
1827                         struct uint8_rgb *rgb = (struct uint8_rgb *)px;
1828                         rgb->red = clamp_component((y + rv) / YFAC);
1829                         rgb->green = clamp_component((y + guv) / YFAC);
1830                         rgb->blue = clamp_component((y + bu) / YFAC);
1831                     }
1832                 }
1833             } else {
1834                 for (yp = 0; yp < height; yp++, row += b_width)
1835                 {
1836                     unsigned char *px = row;
1837                     for (xp = 0; xp < 1U << p_jpeg->h_scale[1];
1838                         xp++, px += JPEG_PIX_SZ)
1839                     {
1840                         px[1] = px[2] = px[0];
1841                     }
1842                 }
1843             }
1844 #endif
1845             out += mcu_offset;
1846             if (p_jpeg->restart_interval && --p_jpeg->restart == 0)
1847             {   /* if a restart marker is due: */
1848                 p_jpeg->restart = p_jpeg->restart_interval; /* count again */
1849                 search_restart(p_jpeg); /* align the bitstream */
1850 #ifdef HAVE_LCD_COLOR
1851                 p_jpeg->last_dc_val[0] = p_jpeg->last_dc_val[1] =
1852                                  p_jpeg->last_dc_val[2] = 0; /* reset decoder */
1853 #else
1854                 p_jpeg->last_dc_val = 0;
1855 #endif
1856             }
1857         }
1858     } /* if !p_jpeg->mcu_row */
1859     p_jpeg->mcu_row = (p_jpeg->mcu_row + 1) & (height - 1);
1860     p_jpeg->part.len = width;
1861     p_jpeg->part.buf = (jpeg_pix_t *)p_jpeg->out_ptr;
1862     p_jpeg->out_ptr += b_width;
1863     return &(p_jpeg->part);
1864 }
1865
1866 /******************************************************************************
1867  * read_jpeg_file()
1868  *
1869  * Reads a JPEG file and puts the data in rockbox format in *bitmap.
1870  *
1871  *****************************************************************************/
1872 int read_jpeg_file(const char* filename,
1873                    struct bitmap *bm,
1874                    int maxsize,
1875                    int format,
1876                    const struct custom_format *cformat)
1877 {
1878     int fd, ret;
1879     fd = open(filename, O_RDONLY);
1880
1881     /* Exit if file opening failed */
1882     if (fd < 0) {
1883         DEBUGF("read_jpeg_file: can't open '%s', rc: %d\n", filename, fd);
1884         return fd * 10 - 1;
1885     }
1886
1887     ret = read_jpeg_fd(fd, bm, maxsize, format, cformat);
1888     close(fd);
1889     return ret;
1890 }
1891
1892 static int calc_scale(int in_size, int out_size, int subsample)
1893 {
1894     int scale = 0;
1895     out_size <<= 3;
1896     for (scale = 0; scale < 5 - subsample; scale++)
1897     {
1898         if (out_size <= in_size)
1899             break;
1900         else
1901             in_size <<= 1;
1902     }
1903     return scale;
1904 }
1905
1906 int read_jpeg_fd(int fd,
1907                  struct bitmap *bm,
1908                  int maxsize,
1909                  int format,
1910                  const struct custom_format *cformat)
1911 {
1912     bool resize = false, dither = false;
1913     struct rowset rset;
1914     struct dim src_dim;
1915     struct jpeg *p_jpeg = (struct jpeg*)bm->data;
1916     int tmp_size = maxsize;
1917     int status;
1918     int bm_size;
1919     ALIGN_BUFFER(p_jpeg, tmp_size, sizeof(int));
1920     /* not enough memory for our struct jpeg */
1921     if ((size_t)tmp_size < sizeof(struct jpeg))
1922         return -1;
1923
1924     memset(p_jpeg, 0, sizeof(struct jpeg));
1925     p_jpeg->fd = fd;
1926     status = process_markers(p_jpeg);
1927     if (status < 0)
1928         return status;
1929     if ((status & (DQT | SOF0)) != (DQT | SOF0))
1930         return -(status * 16);
1931     if (!(status & DHT)) /* if no Huffman table present: */
1932         default_huff_tbl(p_jpeg); /* use default */
1933     fix_headers(p_jpeg); /* derive Huffman and other lookup-tables */
1934     src_dim.width = p_jpeg->x_size;
1935     src_dim.height = p_jpeg->y_size;
1936     if (format & FORMAT_RESIZE)
1937         resize = true;
1938     if (format & FORMAT_DITHER)
1939         dither = true;
1940     if (resize) {
1941         struct dim resize_dim = {
1942             .width = bm->width,
1943             .height = bm->height,
1944         };
1945         if (format & FORMAT_KEEP_ASPECT)
1946             recalc_dimension(&resize_dim, &src_dim);
1947         bm->width = resize_dim.width;
1948         bm->height = resize_dim.height;
1949         if (bm->width == src_dim.width && bm->height == src_dim.height)
1950             resize = false;
1951     } else {
1952         bm->width = p_jpeg->x_size;
1953         bm->height = p_jpeg->y_size;
1954     }
1955     p_jpeg->h_scale[0] = calc_scale(p_jpeg->x_size, bm->width,
1956         p_jpeg->frameheader[0].horizontal_sampling);
1957     p_jpeg->v_scale[0] = calc_scale(p_jpeg->y_size, bm->height,
1958         p_jpeg->frameheader[0].vertical_sampling);
1959     p_jpeg->h_scale[1] = p_jpeg->h_scale[0] +
1960         p_jpeg->frameheader[0].horizontal_sampling - 1;
1961     p_jpeg->v_scale[1] = p_jpeg->v_scale[0] +
1962         p_jpeg->frameheader[0].vertical_sampling - 1;
1963     fix_quant_tables(p_jpeg);
1964     int decode_w = (1 << MIN(p_jpeg->h_scale[0],3)) - 1;
1965     int decode_h = (1 << MIN(p_jpeg->v_scale[0],3)) - 1;
1966     src_dim.width = (p_jpeg->x_size << p_jpeg->h_scale[0]) >> 3;
1967     src_dim.height = (p_jpeg->y_size << p_jpeg->v_scale[0]) >> 3;
1968     p_jpeg->zero_need[0] = (decode_h << 3) + decode_w;
1969     p_jpeg->k_need[0] = zig[p_jpeg->zero_need[0]];
1970     decode_w = (1 << MIN(p_jpeg->h_scale[1],3)) - 1;
1971     decode_h = (1 << MIN(p_jpeg->v_scale[1],3)) - 1;
1972     p_jpeg->zero_need[1] = p_jpeg->zero_need[2] = (decode_h << 3) + decode_w;
1973     p_jpeg->k_need[1] = p_jpeg->k_need[2] = zig[p_jpeg->zero_need[1]];
1974     if (cformat)
1975         bm_size = cformat->get_size(bm);
1976     else
1977         bm_size = BM_SIZE(bm->width,bm->height,FORMAT_NATIVE,false);
1978     if (bm_size > maxsize)
1979         return -1;
1980     char *buf_start = (char *)bm->data + bm_size;
1981     char *buf_end = (char *)bm->data + maxsize;
1982     maxsize = buf_end - buf_start;
1983     ALIGN_BUFFER(buf_start, maxsize, sizeof(uint32_t));
1984     if (maxsize < (int)sizeof(struct jpeg))
1985         return -1;
1986     memmove(buf_start, p_jpeg, sizeof(struct jpeg));
1987     p_jpeg = (struct jpeg *)buf_start;
1988     fix_huff_tables(p_jpeg);
1989     buf_start += sizeof(struct jpeg);
1990     maxsize = buf_end - buf_start;
1991     int decode_buf_size = (p_jpeg->x_mbl << p_jpeg->h_scale[1])
1992         << p_jpeg->v_scale[1];
1993     decode_buf_size *= JPEG_PIX_SZ;
1994     p_jpeg->img_buf = (jpeg_pix_t *)buf_start;
1995     if (buf_end - buf_start < decode_buf_size)
1996         return -1;
1997     buf_start += decode_buf_size;
1998     maxsize = buf_end - buf_start;
1999     memset(p_jpeg->img_buf, 0, decode_buf_size);
2000     p_jpeg->mcu_row = 0;
2001     p_jpeg->restart = p_jpeg->restart_interval;
2002     rset.rowstart = 0;
2003     rset.rowstop = bm->height;
2004     rset.rowstep = 1;
2005     if (resize_on_load(bm, dither, &src_dim, &rset, buf_start, maxsize, cformat,
2006         store_row_jpeg, p_jpeg))
2007         return bm_size;
2008     else
2009         return 0;
2010 }
2011
2012 /**************** end JPEG code ********************/