apps/plugins/imageviewer/jpeg/jpeg_decoder.c

   1 /***************************************************************************
   2 *             __________               __   ___.
   3 *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 *                     \/            \/     \/    \/            \/
   8 * $Id$
   9 *
  10 * JPEG image viewer
  11 * (This is a real mess if it has to be coded in one single C file)
  12 *
  13 * File scrolling addition (C) 2005 Alexander Spyridakis
  14 * Copyright (C) 2004 Jörg Hohensohn aka [IDC]Dragon
  15 * Heavily borrowed from the IJG implementation (C) Thomas G. Lane
  16 * Small & fast downscaling IDCT (C) 2002 by Guido Vollbeding  JPEGclub.org
  17 *
  18 * This program is free software; you can redistribute it and/or
  19 * modify it under the terms of the GNU General Public License
  20 * as published by the Free Software Foundation; either version 2
  21 * of the License, or (at your option) any later version.
  22 *
  23 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  24 * KIND, either express or implied.
  25 *
  26 ****************************************************************************/
  27
  28 #include "plugin.h"
  29
  30 #include "jpeg_decoder.h"
  31
  32 /* for portability of below JPEG code */
  33 #define MEMSET(p,v,c) rb->memset(p,v,c)
  34 #define MEMCPY(d,s,c) rb->memcpy(d,s,c)
  35 #define INLINE static inline
  36 #define ENDIAN_SWAP16(n) n /* only for poor little endian machines */
  37
  38 /**************** begin JPEG code ********************/
  39
  40 INLINE unsigned range_limit(int value)
  41 {
  42 #if CONFIG_CPU == SH7034
  43     unsigned tmp;
  44     asm (  /* Note: Uses knowledge that only low byte of result is used */
  45         "mov     #-128,%[t]  \n"
  46         "sub     %[t],%[v]   \n"  /* value -= -128; equals value += 128; */
  47         "extu.b  %[v],%[t]   \n"
  48         "cmp/eq  %[v],%[t]   \n"  /* low byte == whole number ? */
  49         "bt      1f          \n"  /* yes: no overflow */
  50         "cmp/pz  %[v]        \n"  /* overflow: positive? */
  51         "subc    %[v],%[v]   \n"  /* %[r] now either 0 or 0xffffffff */
  52     "1:                      \n"
  53         : /* outputs */
  54         [v]"+r"(value),
  55         [t]"=&r"(tmp)
  56     );
  57     return value;
  58 #elif defined(CPU_COLDFIRE)
  59     asm (  /* Note: Uses knowledge that only the low byte of the result is used */
  60         "add.l   #128,%[v]   \n"  /* value += 128; */
  61         "cmp.l   #255,%[v]   \n"  /* overflow? */
  62         "bls.b   1f          \n"  /* no: return value */
  63         "spl.b   %[v]        \n"  /* yes: set low byte to appropriate boundary */
  64     "1:                      \n"
  65         : /* outputs */
  66         [v]"+d"(value)
  67     );
  68     return value;
  69 #elif defined(CPU_ARM)
  70     asm (  /* Note: Uses knowledge that only the low byte of the result is used */
  71         "add     %[v], %[v], #128    \n"  /* value += 128 */
  72         "cmp     %[v], #255          \n"  /* out of range 0..255? */
  73         "mvnhi   %[v], %[v], asr #31 \n"  /* yes: set all bits to ~(sign_bit) */
  74         : /* outputs */
  75         [v]"+r"(value)
  76     );
  77     return value;
  78 #else
  79     value += 128;
  80
  81     if ((unsigned)value <= 255)
  82         return value;
  83
  84     if (value < 0)
  85         return 0;
  86
  87     return 255;
  88 #endif
  89 }
  90
  91 /* IDCT implementation */
  92
  93
  94 #define CONST_BITS 13
  95 #define PASS1_BITS 2
  96
  97
  98 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
  99 * causing a lot of useless floating-point operations at run time.
 100 * To get around this we use the following pre-calculated constants.
 101 * If you change CONST_BITS you may want to add appropriate values.
 102 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
 103 */
 104 #define FIX_0_298631336  2446 /* FIX(0.298631336) */
 105 #define FIX_0_390180644  3196 /* FIX(0.390180644) */
 106 #define FIX_0_541196100  4433 /* FIX(0.541196100) */
 107 #define FIX_0_765366865  6270 /* FIX(0.765366865) */
 108 #define FIX_0_899976223  7373 /* FIX(0.899976223) */
 109 #define FIX_1_175875602  9633 /* FIX(1.175875602) */
 110 #define FIX_1_501321110 12299 /* FIX(1.501321110) */
 111 #define FIX_1_847759065 15137 /* FIX(1.847759065) */
 112 #define FIX_1_961570560 16069 /* FIX(1.961570560) */
 113 #define FIX_2_053119869 16819 /* FIX(2.053119869) */
 114 #define FIX_2_562915447 20995 /* FIX(2.562915447) */
 115 #define FIX_3_072711026 25172 /* FIX(3.072711026) */
 116
 117
 118
 119 /* Multiply an long variable by an long constant to yield an long result.
 120 * For 8-bit samples with the recommended scaling, all the variable
 121 * and constant values involved are no more than 16 bits wide, so a
 122 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
 123 * For 12-bit samples, a full 32-bit multiplication will be needed.
 124 */
 125 #define MULTIPLY16(var,const)  (((short) (var)) * ((short) (const)))
 126
 127
 128 /* Dequantize a coefficient by multiplying it by the multiplier-table
 129 * entry; produce an int result.  In this module, both inputs and result
 130 * are 16 bits or less, so either int or short multiply will work.
 131 */
 132 /* #define DEQUANTIZE(coef,quantval)  (((int) (coef)) * (quantval)) */
 133 #define DEQUANTIZE MULTIPLY16
 134
 135 /* Descale and correctly round an int value that's scaled by N bits.
 136 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
 137 * the fudge factor is correct for either sign of X.
 138 */
 139 #define DESCALE(x,n) (((x) + (1l << ((n)-1))) >> (n))
 140
 141
 142
 143 /*
 144 * Perform dequantization and inverse DCT on one block of coefficients,
 145 * producing a reduced-size 1x1 output block.
 146 */
 147 void idct1x1(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
 148 {
 149     (void)skip_line; /* unused */
 150     *p_byte = range_limit(inptr[0] * quantptr[0] >> 3);
 151 }
 152
 153
 154
 155 /*
 156 * Perform dequantization and inverse DCT on one block of coefficients,
 157 * producing a reduced-size 2x2 output block.
 158 */
 159 void idct2x2(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
 160 {
 161     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
 162     unsigned char* outptr;
 163
 164     /* Pass 1: process columns from input, store into work array. */
 165
 166     /* Column 0 */
 167     tmp4 = DEQUANTIZE(inptr[8*0], quantptr[8*0]);
 168     tmp5 = DEQUANTIZE(inptr[8*1], quantptr[8*1]);
 169
 170     tmp0 = tmp4 + tmp5;
 171     tmp2 = tmp4 - tmp5;
 172
 173     /* Column 1 */
 174     tmp4 = DEQUANTIZE(inptr[8*0+1], quantptr[8*0+1]);
 175     tmp5 = DEQUANTIZE(inptr[8*1+1], quantptr[8*1+1]);
 176
 177     tmp1 = tmp4 + tmp5;
 178     tmp3 = tmp4 - tmp5;
 179
 180     /* Pass 2: process 2 rows, store into output array. */
 181
 182     /* Row 0 */
 183     outptr = p_byte;
 184
 185     outptr[0] = range_limit((int) DESCALE(tmp0 + tmp1, 3));
 186     outptr[1] = range_limit((int) DESCALE(tmp0 - tmp1, 3));
 187
 188     /* Row 1 */
 189     outptr = p_byte + skip_line;
 190
 191     outptr[0] = range_limit((int) DESCALE(tmp2 + tmp3, 3));
 192     outptr[1] = range_limit((int) DESCALE(tmp2 - tmp3, 3));
 193 }
 194
 195
 196
 197 /*
 198 * Perform dequantization and inverse DCT on one block of coefficients,
 199 * producing a reduced-size 4x4 output block.
 200 */
 201 void idct4x4(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
 202 {
 203     int tmp0, tmp2, tmp10, tmp12;
 204     int z1, z2, z3;
 205     int * wsptr;
 206     unsigned char* outptr;
 207     int ctr;
 208     int workspace[4*4]; /* buffers data between passes */
 209
 210     /* Pass 1: process columns from input, store into work array. */
 211
 212     wsptr = workspace;
 213     for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++)
 214     {
 215         /* Even part */
 216
 217         tmp0 = DEQUANTIZE(inptr[8*0], quantptr[8*0]);
 218         tmp2 = DEQUANTIZE(inptr[8*2], quantptr[8*2]);
 219
 220         tmp10 = (tmp0 + tmp2) << PASS1_BITS;
 221         tmp12 = (tmp0 - tmp2) << PASS1_BITS;
 222
 223         /* Odd part */
 224         /* Same rotation as in the even part of the 8x8 LL&M IDCT */
 225
 226         z2 = DEQUANTIZE(inptr[8*1], quantptr[8*1]);
 227         z3 = DEQUANTIZE(inptr[8*3], quantptr[8*3]);
 228
 229         z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 230         tmp0 = DESCALE(z1 + MULTIPLY16(z3, - FIX_1_847759065), CONST_BITS-PASS1_BITS);
 231         tmp2 = DESCALE(z1 + MULTIPLY16(z2, FIX_0_765366865), CONST_BITS-PASS1_BITS);
 232
 233         /* Final output stage */
 234
 235         wsptr[4*0] = (int) (tmp10 + tmp2);
 236         wsptr[4*3] = (int) (tmp10 - tmp2);
 237         wsptr[4*1] = (int) (tmp12 + tmp0);
 238         wsptr[4*2] = (int) (tmp12 - tmp0);
 239     }
 240
 241     /* Pass 2: process 4 rows from work array, store into output array. */
 242
 243     wsptr = workspace;
 244     for (ctr = 0; ctr < 4; ctr++)
 245     {
 246         outptr = p_byte + (ctr*skip_line);
 247         /* Even part */
 248
 249         tmp0 = (int) wsptr[0];
 250         tmp2 = (int) wsptr[2];
 251
 252         tmp10 = (tmp0 + tmp2) << CONST_BITS;
 253         tmp12 = (tmp0 - tmp2) << CONST_BITS;
 254
 255         /* Odd part */
 256         /* Same rotation as in the even part of the 8x8 LL&M IDCT */
 257
 258         z2 = (int) wsptr[1];
 259         z3 = (int) wsptr[3];
 260
 261         z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 262         tmp0 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
 263         tmp2 = z1 + MULTIPLY16(z2, FIX_0_765366865);
 264
 265         /* Final output stage */
 266
 267         outptr[0] = range_limit((int) DESCALE(tmp10 + tmp2,
 268             CONST_BITS+PASS1_BITS+3));
 269         outptr[3] = range_limit((int) DESCALE(tmp10 - tmp2,
 270             CONST_BITS+PASS1_BITS+3));
 271         outptr[1] = range_limit((int) DESCALE(tmp12 + tmp0,
 272             CONST_BITS+PASS1_BITS+3));
 273         outptr[2] = range_limit((int) DESCALE(tmp12 - tmp0,
 274             CONST_BITS+PASS1_BITS+3));
 275
 276         wsptr += 4;     /* advance pointer to next row */
 277     }
 278 }
 279
 280
 281
 282 /*
 283 * Perform dequantization and inverse DCT on one block of coefficients.
 284 */
 285 void idct8x8(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
 286 {
 287     long tmp0, tmp1, tmp2, tmp3;
 288     long tmp10, tmp11, tmp12, tmp13;
 289     long z1, z2, z3, z4, z5;
 290     int * wsptr;
 291     unsigned char* outptr;
 292     int ctr;
 293     int workspace[64];  /* buffers data between passes */
 294
 295     /* Pass 1: process columns from input, store into work array. */
 296     /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
 297     /* furthermore, we scale the results by 2**PASS1_BITS. */
 298
 299     wsptr = workspace;
 300     for (ctr = 8; ctr > 0; ctr--)
 301     {
 302     /* Due to quantization, we will usually find that many of the input
 303     * coefficients are zero, especially the AC terms.  We can exploit this
 304     * by short-circuiting the IDCT calculation for any column in which all
 305     * the AC terms are zero.  In that case each output is equal to the
 306     * DC coefficient (with scale factor as needed).
 307     * With typical images and quantization tables, half or more of the
 308     * column DCT calculations can be simplified this way.
 309     */
 310
 311         if ((inptr[8*1] | inptr[8*2] | inptr[8*3]
 312            | inptr[8*4] | inptr[8*5] | inptr[8*6] | inptr[8*7]) == 0)
 313         {
 314             /* AC terms all zero */
 315             int dcval = DEQUANTIZE(inptr[8*0], quantptr[8*0]) << PASS1_BITS;
 316
 317             wsptr[8*0] = wsptr[8*1] = wsptr[8*2] = wsptr[8*3] = wsptr[8*4]
 318                        = wsptr[8*5] = wsptr[8*6] = wsptr[8*7] = dcval;
 319             inptr++;      /* advance pointers to next column */
 320             quantptr++;
 321             wsptr++;
 322             continue;
 323         }
 324
 325         /* Even part: reverse the even part of the forward DCT. */
 326         /* The rotator is sqrt(2)*c(-6). */
 327
 328         z2 = DEQUANTIZE(inptr[8*2], quantptr[8*2]);
 329         z3 = DEQUANTIZE(inptr[8*6], quantptr[8*6]);
 330
 331         z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 332         tmp2 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
 333         tmp3 = z1 + MULTIPLY16(z2, FIX_0_765366865);
 334
 335         z2 = DEQUANTIZE(inptr[8*0], quantptr[8*0]);
 336         z3 = DEQUANTIZE(inptr[8*4], quantptr[8*4]);
 337
 338         tmp0 = (z2 + z3) << CONST_BITS;
 339         tmp1 = (z2 - z3) << CONST_BITS;
 340
 341         tmp10 = tmp0 + tmp3;
 342         tmp13 = tmp0 - tmp3;
 343         tmp11 = tmp1 + tmp2;
 344         tmp12 = tmp1 - tmp2;
 345
 346         /* Odd part per figure 8; the matrix is unitary and hence its
 347            transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively. */
 348
 349         tmp0 = DEQUANTIZE(inptr[8*7], quantptr[8*7]);
 350         tmp1 = DEQUANTIZE(inptr[8*5], quantptr[8*5]);
 351         tmp2 = DEQUANTIZE(inptr[8*3], quantptr[8*3]);
 352         tmp3 = DEQUANTIZE(inptr[8*1], quantptr[8*1]);
 353
 354         z1 = tmp0 + tmp3;
 355         z2 = tmp1 + tmp2;
 356         z3 = tmp0 + tmp2;
 357         z4 = tmp1 + tmp3;
 358         z5 = MULTIPLY16(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 359
 360         tmp0 = MULTIPLY16(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 361         tmp1 = MULTIPLY16(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 362         tmp2 = MULTIPLY16(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 363         tmp3 = MULTIPLY16(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 364         z1 = MULTIPLY16(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 365         z2 = MULTIPLY16(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 366         z3 = MULTIPLY16(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 367         z4 = MULTIPLY16(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 368
 369         z3 += z5;
 370         z4 += z5;
 371
 372         tmp0 += z1 + z3;
 373         tmp1 += z2 + z4;
 374         tmp2 += z2 + z3;
 375         tmp3 += z1 + z4;
 376
 377         /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 378
 379         wsptr[8*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
 380         wsptr[8*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
 381         wsptr[8*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
 382         wsptr[8*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
 383         wsptr[8*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
 384         wsptr[8*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
 385         wsptr[8*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
 386         wsptr[8*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
 387
 388         inptr++; /* advance pointers to next column */
 389         quantptr++;
 390         wsptr++;
 391     }
 392
 393     /* Pass 2: process rows from work array, store into output array. */
 394     /* Note that we must descale the results by a factor of 8 == 2**3, */
 395     /* and also undo the PASS1_BITS scaling. */
 396
 397     wsptr = workspace;
 398     for (ctr = 0; ctr < 8; ctr++)
 399     {
 400         outptr = p_byte + (ctr*skip_line);
 401         /* Rows of zeroes can be exploited in the same way as we did with columns.
 402         * However, the column calculation has created many nonzero AC terms, so
 403         * the simplification applies less often (typically 5% to 10% of the time).
 404         * On machines with very fast multiplication, it's possible that the
 405         * test takes more time than it's worth.  In that case this section
 406         * may be commented out.
 407         */
 408
 409 #ifndef NO_ZERO_ROW_TEST
 410         if ((wsptr[1] | wsptr[2] | wsptr[3]
 411            | wsptr[4] | wsptr[5] | wsptr[6] | wsptr[7]) == 0)
 412         {
 413             /* AC terms all zero */
 414             unsigned char dcval = range_limit((int) DESCALE((long) wsptr[0],
 415                 PASS1_BITS+3));
 416
 417             outptr[0] = dcval;
 418             outptr[1] = dcval;
 419             outptr[2] = dcval;
 420             outptr[3] = dcval;
 421             outptr[4] = dcval;
 422             outptr[5] = dcval;
 423             outptr[6] = dcval;
 424             outptr[7] = dcval;
 425
 426             wsptr += 8; /* advance pointer to next row */
 427             continue;
 428         }
 429 #endif
 430
 431         /* Even part: reverse the even part of the forward DCT. */
 432         /* The rotator is sqrt(2)*c(-6). */
 433
 434         z2 = (long) wsptr[2];
 435         z3 = (long) wsptr[6];
 436
 437         z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 438         tmp2 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
 439         tmp3 = z1 + MULTIPLY16(z2, FIX_0_765366865);
 440
 441         tmp0 = ((long) wsptr[0] + (long) wsptr[4]) << CONST_BITS;
 442         tmp1 = ((long) wsptr[0] - (long) wsptr[4]) << CONST_BITS;
 443
 444         tmp10 = tmp0 + tmp3;
 445         tmp13 = tmp0 - tmp3;
 446         tmp11 = tmp1 + tmp2;
 447         tmp12 = tmp1 - tmp2;
 448
 449         /* Odd part per figure 8; the matrix is unitary and hence its
 450         * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */
 451
 452         tmp0 = (long) wsptr[7];
 453         tmp1 = (long) wsptr[5];
 454         tmp2 = (long) wsptr[3];
 455         tmp3 = (long) wsptr[1];
 456
 457         z1 = tmp0 + tmp3;
 458         z2 = tmp1 + tmp2;
 459         z3 = tmp0 + tmp2;
 460         z4 = tmp1 + tmp3;
 461         z5 = MULTIPLY16(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 462
 463         tmp0 = MULTIPLY16(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 464         tmp1 = MULTIPLY16(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 465         tmp2 = MULTIPLY16(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 466         tmp3 = MULTIPLY16(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 467         z1 = MULTIPLY16(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 468         z2 = MULTIPLY16(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 469         z3 = MULTIPLY16(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 470         z4 = MULTIPLY16(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 471
 472         z3 += z5;
 473         z4 += z5;
 474
 475         tmp0 += z1 + z3;
 476         tmp1 += z2 + z4;
 477         tmp2 += z2 + z3;
 478         tmp3 += z1 + z4;
 479
 480         /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 481
 482         outptr[0] = range_limit((int) DESCALE(tmp10 + tmp3,
 483             CONST_BITS+PASS1_BITS+3));
 484         outptr[7] = range_limit((int) DESCALE(tmp10 - tmp3,
 485             CONST_BITS+PASS1_BITS+3));
 486         outptr[1] = range_limit((int) DESCALE(tmp11 + tmp2,
 487             CONST_BITS+PASS1_BITS+3));
 488         outptr[6] = range_limit((int) DESCALE(tmp11 - tmp2,
 489             CONST_BITS+PASS1_BITS+3));
 490         outptr[2] = range_limit((int) DESCALE(tmp12 + tmp1,
 491             CONST_BITS+PASS1_BITS+3));
 492         outptr[5] = range_limit((int) DESCALE(tmp12 - tmp1,
 493             CONST_BITS+PASS1_BITS+3));
 494         outptr[3] = range_limit((int) DESCALE(tmp13 + tmp0,
 495             CONST_BITS+PASS1_BITS+3));
 496         outptr[4] = range_limit((int) DESCALE(tmp13 - tmp0,
 497             CONST_BITS+PASS1_BITS+3));
 498
 499         wsptr += 8; /* advance pointer to next row */
 500     }
 501 }
 502
 503
 504
 505 /* JPEG decoder implementation */
 506
 507 /* Preprocess the JPEG JFIF file */
 508 int process_markers(unsigned char* p_src, long size, struct jpeg* p_jpeg)
 509 {
 510     unsigned char* p_bytes = p_src;
 511     int marker_size; /* variable length of marker segment */
 512     int i, j, n;
 513     int ret = 0; /* returned flags */
 514
 515     p_jpeg->p_entropy_end = p_src + size;
 516
 517     while (p_src < p_bytes + size)
 518     {
 519         if (*p_src++ != 0xFF) /* no marker? */
 520         {
 521             p_src--; /* it's image data, put it back */
 522             p_jpeg->p_entropy_data = p_src;
 523             break; /* exit marker processing */
 524         }
 525
 526         switch (*p_src++)
 527         {
 528         case 0xFF: /* Fill byte */
 529             ret |= FILL_FF;
 530         case 0x00: /* Zero stuffed byte - entropy data */
 531             p_src--; /* put it back */
 532             continue;
 533
 534         case 0xC0: /* SOF Huff  - Baseline DCT */
 535             {
 536                 ret |= SOF0;
 537                 marker_size = *p_src++ << 8; /* Highbyte */
 538                 marker_size |= *p_src++; /* Lowbyte */
 539                 n = *p_src++; /* sample precision (= 8 or 12) */
 540                 if (n != 8)
 541                 {
 542                     return(-1); /* Unsupported sample precision */
 543                 }
 544                 p_jpeg->y_size = *p_src++ << 8; /* Highbyte */
 545                 p_jpeg->y_size |= *p_src++; /* Lowbyte */
 546                 p_jpeg->x_size = *p_src++ << 8; /* Highbyte */
 547                 p_jpeg->x_size |= *p_src++; /* Lowbyte */
 548
 549                 n = (marker_size-2-6)/3;
 550                 if (*p_src++ != n || (n != 1 && n != 3))
 551                 {
 552                     return(-2); /* Unsupported SOF0 component specification */
 553                 }
 554                 for (i=0; i<n; i++)
 555                 {
 556                     p_jpeg->frameheader[i].ID = *p_src++; /* Component info */
 557                     p_jpeg->frameheader[i].horizontal_sampling = *p_src >> 4;
 558                     p_jpeg->frameheader[i].vertical_sampling = *p_src++ & 0x0F;
 559                     p_jpeg->frameheader[i].quanttable_select = *p_src++;
 560                     if (p_jpeg->frameheader[i].horizontal_sampling > 2
 561                      || p_jpeg->frameheader[i].vertical_sampling > 2)
 562                     return -3; /* Unsupported SOF0 subsampling */
 563                 }
 564                 p_jpeg->blocks = n;
 565             }
 566             break;
 567
 568         case 0xC1: /* SOF Huff  - Extended sequential DCT*/
 569         case 0xC2: /* SOF Huff  - Progressive DCT*/
 570         case 0xC3: /* SOF Huff  - Spatial (sequential) lossless*/
 571         case 0xC5: /* SOF Huff  - Differential sequential DCT*/
 572         case 0xC6: /* SOF Huff  - Differential progressive DCT*/
 573         case 0xC7: /* SOF Huff  - Differential spatial*/
 574         case 0xC8: /* SOF Arith - Reserved for JPEG extensions*/
 575         case 0xC9: /* SOF Arith - Extended sequential DCT*/
 576         case 0xCA: /* SOF Arith - Progressive DCT*/
 577         case 0xCB: /* SOF Arith - Spatial (sequential) lossless*/
 578         case 0xCD: /* SOF Arith - Differential sequential DCT*/
 579         case 0xCE: /* SOF Arith - Differential progressive DCT*/
 580         case 0xCF: /* SOF Arith - Differential spatial*/
 581             {
 582                 return (-4); /* other DCT model than baseline not implemented */
 583             }
 584
 585         case 0xC4: /* Define Huffman Table(s) */
 586             {
 587                 unsigned char* p_temp;
 588
 589                 ret |= DHT;
 590                 marker_size = *p_src++ << 8; /* Highbyte */
 591                 marker_size |= *p_src++; /* Lowbyte */
 592
 593                 p_temp = p_src;
 594                 while (p_src < p_temp+marker_size-2-17) /* another table */
 595                 {
 596                     int sum = 0;
 597                     i = *p_src & 0x0F; /* table index */
 598                     if (i > 1)
 599                     {
 600                         return (-5); /* Huffman table index out of range */
 601                     }
 602                     else if (*p_src++ & 0xF0) /* AC table */
 603                     {
 604                         for (j=0; j<16; j++)
 605                         {
 606                             sum += *p_src;
 607                             p_jpeg->hufftable[i].huffmancodes_ac[j] = *p_src++;
 608                         }
 609                         if(16 + sum > AC_LEN)
 610                             return -10; /* longer than allowed */
 611
 612                         for (; j < 16 + sum; j++)
 613                             p_jpeg->hufftable[i].huffmancodes_ac[j] = *p_src++;
 614                     }
 615                     else /* DC table */
 616                     {
 617                         for (j=0; j<16; j++)
 618                         {
 619                             sum += *p_src;
 620                             p_jpeg->hufftable[i].huffmancodes_dc[j] = *p_src++;
 621                         }
 622                         if(16 + sum > DC_LEN)
 623                             return -11; /* longer than allowed */
 624
 625                         for (; j < 16 + sum; j++)
 626                             p_jpeg->hufftable[i].huffmancodes_dc[j] = *p_src++;
 627                     }
 628                 } /* while */
 629                 p_src = p_temp+marker_size - 2; /* skip possible residue */
 630             }
 631             break;
 632
 633         case 0xCC: /* Define Arithmetic coding conditioning(s) */
 634             return(-6); /* Arithmetic coding not supported */
 635
 636         case 0xD8: /* Start of Image */
 637         case 0xD9: /* End of Image */
 638         case 0x01: /* for temp private use arith code */
 639             break; /* skip parameterless marker */
 640
 641
 642         case 0xDA: /* Start of Scan */
 643             {
 644                 ret |= SOS;
 645                 marker_size = *p_src++ << 8; /* Highbyte */
 646                 marker_size |= *p_src++; /* Lowbyte */
 647
 648                 n = (marker_size-2-1-3)/2;
 649                 if (*p_src++ != n || (n != 1 && n != 3))
 650                 {
 651                     return (-7); /* Unsupported SOS component specification */
 652                 }
 653                 for (i=0; i<n; i++)
 654                 {
 655                     p_jpeg->scanheader[i].ID = *p_src++;
 656                     p_jpeg->scanheader[i].DC_select = *p_src >> 4;
 657                     p_jpeg->scanheader[i].AC_select = *p_src++ & 0x0F;
 658                 }
 659                 p_src += 3; /* skip spectral information */
 660             }
 661             break;
 662
 663         case 0xDB: /* Define quantization Table(s) */
 664             {
 665                 ret |= DQT;
 666                 marker_size = *p_src++ << 8; /* Highbyte */
 667                 marker_size |= *p_src++; /* Lowbyte */
 668                 n = (marker_size-2)/(QUANT_TABLE_LENGTH+1); /* # of tables */
 669                 for (i=0; i<n; i++)
 670                 {
 671                     int id = *p_src++; /* ID */
 672                     if (id >= 4)
 673                     {
 674                         return (-8); /* Unsupported quantization table */
 675                     }
 676                     /* Read Quantisation table: */
 677                     for (j=0; j<QUANT_TABLE_LENGTH; j++)
 678                         p_jpeg->quanttable[id][j] = *p_src++;
 679                 }
 680             }
 681             break;
 682
 683         case 0xDD: /* Define Restart Interval */
 684             {
 685                 marker_size = *p_src++ << 8; /* Highbyte */
 686                 marker_size |= *p_src++; /* Lowbyte */
 687                 p_jpeg->restart_interval = *p_src++ << 8; /* Highbyte */
 688                 p_jpeg->restart_interval |= *p_src++; /* Lowbyte */
 689                 p_src += marker_size-4; /* skip segment */
 690             }
 691             break;
 692
 693         case 0xDC: /* Define Number of Lines */
 694         case 0xDE: /* Define Hierarchical progression */
 695         case 0xDF: /* Expand Reference Component(s) */
 696         case 0xE0: /* Application Field 0*/
 697         case 0xE1: /* Application Field 1*/
 698         case 0xE2: /* Application Field 2*/
 699         case 0xE3: /* Application Field 3*/
 700         case 0xE4: /* Application Field 4*/
 701         case 0xE5: /* Application Field 5*/
 702         case 0xE6: /* Application Field 6*/
 703         case 0xE7: /* Application Field 7*/
 704         case 0xE8: /* Application Field 8*/
 705         case 0xE9: /* Application Field 9*/
 706         case 0xEA: /* Application Field 10*/
 707         case 0xEB: /* Application Field 11*/
 708         case 0xEC: /* Application Field 12*/
 709         case 0xED: /* Application Field 13*/
 710         case 0xEE: /* Application Field 14*/
 711         case 0xEF: /* Application Field 15*/
 712         case 0xFE: /* Comment */
 713             {
 714                 marker_size = *p_src++ << 8; /* Highbyte */
 715                 marker_size |= *p_src++; /* Lowbyte */
 716                 p_src += marker_size-2; /* skip segment */
 717             }
 718             break;
 719
 720         case 0xF0: /* Reserved for JPEG extensions */
 721         case 0xF1: /* Reserved for JPEG extensions */
 722         case 0xF2: /* Reserved for JPEG extensions */
 723         case 0xF3: /* Reserved for JPEG extensions */
 724         case 0xF4: /* Reserved for JPEG extensions */
 725         case 0xF5: /* Reserved for JPEG extensions */
 726         case 0xF6: /* Reserved for JPEG extensions */
 727         case 0xF7: /* Reserved for JPEG extensions */
 728         case 0xF8: /* Reserved for JPEG extensions */
 729         case 0xF9: /* Reserved for JPEG extensions */
 730         case 0xFA: /* Reserved for JPEG extensions */
 731         case 0xFB: /* Reserved for JPEG extensions */
 732         case 0xFC: /* Reserved for JPEG extensions */
 733         case 0xFD: /* Reserved for JPEG extensions */
 734         case 0x02: /* Reserved */
 735         default:
 736             return (-9); /* Unknown marker */
 737         } /* switch */
 738     } /* while */
 739
 740     return (ret); /* return flags with seen markers */
 741 }
 742
 743
 744 void default_huff_tbl(struct jpeg* p_jpeg)
 745 {
 746     static const struct huffman_table luma_table =
 747     {
 748         {
 749             0x00,0x01,0x05,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,0x00,0x00,
 750             0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B
 751         },
 752         {
 753             0x00,0x02,0x01,0x03,0x03,0x02,0x04,0x03,0x05,0x05,0x04,0x04,0x00,0x00,0x01,0x7D,
 754             0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,
 755             0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08,0x23,0x42,0xB1,0xC1,0x15,0x52,0xD1,0xF0,
 756             0x24,0x33,0x62,0x72,0x82,0x09,0x0A,0x16,0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28,
 757             0x29,0x2A,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,
 758             0x4A,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,
 759             0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
 760             0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 761             0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,
 762             0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xE1,0xE2,
 763             0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,
 764             0xF9,0xFA
 765         }
 766     };
 767
 768     static const struct huffman_table chroma_table =
 769     {
 770         {
 771             0x00,0x03,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,
 772             0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B
 773         },
 774         {
 775             0x00,0x02,0x01,0x02,0x04,0x04,0x03,0x04,0x07,0x05,0x04,0x04,0x00,0x01,0x02,0x77,
 776             0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,
 777             0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xA1,0xB1,0xC1,0x09,0x23,0x33,0x52,0xF0,
 778             0x15,0x62,0x72,0xD1,0x0A,0x16,0x24,0x34,0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26,
 779             0x27,0x28,0x29,0x2A,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,
 780             0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,
 781             0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87,
 782             0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,
 783             0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,
 784             0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,
 785             0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,
 786             0xF9,0xFA
 787         }
 788     };
 789
 790     MEMCPY(&p_jpeg->hufftable[0], &luma_table, sizeof(luma_table));
 791     MEMCPY(&p_jpeg->hufftable[1], &chroma_table, sizeof(chroma_table));
 792
 793     return;
 794 }
 795
 796 /* Compute the derived values for a Huffman table */
 797 void fix_huff_tbl(int* htbl, struct derived_tbl* dtbl)
 798 {
 799     int p, i, l, si;
 800     int lookbits, ctr;
 801     char huffsize[257];
 802     unsigned int huffcode[257];
 803     unsigned int code;
 804
 805     dtbl->pub = htbl; /* fill in back link */
 806
 807     /* Figure C.1: make table of Huffman code length for each symbol */
 808     /* Note that this is in code-length order. */
 809
 810     p = 0;
 811     for (l = 1; l <= 16; l++)
 812     {    /* all possible code length */
 813         for (i = 1; i <= (int) htbl[l-1]; i++)  /* all codes per length */
 814             huffsize[p++] = (char) l;
 815     }
 816     huffsize[p] = 0;
 817
 818     /* Figure C.2: generate the codes themselves */
 819     /* Note that this is in code-length order. */
 820
 821     code = 0;
 822     si = huffsize[0];
 823     p = 0;
 824     while (huffsize[p])
 825     {
 826         while (((int) huffsize[p]) == si)
 827         {
 828             huffcode[p++] = code;
 829             code++;
 830         }
 831         code <<= 1;
 832         si++;
 833     }
 834
 835     /* Figure F.15: generate decoding tables for bit-sequential decoding */
 836
 837     p = 0;
 838     for (l = 1; l <= 16; l++)
 839     {
 840         if (htbl[l-1])
 841         {
 842             dtbl->valptr[l] = p; /* huffval[] index of 1st symbol of code length l */
 843             dtbl->mincode[l] = huffcode[p]; /* minimum code of length l */
 844             p += htbl[l-1];
 845             dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
 846         }
 847         else
 848         {
 849             dtbl->maxcode[l] = -1;  /* -1 if no codes of this length */
 850         }
 851     }
 852     dtbl->maxcode[17] = 0xFFFFFL; /* ensures huff_DECODE terminates */
 853
 854     /* Compute lookahead tables to speed up decoding.
 855     * First we set all the table entries to 0, indicating "too long";
 856     * then we iterate through the Huffman codes that are short enough and
 857     * fill in all the entries that correspond to bit sequences starting
 858     * with that code.
 859     */
 860
 861     MEMSET(dtbl->look_nbits, 0, sizeof(dtbl->look_nbits));
 862
 863     p = 0;
 864     for (l = 1; l <= HUFF_LOOKAHEAD; l++)
 865     {
 866         for (i = 1; i <= (int) htbl[l-1]; i++, p++)
 867         {
 868             /* l = current code's length, p = its index in huffcode[] & huffval[]. */
 869             /* Generate left-justified code followed by all possible bit sequences */
 870             lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
 871             for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--)
 872             {
 873                 dtbl->look_nbits[lookbits] = l;
 874                 dtbl->look_sym[lookbits] = htbl[16+p];
 875                 lookbits++;
 876             }
 877         }
 878     }
 879 }
 880
 881
 882 /* zag[i] is the natural-order position of the i'th element of zigzag order.
 883  * If the incoming data is corrupted, decode_mcu could attempt to
 884  * reference values beyond the end of the array.  To avoid a wild store,
 885  * we put some extra zeroes after the real entries.
 886  */
 887 static const int zag[] =
 888 {
 889      0,  1,  8, 16,  9,  2,  3, 10,
 890     17, 24, 32, 25, 18, 11,  4,  5,
 891     12, 19, 26, 33, 40, 48, 41, 34,
 892     27, 20, 13,  6,  7, 14, 21, 28,
 893     35, 42, 49, 56, 57, 50, 43, 36,
 894     29, 22, 15, 23, 30, 37, 44, 51,
 895     58, 59, 52, 45, 38, 31, 39, 46,
 896     53, 60, 61, 54, 47, 55, 62, 63,
 897      0,  0,  0,  0,  0,  0,  0,  0, /* extra entries in case k>63 below */
 898      0,  0,  0,  0,  0,  0,  0,  0
 899 };
 900
 901 void build_lut(struct jpeg* p_jpeg)
 902 {
 903     int i;
 904     fix_huff_tbl(p_jpeg->hufftable[0].huffmancodes_dc,
 905         &p_jpeg->dc_derived_tbls[0]);
 906     fix_huff_tbl(p_jpeg->hufftable[0].huffmancodes_ac,
 907         &p_jpeg->ac_derived_tbls[0]);
 908     fix_huff_tbl(p_jpeg->hufftable[1].huffmancodes_dc,
 909         &p_jpeg->dc_derived_tbls[1]);
 910     fix_huff_tbl(p_jpeg->hufftable[1].huffmancodes_ac,
 911         &p_jpeg->ac_derived_tbls[1]);
 912
 913     /* build the dequantization tables for the IDCT (De-ZiZagged) */
 914     for (i=0; i<64; i++)
 915     {
 916         p_jpeg->qt_idct[0][zag[i]] = p_jpeg->quanttable[0][i];
 917         p_jpeg->qt_idct[1][zag[i]] = p_jpeg->quanttable[1][i];
 918     }
 919
 920     for (i=0; i<4; i++)
 921         p_jpeg->store_pos[i] = i; /* default ordering */
 922
 923     /* assignments for the decoding of blocks */
 924     if (p_jpeg->frameheader[0].horizontal_sampling == 2
 925         && p_jpeg->frameheader[0].vertical_sampling == 1)
 926     {   /* 4:2:2 */
 927         p_jpeg->blocks = 4;
 928         p_jpeg->x_mbl = (p_jpeg->x_size+15) / 16;
 929         p_jpeg->x_phys = p_jpeg->x_mbl * 16;
 930         p_jpeg->y_mbl = (p_jpeg->y_size+7) / 8;
 931         p_jpeg->y_phys = p_jpeg->y_mbl * 8;
 932         p_jpeg->mcu_membership[0] = 0; /* Y1=Y2=0, U=1, V=2 */
 933         p_jpeg->mcu_membership[1] = 0;
 934         p_jpeg->mcu_membership[2] = 1;
 935         p_jpeg->mcu_membership[3] = 2;
 936         p_jpeg->tab_membership[0] = 0; /* DC, DC, AC, AC */
 937         p_jpeg->tab_membership[1] = 0;
 938         p_jpeg->tab_membership[2] = 1;
 939         p_jpeg->tab_membership[3] = 1;
 940         p_jpeg->subsample_x[0] = 1;
 941         p_jpeg->subsample_x[1] = 2;
 942         p_jpeg->subsample_x[2] = 2;
 943         p_jpeg->subsample_y[0] = 1;
 944         p_jpeg->subsample_y[1] = 1;
 945         p_jpeg->subsample_y[2] = 1;
 946     }
 947     if (p_jpeg->frameheader[0].horizontal_sampling == 1
 948         && p_jpeg->frameheader[0].vertical_sampling == 2)
 949     {   /* 4:2:2 vertically subsampled */
 950         p_jpeg->store_pos[1] = 2; /* block positions are mirrored */
 951         p_jpeg->store_pos[2] = 1;
 952         p_jpeg->blocks = 4;
 953         p_jpeg->x_mbl = (p_jpeg->x_size+7) / 8;
 954         p_jpeg->x_phys = p_jpeg->x_mbl * 8;
 955         p_jpeg->y_mbl = (p_jpeg->y_size+15) / 16;
 956         p_jpeg->y_phys = p_jpeg->y_mbl * 16;
 957         p_jpeg->mcu_membership[0] = 0; /* Y1=Y2=0, U=1, V=2 */
 958         p_jpeg->mcu_membership[1] = 0;
 959         p_jpeg->mcu_membership[2] = 1;
 960         p_jpeg->mcu_membership[3] = 2;
 961         p_jpeg->tab_membership[0] = 0; /* DC, DC, AC, AC */
 962         p_jpeg->tab_membership[1] = 0;
 963         p_jpeg->tab_membership[2] = 1;
 964         p_jpeg->tab_membership[3] = 1;
 965         p_jpeg->subsample_x[0] = 1;
 966         p_jpeg->subsample_x[1] = 1;
 967         p_jpeg->subsample_x[2] = 1;
 968         p_jpeg->subsample_y[0] = 1;
 969         p_jpeg->subsample_y[1] = 2;
 970         p_jpeg->subsample_y[2] = 2;
 971     }
 972     else if (p_jpeg->frameheader[0].horizontal_sampling == 2
 973         && p_jpeg->frameheader[0].vertical_sampling == 2)
 974     {   /* 4:2:0 */
 975         p_jpeg->blocks = 6;
 976         p_jpeg->x_mbl = (p_jpeg->x_size+15) / 16;
 977         p_jpeg->x_phys = p_jpeg->x_mbl * 16;
 978         p_jpeg->y_mbl = (p_jpeg->y_size+15) / 16;
 979         p_jpeg->y_phys = p_jpeg->y_mbl * 16;
 980         p_jpeg->mcu_membership[0] = 0;
 981         p_jpeg->mcu_membership[1] = 0;
 982         p_jpeg->mcu_membership[2] = 0;
 983         p_jpeg->mcu_membership[3] = 0;
 984         p_jpeg->mcu_membership[4] = 1;
 985         p_jpeg->mcu_membership[5] = 2;
 986         p_jpeg->tab_membership[0] = 0;
 987         p_jpeg->tab_membership[1] = 0;
 988         p_jpeg->tab_membership[2] = 0;
 989         p_jpeg->tab_membership[3] = 0;
 990         p_jpeg->tab_membership[4] = 1;
 991         p_jpeg->tab_membership[5] = 1;
 992         p_jpeg->subsample_x[0] = 1;
 993         p_jpeg->subsample_x[1] = 2;
 994         p_jpeg->subsample_x[2] = 2;
 995         p_jpeg->subsample_y[0] = 1;
 996         p_jpeg->subsample_y[1] = 2;
 997         p_jpeg->subsample_y[2] = 2;
 998     }
 999     else if (p_jpeg->frameheader[0].horizontal_sampling == 1
1000         && p_jpeg->frameheader[0].vertical_sampling == 1)
1001     {   /* 4:4:4 */
1002         /* don't overwrite p_jpeg->blocks */
1003         p_jpeg->x_mbl = (p_jpeg->x_size+7) / 8;
1004         p_jpeg->x_phys = p_jpeg->x_mbl * 8;
1005         p_jpeg->y_mbl = (p_jpeg->y_size+7) / 8;
1006         p_jpeg->y_phys = p_jpeg->y_mbl * 8;
1007         p_jpeg->mcu_membership[0] = 0;
1008         p_jpeg->mcu_membership[1] = 1;
1009         p_jpeg->mcu_membership[2] = 2;
1010         p_jpeg->tab_membership[0] = 0;
1011         p_jpeg->tab_membership[1] = 1;
1012         p_jpeg->tab_membership[2] = 1;
1013         p_jpeg->subsample_x[0] = 1;
1014         p_jpeg->subsample_x[1] = 1;
1015         p_jpeg->subsample_x[2] = 1;
1016         p_jpeg->subsample_y[0] = 1;
1017         p_jpeg->subsample_y[1] = 1;
1018         p_jpeg->subsample_y[2] = 1;
1019     }
1020     else
1021     {
1022         /* error */
1023     }
1024
1025 }
1026
1027
1028 /*
1029 * These functions/macros provide the in-line portion of bit fetching.
1030 * Use check_bit_buffer to ensure there are N bits in get_buffer
1031 * before using get_bits, peek_bits, or drop_bits.
1032 *  check_bit_buffer(state,n,action);
1033 *    Ensure there are N bits in get_buffer; if suspend, take action.
1034 *  val = get_bits(n);
1035 *    Fetch next N bits.
1036 *  val = peek_bits(n);
1037 *    Fetch next N bits without removing them from the buffer.
1038 *  drop_bits(n);
1039 *    Discard next N bits.
1040 * The value N should be a simple variable, not an expression, because it
1041 * is evaluated multiple times.
1042 */
1043
1044 INLINE void check_bit_buffer(struct bitstream* pb, int nbits)
1045 {
1046     if (pb->bits_left < nbits)
1047     {   /* nbits is <= 16, so I can always refill 2 bytes in this case */
1048         unsigned char byte;
1049
1050         byte = *pb->next_input_byte++;
1051         if (byte == 0xFF) /* legal marker can be byte stuffing or RSTm */
1052         {   /* simplification: just skip the (one-byte) marker code */
1053             pb->next_input_byte++;
1054         }
1055         pb->get_buffer = (pb->get_buffer << 8) | byte;
1056
1057         byte = *pb->next_input_byte++;
1058         if (byte == 0xFF) /* legal marker can be byte stuffing or RSTm */
1059         {   /* simplification: just skip the (one-byte) marker code */
1060             pb->next_input_byte++;
1061         }
1062         pb->get_buffer = (pb->get_buffer << 8) | byte;
1063
1064         pb->bits_left += 16;
1065     }
1066 }
1067
1068 INLINE int get_bits(struct bitstream* pb, int nbits)
1069 {
1070     return ((int) (pb->get_buffer >> (pb->bits_left -= nbits))) & (BIT_N(nbits)-1);
1071 }
1072
1073 INLINE int peek_bits(struct bitstream* pb, int nbits)
1074 {
1075     return ((int) (pb->get_buffer >> (pb->bits_left - nbits))) & (BIT_N(nbits)-1);
1076 }
1077
1078 INLINE void drop_bits(struct bitstream* pb, int nbits)
1079 {
1080     pb->bits_left -= nbits;
1081 }
1082
1083 /* re-synchronize to entropy data (skip restart marker) */
1084 void search_restart(struct bitstream* pb)
1085 {
1086     pb->next_input_byte--; /* we may have overread it, taking 2 bytes */
1087     /* search for a non-byte-padding marker, has to be RSTm or EOS */
1088     while (pb->next_input_byte < pb->input_end &&
1089         (pb->next_input_byte[-2] != 0xFF || pb->next_input_byte[-1] == 0x00))
1090     {
1091         pb->next_input_byte++;
1092     }
1093     pb->bits_left = 0;
1094 }
1095
1096 /* Figure F.12: extend sign bit. */
1097 #define HUFF_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
1098
1099 static const int extend_test[16] =   /* entry n is 2**(n-1) */
1100 {
1101     0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
1102     0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000
1103 };
1104
1105 static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
1106 {
1107     0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
1108     ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
1109     ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
1110     ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1
1111 };
1112
1113 /* Decode a single value */
1114 INLINE int huff_decode_dc(struct bitstream* bs, struct derived_tbl* tbl)
1115 {
1116     int nb, look, s, r;
1117
1118     check_bit_buffer(bs, HUFF_LOOKAHEAD);
1119     look = peek_bits(bs, HUFF_LOOKAHEAD);
1120     if ((nb = tbl->look_nbits[look]) != 0)
1121     {
1122         drop_bits(bs, nb);
1123         s = tbl->look_sym[look];
1124         check_bit_buffer(bs, s);
1125         r = get_bits(bs, s);
1126         s = HUFF_EXTEND(r, s);
1127     }
1128     else
1129     {   /*  slow_DECODE(s, HUFF_LOOKAHEAD+1)) < 0); */
1130         long code;
1131         nb=HUFF_LOOKAHEAD+1;
1132         check_bit_buffer(bs, nb);
1133         code = get_bits(bs, nb);
1134         while (code > tbl->maxcode[nb])
1135         {
1136             code <<= 1;
1137             check_bit_buffer(bs, 1);
1138             code |= get_bits(bs, 1);
1139             nb++;
1140         }
1141         if (nb > 16) /* error in Huffman */
1142         {
1143             s=0; /* fake a zero, this is most safe */
1144         }
1145         else
1146         {
1147             s = tbl->pub[16 + tbl->valptr[nb] + ((int) (code - tbl->mincode[nb])) ];
1148             check_bit_buffer(bs, s);
1149             r = get_bits(bs, s);
1150             s = HUFF_EXTEND(r, s);
1151         }
1152     } /* end slow decode */
1153     return s;
1154 }
1155
1156 INLINE int huff_decode_ac(struct bitstream* bs, struct derived_tbl* tbl)
1157 {
1158     int nb, look, s;
1159
1160     check_bit_buffer(bs, HUFF_LOOKAHEAD);
1161     look = peek_bits(bs, HUFF_LOOKAHEAD);
1162     if ((nb = tbl->look_nbits[look]) != 0)
1163     {
1164         drop_bits(bs, nb);
1165         s = tbl->look_sym[look];
1166     }
1167     else
1168     {   /*  slow_DECODE(s, HUFF_LOOKAHEAD+1)) < 0); */
1169         long code;
1170         nb=HUFF_LOOKAHEAD+1;
1171         check_bit_buffer(bs, nb);
1172         code = get_bits(bs, nb);
1173         while (code > tbl->maxcode[nb])
1174         {
1175             code <<= 1;
1176             check_bit_buffer(bs, 1);
1177             code |= get_bits(bs, 1);
1178             nb++;
1179         }
1180         if (nb > 16) /* error in Huffman */
1181         {
1182             s=0; /* fake a zero, this is most safe */
1183         }
1184         else
1185         {
1186             s = tbl->pub[16 + tbl->valptr[nb] + ((int) (code - tbl->mincode[nb])) ];
1187         }
1188     } /* end slow decode */
1189     return s;
1190 }
1191
1192
1193 #ifdef HAVE_LCD_COLOR
1194
1195 /* JPEG decoder variant for YUV decoding, into 3 different planes */
1196 /*  Note: it keeps the original color subsampling, even if resized. */
1197 int jpeg_decode(struct jpeg* p_jpeg, unsigned char* p_pixel[3],
1198                 int downscale, void (*pf_progress)(int current, int total))
1199 {
1200     struct bitstream bs; /* bitstream "object" */
1201     int block[64]; /* decoded DCT coefficients */
1202
1203     int width, height;
1204     int skip_line[3]; /* bytes from one line to the next (skip_line) */
1205     int skip_strip[3], skip_mcu[3]; /* bytes to next DCT row / column */
1206
1207     int i, x, y; /* loop counter */
1208
1209     unsigned char* p_line[3] = {p_pixel[0], p_pixel[1], p_pixel[2]};
1210     unsigned char* p_byte[3]; /* bitmap pointer */
1211
1212     void (*pf_idct)(unsigned char*, int*, int*, int); /* selected IDCT */
1213     int k_need; /* AC coefficients needed up to here */
1214     int zero_need; /* init the block with this many zeros */
1215
1216     int last_dc_val[3] = {0, 0, 0}; /* or 128 for chroma? */
1217     int store_offs[4]; /* memory offsets: order of Y11 Y12 Y21 Y22 U V */
1218     int restart = p_jpeg->restart_interval; /* MCUs until restart marker */
1219
1220     /* pick the IDCT we want, determine how to work with coefs */
1221     if (downscale == 1)
1222     {
1223         pf_idct = idct8x8;
1224         k_need = 64; /* all */
1225         zero_need = 63; /* all */
1226     }
1227     else if (downscale == 2)
1228     {
1229         pf_idct = idct4x4;
1230         k_need = 25; /* this far in zig-zag to cover 4*4 */
1231         zero_need = 27; /* clear this far in linear order */
1232     }
1233     else if (downscale == 4)
1234     {
1235         pf_idct = idct2x2;
1236         k_need = 5; /* this far in zig-zag to cover 2*2 */
1237         zero_need = 9; /* clear this far in linear order */
1238     }
1239     else if (downscale == 8)
1240     {
1241         pf_idct = idct1x1;
1242         k_need = 0; /* no AC, not needed */
1243         zero_need = 0; /* no AC, not needed */
1244     }
1245     else return -1; /* not supported */
1246
1247     /* init bitstream, fake a restart to make it start */
1248     bs.next_input_byte = p_jpeg->p_entropy_data;
1249     bs.bits_left = 0;
1250     bs.input_end = p_jpeg->p_entropy_end;
1251
1252     width  = p_jpeg->x_phys / downscale;
1253     height = p_jpeg->y_phys / downscale;
1254     for (i=0; i<3; i++) /* calculate some strides */
1255     {
1256         skip_line[i] = width / p_jpeg->subsample_x[i];
1257         skip_strip[i] = skip_line[i]
1258                         * (height / p_jpeg->y_mbl) / p_jpeg->subsample_y[i];
1259         skip_mcu[i] = width/p_jpeg->x_mbl / p_jpeg->subsample_x[i];
1260     }
1261
1262     /* prepare offsets about where to store the different blocks */
1263     store_offs[p_jpeg->store_pos[0]] = 0;
1264     store_offs[p_jpeg->store_pos[1]] = 8 / downscale; /* to the right */
1265     store_offs[p_jpeg->store_pos[2]] = width * 8 / downscale; /* below */
1266     store_offs[p_jpeg->store_pos[3]] = store_offs[1] + store_offs[2]; /* r+b */
1267
1268     for(y=0; y<p_jpeg->y_mbl && bs.next_input_byte <= bs.input_end; y++)
1269     {
1270         for (i=0; i<3; i++) /* scan line init */
1271         {
1272             p_byte[i] = p_line[i];
1273             p_line[i] += skip_strip[i];
1274         }
1275         for (x=0; x<p_jpeg->x_mbl; x++)
1276         {
1277             int blkn;
1278
1279             /* Outer loop handles each block in the MCU */
1280             for (blkn = 0; blkn < p_jpeg->blocks; blkn++)
1281             {   /* Decode a single block's worth of coefficients */
1282                 int k = 1; /* coefficient index */
1283                 int s, r; /* huffman values */
1284                 int ci = p_jpeg->mcu_membership[blkn]; /* component index */
1285                 int ti = p_jpeg->tab_membership[blkn]; /* table index */
1286                 struct derived_tbl* dctbl = &p_jpeg->dc_derived_tbls[ti];
1287                 struct derived_tbl* actbl = &p_jpeg->ac_derived_tbls[ti];
1288
1289                 /* Section F.2.2.1: decode the DC coefficient difference */
1290                 s = huff_decode_dc(&bs, dctbl);
1291
1292                 last_dc_val[ci] += s;
1293                 block[0] = last_dc_val[ci]; /* output it (assumes zag[0] = 0) */
1294
1295                 /* coefficient buffer must be cleared */
1296                 MEMSET(block+1, 0, zero_need*sizeof(block[0]));
1297
1298                 /* Section F.2.2.2: decode the AC coefficients */
1299                 for (; k < k_need; k++)
1300                 {
1301                     s = huff_decode_ac(&bs, actbl);
1302                     r = s >> 4;
1303                     s &= 15;
1304
1305                     if (s)
1306                     {
1307                         k += r;
1308                         check_bit_buffer(&bs, s);
1309                         r = get_bits(&bs, s);
1310                         block[zag[k]] = HUFF_EXTEND(r, s);
1311                     }
1312                     else
1313                     {
1314                         if (r != 15)
1315                         {
1316                             k = 64;
1317                             break;
1318                         }
1319                         k += r;
1320                     }
1321                 }  /* for k */
1322                 /* In this path we just discard the values */
1323                 for (; k < 64; k++)
1324                 {
1325                     s = huff_decode_ac(&bs, actbl);
1326                     r = s >> 4;
1327                     s &= 15;
1328
1329                     if (s)
1330                     {
1331                         k += r;
1332                         check_bit_buffer(&bs, s);
1333                         drop_bits(&bs, s);
1334                     }
1335                     else
1336                     {
1337                         if (r != 15)
1338                             break;
1339                         k += r;
1340                     }
1341                 }  /* for k */
1342
1343                 if (ci == 0)
1344                 {   /* Y component needs to bother about block store */
1345                     pf_idct(p_byte[0]+store_offs[blkn], block,
1346                         p_jpeg->qt_idct[ti], skip_line[0]);
1347                 }
1348                 else
1349                 {   /* chroma */
1350                     pf_idct(p_byte[ci], block, p_jpeg->qt_idct[ti],
1351                         skip_line[ci]);
1352                 }
1353             } /* for blkn */
1354             p_byte[0] += skip_mcu[0]; /* unrolled for (i=0; i<3; i++) loop */
1355             p_byte[1] += skip_mcu[1];
1356             p_byte[2] += skip_mcu[2];
1357             if (p_jpeg->restart_interval && --restart == 0)
1358             {   /* if a restart marker is due: */
1359                 restart = p_jpeg->restart_interval; /* count again */
1360                 search_restart(&bs); /* align the bitstream */
1361                 last_dc_val[0] = last_dc_val[1] =
1362                                  last_dc_val[2] = 0; /* reset decoder */
1363             }
1364         } /* for x */
1365         if (pf_progress != NULL)
1366             pf_progress(y, p_jpeg->y_mbl-1); /* notify about decoding progress */
1367     } /* for y */
1368
1369     return 0; /* success */
1370 }
1371 #else /* !HAVE_LCD_COLOR */
1372
1373 /* a JPEG decoder specialized in decoding only the luminance (b&w) */
1374 int jpeg_decode(struct jpeg* p_jpeg, unsigned char* p_pixel[1], int downscale,
1375                 void (*pf_progress)(int current, int total))
1376 {
1377     struct bitstream bs; /* bitstream "object" */
1378     int block[64]; /* decoded DCT coefficients */
1379
1380     int width, height;
1381     int skip_line; /* bytes from one line to the next (skip_line) */
1382     int skip_strip, skip_mcu; /* bytes to next DCT row / column */
1383
1384     int x, y; /* loop counter */
1385
1386     unsigned char* p_line = p_pixel[0];
1387     unsigned char* p_byte; /* bitmap pointer */
1388
1389     void (*pf_idct)(unsigned char*, int*, int*, int); /* selected IDCT */
1390     int k_need; /* AC coefficients needed up to here */
1391     int zero_need; /* init the block with this many zeros */
1392
1393     int last_dc_val = 0;
1394     int store_offs[4]; /* memory offsets: order of Y11 Y12 Y21 Y22 U V */
1395     int restart = p_jpeg->restart_interval; /* MCUs until restart marker */
1396
1397     /* pick the IDCT we want, determine how to work with coefs */
1398     if (downscale == 1)
1399     {
1400         pf_idct = idct8x8;
1401         k_need = 64; /* all */
1402         zero_need = 63; /* all */
1403     }
1404     else if (downscale == 2)
1405     {
1406         pf_idct = idct4x4;
1407         k_need = 25; /* this far in zig-zag to cover 4*4 */
1408         zero_need = 27; /* clear this far in linear order */
1409     }
1410     else if (downscale == 4)
1411     {
1412         pf_idct = idct2x2;
1413         k_need = 5; /* this far in zig-zag to cover 2*2 */
1414         zero_need = 9; /* clear this far in linear order */
1415     }
1416     else if (downscale == 8)
1417     {
1418         pf_idct = idct1x1;
1419         k_need = 0; /* no AC, not needed */
1420         zero_need = 0; /* no AC, not needed */
1421     }
1422     else return -1; /* not supported */
1423
1424     /* init bitstream, fake a restart to make it start */
1425     bs.next_input_byte = p_jpeg->p_entropy_data;
1426     bs.bits_left = 0;
1427     bs.input_end = p_jpeg->p_entropy_end;
1428
1429     width  = p_jpeg->x_phys / downscale;
1430     height = p_jpeg->y_phys / downscale;
1431     skip_line = width;
1432     skip_strip = skip_line * (height / p_jpeg->y_mbl);
1433     skip_mcu = (width/p_jpeg->x_mbl);
1434
1435     /* prepare offsets about where to store the different blocks */
1436     store_offs[p_jpeg->store_pos[0]] = 0;
1437     store_offs[p_jpeg->store_pos[1]] = 8 / downscale; /* to the right */
1438     store_offs[p_jpeg->store_pos[2]] = width * 8 / downscale; /* below */
1439     store_offs[p_jpeg->store_pos[3]] = store_offs[1] + store_offs[2]; /* r+b */
1440
1441     for(y=0; y<p_jpeg->y_mbl && bs.next_input_byte <= bs.input_end; y++)
1442     {
1443         p_byte = p_line;
1444         p_line += skip_strip;
1445         for (x=0; x<p_jpeg->x_mbl; x++)
1446         {
1447             int blkn;
1448
1449             /* Outer loop handles each block in the MCU */
1450             for (blkn = 0; blkn < p_jpeg->blocks; blkn++)
1451             {   /* Decode a single block's worth of coefficients */
1452                 int k = 1; /* coefficient index */
1453                 int s, r; /* huffman values */
1454                 int ci = p_jpeg->mcu_membership[blkn]; /* component index */
1455                 int ti = p_jpeg->tab_membership[blkn]; /* table index */
1456                 struct derived_tbl* dctbl = &p_jpeg->dc_derived_tbls[ti];
1457                 struct derived_tbl* actbl = &p_jpeg->ac_derived_tbls[ti];
1458
1459                 /* Section F.2.2.1: decode the DC coefficient difference */
1460                 s = huff_decode_dc(&bs, dctbl);
1461
1462                 if (ci == 0) /* only for Y component */
1463                 {
1464                     last_dc_val += s;
1465                     block[0] = last_dc_val; /* output it (assumes zag[0] = 0) */
1466
1467                     /* coefficient buffer must be cleared */
1468                     MEMSET(block+1, 0, zero_need*sizeof(block[0]));
1469
1470                     /* Section F.2.2.2: decode the AC coefficients */
1471                     for (; k < k_need; k++)
1472                     {
1473                         s = huff_decode_ac(&bs, actbl);
1474                         r = s >> 4;
1475                         s &= 15;
1476
1477                         if (s)
1478                         {
1479                             k += r;
1480                             check_bit_buffer(&bs, s);
1481                             r = get_bits(&bs, s);
1482                             block[zag[k]] = HUFF_EXTEND(r, s);
1483                         }
1484                         else
1485                         {
1486                             if (r != 15)
1487                             {
1488                                 k = 64;
1489                                 break;
1490                             }
1491                             k += r;
1492                         }
1493                     }  /* for k */
1494                 }
1495                 /* In this path we just discard the values */
1496                 for (; k < 64; k++)
1497                 {
1498                     s = huff_decode_ac(&bs, actbl);
1499                     r = s >> 4;
1500                     s &= 15;
1501
1502                     if (s)
1503                     {
1504                         k += r;
1505                         check_bit_buffer(&bs, s);
1506                         drop_bits(&bs, s);
1507                     }
1508                     else
1509                     {
1510                         if (r != 15)
1511                             break;
1512                         k += r;
1513                     }
1514                 }  /* for k */
1515
1516                 if (ci == 0)
1517                 {   /* only for Y component */
1518                     pf_idct(p_byte+store_offs[blkn], block, p_jpeg->qt_idct[ti],
1519                         skip_line);
1520                 }
1521             } /* for blkn */
1522             p_byte += skip_mcu;
1523             if (p_jpeg->restart_interval && --restart == 0)
1524             {   /* if a restart marker is due: */
1525                 restart = p_jpeg->restart_interval; /* count again */
1526                 search_restart(&bs); /* align the bitstream */
1527                 last_dc_val = 0; /* reset decoder */
1528             }
1529         } /* for x */
1530         if (pf_progress != NULL)
1531             pf_progress(y, p_jpeg->y_mbl-1); /* notify about decoding progress */
1532     } /* for y */
1533
1534     return 0; /* success */
1535 }
1536 #endif /* !HAVE_LCD_COLOR */
1537
1538 /**************** end JPEG code ********************/