libvo/jpeg_enc.c

   1 /* Straightforward (to be) optimized JPEG encoder for the YUV422 format
   2  * based on mjpeg code from ffmpeg.
   3  *
   4  * Copyright (c) 2002, Rik Snel
   5  * Parts from ffmpeg Copyright (c) 2000-2002 Fabrice Bellard
   6  *
   7  * This program is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation; either version 2 of the License, or
  10  * (at your option) any later version.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program; if not, write to the Free Software
  19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20  *
  21  * For an excellent introduction to the JPEG format, see:
  22  * http://www.ece.purdue.edu/~bouman/grad-labs/lab8/pdf/lab.pdf
  23  */
  24
  25
  26
  27 #include <sys/types.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include "config.h"
  31 #ifdef USE_FASTMEMCPY
  32 #include "fastmemcpy.h"
  33 #endif
  34 #include "../mp_msg.h"
  35 /* We need this #define because we need ../libavcodec/common.h to #define
  36  * be2me_32, otherwise the linker will complain that it doesn't exist */
  37 #define HAVE_AV_CONFIG_H
  38 #include "../libavcodec/avcodec.h"
  39 #include "../libavcodec/dsputil.h"
  40 #include "../libavcodec/mpegvideo.h"
  41
  42 #include "jpeg_enc.h"
  43
  44 extern int avcodec_inited;
  45
  46 /* zr_mjpeg_encode_mb needs access to these tables for the black & white
  47  * option */
  48 typedef struct MJpegContext {
  49     uint8_t huff_size_dc_luminance[12];
  50     uint16_t huff_code_dc_luminance[12];
  51     uint8_t huff_size_dc_chrominance[12];
  52     uint16_t huff_code_dc_chrominance[12];
  53
  54     uint8_t huff_size_ac_luminance[256];
  55     uint16_t huff_code_ac_luminance[256];
  56     uint8_t huff_size_ac_chrominance[256];
  57     uint16_t huff_code_ac_chrominance[256];
  58 } MJpegContext;
  59
  60
  61 /* Begin excessive code duplication ************************************/
  62 /* Code coming from mpegvideo.c and mjpeg.c in ../libavcodec ***********/
  63
  64 static const unsigned short aanscales[64] = {
  65     /* precomputed values scaled up by 14 bits */
  66     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  67     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  68     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  69     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  70     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  71     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  72     8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  73     4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  74 };
  75
  76 static void convert_matrix(MpegEncContext *s, int (*qmat)[64],
  77                 uint16_t (*qmat16)[2][64], const uint16_t *quant_matrix,
  78                 int bias, int qmin, int qmax)
  79 {
  80     int qscale;
  81
  82     for(qscale=qmin; qscale<qmax; qscale++){
  83         int i;
  84         if (s->dsp.fdct == ff_jpeg_fdct_islow) {
  85                 for (i = 0; i < 64; i++) {
  86                         const int j = s->dsp.idct_permutation[i];
  87                         /* 16    <= qscale * quant_matrix[i] <= 7905
  88                          * 19952 <= aanscales[i] *  \
  89                          *              qscale * quant_matrix[i]     <= 205026
  90                          * (1<<36)/19952 >= (1<<36)/(aanscales[i] * \
  91                          *      qscale * quant_matrix[i]) >= (1<<36)/249205025
  92                          * 3444240       >= (1<<36)/(aanscales[i] *
  93                          *      qscale * quant_matrix[i]) >= 275              */
  94                         qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT-3))/
  95                                         (qscale * quant_matrix[j]));
  96                 }
  97         } else if (s->dsp.fdct == fdct_ifast) {
  98             for(i=0;i<64;i++) {
  99                 const int j = s->dsp.idct_permutation[i];
 100                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 101                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 102                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 103                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 104
 105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) /
 106                                 (aanscales[i] * qscale * quant_matrix[j]));
 107             }
 108         } else {
 109             for(i=0;i<64;i++) {
 110                 const int j = s->dsp.idct_permutation[i];
 111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 112                    So 16           <= qscale * quant_matrix[i]             <= 7905
 113                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 114                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 115                 */
 116                 qmat  [qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]));
 117                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 118
 119                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 120                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 121             }
 122         }
 123     }
 124 }
 125
 126 static inline void encode_dc(MpegEncContext *s, int val,
 127                              uint8_t *huff_size, uint16_t *huff_code)
 128 {
 129     int mant, nbits;
 130
 131     if (val == 0) {
 132         put_bits(&s->pb, huff_size[0], huff_code[0]);
 133     } else {
 134         mant = val;
 135         if (val < 0) {
 136             val = -val;
 137             mant--;
 138         }
 139
 140         /* compute the log (XXX: optimize) */
 141         nbits = 0;
 142         while (val != 0) {
 143             val = val >> 1;
 144             nbits++;
 145         }
 146
 147         put_bits(&s->pb, huff_size[nbits], huff_code[nbits]);
 148
 149         put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
 150     }
 151 }
 152
 153 static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
 154 {
 155     int mant, nbits, code, i, j;
 156     int component, dc, run, last_index, val;
 157     MJpegContext *m = s->mjpeg_ctx;
 158     uint8_t *huff_size_ac;
 159     uint16_t *huff_code_ac;
 160
 161     /* DC coef */
 162     component = (n <= 3 ? 0 : n - 4 + 1);
 163     dc = block[0]; /* overflow is impossible */
 164     val = dc - s->last_dc[component];
 165     if (n < 4) {
 166         encode_dc(s, val, m->huff_size_dc_luminance, m->huff_code_dc_luminance);
 167         huff_size_ac = m->huff_size_ac_luminance;
 168         huff_code_ac = m->huff_code_ac_luminance;
 169     } else {
 170         encode_dc(s, val, m->huff_size_dc_chrominance, m->huff_code_dc_chrominance);
 171         huff_size_ac = m->huff_size_ac_chrominance;
 172         huff_code_ac = m->huff_code_ac_chrominance;
 173     }
 174     s->last_dc[component] = dc;
 175
 176     /* AC coefs */
 177
 178     run = 0;
 179     last_index = s->block_last_index[n];
 180     for(i=1;i<=last_index;i++) {
 181         j = s->intra_scantable.permutated[i];
 182         val = block[j];
 183         if (val == 0) {
 184             run++;
 185         } else {
 186             while (run >= 16) {
 187                 put_bits(&s->pb, huff_size_ac[0xf0], huff_code_ac[0xf0]);
 188                 run -= 16;
 189             }
 190             mant = val;
 191             if (val < 0) {
 192                 val = -val;
 193                 mant--;
 194             }
 195
 196             /* compute the log (XXX: optimize) */
 197             nbits = 0;
 198             while (val != 0) {
 199                 val = val >> 1;
 200                 nbits++;
 201             }
 202             code = (run << 4) | nbits;
 203
 204             put_bits(&s->pb, huff_size_ac[code], huff_code_ac[code]);
 205
 206             put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
 207             run = 0;
 208         }
 209     }
 210
 211     /* output EOB only if not already 64 values */
 212     if (last_index < 63 || run != 0)
 213         put_bits(&s->pb, huff_size_ac[0], huff_code_ac[0]);
 214 }
 215
 216 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
 217 {
 218     int i;
 219     const int maxlevel= s->max_qcoeff;
 220     const int minlevel= s->min_qcoeff;
 221
 222     for(i=0; i<=last_index; i++){
 223         const int j = s->intra_scantable.permutated[i];
 224         int level = block[j];
 225
 226         if     (level>maxlevel) level=maxlevel;
 227         else if(level<minlevel) level=minlevel;
 228         block[j]= level;
 229     }
 230 }
 231
 232 /* End excessive code duplication **************************************/
 233
 234 /* this function is a reproduction of the one in mjpeg, it includes two
 235  * changes, it allows for black&white encoding (it skips the U and V
 236  * macroblocks and it outputs the huffman code for 'no change' (dc) and
 237  * 'all zero' (ac)) and it takes 4 macroblocks (422) instead of 6 (420) */
 238 static void zr_mjpeg_encode_mb(jpeg_enc_t *j) {
 239
 240         MJpegContext *m = j->s->mjpeg_ctx;
 241
 242         encode_block(j->s, j->s->block[0], 0);
 243         encode_block(j->s, j->s->block[1], 1);
 244         if (j->bw) {
 245                 /* U */
 246                 put_bits(&j->s->pb, m->huff_size_dc_chrominance[0],
 247                                 m->huff_code_dc_chrominance[0]);
 248                 put_bits(&j->s->pb, m->huff_size_ac_chrominance[0],
 249                                 m->huff_code_ac_chrominance[0]);
 250                 /* V */
 251                 put_bits(&j->s->pb, m->huff_size_dc_chrominance[0],
 252                                 m->huff_code_dc_chrominance[0]);
 253                 put_bits(&j->s->pb, m->huff_size_ac_chrominance[0],
 254                                 m->huff_code_ac_chrominance[0]);
 255         } else {
 256                 /* we trick encode_block here so that it uses
 257                  * chrominance huffman tables instead of luminance ones
 258                  * (see the effect of second argument of encode_block) */
 259                 encode_block(j->s, j->s->block[2], 4);
 260                 encode_block(j->s, j->s->block[3], 5);
 261         }
 262 }
 263
 264 /* this function can take all kinds of YUV colorspaces
 265  * YV12, YVYU, UYVY. The necesary parameters must be set up by the caller
 266  * y_ps means "y pixel size", y_rs means "y row size".
 267  * For YUYV, for example, is u_buf = y_buf + 1, v_buf = y_buf + 3,
 268  * y_ps = 2, u_ps = 4, v_ps = 4, y_rs = u_rs = v_rs.
 269  *
 270  *  The actual buffers must be passed with mjpeg_encode_frame, this is
 271  *  to make it possible to call encode on the buffer provided by the
 272  *  codec in draw_frame.
 273  *
 274  * The data is straightened out at the moment it is put in DCT
 275  * blocks, there are therefore no spurious memcopies involved */
 276 /* Notice that w must be a multiple of 16 and h must be a multiple of 8 */
 277 /* We produce YUV422 jpegs, the colors must be subsampled horizontally,
 278  * if the colors are also subsampled vertically, then this function
 279  * performs cheap upsampling (better solution will be: a DCT that is
 280  * optimized in the case that every two rows are the same) */
 281 /* cu = 0 means 'No cheap upsampling'
 282  * cu = 1 means 'perform cheap upsampling' */
 283 /* The encoder doesn't know anything about interlacing, the halve height
 284  * needs to be passed and the double rowstride. Which field gets encoded
 285  * is decided by what buffers are passed to mjpeg_encode_frame */
 286 jpeg_enc_t *jpeg_enc_init(int w, int h, int y_psize, int y_rsize,
 287                 int u_psize, int u_rsize, int v_psize, int v_rsize,
 288                 int cu, int q, int b) {
 289         jpeg_enc_t *j;
 290         int i = 0;
 291         mp_msg(MSGT_VO, MSGL_V, "JPEnc init: %dx%d %d %d %d %d %d %d\n",
 292                         w, h, y_psize, y_rsize, u_psize,
 293                         u_rsize, v_psize, v_rsize);
 294
 295         j = av_malloc(sizeof(jpeg_enc_t));
 296         if (j == NULL) return NULL;
 297
 298         j->s = av_malloc(sizeof(MpegEncContext));
 299         memset(j->s,0x00,sizeof(MpegEncContext));
 300         if (j->s == NULL) {
 301                 av_free(j);
 302                 return NULL;
 303         }
 304
 305         /* info on how to access the pixels */
 306         j->y_ps = y_psize;
 307         j->u_ps = u_psize;
 308         j->v_ps = v_psize;
 309         j->y_rs = y_rsize;
 310         j->u_rs = u_rsize;
 311         j->v_rs = v_rsize;
 312
 313         j->s->width = w;
 314         j->s->height = h;
 315         j->s->qscale = q;
 316
 317         j->s->mjpeg_data_only_frames = 0;
 318         j->s->out_format = FMT_MJPEG;
 319         j->s->intra_only = 1;
 320         j->s->encoding = 1;
 321         j->s->pict_type = I_TYPE;
 322         j->s->y_dc_scale = 8;
 323         j->s->c_dc_scale = 8;
 324
 325         j->s->mjpeg_write_tables = 1;
 326         j->s->mjpeg_vsample[0] = 1;
 327         j->s->mjpeg_vsample[1] = 1;
 328         j->s->mjpeg_vsample[2] = 1;
 329         j->s->mjpeg_hsample[0] = 2;
 330         j->s->mjpeg_hsample[1] = 1;
 331         j->s->mjpeg_hsample[2] = 1;
 332
 333         j->cheap_upsample = cu;
 334         j->bw = b;
 335
 336         /* if libavcodec is used by the decoder then we must not
 337          * initialize again, but if it is not initialized then we must
 338          * initialize it here. */
 339         if (!avcodec_inited) {
 340                 /* we need to initialize libavcodec */
 341                 avcodec_init();
 342                 avcodec_register_all();
 343                 avcodec_inited=1;
 344         }
 345
 346         if (mjpeg_init(j->s) < 0) {
 347                 av_free(j->s);
 348                 av_free(j);
 349                 return NULL;
 350         }
 351
 352         /* alloc bogus avctx to keep MPV_common_init from segfaulting */
 353         j->s->avctx = calloc(sizeof(*j->s->avctx), 1);
 354
 355         if (MPV_common_init(j->s) < 0) {
 356                 av_free(j->s);
 357                 av_free(j);
 358                 return NULL;
 359         }
 360
 361         /* correct the value for sc->mb_height */
 362         j->s->mb_height = j->s->height/8;
 363         j->s->mb_intra = 1;
 364
 365         j->s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
 366         for (i = 1; i < 64; i++)
 367                 j->s->intra_matrix[i] = CLAMP_TO_8BIT(
 368                         (ff_mpeg1_default_intra_matrix[i]*j->s->qscale) >> 3);
 369         convert_matrix(j->s, j->s->q_intra_matrix, j->s->q_intra_matrix16,
 370                         j->s->intra_matrix, j->s->intra_quant_bias, 1, 31);
 371         return j;
 372 }
 373
 374 int jpeg_enc_frame(jpeg_enc_t *j, unsigned char *y_data,
 375                 unsigned char *u_data, unsigned char *v_data, char *bufr) {
 376         int i, k, mb_x, mb_y, overflow;
 377         short int *dest;
 378         unsigned char *source;
 379         /* initialize the buffer */
 380
 381         init_put_bits(&j->s->pb, bufr, 1024*256);
 382
 383         mjpeg_picture_header(j->s);
 384
 385         j->s->header_bits = get_bit_count(&j->s->pb);
 386
 387         j->s->last_dc[0] = 128;
 388         j->s->last_dc[1] = 128;
 389         j->s->last_dc[2] = 128;
 390
 391         for (mb_y = 0; mb_y < j->s->mb_height; mb_y++) {
 392                 for (mb_x = 0; mb_x < j->s->mb_width; mb_x++) {
 393                         /* conversion 8 to 16 bit and filling of blocks
 394                          * must be mmx optimized */
 395                         /* fill 2 Y macroblocks and one U and one V */
 396                         source = mb_y * 8 * j->y_rs +
 397                                 16 * j->y_ps * mb_x + y_data;
 398                         dest = j->s->block[0];
 399                         for (i = 0; i < 8; i++) {
 400                                 for (k = 0; k < 8; k++) {
 401                                         dest[k] = source[k*j->y_ps];
 402                                 }
 403                                 dest += 8;
 404                                 source += j->y_rs;
 405                         }
 406                         source = mb_y * 8 * j->y_rs +
 407                                 (16*mb_x + 8)*j->y_ps + y_data;
 408                         dest = j->s->block[1];
 409                         for (i = 0; i < 8; i++) {
 410                                 for (k = 0; k < 8; k++) {
 411                                         dest[k] = source[k*j->y_ps];
 412                                 }
 413                                 dest += 8;
 414                                 source += j->y_rs;
 415                         }
 416                         if (!j->bw && j->cheap_upsample) {
 417                                 source = mb_y*4*j->u_rs +
 418                                         8*mb_x*j->u_ps + u_data;
 419                                 dest = j->s->block[2];
 420                                 for (i = 0; i < 4; i++) {
 421                                         for (k = 0; k < 8; k++) {
 422                                                 dest[k] = source[k*j->u_ps];
 423                                                 dest[k+8] = source[k*j->u_ps];
 424                                         }
 425                                         dest += 16;
 426                                         source += j->u_rs;
 427                                 }
 428                                 source = mb_y*4*j->v_rs +
 429                                         8*mb_x*j->v_ps + v_data;
 430                                 dest = j->s->block[3];
 431                                 for (i = 0; i < 4; i++) {
 432                                         for (k = 0; k < 8; k++) {
 433                                                 dest[k] = source[k*j->v_ps];
 434                                                 dest[k+8] = source[k*j->v_ps];
 435                                         }
 436                                         dest += 16;
 437                                         source += j->u_rs;
 438                                 }
 439                         } else if (!j->bw && !j->cheap_upsample) {
 440                                 source = mb_y*8*j->u_rs +
 441                                         8*mb_x*j->u_ps + u_data;
 442                                 dest = j->s->block[2];
 443                                 for (i = 0; i < 8; i++) {
 444                                         for (k = 0; k < 8; k++)
 445                                                 dest[k] = source[k*j->u_ps];
 446                                         dest += 8;
 447                                         source += j->u_rs;
 448                                 }
 449                                 source = mb_y*8*j->v_rs +
 450                                         8*mb_x*j->v_ps + v_data;
 451                                 dest = j->s->block[3];
 452                                 for (i = 0; i < 8; i++) {
 453                                         for (k = 0; k < 8; k++)
 454                                                 dest[k] = source[k*j->v_ps];
 455                                         dest += 8;
 456                                         source += j->u_rs;
 457                                 }
 458                         }
 459                         emms_c(); /* is this really needed? */
 460
 461                         j->s->block_last_index[0] =
 462                                 j->s->dct_quantize(j->s, j->s->block[0],
 463                                                 0, 8, &overflow);
 464                         if (overflow) clip_coeffs(j->s, j->s->block[0],
 465                                         j->s->block_last_index[0]);
 466                         j->s->block_last_index[1] =
 467                                 j->s->dct_quantize(j->s, j->s->block[1],
 468                                                 1, 8, &overflow);
 469                         if (overflow) clip_coeffs(j->s, j->s->block[1],
 470                                         j->s->block_last_index[1]);
 471
 472                         if (!j->bw) {
 473                                 j->s->block_last_index[4] =
 474                                         j->s->dct_quantize(j->s, j->s->block[2],
 475                                                         4, 8, &overflow);
 476                                 if (overflow) clip_coeffs(j->s, j->s->block[2],
 477                                                 j->s->block_last_index[2]);
 478                                 j->s->block_last_index[5] =
 479                                         j->s->dct_quantize(j->s, j->s->block[3],
 480                                                         5, 8, &overflow);
 481                                 if (overflow) clip_coeffs(j->s, j->s->block[3],
 482                                                 j->s->block_last_index[3]);
 483                         }
 484                         zr_mjpeg_encode_mb(j);
 485                 }
 486         }
 487         emms_c();
 488         mjpeg_picture_trailer(j->s);
 489         flush_put_bits(&j->s->pb);
 490
 491         if (j->s->mjpeg_write_tables == 1)
 492                 j->s->mjpeg_write_tables = 0;
 493
 494         return pbBufPtr(&(j->s->pb)) - j->s->pb.buf;
 495 }
 496
 497 void jpeg_enc_uninit(jpeg_enc_t *j) {
 498         mjpeg_close(j->s);
 499         av_free(j->s);
 500         av_free(j);
 501 }
 502
 503 #if 0
 504
 505 #define         W       32
 506 #define         H       32
 507
 508 int quant_store[MBR+1][MBC+1];
 509 unsigned char buf[W*H*3/2];
 510 char code[256*1024];
 511
 512
 513 main() {
 514         int i, size;
 515         FILE *fp;
 516
 517         memset(buf, 0, W*H);
 518         memset(buf+W*H, 255, W*H/4);
 519         memset(buf+5*W*H/4, 0, W*H/4);
 520         mjpeg_encoder_init(W, H, 1, W, 1, W/2, 1, W/2, 1, 1, 0);
 521
 522         size = mjpeg_encode_frame(buf, buf+W*H, buf+5*W*H/4, code);
 523         fp = fopen("test.jpg", "w");
 524         fwrite(code, 1, size, fp);
 525         fclose(fp);
 526 }
 527 #endif