libvo/jpeg_enc.c

   1 /*
   2  * straightforward (to be) optimized JPEG encoder for the YUV422 format
   3  * based on MJPEG code from FFmpeg
   4  *
   5  * For an excellent introduction to the JPEG format, see:
   6  * http://www.ece.purdue.edu/~bouman/grad-labs/lab8/pdf/lab.pdf
   7  *
   8  * Copyright (c) 2002, Rik Snel
   9  * parts from FFmpeg Copyright (c) 2000-2002 Fabrice Bellard
  10  *
  11  * This file is part of MPlayer.
  12  *
  13  * MPlayer is free software; you can redistribute it and/or modify
  14  * it under the terms of the GNU General Public License as published by
  15  * the Free Software Foundation; either version 2 of the License, or
  16  * (at your option) any later version.
  17  *
  18  * MPlayer is distributed in the hope that it will be useful,
  19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21  * GNU General Public License for more details.
  22  *
  23  * You should have received a copy of the GNU General Public License along
  24  * with MPlayer; if not, write to the Free Software Foundation, Inc.,
  25  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  26  */
  27
  28
  29
  30 #include <sys/types.h>
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34 #include "config.h"
  35 #include "mp_msg.h"
  36 /* We need this #define because we need ../libavcodec/common.h to #define
  37  * be2me_32, otherwise the linker will complain that it doesn't exist */
  38 #define HAVE_AV_CONFIG_H
  39 #include "libavcodec/avcodec.h"
  40 #include "libavcodec/dsputil.h"
  41 #include "libavcodec/mpegvideo.h"
  42 #include "libavcodec/mjpegenc.h"
  43
  44 #include "jpeg_enc.h"
  45
  46 extern int avcodec_initialized;
  47
  48
  49 /* Begin excessive code duplication ************************************/
  50 /* Code coming from mpegvideo.c and mjpeg.c in ../libavcodec ***********/
  51
  52 static const unsigned short aanscales[64] = {
  53     /* precomputed values scaled up by 14 bits */
  54     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  55     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  56     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  57     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  58     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  59     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  60     8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  61     4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  62 };
  63
  64 static void convert_matrix(MpegEncContext *s, int (*qmat)[64],
  65                 uint16_t (*qmat16)[2][64], const uint16_t *quant_matrix,
  66                 int bias, int qmin, int qmax)
  67 {
  68     int qscale;
  69
  70     for(qscale=qmin; qscale<=qmax; qscale++){
  71         int i;
  72         if (s->dsp.fdct == ff_jpeg_fdct_islow) {
  73                 for (i = 0; i < 64; i++) {
  74                         const int j = s->dsp.idct_permutation[i];
  75                         /* 16    <= qscale * quant_matrix[i] <= 7905
  76                          * 19952 <= aanscales[i] *  \
  77                          *              qscale * quant_matrix[i]     <= 205026
  78                          * (1<<36)/19952 >= (1<<36)/(aanscales[i] * \
  79                          *      qscale * quant_matrix[i]) >= (1<<36)/249205025
  80                          * 3444240       >= (1<<36)/(aanscales[i] *
  81                          *      qscale * quant_matrix[i]) >= 275              */
  82                         qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT-3))/
  83                                         (qscale * quant_matrix[j]));
  84                 }
  85         } else if (s->dsp.fdct == fdct_ifast) {
  86             for(i=0;i<64;i++) {
  87                 const int j = s->dsp.idct_permutation[i];
  88                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
  89                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
  90                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
  91                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
  92
  93                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) /
  94                                 (aanscales[i] * qscale * quant_matrix[j]));
  95             }
  96         } else {
  97             for(i=0;i<64;i++) {
  98                 const int j = s->dsp.idct_permutation[i];
  99                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 100                    So 16           <= qscale * quant_matrix[i]             <= 7905
 101                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 102                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 103                 */
 104                 qmat  [qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]));
 105                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 106
 107                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
 108                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
 109             }
 110         }
 111     }
 112 }
 113
 114 static inline void encode_dc(MpegEncContext *s, int val,
 115                              uint8_t *huff_size, uint16_t *huff_code)
 116 {
 117     int mant, nbits;
 118
 119     if (val == 0) {
 120         put_bits(&s->pb, huff_size[0], huff_code[0]);
 121     } else {
 122         mant = val;
 123         if (val < 0) {
 124             val = -val;
 125             mant--;
 126         }
 127
 128         /* compute the log (XXX: optimize) */
 129         nbits = 0;
 130         while (val != 0) {
 131             val = val >> 1;
 132             nbits++;
 133         }
 134
 135         put_bits(&s->pb, huff_size[nbits], huff_code[nbits]);
 136
 137         put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
 138     }
 139 }
 140
 141 static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
 142 {
 143     int mant, nbits, code, i, j;
 144     int component, dc, run, last_index, val;
 145     MJpegContext *m = s->mjpeg_ctx;
 146     uint8_t *huff_size_ac;
 147     uint16_t *huff_code_ac;
 148
 149     /* DC coef */
 150     component = (n <= 3 ? 0 : n - 4 + 1);
 151     dc = block[0]; /* overflow is impossible */
 152     val = dc - s->last_dc[component];
 153     if (n < 4) {
 154         encode_dc(s, val, m->huff_size_dc_luminance, m->huff_code_dc_luminance);
 155         huff_size_ac = m->huff_size_ac_luminance;
 156         huff_code_ac = m->huff_code_ac_luminance;
 157     } else {
 158         encode_dc(s, val, m->huff_size_dc_chrominance, m->huff_code_dc_chrominance);
 159         huff_size_ac = m->huff_size_ac_chrominance;
 160         huff_code_ac = m->huff_code_ac_chrominance;
 161     }
 162     s->last_dc[component] = dc;
 163
 164     /* AC coefs */
 165
 166     run = 0;
 167     last_index = s->block_last_index[n];
 168     for(i=1;i<=last_index;i++) {
 169         j = s->intra_scantable.permutated[i];
 170         val = block[j];
 171         if (val == 0) {
 172             run++;
 173         } else {
 174             while (run >= 16) {
 175                 put_bits(&s->pb, huff_size_ac[0xf0], huff_code_ac[0xf0]);
 176                 run -= 16;
 177             }
 178             mant = val;
 179             if (val < 0) {
 180                 val = -val;
 181                 mant--;
 182             }
 183
 184             /* compute the log (XXX: optimize) */
 185             nbits = 0;
 186             while (val != 0) {
 187                 val = val >> 1;
 188                 nbits++;
 189             }
 190             code = (run << 4) | nbits;
 191
 192             put_bits(&s->pb, huff_size_ac[code], huff_code_ac[code]);
 193
 194             put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
 195             run = 0;
 196         }
 197     }
 198
 199     /* output EOB only if not already 64 values */
 200     if (last_index < 63 || run != 0)
 201         put_bits(&s->pb, huff_size_ac[0], huff_code_ac[0]);
 202 }
 203
 204 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
 205 {
 206     int i;
 207     const int maxlevel= s->max_qcoeff;
 208     const int minlevel= s->min_qcoeff;
 209
 210     for(i=0; i<=last_index; i++){
 211         const int j = s->intra_scantable.permutated[i];
 212         int level = block[j];
 213
 214         if     (level>maxlevel) level=maxlevel;
 215         else if(level<minlevel) level=minlevel;
 216         block[j]= level;
 217     }
 218 }
 219
 220 /* End excessive code duplication **************************************/
 221
 222 /* this function is a reproduction of the one in mjpeg, it includes two
 223  * changes, it allows for black&white encoding (it skips the U and V
 224  * macroblocks and it outputs the huffman code for 'no change' (dc) and
 225  * 'all zero' (ac)) and it takes 4 macroblocks (422) instead of 6 (420) */
 226 static void zr_mjpeg_encode_mb(jpeg_enc_t *j) {
 227
 228         MJpegContext *m = j->s->mjpeg_ctx;
 229
 230         encode_block(j->s, j->s->block[0], 0);
 231         encode_block(j->s, j->s->block[1], 1);
 232         if (j->bw) {
 233                 /* U */
 234                 put_bits(&j->s->pb, m->huff_size_dc_chrominance[0],
 235                                 m->huff_code_dc_chrominance[0]);
 236                 put_bits(&j->s->pb, m->huff_size_ac_chrominance[0],
 237                                 m->huff_code_ac_chrominance[0]);
 238                 /* V */
 239                 put_bits(&j->s->pb, m->huff_size_dc_chrominance[0],
 240                                 m->huff_code_dc_chrominance[0]);
 241                 put_bits(&j->s->pb, m->huff_size_ac_chrominance[0],
 242                                 m->huff_code_ac_chrominance[0]);
 243         } else {
 244                 /* we trick encode_block here so that it uses
 245                  * chrominance huffman tables instead of luminance ones
 246                  * (see the effect of second argument of encode_block) */
 247                 encode_block(j->s, j->s->block[2], 4);
 248                 encode_block(j->s, j->s->block[3], 5);
 249         }
 250 }
 251
 252 /* this function can take all kinds of YUV colorspaces
 253  * YV12, YVYU, UYVY. The necesary parameters must be set up by the caller
 254  * y_ps means "y pixel size", y_rs means "y row size".
 255  * For YUYV, for example, is u_buf = y_buf + 1, v_buf = y_buf + 3,
 256  * y_ps = 2, u_ps = 4, v_ps = 4, y_rs = u_rs = v_rs.
 257  *
 258  *  The actual buffers must be passed with mjpeg_encode_frame, this is
 259  *  to make it possible to call encode on the buffer provided by the
 260  *  codec in draw_frame.
 261  *
 262  * The data is straightened out at the moment it is put in DCT
 263  * blocks, there are therefore no spurious memcopies involved */
 264 /* Notice that w must be a multiple of 16 and h must be a multiple of 8 */
 265 /* We produce YUV422 jpegs, the colors must be subsampled horizontally,
 266  * if the colors are also subsampled vertically, then this function
 267  * performs cheap upsampling (better solution will be: a DCT that is
 268  * optimized in the case that every two rows are the same) */
 269 /* cu = 0 means 'No cheap upsampling'
 270  * cu = 1 means 'perform cheap upsampling' */
 271 /* The encoder doesn't know anything about interlacing, the halve height
 272  * needs to be passed and the double rowstride. Which field gets encoded
 273  * is decided by what buffers are passed to mjpeg_encode_frame */
 274 jpeg_enc_t *jpeg_enc_init(int w, int h, int y_psize, int y_rsize,
 275                 int u_psize, int u_rsize, int v_psize, int v_rsize,
 276                 int cu, int q, int b) {
 277         jpeg_enc_t *j;
 278         int i = 0;
 279         mp_msg(MSGT_VO, MSGL_V, "JPEnc init: %dx%d %d %d %d %d %d %d\n",
 280                         w, h, y_psize, y_rsize, u_psize,
 281                         u_rsize, v_psize, v_rsize);
 282
 283         j = av_malloc(sizeof(jpeg_enc_t));
 284         if (j == NULL) return NULL;
 285
 286         j->s = av_malloc(sizeof(MpegEncContext));
 287         memset(j->s,0x00,sizeof(MpegEncContext));
 288         if (j->s == NULL) {
 289                 av_free(j);
 290                 return NULL;
 291         }
 292
 293         /* info on how to access the pixels */
 294         j->y_ps = y_psize;
 295         j->u_ps = u_psize;
 296         j->v_ps = v_psize;
 297         j->y_rs = y_rsize;
 298         j->u_rs = u_rsize;
 299         j->v_rs = v_rsize;
 300
 301         j->s->width = w;
 302         j->s->height = h;
 303         j->s->qscale = q;
 304
 305         j->s->out_format = FMT_MJPEG;
 306         j->s->intra_only = 1;
 307         j->s->encoding = 1;
 308         j->s->pict_type = FF_I_TYPE;
 309         j->s->y_dc_scale = 8;
 310         j->s->c_dc_scale = 8;
 311
 312         //FIXME j->s->mjpeg_write_tables = 1;
 313         j->s->mjpeg_vsample[0] = 1;
 314         j->s->mjpeg_vsample[1] = 1;
 315         j->s->mjpeg_vsample[2] = 1;
 316         j->s->mjpeg_hsample[0] = 2;
 317         j->s->mjpeg_hsample[1] = 1;
 318         j->s->mjpeg_hsample[2] = 1;
 319
 320         j->cheap_upsample = cu;
 321         j->bw = b;
 322
 323         /* if libavcodec is used by the decoder then we must not
 324          * initialize again, but if it is not initialized then we must
 325          * initialize it here. */
 326         if (!avcodec_initialized) {
 327                 /* we need to initialize libavcodec */
 328                 avcodec_init();
 329                 avcodec_register_all();
 330                 avcodec_initialized=1;
 331         }
 332
 333         if (ff_mjpeg_encode_init(j->s) < 0) {
 334                 av_free(j->s);
 335                 av_free(j);
 336                 return NULL;
 337         }
 338
 339         /* alloc bogus avctx to keep MPV_common_init from segfaulting */
 340         j->s->avctx = calloc(sizeof(*j->s->avctx), 1);
 341         /* Set up to encode mjpeg */
 342         j->s->avctx->codec_id = CODEC_ID_MJPEG;
 343
 344         /* make MPV_common_init allocate important buffers, like s->block */
 345         j->s->avctx->thread_count = 1;
 346
 347         if (MPV_common_init(j->s) < 0) {
 348                 av_free(j->s);
 349                 av_free(j);
 350                 return NULL;
 351         }
 352
 353         /* correct the value for sc->mb_height */
 354         j->s->mb_height = j->s->height/8;
 355         j->s->mb_intra = 1;
 356
 357         j->s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
 358         for (i = 1; i < 64; i++)
 359                 j->s->intra_matrix[i] = av_clip_uint8(
 360                         (ff_mpeg1_default_intra_matrix[i]*j->s->qscale) >> 3);
 361         convert_matrix(j->s, j->s->q_intra_matrix, j->s->q_intra_matrix16,
 362                         j->s->intra_matrix, j->s->intra_quant_bias, 8, 8);
 363         return j;
 364 }
 365
 366 int jpeg_enc_frame(jpeg_enc_t *j, unsigned char *y_data,
 367                 unsigned char *u_data, unsigned char *v_data, char *bufr) {
 368         int i, k, mb_x, mb_y, overflow;
 369         short int *dest;
 370         unsigned char *source;
 371         /* initialize the buffer */
 372
 373         init_put_bits(&j->s->pb, bufr, 1024*256);
 374
 375         ff_mjpeg_encode_picture_header(j->s);
 376
 377         j->s->header_bits = put_bits_count(&j->s->pb);
 378
 379         j->s->last_dc[0] = 128;
 380         j->s->last_dc[1] = 128;
 381         j->s->last_dc[2] = 128;
 382
 383         for (mb_y = 0; mb_y < j->s->mb_height; mb_y++) {
 384                 for (mb_x = 0; mb_x < j->s->mb_width; mb_x++) {
 385                         /* conversion 8 to 16 bit and filling of blocks
 386                          * must be mmx optimized */
 387                         /* fill 2 Y macroblocks and one U and one V */
 388                         source = mb_y * 8 * j->y_rs +
 389                                 16 * j->y_ps * mb_x + y_data;
 390                         dest = j->s->block[0];
 391                         for (i = 0; i < 8; i++) {
 392                                 for (k = 0; k < 8; k++) {
 393                                         dest[k] = source[k*j->y_ps];
 394                                 }
 395                                 dest += 8;
 396                                 source += j->y_rs;
 397                         }
 398                         source = mb_y * 8 * j->y_rs +
 399                                 (16*mb_x + 8)*j->y_ps + y_data;
 400                         dest = j->s->block[1];
 401                         for (i = 0; i < 8; i++) {
 402                                 for (k = 0; k < 8; k++) {
 403                                         dest[k] = source[k*j->y_ps];
 404                                 }
 405                                 dest += 8;
 406                                 source += j->y_rs;
 407                         }
 408                         if (!j->bw && j->cheap_upsample) {
 409                                 source = mb_y*4*j->u_rs +
 410                                         8*mb_x*j->u_ps + u_data;
 411                                 dest = j->s->block[2];
 412                                 for (i = 0; i < 4; i++) {
 413                                         for (k = 0; k < 8; k++) {
 414                                                 dest[k] = source[k*j->u_ps];
 415                                                 dest[k+8] = source[k*j->u_ps];
 416                                         }
 417                                         dest += 16;
 418                                         source += j->u_rs;
 419                                 }
 420                                 source = mb_y*4*j->v_rs +
 421                                         8*mb_x*j->v_ps + v_data;
 422                                 dest = j->s->block[3];
 423                                 for (i = 0; i < 4; i++) {
 424                                         for (k = 0; k < 8; k++) {
 425                                                 dest[k] = source[k*j->v_ps];
 426                                                 dest[k+8] = source[k*j->v_ps];
 427                                         }
 428                                         dest += 16;
 429                                         source += j->u_rs;
 430                                 }
 431                         } else if (!j->bw && !j->cheap_upsample) {
 432                                 source = mb_y*8*j->u_rs +
 433                                         8*mb_x*j->u_ps + u_data;
 434                                 dest = j->s->block[2];
 435                                 for (i = 0; i < 8; i++) {
 436                                         for (k = 0; k < 8; k++)
 437                                                 dest[k] = source[k*j->u_ps];
 438                                         dest += 8;
 439                                         source += j->u_rs;
 440                                 }
 441                                 source = mb_y*8*j->v_rs +
 442                                         8*mb_x*j->v_ps + v_data;
 443                                 dest = j->s->block[3];
 444                                 for (i = 0; i < 8; i++) {
 445                                         for (k = 0; k < 8; k++)
 446                                                 dest[k] = source[k*j->v_ps];
 447                                         dest += 8;
 448                                         source += j->u_rs;
 449                                 }
 450                         }
 451                         emms_c(); /* is this really needed? */
 452
 453                         j->s->block_last_index[0] =
 454                                 j->s->dct_quantize(j->s, j->s->block[0],
 455                                                 0, 8, &overflow);
 456                         if (overflow) clip_coeffs(j->s, j->s->block[0],
 457                                         j->s->block_last_index[0]);
 458                         j->s->block_last_index[1] =
 459                                 j->s->dct_quantize(j->s, j->s->block[1],
 460                                                 1, 8, &overflow);
 461                         if (overflow) clip_coeffs(j->s, j->s->block[1],
 462                                         j->s->block_last_index[1]);
 463
 464                         if (!j->bw) {
 465                                 j->s->block_last_index[4] =
 466                                         j->s->dct_quantize(j->s, j->s->block[2],
 467                                                         4, 8, &overflow);
 468                                 if (overflow) clip_coeffs(j->s, j->s->block[2],
 469                                                 j->s->block_last_index[2]);
 470                                 j->s->block_last_index[5] =
 471                                         j->s->dct_quantize(j->s, j->s->block[3],
 472                                                         5, 8, &overflow);
 473                                 if (overflow) clip_coeffs(j->s, j->s->block[3],
 474                                                 j->s->block_last_index[3]);
 475                         }
 476                         zr_mjpeg_encode_mb(j);
 477                 }
 478         }
 479         emms_c();
 480         ff_mjpeg_encode_picture_trailer(j->s);
 481         flush_put_bits(&j->s->pb);
 482
 483         //FIXME
 484         //if (j->s->mjpeg_write_tables == 1)
 485         //      j->s->mjpeg_write_tables = 0;
 486
 487         return pbBufPtr(&(j->s->pb)) - j->s->pb.buf;
 488 }
 489
 490 void jpeg_enc_uninit(jpeg_enc_t *j) {
 491         ff_mjpeg_encode_close(j->s);
 492         av_free(j->s);
 493         av_free(j);
 494 }