main() --> main(void)
[mplayer/greg.git] / libvo / jpeg_enc.c
blobf5777f740138e9981891b047f4c2df7c8eb10bc2
1 /* Straightforward (to be) optimized JPEG encoder for the YUV422 format
2 * based on mjpeg code from ffmpeg.
4 * Copyright (c) 2002, Rik Snel
5 * Parts from ffmpeg Copyright (c) 2000-2002 Fabrice Bellard
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 * For an excellent introduction to the JPEG format, see:
22 * http://www.ece.purdue.edu/~bouman/grad-labs/lab8/pdf/lab.pdf
27 #include <sys/types.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include "config.h"
31 #include "mp_msg.h"
32 /* We need this #define because we need ../libavcodec/common.h to #define
33 * be2me_32, otherwise the linker will complain that it doesn't exist */
34 #define HAVE_AV_CONFIG_H
35 #include "libavcodec/avcodec.h"
36 #include "libavcodec/dsputil.h"
37 #include "libavcodec/mpegvideo.h"
39 #include "jpeg_enc.h"
41 extern int avcodec_inited;
43 /* zr_mjpeg_encode_mb needs access to these tables for the black & white
44 * option */
45 typedef struct MJpegContext {
46 uint8_t huff_size_dc_luminance[12];
47 uint16_t huff_code_dc_luminance[12];
48 uint8_t huff_size_dc_chrominance[12];
49 uint16_t huff_code_dc_chrominance[12];
51 uint8_t huff_size_ac_luminance[256];
52 uint16_t huff_code_ac_luminance[256];
53 uint8_t huff_size_ac_chrominance[256];
54 uint16_t huff_code_ac_chrominance[256];
55 } MJpegContext;
58 /* Begin excessive code duplication ************************************/
59 /* Code coming from mpegvideo.c and mjpeg.c in ../libavcodec ***********/
61 static const unsigned short aanscales[64] = {
62 /* precomputed values scaled up by 14 bits */
63 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
64 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
65 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
66 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
67 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
68 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
69 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
70 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
73 static void convert_matrix(MpegEncContext *s, int (*qmat)[64],
74 uint16_t (*qmat16)[2][64], const uint16_t *quant_matrix,
75 int bias, int qmin, int qmax)
77 int qscale;
79 for(qscale=qmin; qscale<=qmax; qscale++){
80 int i;
81 if (s->dsp.fdct == ff_jpeg_fdct_islow) {
82 for (i = 0; i < 64; i++) {
83 const int j = s->dsp.idct_permutation[i];
84 /* 16 <= qscale * quant_matrix[i] <= 7905
85 * 19952 <= aanscales[i] * \
86 * qscale * quant_matrix[i] <= 205026
87 * (1<<36)/19952 >= (1<<36)/(aanscales[i] * \
88 * qscale * quant_matrix[i]) >= (1<<36)/249205025
89 * 3444240 >= (1<<36)/(aanscales[i] *
90 * qscale * quant_matrix[i]) >= 275 */
91 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT-3))/
92 (qscale * quant_matrix[j]));
94 } else if (s->dsp.fdct == fdct_ifast) {
95 for(i=0;i<64;i++) {
96 const int j = s->dsp.idct_permutation[i];
97 /* 16 <= qscale * quant_matrix[i] <= 7905 */
98 /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
99 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
100 /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
102 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) /
103 (aanscales[i] * qscale * quant_matrix[j]));
105 } else {
106 for(i=0;i<64;i++) {
107 const int j = s->dsp.idct_permutation[i];
108 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
109 So 16 <= qscale * quant_matrix[i] <= 7905
110 so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
111 so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67
113 qmat [qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]));
114 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
116 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
117 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
123 static inline void encode_dc(MpegEncContext *s, int val,
124 uint8_t *huff_size, uint16_t *huff_code)
126 int mant, nbits;
128 if (val == 0) {
129 put_bits(&s->pb, huff_size[0], huff_code[0]);
130 } else {
131 mant = val;
132 if (val < 0) {
133 val = -val;
134 mant--;
137 /* compute the log (XXX: optimize) */
138 nbits = 0;
139 while (val != 0) {
140 val = val >> 1;
141 nbits++;
144 put_bits(&s->pb, huff_size[nbits], huff_code[nbits]);
146 put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
150 static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
152 int mant, nbits, code, i, j;
153 int component, dc, run, last_index, val;
154 MJpegContext *m = s->mjpeg_ctx;
155 uint8_t *huff_size_ac;
156 uint16_t *huff_code_ac;
158 /* DC coef */
159 component = (n <= 3 ? 0 : n - 4 + 1);
160 dc = block[0]; /* overflow is impossible */
161 val = dc - s->last_dc[component];
162 if (n < 4) {
163 encode_dc(s, val, m->huff_size_dc_luminance, m->huff_code_dc_luminance);
164 huff_size_ac = m->huff_size_ac_luminance;
165 huff_code_ac = m->huff_code_ac_luminance;
166 } else {
167 encode_dc(s, val, m->huff_size_dc_chrominance, m->huff_code_dc_chrominance);
168 huff_size_ac = m->huff_size_ac_chrominance;
169 huff_code_ac = m->huff_code_ac_chrominance;
171 s->last_dc[component] = dc;
173 /* AC coefs */
175 run = 0;
176 last_index = s->block_last_index[n];
177 for(i=1;i<=last_index;i++) {
178 j = s->intra_scantable.permutated[i];
179 val = block[j];
180 if (val == 0) {
181 run++;
182 } else {
183 while (run >= 16) {
184 put_bits(&s->pb, huff_size_ac[0xf0], huff_code_ac[0xf0]);
185 run -= 16;
187 mant = val;
188 if (val < 0) {
189 val = -val;
190 mant--;
193 /* compute the log (XXX: optimize) */
194 nbits = 0;
195 while (val != 0) {
196 val = val >> 1;
197 nbits++;
199 code = (run << 4) | nbits;
201 put_bits(&s->pb, huff_size_ac[code], huff_code_ac[code]);
203 put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
204 run = 0;
208 /* output EOB only if not already 64 values */
209 if (last_index < 63 || run != 0)
210 put_bits(&s->pb, huff_size_ac[0], huff_code_ac[0]);
213 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
215 int i;
216 const int maxlevel= s->max_qcoeff;
217 const int minlevel= s->min_qcoeff;
219 for(i=0; i<=last_index; i++){
220 const int j = s->intra_scantable.permutated[i];
221 int level = block[j];
223 if (level>maxlevel) level=maxlevel;
224 else if(level<minlevel) level=minlevel;
225 block[j]= level;
229 /* End excessive code duplication **************************************/
231 /* this function is a reproduction of the one in mjpeg, it includes two
232 * changes, it allows for black&white encoding (it skips the U and V
233 * macroblocks and it outputs the huffman code for 'no change' (dc) and
234 * 'all zero' (ac)) and it takes 4 macroblocks (422) instead of 6 (420) */
235 static void zr_mjpeg_encode_mb(jpeg_enc_t *j) {
237 MJpegContext *m = j->s->mjpeg_ctx;
239 encode_block(j->s, j->s->block[0], 0);
240 encode_block(j->s, j->s->block[1], 1);
241 if (j->bw) {
242 /* U */
243 put_bits(&j->s->pb, m->huff_size_dc_chrominance[0],
244 m->huff_code_dc_chrominance[0]);
245 put_bits(&j->s->pb, m->huff_size_ac_chrominance[0],
246 m->huff_code_ac_chrominance[0]);
247 /* V */
248 put_bits(&j->s->pb, m->huff_size_dc_chrominance[0],
249 m->huff_code_dc_chrominance[0]);
250 put_bits(&j->s->pb, m->huff_size_ac_chrominance[0],
251 m->huff_code_ac_chrominance[0]);
252 } else {
253 /* we trick encode_block here so that it uses
254 * chrominance huffman tables instead of luminance ones
255 * (see the effect of second argument of encode_block) */
256 encode_block(j->s, j->s->block[2], 4);
257 encode_block(j->s, j->s->block[3], 5);
261 /* this function can take all kinds of YUV colorspaces
262 * YV12, YVYU, UYVY. The necesary parameters must be set up by the caller
263 * y_ps means "y pixel size", y_rs means "y row size".
264 * For YUYV, for example, is u_buf = y_buf + 1, v_buf = y_buf + 3,
265 * y_ps = 2, u_ps = 4, v_ps = 4, y_rs = u_rs = v_rs.
267 * The actual buffers must be passed with mjpeg_encode_frame, this is
268 * to make it possible to call encode on the buffer provided by the
269 * codec in draw_frame.
271 * The data is straightened out at the moment it is put in DCT
272 * blocks, there are therefore no spurious memcopies involved */
273 /* Notice that w must be a multiple of 16 and h must be a multiple of 8 */
274 /* We produce YUV422 jpegs, the colors must be subsampled horizontally,
275 * if the colors are also subsampled vertically, then this function
276 * performs cheap upsampling (better solution will be: a DCT that is
277 * optimized in the case that every two rows are the same) */
278 /* cu = 0 means 'No cheap upsampling'
279 * cu = 1 means 'perform cheap upsampling' */
280 /* The encoder doesn't know anything about interlacing, the halve height
281 * needs to be passed and the double rowstride. Which field gets encoded
282 * is decided by what buffers are passed to mjpeg_encode_frame */
283 jpeg_enc_t *jpeg_enc_init(int w, int h, int y_psize, int y_rsize,
284 int u_psize, int u_rsize, int v_psize, int v_rsize,
285 int cu, int q, int b) {
286 jpeg_enc_t *j;
287 int i = 0;
288 mp_msg(MSGT_VO, MSGL_V, "JPEnc init: %dx%d %d %d %d %d %d %d\n",
289 w, h, y_psize, y_rsize, u_psize,
290 u_rsize, v_psize, v_rsize);
292 j = av_malloc(sizeof(jpeg_enc_t));
293 if (j == NULL) return NULL;
295 j->s = av_malloc(sizeof(MpegEncContext));
296 memset(j->s,0x00,sizeof(MpegEncContext));
297 if (j->s == NULL) {
298 av_free(j);
299 return NULL;
302 /* info on how to access the pixels */
303 j->y_ps = y_psize;
304 j->u_ps = u_psize;
305 j->v_ps = v_psize;
306 j->y_rs = y_rsize;
307 j->u_rs = u_rsize;
308 j->v_rs = v_rsize;
310 j->s->width = w;
311 j->s->height = h;
312 j->s->qscale = q;
314 j->s->out_format = FMT_MJPEG;
315 j->s->intra_only = 1;
316 j->s->encoding = 1;
317 j->s->pict_type = I_TYPE;
318 j->s->y_dc_scale = 8;
319 j->s->c_dc_scale = 8;
321 //FIXME j->s->mjpeg_write_tables = 1;
322 j->s->mjpeg_vsample[0] = 1;
323 j->s->mjpeg_vsample[1] = 1;
324 j->s->mjpeg_vsample[2] = 1;
325 j->s->mjpeg_hsample[0] = 2;
326 j->s->mjpeg_hsample[1] = 1;
327 j->s->mjpeg_hsample[2] = 1;
329 j->cheap_upsample = cu;
330 j->bw = b;
332 /* if libavcodec is used by the decoder then we must not
333 * initialize again, but if it is not initialized then we must
334 * initialize it here. */
335 if (!avcodec_inited) {
336 /* we need to initialize libavcodec */
337 avcodec_init();
338 avcodec_register_all();
339 avcodec_inited=1;
342 if (ff_mjpeg_encode_init(j->s) < 0) {
343 av_free(j->s);
344 av_free(j);
345 return NULL;
348 /* alloc bogus avctx to keep MPV_common_init from segfaulting */
349 j->s->avctx = calloc(sizeof(*j->s->avctx), 1);
350 /* Set up to encode mjpeg */
351 j->s->avctx->codec_id = CODEC_ID_MJPEG;
353 /* make MPV_common_init allocate important buffers, like s->block */
354 j->s->avctx->thread_count = 1;
356 if (MPV_common_init(j->s) < 0) {
357 av_free(j->s);
358 av_free(j);
359 return NULL;
362 /* correct the value for sc->mb_height */
363 j->s->mb_height = j->s->height/8;
364 j->s->mb_intra = 1;
366 j->s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
367 for (i = 1; i < 64; i++)
368 j->s->intra_matrix[i] = av_clip_uint8(
369 (ff_mpeg1_default_intra_matrix[i]*j->s->qscale) >> 3);
370 convert_matrix(j->s, j->s->q_intra_matrix, j->s->q_intra_matrix16,
371 j->s->intra_matrix, j->s->intra_quant_bias, 8, 8);
372 return j;
375 int jpeg_enc_frame(jpeg_enc_t *j, unsigned char *y_data,
376 unsigned char *u_data, unsigned char *v_data, char *bufr) {
377 int i, k, mb_x, mb_y, overflow;
378 short int *dest;
379 unsigned char *source;
380 /* initialize the buffer */
382 init_put_bits(&j->s->pb, bufr, 1024*256);
384 ff_mjpeg_encode_picture_header(j->s);
386 j->s->header_bits = put_bits_count(&j->s->pb);
388 j->s->last_dc[0] = 128;
389 j->s->last_dc[1] = 128;
390 j->s->last_dc[2] = 128;
392 for (mb_y = 0; mb_y < j->s->mb_height; mb_y++) {
393 for (mb_x = 0; mb_x < j->s->mb_width; mb_x++) {
394 /* conversion 8 to 16 bit and filling of blocks
395 * must be mmx optimized */
396 /* fill 2 Y macroblocks and one U and one V */
397 source = mb_y * 8 * j->y_rs +
398 16 * j->y_ps * mb_x + y_data;
399 dest = j->s->block[0];
400 for (i = 0; i < 8; i++) {
401 for (k = 0; k < 8; k++) {
402 dest[k] = source[k*j->y_ps];
404 dest += 8;
405 source += j->y_rs;
407 source = mb_y * 8 * j->y_rs +
408 (16*mb_x + 8)*j->y_ps + y_data;
409 dest = j->s->block[1];
410 for (i = 0; i < 8; i++) {
411 for (k = 0; k < 8; k++) {
412 dest[k] = source[k*j->y_ps];
414 dest += 8;
415 source += j->y_rs;
417 if (!j->bw && j->cheap_upsample) {
418 source = mb_y*4*j->u_rs +
419 8*mb_x*j->u_ps + u_data;
420 dest = j->s->block[2];
421 for (i = 0; i < 4; i++) {
422 for (k = 0; k < 8; k++) {
423 dest[k] = source[k*j->u_ps];
424 dest[k+8] = source[k*j->u_ps];
426 dest += 16;
427 source += j->u_rs;
429 source = mb_y*4*j->v_rs +
430 8*mb_x*j->v_ps + v_data;
431 dest = j->s->block[3];
432 for (i = 0; i < 4; i++) {
433 for (k = 0; k < 8; k++) {
434 dest[k] = source[k*j->v_ps];
435 dest[k+8] = source[k*j->v_ps];
437 dest += 16;
438 source += j->u_rs;
440 } else if (!j->bw && !j->cheap_upsample) {
441 source = mb_y*8*j->u_rs +
442 8*mb_x*j->u_ps + u_data;
443 dest = j->s->block[2];
444 for (i = 0; i < 8; i++) {
445 for (k = 0; k < 8; k++)
446 dest[k] = source[k*j->u_ps];
447 dest += 8;
448 source += j->u_rs;
450 source = mb_y*8*j->v_rs +
451 8*mb_x*j->v_ps + v_data;
452 dest = j->s->block[3];
453 for (i = 0; i < 8; i++) {
454 for (k = 0; k < 8; k++)
455 dest[k] = source[k*j->v_ps];
456 dest += 8;
457 source += j->u_rs;
460 emms_c(); /* is this really needed? */
462 j->s->block_last_index[0] =
463 j->s->dct_quantize(j->s, j->s->block[0],
464 0, 8, &overflow);
465 if (overflow) clip_coeffs(j->s, j->s->block[0],
466 j->s->block_last_index[0]);
467 j->s->block_last_index[1] =
468 j->s->dct_quantize(j->s, j->s->block[1],
469 1, 8, &overflow);
470 if (overflow) clip_coeffs(j->s, j->s->block[1],
471 j->s->block_last_index[1]);
473 if (!j->bw) {
474 j->s->block_last_index[4] =
475 j->s->dct_quantize(j->s, j->s->block[2],
476 4, 8, &overflow);
477 if (overflow) clip_coeffs(j->s, j->s->block[2],
478 j->s->block_last_index[2]);
479 j->s->block_last_index[5] =
480 j->s->dct_quantize(j->s, j->s->block[3],
481 5, 8, &overflow);
482 if (overflow) clip_coeffs(j->s, j->s->block[3],
483 j->s->block_last_index[3]);
485 zr_mjpeg_encode_mb(j);
488 emms_c();
489 ff_mjpeg_encode_picture_trailer(j->s);
490 flush_put_bits(&j->s->pb);
492 //FIXME
493 //if (j->s->mjpeg_write_tables == 1)
494 // j->s->mjpeg_write_tables = 0;
496 return pbBufPtr(&(j->s->pb)) - j->s->pb.buf;
499 void jpeg_enc_uninit(jpeg_enc_t *j) {
500 ff_mjpeg_encode_close(j->s);
501 av_free(j->s);
502 av_free(j);
505 #if 0
507 #define W 32
508 #define H 32
510 int quant_store[MBR+1][MBC+1];
511 unsigned char buf[W*H*3/2];
512 char code[256*1024];
515 main(void) {
516 int i, size;
517 FILE *fp;
519 memset(buf, 0, W*H);
520 memset(buf+W*H, 255, W*H/4);
521 memset(buf+5*W*H/4, 0, W*H/4);
522 mjpeg_encoder_init(W, H, 1, W, 1, W/2, 1, W/2, 1, 1, 0);
524 size = mjpeg_encode_frame(buf, buf+W*H, buf+5*W*H/4, code);
525 fp = fopen("test.jpg", "w");
526 fwrite(code, 1, size, fp);
527 fclose(fp);
529 #endif