r864: Merge 2.1:
[cinelerra_cv/ct.git] / mpeg2enc / transfrm.c
blobac4127ee53f4704eccc0a4d64b1054741af255b6
1 /* transfrm.c, forward / inverse transformation */
3 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
5 /*
6 * Disclaimer of Warranty
8 * These software programs are available to the user without any license fee or
9 * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
10 * any and all warranties, whether express, implied, or statuary, including any
11 * implied warranties or merchantability or of fitness for a particular
12 * purpose. In no event shall the copyright-holder be liable for any
13 * incidental, punitive, or consequential damages of any kind whatsoever
14 * arising from the use of these programs.
16 * This disclaimer of warranty extends to the user of these programs and user's
17 * customers, employees, agents, transferees, successors, and assigns.
19 * The MPEG Software Simulation Group does not represent or warrant that the
20 * programs furnished hereunder are free of infringement of any third-party
21 * patents.
23 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
24 * are subject to royalty fees to patent holders. Many of these patents are
25 * general enough such that they are unavoidable regardless of implementation
26 * design.
30 #include "config.h"
31 #include "global.h"
32 #include <stdio.h>
33 #include <math.h>
34 #include "cpu_accel.h"
36 #ifdef X86_CPU
37 extern void fdct_mmx( int16_t * blk );
38 extern void idct_mmx( int16_t * blk, unsigned char *temp );
40 void add_pred_mmx (uint8_t *pred, uint8_t *cur,
41 int lx, int16_t *blk);
42 void sub_pred_mmx (uint8_t *pred, uint8_t *cur,
43 int lx, int16_t *blk);
44 #endif
46 extern void fdct( int16_t *blk );
47 extern void idct( int16_t *blk, unsigned char *temp );
51 /* private prototypes*/
52 static void add_pred (uint8_t *pred, uint8_t *cur,
53 int lx, int16_t *blk);
54 static void sub_pred (uint8_t *pred, uint8_t *cur,
55 int lx, int16_t *blk);
58 Pointers to version of transform and prediction manipulation
59 routines to be used..
62 static void (*pfdct)( int16_t * blk );
63 static void (*pidct)( int16_t * blk , unsigned char *temp);
64 static void (*padd_pred) (uint8_t *pred, uint8_t *cur,
65 int lx, int16_t *blk);
66 static void (*psub_pred) (uint8_t *pred, uint8_t *cur,
67 int lx, int16_t *blk);
70 Initialise DCT transformation routines
71 Currently just activates MMX routines if available
75 void init_transform_hv()
77 int flags;
78 flags = cpu_accel();
80 #ifdef X86_CPU
81 if( (flags & ACCEL_X86_MMX) ) /* MMX CPU */
83 if(verbose) fprintf( stderr, "SETTING MMX for TRANSFORM!\n");
84 pfdct = fdct_mmx;
85 pidct = idct_mmx;
86 padd_pred = add_pred_mmx;
87 psub_pred = sub_pred_mmx;
89 else
90 #endif
92 pfdct = fdct;
93 pidct = idct;
94 padd_pred = add_pred;
95 psub_pred = sub_pred;
100 /* add prediction and prediction error, saturate to 0...255 */
101 static void add_pred(unsigned char *pred,
102 unsigned char *cur,
103 int lx,
104 short *blk)
106 register int j;
108 for (j=0; j<8; j++)
111 * for (i=0; i<8; i++)
112 * cur[i] = clp[blk[i] + pred[i]];
114 cur[0] = clp[blk[0] + pred[0]];
115 cur[1] = clp[blk[1] + pred[1]];
116 cur[2] = clp[blk[2] + pred[2]];
117 cur[3] = clp[blk[3] + pred[3]];
118 cur[4] = clp[blk[4] + pred[4]];
119 cur[5] = clp[blk[5] + pred[5]];
120 cur[6] = clp[blk[6] + pred[6]];
121 cur[7] = clp[blk[7] + pred[7]];
123 blk += 8;
124 cur += lx;
125 pred += lx;
129 /* subtract prediction from block data */
130 static void sub_pred(unsigned char *pred,
131 unsigned char *cur,
132 int lx,
133 short *blk)
135 register int j;
137 for (j=0; j<8; j++)
140 * for (i=0; i<8; i++)
141 * blk[i] = cur[i] - pred[i];
143 blk[0] = cur[0] - pred[0];
144 blk[1] = cur[1] - pred[1];
145 blk[2] = cur[2] - pred[2];
146 blk[3] = cur[3] - pred[3];
147 blk[4] = cur[4] - pred[4];
148 blk[5] = cur[5] - pred[5];
149 blk[6] = cur[6] - pred[6];
150 blk[7] = cur[7] - pred[7];
152 blk += 8;
153 cur += lx;
154 pred += lx;
158 void transform_engine_loop(transform_engine_t *engine)
160 while(!engine->done)
162 pthread_mutex_lock(&(engine->input_lock));
164 if(!engine->done)
166 pict_data_s *picture = engine->picture;
167 uint8_t **pred = engine->pred;
168 uint8_t **cur = engine->cur;
169 mbinfo_s *mbi = picture->mbinfo;
170 int16_t (*blocks)[64] = picture->blocks;
171 int i, j, i1, j1, k, n, cc, offs, lx;
173 k = (engine->start_row / 16) * (width / 16);
175 for(j = engine->start_row; j < engine->end_row; j += 16)
176 for(i = 0; i < width; i += 16)
178 mbi[k].dctblocks = &blocks[k * block_count];
180 for(n = 0; n < block_count; n++)
182 /* color component index */
183 cc = (n < 4) ? 0 : (n & 1) + 1;
184 if(cc == 0)
186 /* A.Stevens Jul 2000 Record dct blocks associated with macroblock */
187 /* We'll use this for quantisation calculations */
188 /* luminance */
189 if ((picture->pict_struct == FRAME_PICTURE) && mbi[k].dct_type)
191 /* field DCT */
192 offs = i + ((n & 1) << 3) + width * (j + ((n & 2) >> 1));
193 lx = width << 1;
195 else
197 /* frame DCT */
198 offs = i + ((n & 1) << 3) + width2 * (j + ((n & 2) << 2));
199 lx = width2;
202 if (picture->pict_struct == BOTTOM_FIELD)
203 offs += width;
205 else
207 /* chrominance */
208 /* scale coordinates */
209 i1 = (chroma_format == CHROMA444) ? i : i >> 1;
210 j1 = (chroma_format != CHROMA420) ? j : j >> 1;
212 if ((picture->pict_struct==FRAME_PICTURE) && mbi[k].dct_type
213 && (chroma_format!=CHROMA420))
215 /* field DCT */
216 offs = i1 + (n&8) + chrom_width*(j1+((n&2)>>1));
217 lx = chrom_width<<1;
219 else
221 /* frame DCT */
222 offs = i1 + (n&8) + chrom_width2*(j1+((n&2)<<2));
223 lx = chrom_width2;
226 if(picture->pict_struct==BOTTOM_FIELD)
227 offs += chrom_width;
230 (*psub_pred)(pred[cc]+offs,cur[cc]+offs,lx,
231 blocks[k*block_count+n]);
232 (*pfdct)(blocks[k*block_count+n]);
235 k++;
238 pthread_mutex_unlock(&(engine->output_lock));
242 /* subtract prediction and transform prediction error */
243 void transform(pict_data_s *picture,
244 uint8_t *pred[], uint8_t *cur[])
246 int i;
247 /* Start loop */
248 for(i = 0; i < processors; i++)
250 transform_engines[i].picture = picture;
251 transform_engines[i].pred = pred;
252 transform_engines[i].cur = cur;
253 pthread_mutex_unlock(&(transform_engines[i].input_lock));
256 /* Wait for completion */
257 for(i = 0; i < processors; i++)
259 pthread_mutex_lock(&(transform_engines[i].output_lock));
265 void start_transform_engines()
267 int i;
268 int rows_per_processor = (int)((float)height2 / 16 / processors + 0.5);
269 int current_row = 0;
270 pthread_attr_t attr;
271 pthread_mutexattr_t mutex_attr;
273 pthread_mutexattr_init(&mutex_attr);
274 pthread_attr_init(&attr);
275 transform_engines = calloc(1, sizeof(transform_engine_t) * processors);
276 for(i = 0; i < processors; i++)
278 transform_engines[i].start_row = current_row * 16;
279 current_row += rows_per_processor;
280 if(current_row > height2 / 16) current_row = height2 / 16;
281 transform_engines[i].end_row = current_row * 16;
282 pthread_mutex_init(&(transform_engines[i].input_lock), &mutex_attr);
283 pthread_mutex_lock(&(transform_engines[i].input_lock));
284 pthread_mutex_init(&(transform_engines[i].output_lock), &mutex_attr);
285 pthread_mutex_lock(&(transform_engines[i].output_lock));
286 transform_engines[i].done = 0;
287 pthread_create(&(transform_engines[i].tid),
288 &attr,
289 (void*)transform_engine_loop,
290 &transform_engines[i]);
294 void stop_transform_engines()
296 int i;
297 for(i = 0; i < processors; i++)
299 transform_engines[i].done = 1;
300 pthread_mutex_unlock(&(transform_engines[i].input_lock));
301 pthread_join(transform_engines[i].tid, 0);
302 pthread_mutex_destroy(&(transform_engines[i].input_lock));
303 pthread_mutex_destroy(&(transform_engines[i].output_lock));
305 free(transform_engines);
316 /* inverse transform prediction error and add prediction */
317 void itransform_engine_loop(transform_engine_t *engine)
319 while(!engine->done)
321 pthread_mutex_lock(&(engine->input_lock));
323 if(!engine->done)
325 pict_data_s *picture = engine->picture;
326 uint8_t **pred = engine->pred;
327 uint8_t **cur = engine->cur;
328 int i, j, i1, j1, k, n, cc, offs, lx;
329 mbinfo_s *mbi = picture->mbinfo;
330 /* Its the quantised / inverse quantised blocks were interested in
331 for inverse transformation */
332 int16_t (*blocks)[64] = picture->qblocks;
334 k = (engine->start_row / 16) * (width / 16);
336 for(j = engine->start_row; j < engine->end_row; j += 16)
337 for(i = 0; i < width; i += 16)
339 for(n = 0; n < block_count; n++)
341 cc = (n < 4) ? 0 : (n & 1) + 1; /* color component index */
343 if(cc == 0)
345 /* luminance */
346 if((picture->pict_struct == FRAME_PICTURE) && mbi[k].dct_type)
348 /* field DCT */
349 offs = i + ((n & 1) << 3) + width * (j + ((n & 2) >> 1));
350 lx = width<<1;
352 else
354 /* frame DCT */
355 offs = i + ((n & 1) << 3) + width2 * (j + ((n & 2) << 2));
356 lx = width2;
359 if(picture->pict_struct == BOTTOM_FIELD)
360 offs += width;
362 else
364 /* chrominance */
366 /* scale coordinates */
367 i1 = (chroma_format==CHROMA444) ? i : i>>1;
368 j1 = (chroma_format!=CHROMA420) ? j : j>>1;
370 if((picture->pict_struct == FRAME_PICTURE) && mbi[k].dct_type
371 && (chroma_format != CHROMA420))
373 /* field DCT */
374 offs = i1 + (n & 8) + chrom_width * (j1 + ((n & 2) >> 1));
375 lx = chrom_width << 1;
377 else
379 /* frame DCT */
380 offs = i1 + (n&8) + chrom_width2 * (j1 + ((n & 2) << 2));
381 lx = chrom_width2;
384 if(picture->pict_struct == BOTTOM_FIELD)
385 offs += chrom_width;
388 //pthread_mutex_lock(&test_lock);
389 (*pidct)(blocks[k*block_count+n], engine->temp);
390 (*padd_pred)(pred[cc]+offs,cur[cc]+offs,lx,blocks[k*block_count+n]);
391 //pthread_mutex_unlock(&test_lock);
394 k++;
397 pthread_mutex_unlock(&(engine->output_lock));
401 void itransform(pict_data_s *picture,
402 uint8_t *pred[], uint8_t *cur[])
404 int i;
405 /* Start loop */
406 for(i = 0; i < processors; i++)
408 itransform_engines[i].picture = picture;
409 itransform_engines[i].cur = cur;
410 itransform_engines[i].pred = pred;
411 pthread_mutex_unlock(&(itransform_engines[i].input_lock));
414 /* Wait for completion */
415 for(i = 0; i < processors; i++)
417 pthread_mutex_lock(&(itransform_engines[i].output_lock));
421 void start_itransform_engines()
423 int i;
424 int rows_per_processor = (int)((float)height2 / 16 / processors + 0.5);
425 int current_row = 0;
426 pthread_attr_t attr;
427 pthread_mutexattr_t mutex_attr;
429 pthread_mutexattr_init(&mutex_attr);
430 pthread_attr_init(&attr);
431 itransform_engines = calloc(1, sizeof(transform_engine_t) * processors);
432 for(i = 0; i < processors; i++)
434 itransform_engines[i].start_row = current_row * 16;
435 current_row += rows_per_processor;
436 if(current_row > height2 / 16) current_row = height2 / 16;
437 itransform_engines[i].end_row = current_row * 16;
438 pthread_mutex_init(&(itransform_engines[i].input_lock), &mutex_attr);
439 pthread_mutex_lock(&(itransform_engines[i].input_lock));
440 pthread_mutex_init(&(itransform_engines[i].output_lock), &mutex_attr);
441 pthread_mutex_lock(&(itransform_engines[i].output_lock));
442 itransform_engines[i].done = 0;
443 pthread_create(&(itransform_engines[i].tid),
444 &attr,
445 (void*)itransform_engine_loop,
446 &itransform_engines[i]);
450 void stop_itransform_engines()
452 int i;
453 for(i = 0; i < processors; i++)
455 itransform_engines[i].done = 1;
456 pthread_mutex_unlock(&(itransform_engines[i].input_lock));
457 pthread_join(itransform_engines[i].tid, 0);
458 pthread_mutex_destroy(&(itransform_engines[i].input_lock));
459 pthread_mutex_destroy(&(itransform_engines[i].output_lock));
461 free(itransform_engines);
468 * select between frame and field DCT
470 * preliminary version: based on inter-field correlation
473 void dct_type_estimation(
474 pict_data_s *picture,
475 uint8_t *pred, uint8_t *cur
479 struct mbinfo *mbi = picture->mbinfo;
481 int16_t blk0[128], blk1[128];
482 int i, j, i0, j0, k, offs, s0, s1, sq0, sq1, s01;
483 double d, r;
485 k = 0;
487 for (j0=0; j0<height2; j0+=16)
488 for (i0=0; i0<width; i0+=16)
490 if (picture->frame_pred_dct || picture->pict_struct!=FRAME_PICTURE)
491 mbi[k].dct_type = 0;
492 else
494 /* interlaced frame picture */
496 * calculate prediction error (cur-pred) for top (blk0)
497 * and bottom field (blk1)
499 for (j=0; j<8; j++)
501 offs = width*((j<<1)+j0) + i0;
502 for (i=0; i<16; i++)
504 blk0[16*j+i] = cur[offs] - pred[offs];
505 blk1[16*j+i] = cur[offs+width] - pred[offs+width];
506 offs++;
509 /* correlate fields */
510 s0=s1=sq0=sq1=s01=0;
512 for (i=0; i<128; i++)
514 s0+= blk0[i];
515 sq0+= blk0[i]*blk0[i];
516 s1+= blk1[i];
517 sq1+= blk1[i]*blk1[i];
518 s01+= blk0[i]*blk1[i];
521 d = (sq0-(s0*s0)/128.0)*(sq1-(s1*s1)/128.0);
523 if (d>0.0)
525 r = (s01-(s0*s1)/128.0)/sqrt(d);
526 if (r>0.5)
527 mbi[k].dct_type = 0; /* frame DCT */
528 else
529 mbi[k].dct_type = 1; /* field DCT */
531 else
532 mbi[k].dct_type = 1; /* field DCT */
534 k++;