1 /* transfrm.c, forward / inverse transformation */
3 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
6 * Disclaimer of Warranty
8 * These software programs are available to the user without any license fee or
9 * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
10 * any and all warranties, whether express, implied, or statuary, including any
11 * implied warranties or merchantability or of fitness for a particular
12 * purpose. In no event shall the copyright-holder be liable for any
13 * incidental, punitive, or consequential damages of any kind whatsoever
14 * arising from the use of these programs.
16 * This disclaimer of warranty extends to the user of these programs and user's
17 * customers, employees, agents, transferees, successors, and assigns.
19 * The MPEG Software Simulation Group does not represent or warrant that the
20 * programs furnished hereunder are free of infringement of any third-party
23 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
24 * are subject to royalty fees to patent holders. Many of these patents are
25 * general enough such that they are unavoidable regardless of implementation
34 #include "cpu_accel.h"
37 extern void fdct_mmx( int16_t * blk
);
38 extern void idct_mmx( int16_t * blk
, unsigned char *temp
);
40 void add_pred_mmx (uint8_t *pred
, uint8_t *cur
,
41 int lx
, int16_t *blk
);
42 void sub_pred_mmx (uint8_t *pred
, uint8_t *cur
,
43 int lx
, int16_t *blk
);
46 extern void fdct( int16_t *blk
);
47 extern void idct( int16_t *blk
, unsigned char *temp
);
51 /* private prototypes*/
52 static void add_pred (uint8_t *pred
, uint8_t *cur
,
53 int lx
, int16_t *blk
);
54 static void sub_pred (uint8_t *pred
, uint8_t *cur
,
55 int lx
, int16_t *blk
);
58 Pointers to version of transform and prediction manipulation
62 static void (*pfdct
)( int16_t * blk
);
63 static void (*pidct
)( int16_t * blk
, unsigned char *temp
);
64 static void (*padd_pred
) (uint8_t *pred
, uint8_t *cur
,
65 int lx
, int16_t *blk
);
66 static void (*psub_pred
) (uint8_t *pred
, uint8_t *cur
,
67 int lx
, int16_t *blk
);
70 Initialise DCT transformation routines
71 Currently just activates MMX routines if available
75 void init_transform_hv()
81 if( (flags
& ACCEL_X86_MMX
) ) /* MMX CPU */
83 if(verbose
) fprintf( stderr
, "SETTING MMX for TRANSFORM!\n");
86 padd_pred
= add_pred_mmx
;
87 psub_pred
= sub_pred_mmx
;
100 /* add prediction and prediction error, saturate to 0...255 */
101 static void add_pred(unsigned char *pred
,
111 * for (i=0; i<8; i++)
112 * cur[i] = clp[blk[i] + pred[i]];
114 cur
[0] = clp
[blk
[0] + pred
[0]];
115 cur
[1] = clp
[blk
[1] + pred
[1]];
116 cur
[2] = clp
[blk
[2] + pred
[2]];
117 cur
[3] = clp
[blk
[3] + pred
[3]];
118 cur
[4] = clp
[blk
[4] + pred
[4]];
119 cur
[5] = clp
[blk
[5] + pred
[5]];
120 cur
[6] = clp
[blk
[6] + pred
[6]];
121 cur
[7] = clp
[blk
[7] + pred
[7]];
129 /* subtract prediction from block data */
130 static void sub_pred(unsigned char *pred
,
140 * for (i=0; i<8; i++)
141 * blk[i] = cur[i] - pred[i];
143 blk
[0] = cur
[0] - pred
[0];
144 blk
[1] = cur
[1] - pred
[1];
145 blk
[2] = cur
[2] - pred
[2];
146 blk
[3] = cur
[3] - pred
[3];
147 blk
[4] = cur
[4] - pred
[4];
148 blk
[5] = cur
[5] - pred
[5];
149 blk
[6] = cur
[6] - pred
[6];
150 blk
[7] = cur
[7] - pred
[7];
158 void transform_engine_loop(transform_engine_t
*engine
)
162 pthread_mutex_lock(&(engine
->input_lock
));
166 pict_data_s
*picture
= engine
->picture
;
167 uint8_t **pred
= engine
->pred
;
168 uint8_t **cur
= engine
->cur
;
169 mbinfo_s
*mbi
= picture
->mbinfo
;
170 int16_t (*blocks
)[64] = picture
->blocks
;
171 int i
, j
, i1
, j1
, k
, n
, cc
, offs
, lx
;
173 k
= (engine
->start_row
/ 16) * (width
/ 16);
175 for(j
= engine
->start_row
; j
< engine
->end_row
; j
+= 16)
176 for(i
= 0; i
< width
; i
+= 16)
178 mbi
[k
].dctblocks
= &blocks
[k
* block_count
];
180 for(n
= 0; n
< block_count
; n
++)
182 /* color component index */
183 cc
= (n
< 4) ? 0 : (n
& 1) + 1;
186 /* A.Stevens Jul 2000 Record dct blocks associated with macroblock */
187 /* We'll use this for quantisation calculations */
189 if ((picture
->pict_struct
== FRAME_PICTURE
) && mbi
[k
].dct_type
)
192 offs
= i
+ ((n
& 1) << 3) + width
* (j
+ ((n
& 2) >> 1));
198 offs
= i
+ ((n
& 1) << 3) + width2
* (j
+ ((n
& 2) << 2));
202 if (picture
->pict_struct
== BOTTOM_FIELD
)
208 /* scale coordinates */
209 i1
= (chroma_format
== CHROMA444
) ? i
: i
>> 1;
210 j1
= (chroma_format
!= CHROMA420
) ? j
: j
>> 1;
212 if ((picture
->pict_struct
==FRAME_PICTURE
) && mbi
[k
].dct_type
213 && (chroma_format
!=CHROMA420
))
216 offs
= i1
+ (n
&8) + chrom_width
*(j1
+((n
&2)>>1));
222 offs
= i1
+ (n
&8) + chrom_width2
*(j1
+((n
&2)<<2));
226 if(picture
->pict_struct
==BOTTOM_FIELD
)
230 (*psub_pred
)(pred
[cc
]+offs
,cur
[cc
]+offs
,lx
,
231 blocks
[k
*block_count
+n
]);
232 (*pfdct
)(blocks
[k
*block_count
+n
]);
238 pthread_mutex_unlock(&(engine
->output_lock
));
242 /* subtract prediction and transform prediction error */
243 void transform(pict_data_s
*picture
,
244 uint8_t *pred
[], uint8_t *cur
[])
248 for(i
= 0; i
< processors
; i
++)
250 transform_engines
[i
].picture
= picture
;
251 transform_engines
[i
].pred
= pred
;
252 transform_engines
[i
].cur
= cur
;
253 pthread_mutex_unlock(&(transform_engines
[i
].input_lock
));
256 /* Wait for completion */
257 for(i
= 0; i
< processors
; i
++)
259 pthread_mutex_lock(&(transform_engines
[i
].output_lock
));
265 void start_transform_engines()
268 int rows_per_processor
= (int)((float)height2
/ 16 / processors
+ 0.5);
271 pthread_mutexattr_t mutex_attr
;
273 pthread_mutexattr_init(&mutex_attr
);
274 pthread_attr_init(&attr
);
275 transform_engines
= calloc(1, sizeof(transform_engine_t
) * processors
);
276 for(i
= 0; i
< processors
; i
++)
278 transform_engines
[i
].start_row
= current_row
* 16;
279 current_row
+= rows_per_processor
;
280 if(current_row
> height2
/ 16) current_row
= height2
/ 16;
281 transform_engines
[i
].end_row
= current_row
* 16;
282 pthread_mutex_init(&(transform_engines
[i
].input_lock
), &mutex_attr
);
283 pthread_mutex_lock(&(transform_engines
[i
].input_lock
));
284 pthread_mutex_init(&(transform_engines
[i
].output_lock
), &mutex_attr
);
285 pthread_mutex_lock(&(transform_engines
[i
].output_lock
));
286 transform_engines
[i
].done
= 0;
287 pthread_create(&(transform_engines
[i
].tid
),
289 (void*)transform_engine_loop
,
290 &transform_engines
[i
]);
294 void stop_transform_engines()
297 for(i
= 0; i
< processors
; i
++)
299 transform_engines
[i
].done
= 1;
300 pthread_mutex_unlock(&(transform_engines
[i
].input_lock
));
301 pthread_join(transform_engines
[i
].tid
, 0);
302 pthread_mutex_destroy(&(transform_engines
[i
].input_lock
));
303 pthread_mutex_destroy(&(transform_engines
[i
].output_lock
));
305 free(transform_engines
);
316 /* inverse transform prediction error and add prediction */
317 void itransform_engine_loop(transform_engine_t
*engine
)
321 pthread_mutex_lock(&(engine
->input_lock
));
325 pict_data_s
*picture
= engine
->picture
;
326 uint8_t **pred
= engine
->pred
;
327 uint8_t **cur
= engine
->cur
;
328 int i
, j
, i1
, j1
, k
, n
, cc
, offs
, lx
;
329 mbinfo_s
*mbi
= picture
->mbinfo
;
330 /* Its the quantised / inverse quantised blocks were interested in
331 for inverse transformation */
332 int16_t (*blocks
)[64] = picture
->qblocks
;
334 k
= (engine
->start_row
/ 16) * (width
/ 16);
336 for(j
= engine
->start_row
; j
< engine
->end_row
; j
+= 16)
337 for(i
= 0; i
< width
; i
+= 16)
339 for(n
= 0; n
< block_count
; n
++)
341 cc
= (n
< 4) ? 0 : (n
& 1) + 1; /* color component index */
346 if((picture
->pict_struct
== FRAME_PICTURE
) && mbi
[k
].dct_type
)
349 offs
= i
+ ((n
& 1) << 3) + width
* (j
+ ((n
& 2) >> 1));
355 offs
= i
+ ((n
& 1) << 3) + width2
* (j
+ ((n
& 2) << 2));
359 if(picture
->pict_struct
== BOTTOM_FIELD
)
366 /* scale coordinates */
367 i1
= (chroma_format
==CHROMA444
) ? i
: i
>>1;
368 j1
= (chroma_format
!=CHROMA420
) ? j
: j
>>1;
370 if((picture
->pict_struct
== FRAME_PICTURE
) && mbi
[k
].dct_type
371 && (chroma_format
!= CHROMA420
))
374 offs
= i1
+ (n
& 8) + chrom_width
* (j1
+ ((n
& 2) >> 1));
375 lx
= chrom_width
<< 1;
380 offs
= i1
+ (n
&8) + chrom_width2
* (j1
+ ((n
& 2) << 2));
384 if(picture
->pict_struct
== BOTTOM_FIELD
)
388 //pthread_mutex_lock(&test_lock);
389 (*pidct
)(blocks
[k
*block_count
+n
], engine
->temp
);
390 (*padd_pred
)(pred
[cc
]+offs
,cur
[cc
]+offs
,lx
,blocks
[k
*block_count
+n
]);
391 //pthread_mutex_unlock(&test_lock);
397 pthread_mutex_unlock(&(engine
->output_lock
));
401 void itransform(pict_data_s
*picture
,
402 uint8_t *pred
[], uint8_t *cur
[])
406 for(i
= 0; i
< processors
; i
++)
408 itransform_engines
[i
].picture
= picture
;
409 itransform_engines
[i
].cur
= cur
;
410 itransform_engines
[i
].pred
= pred
;
411 pthread_mutex_unlock(&(itransform_engines
[i
].input_lock
));
414 /* Wait for completion */
415 for(i
= 0; i
< processors
; i
++)
417 pthread_mutex_lock(&(itransform_engines
[i
].output_lock
));
421 void start_itransform_engines()
424 int rows_per_processor
= (int)((float)height2
/ 16 / processors
+ 0.5);
427 pthread_mutexattr_t mutex_attr
;
429 pthread_mutexattr_init(&mutex_attr
);
430 pthread_attr_init(&attr
);
431 itransform_engines
= calloc(1, sizeof(transform_engine_t
) * processors
);
432 for(i
= 0; i
< processors
; i
++)
434 itransform_engines
[i
].start_row
= current_row
* 16;
435 current_row
+= rows_per_processor
;
436 if(current_row
> height2
/ 16) current_row
= height2
/ 16;
437 itransform_engines
[i
].end_row
= current_row
* 16;
438 pthread_mutex_init(&(itransform_engines
[i
].input_lock
), &mutex_attr
);
439 pthread_mutex_lock(&(itransform_engines
[i
].input_lock
));
440 pthread_mutex_init(&(itransform_engines
[i
].output_lock
), &mutex_attr
);
441 pthread_mutex_lock(&(itransform_engines
[i
].output_lock
));
442 itransform_engines
[i
].done
= 0;
443 pthread_create(&(itransform_engines
[i
].tid
),
445 (void*)itransform_engine_loop
,
446 &itransform_engines
[i
]);
450 void stop_itransform_engines()
453 for(i
= 0; i
< processors
; i
++)
455 itransform_engines
[i
].done
= 1;
456 pthread_mutex_unlock(&(itransform_engines
[i
].input_lock
));
457 pthread_join(itransform_engines
[i
].tid
, 0);
458 pthread_mutex_destroy(&(itransform_engines
[i
].input_lock
));
459 pthread_mutex_destroy(&(itransform_engines
[i
].output_lock
));
461 free(itransform_engines
);
468 * select between frame and field DCT
470 * preliminary version: based on inter-field correlation
473 void dct_type_estimation(
474 pict_data_s
*picture
,
475 uint8_t *pred
, uint8_t *cur
479 struct mbinfo
*mbi
= picture
->mbinfo
;
481 int16_t blk0
[128], blk1
[128];
482 int i
, j
, i0
, j0
, k
, offs
, s0
, s1
, sq0
, sq1
, s01
;
487 for (j0
=0; j0
<height2
; j0
+=16)
488 for (i0
=0; i0
<width
; i0
+=16)
490 if (picture
->frame_pred_dct
|| picture
->pict_struct
!=FRAME_PICTURE
)
494 /* interlaced frame picture */
496 * calculate prediction error (cur-pred) for top (blk0)
497 * and bottom field (blk1)
501 offs
= width
*((j
<<1)+j0
) + i0
;
504 blk0
[16*j
+i
] = cur
[offs
] - pred
[offs
];
505 blk1
[16*j
+i
] = cur
[offs
+width
] - pred
[offs
+width
];
509 /* correlate fields */
512 for (i
=0; i
<128; i
++)
515 sq0
+= blk0
[i
]*blk0
[i
];
517 sq1
+= blk1
[i
]*blk1
[i
];
518 s01
+= blk0
[i
]*blk1
[i
];
521 d
= (sq0
-(s0
*s0
)/128.0)*(sq1
-(s1
*s1
)/128.0);
525 r
= (s01
-(s0
*s1
)/128.0)/sqrt(d
);
527 mbi
[k
].dct_type
= 0; /* frame DCT */
529 mbi
[k
].dct_type
= 1; /* field DCT */
532 mbi
[k
].dct_type
= 1; /* field DCT */