Merge branch 'mirror' into vdpau
[FFMpeg-mirror/ffmpeg-vdpau.git] / libavcodec / dct-test.c
blob7a805056e5cfdec23a743be87c8b34f7714e0ff6
1 /*
2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file dct-test.c
24 * DCT test. (c) 2001 Fabrice Bellard.
25 * Started from sample code by Juan J. Sierralta P.
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <sys/time.h>
32 #include <unistd.h>
33 #include <math.h>
35 #include "libavutil/common.h"
37 #include "simple_idct.h"
38 #include "faandct.h"
39 #include "faanidct.h"
40 #include "i386/idct_xvid.h"
42 #undef printf
43 #undef random
45 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
47 /* reference fdct/idct */
48 extern void fdct(DCTELEM *block);
49 extern void idct(DCTELEM *block);
50 extern void init_fdct();
52 extern void ff_mmx_idct(DCTELEM *data);
53 extern void ff_mmxext_idct(DCTELEM *data);
55 extern void odivx_idct_c (short *block);
57 // BFIN
58 extern void ff_bfin_idct (DCTELEM *block) ;
59 extern void ff_bfin_fdct (DCTELEM *block) ;
61 // ALTIVEC
62 extern void fdct_altivec (DCTELEM *block);
63 //extern void idct_altivec (DCTELEM *block);?? no routine
66 struct algo {
67 const char *name;
68 enum { FDCT, IDCT } is_idct;
69 void (* func) (DCTELEM *block);
70 void (* ref) (DCTELEM *block);
71 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM } format;
72 int mm_support;
75 #ifndef FAAN_POSTSCALE
76 #define FAAN_SCALE SCALE_PERM
77 #else
78 #define FAAN_SCALE NO_PERM
79 #endif
81 static int cpu_flags;
83 struct algo algos[] = {
84 {"REF-DBL", 0, fdct, fdct, NO_PERM},
85 {"FAAN", 0, ff_faandct, fdct, FAAN_SCALE},
86 {"FAANI", 1, ff_faanidct, idct, NO_PERM},
87 {"IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM},
88 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM},
89 {"REF-DBL", 1, idct, idct, NO_PERM},
90 {"INT", 1, j_rev_dct, idct, MMX_PERM},
91 {"SIMPLE-C", 1, ff_simple_idct, idct, NO_PERM},
93 #ifdef HAVE_MMX
94 {"MMX", 0, ff_fdct_mmx, fdct, NO_PERM, FF_MM_MMX},
95 #ifdef HAVE_MMX2
96 {"MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM, FF_MM_MMXEXT},
97 {"SSE2", 0, ff_fdct_sse2, fdct, NO_PERM, FF_MM_SSE2},
98 #endif
100 #ifdef CONFIG_GPL
101 {"LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM, FF_MM_MMX},
102 {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM, FF_MM_MMXEXT},
103 #endif
104 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, FF_MM_MMX},
105 {"XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM, FF_MM_MMX},
106 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM, FF_MM_MMXEXT},
107 {"XVID-SSE2", 1, ff_idct_xvid_sse2, idct, SSE2_PERM, FF_MM_SSE2},
108 #endif
110 #ifdef HAVE_ALTIVEC
111 {"altivecfdct", 0, fdct_altivec, fdct, NO_PERM, FF_MM_ALTIVEC},
112 #endif
114 #ifdef ARCH_BFIN
115 {"BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM},
116 {"BFINidct", 1, ff_bfin_idct, idct, NO_PERM},
117 #endif
119 { 0 }
122 #define AANSCALE_BITS 12
123 static const unsigned short aanscales[64] = {
124 /* precomputed values scaled up by 14 bits */
125 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
126 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
127 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
128 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
129 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
130 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
131 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
132 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
135 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
137 int64_t gettime(void)
139 struct timeval tv;
140 gettimeofday(&tv,NULL);
141 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
144 #define NB_ITS 20000
145 #define NB_ITS_SPEED 50000
147 static short idct_mmx_perm[64];
149 static short idct_simple_mmx_perm[64]={
150 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
151 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
152 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
153 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
154 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
155 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
156 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
157 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
160 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
162 void idct_mmx_init(void)
164 int i;
166 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
167 for (i = 0; i < 64; i++) {
168 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
169 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
173 static DCTELEM block[64] __attribute__ ((aligned (16)));
174 static DCTELEM block1[64] __attribute__ ((aligned (8)));
175 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
177 static inline void mmx_emms(void)
179 #ifdef HAVE_MMX
180 if (cpu_flags & FF_MM_MMX)
181 __asm__ volatile ("emms\n\t");
182 #endif
185 void dct_error(const char *name, int is_idct,
186 void (*fdct_func)(DCTELEM *block),
187 void (*fdct_ref)(DCTELEM *block), int form, int test)
189 int it, i, scale;
190 int err_inf, v;
191 int64_t err2, ti, ti1, it1;
192 int64_t sysErr[64], sysErrMax=0;
193 int maxout=0;
194 int blockSumErrMax=0, blockSumErr;
196 srandom(0);
198 err_inf = 0;
199 err2 = 0;
200 for(i=0; i<64; i++) sysErr[i]=0;
201 for(it=0;it<NB_ITS;it++) {
202 for(i=0;i<64;i++)
203 block1[i] = 0;
204 switch(test){
205 case 0:
206 for(i=0;i<64;i++)
207 block1[i] = (random() % 512) -256;
208 if (is_idct){
209 fdct(block1);
211 for(i=0;i<64;i++)
212 block1[i]>>=3;
214 break;
215 case 1:{
216 int num= (random()%10)+1;
217 for(i=0;i<num;i++)
218 block1[random()%64] = (random() % 512) -256;
219 }break;
220 case 2:
221 block1[0]= (random()%4096)-2048;
222 block1[63]= (block1[0]&1)^1;
223 break;
226 #if 0 // simulate mismatch control
227 { int sum=0;
228 for(i=0;i<64;i++)
229 sum+=block1[i];
231 if((sum&1)==0) block1[63]^=1;
233 #endif
235 for(i=0; i<64; i++)
236 block_org[i]= block1[i];
238 if (form == MMX_PERM) {
239 for(i=0;i<64;i++)
240 block[idct_mmx_perm[i]] = block1[i];
241 } else if (form == MMX_SIMPLE_PERM) {
242 for(i=0;i<64;i++)
243 block[idct_simple_mmx_perm[i]] = block1[i];
245 } else if (form == SSE2_PERM) {
246 for(i=0; i<64; i++)
247 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
248 } else {
249 for(i=0; i<64; i++)
250 block[i]= block1[i];
252 #if 0 // simulate mismatch control for tested IDCT but not the ref
253 { int sum=0;
254 for(i=0;i<64;i++)
255 sum+=block[i];
257 if((sum&1)==0) block[63]^=1;
259 #endif
261 fdct_func(block);
262 mmx_emms();
264 if (form == SCALE_PERM) {
265 for(i=0; i<64; i++) {
266 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
267 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
271 fdct_ref(block1);
273 blockSumErr=0;
274 for(i=0;i<64;i++) {
275 v = abs(block[i] - block1[i]);
276 if (v > err_inf)
277 err_inf = v;
278 err2 += v * v;
279 sysErr[i] += block[i] - block1[i];
280 blockSumErr += v;
281 if( abs(block[i])>maxout) maxout=abs(block[i]);
283 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
284 #if 0 // print different matrix pairs
285 if(blockSumErr){
286 printf("\n");
287 for(i=0; i<64; i++){
288 if((i&7)==0) printf("\n");
289 printf("%4d ", block_org[i]);
291 for(i=0; i<64; i++){
292 if((i&7)==0) printf("\n");
293 printf("%4d ", block[i] - block1[i]);
296 #endif
298 for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
300 #if 1 // dump systematic errors
301 for(i=0; i<64; i++){
302 if(i%8==0) printf("\n");
303 printf("%5d ", (int)sysErr[i]);
305 printf("\n");
306 #endif
308 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
309 is_idct ? "IDCT" : "DCT",
310 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
311 #if 1 //Speed test
312 /* speed test */
313 for(i=0;i<64;i++)
314 block1[i] = 0;
315 switch(test){
316 case 0:
317 for(i=0;i<64;i++)
318 block1[i] = (random() % 512) -256;
319 if (is_idct){
320 fdct(block1);
322 for(i=0;i<64;i++)
323 block1[i]>>=3;
325 break;
326 case 1:{
327 case 2:
328 block1[0] = (random() % 512) -256;
329 block1[1] = (random() % 512) -256;
330 block1[2] = (random() % 512) -256;
331 block1[3] = (random() % 512) -256;
332 }break;
335 if (form == MMX_PERM) {
336 for(i=0;i<64;i++)
337 block[idct_mmx_perm[i]] = block1[i];
338 } else if(form == MMX_SIMPLE_PERM) {
339 for(i=0;i<64;i++)
340 block[idct_simple_mmx_perm[i]] = block1[i];
341 } else {
342 for(i=0; i<64; i++)
343 block[i]= block1[i];
346 ti = gettime();
347 it1 = 0;
348 do {
349 for(it=0;it<NB_ITS_SPEED;it++) {
350 for(i=0; i<64; i++)
351 block[i]= block1[i];
352 // memcpy(block, block1, sizeof(DCTELEM) * 64);
353 // do not memcpy especially not fastmemcpy because it does movntq !!!
354 fdct_func(block);
356 it1 += NB_ITS_SPEED;
357 ti1 = gettime() - ti;
358 } while (ti1 < 1000000);
359 mmx_emms();
361 printf("%s %s: %0.1f kdct/s\n",
362 is_idct ? "IDCT" : "DCT",
363 name, (double)it1 * 1000.0 / (double)ti1);
364 #endif
367 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
368 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
370 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
372 static int init;
373 static double c8[8][8];
374 static double c4[4][4];
375 double block1[64], block2[64], block3[64];
376 double s, sum, v;
377 int i, j, k;
379 if (!init) {
380 init = 1;
382 for(i=0;i<8;i++) {
383 sum = 0;
384 for(j=0;j<8;j++) {
385 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
386 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
387 sum += c8[i][j] * c8[i][j];
391 for(i=0;i<4;i++) {
392 sum = 0;
393 for(j=0;j<4;j++) {
394 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
395 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
396 sum += c4[i][j] * c4[i][j];
401 /* butterfly */
402 s = 0.5 * sqrt(2.0);
403 for(i=0;i<4;i++) {
404 for(j=0;j<8;j++) {
405 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
406 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
410 /* idct8 on lines */
411 for(i=0;i<8;i++) {
412 for(j=0;j<8;j++) {
413 sum = 0;
414 for(k=0;k<8;k++)
415 sum += c8[k][j] * block1[8*i+k];
416 block2[8*i+j] = sum;
420 /* idct4 */
421 for(i=0;i<8;i++) {
422 for(j=0;j<4;j++) {
423 /* top */
424 sum = 0;
425 for(k=0;k<4;k++)
426 sum += c4[k][j] * block2[8*(2*k)+i];
427 block3[8*(2*j)+i] = sum;
429 /* bottom */
430 sum = 0;
431 for(k=0;k<4;k++)
432 sum += c4[k][j] * block2[8*(2*k+1)+i];
433 block3[8*(2*j+1)+i] = sum;
437 /* clamp and store the result */
438 for(i=0;i<8;i++) {
439 for(j=0;j<8;j++) {
440 v = block3[8*i+j];
441 if (v < 0)
442 v = 0;
443 else if (v > 255)
444 v = 255;
445 dest[i * linesize + j] = (int)rint(v);
450 void idct248_error(const char *name,
451 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
453 int it, i, it1, ti, ti1, err_max, v;
455 srandom(0);
457 /* just one test to see if code is correct (precision is less
458 important here) */
459 err_max = 0;
460 for(it=0;it<NB_ITS;it++) {
462 /* XXX: use forward transform to generate values */
463 for(i=0;i<64;i++)
464 block1[i] = (random() % 256) - 128;
465 block1[0] += 1024;
467 for(i=0; i<64; i++)
468 block[i]= block1[i];
469 idct248_ref(img_dest1, 8, block);
471 for(i=0; i<64; i++)
472 block[i]= block1[i];
473 idct248_put(img_dest, 8, block);
475 for(i=0;i<64;i++) {
476 v = abs((int)img_dest[i] - (int)img_dest1[i]);
477 if (v == 255)
478 printf("%d %d\n", img_dest[i], img_dest1[i]);
479 if (v > err_max)
480 err_max = v;
482 #if 0
483 printf("ref=\n");
484 for(i=0;i<8;i++) {
485 int j;
486 for(j=0;j<8;j++) {
487 printf(" %3d", img_dest1[i*8+j]);
489 printf("\n");
492 printf("out=\n");
493 for(i=0;i<8;i++) {
494 int j;
495 for(j=0;j<8;j++) {
496 printf(" %3d", img_dest[i*8+j]);
498 printf("\n");
500 #endif
502 printf("%s %s: err_inf=%d\n",
503 1 ? "IDCT248" : "DCT248",
504 name, err_max);
506 ti = gettime();
507 it1 = 0;
508 do {
509 for(it=0;it<NB_ITS_SPEED;it++) {
510 for(i=0; i<64; i++)
511 block[i]= block1[i];
512 // memcpy(block, block1, sizeof(DCTELEM) * 64);
513 // do not memcpy especially not fastmemcpy because it does movntq !!!
514 idct248_put(img_dest, 8, block);
516 it1 += NB_ITS_SPEED;
517 ti1 = gettime() - ti;
518 } while (ti1 < 1000000);
519 mmx_emms();
521 printf("%s %s: %0.1f kdct/s\n",
522 1 ? "IDCT248" : "DCT248",
523 name, (double)it1 * 1000.0 / (double)ti1);
526 void help(void)
528 printf("dct-test [-i] [<test-number>]\n"
529 "test-number 0 -> test with random matrixes\n"
530 " 1 -> test with random sparse matrixes\n"
531 " 2 -> do 3. test from mpeg4 std\n"
532 "-i test IDCT implementations\n"
533 "-4 test IDCT248 implementations\n");
536 int main(int argc, char **argv)
538 int test_idct = 0, test_248_dct = 0;
539 int c,i;
540 int test=1;
541 cpu_flags = mm_support();
543 init_fdct();
544 idct_mmx_init();
546 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
547 for(i=0;i<MAX_NEG_CROP;i++) {
548 cropTbl[i] = 0;
549 cropTbl[i + MAX_NEG_CROP + 256] = 255;
552 for(;;) {
553 c = getopt(argc, argv, "ih4");
554 if (c == -1)
555 break;
556 switch(c) {
557 case 'i':
558 test_idct = 1;
559 break;
560 case '4':
561 test_248_dct = 1;
562 break;
563 default :
564 case 'h':
565 help();
566 return 0;
570 if(optind <argc) test= atoi(argv[optind]);
572 printf("ffmpeg DCT/IDCT test\n");
574 if (test_248_dct) {
575 idct248_error("SIMPLE-C", ff_simple_idct248_put);
576 } else {
577 for (i=0;algos[i].name;i++)
578 if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
579 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
582 return 0;