2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * DCT test. (c) 2001 Fabrice Bellard.
25 * Started from sample code by Juan J. Sierralta P.
37 #include "simple_idct.h"
42 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
48 void *fast_memcpy(void *a
, const void *b
, size_t c
){return memcpy(a
,b
,c
);};
50 /* reference fdct/idct */
51 extern void fdct(DCTELEM
*block
);
52 extern void idct(DCTELEM
*block
);
53 extern void ff_idct_xvid_mmx(DCTELEM
*block
);
54 extern void ff_idct_xvid_mmx2(DCTELEM
*block
);
55 extern void init_fdct();
57 extern void ff_mmx_idct(DCTELEM
*data
);
58 extern void ff_mmxext_idct(DCTELEM
*data
);
60 extern void odivx_idct_c (short *block
);
63 extern void ff_bfin_idct (DCTELEM
*block
) ;
64 extern void ff_bfin_fdct (DCTELEM
*block
) ;
67 extern void fdct_altivec (DCTELEM
*block
);
68 //extern void idct_altivec (DCTELEM *block);?? no routine
73 enum { FDCT
, IDCT
} is_idct
;
74 void (* func
) (DCTELEM
*block
);
75 void (* ref
) (DCTELEM
*block
);
76 enum formattag
{ NO_PERM
,MMX_PERM
, MMX_SIMPLE_PERM
, SCALE_PERM
} format
;
79 #ifndef FAAN_POSTSCALE
80 #define FAAN_SCALE SCALE_PERM
82 #define FAAN_SCALE NO_PERM
85 #define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
88 struct algo algos
[] = {
89 DCT_ERROR( "REF-DBL", 0, fdct
, fdct
, NO_PERM
),
90 DCT_ERROR("FAAN", 0, ff_faandct
, fdct
, FAAN_SCALE
),
91 DCT_ERROR("FAANI", 1, ff_faanidct
, idct
, NO_PERM
),
92 DCT_ERROR("IJG-AAN-INT", 0, fdct_ifast
, fdct
, SCALE_PERM
),
93 DCT_ERROR("IJG-LLM-INT", 0, ff_jpeg_fdct_islow
, fdct
, NO_PERM
),
94 DCT_ERROR("REF-DBL", 1, idct
, idct
, NO_PERM
),
95 DCT_ERROR("INT", 1, j_rev_dct
, idct
, MMX_PERM
),
96 DCT_ERROR("SIMPLE-C", 1, ff_simple_idct
, idct
, NO_PERM
),
99 DCT_ERROR("MMX", 0, ff_fdct_mmx
, fdct
, NO_PERM
),
101 DCT_ERROR("MMX2", 0, ff_fdct_mmx2
, fdct
, NO_PERM
),
105 DCT_ERROR("LIBMPEG2-MMX", 1, ff_mmx_idct
, idct
, MMX_PERM
),
106 DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct
, idct
, MMX_PERM
),
108 DCT_ERROR("SIMPLE-MMX", 1, ff_simple_idct_mmx
, idct
, MMX_SIMPLE_PERM
),
109 DCT_ERROR("XVID-MMX", 1, ff_idct_xvid_mmx
, idct
, NO_PERM
),
110 DCT_ERROR("XVID-MMX2", 1, ff_idct_xvid_mmx2
, idct
, NO_PERM
),
114 DCT_ERROR("altivecfdct", 0, fdct_altivec
, fdct
, NO_PERM
),
118 DCT_ERROR("BFINfdct", 0, ff_bfin_fdct
, fdct
, NO_PERM
),
119 DCT_ERROR("BFINidct", 1, ff_bfin_idct
, idct
, NO_PERM
),
125 #define AANSCALE_BITS 12
126 static const unsigned short aanscales
[64] = {
127 /* precomputed values scaled up by 14 bits */
128 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
129 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
130 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
131 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
132 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
133 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
134 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
135 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
138 uint8_t cropTbl
[256 + 2 * MAX_NEG_CROP
];
140 int64_t gettime(void)
143 gettimeofday(&tv
,NULL
);
144 return (int64_t)tv
.tv_sec
* 1000000 + tv
.tv_usec
;
148 #define NB_ITS_SPEED 50000
150 static short idct_mmx_perm
[64];
152 static short idct_simple_mmx_perm
[64]={
153 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
154 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
155 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
156 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
157 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
158 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
159 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
160 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
163 void idct_mmx_init(void)
167 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
168 for (i
= 0; i
< 64; i
++) {
169 idct_mmx_perm
[i
] = (i
& 0x38) | ((i
& 6) >> 1) | ((i
& 1) << 2);
170 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
174 static DCTELEM block
[64] __attribute__ ((aligned (8)));
175 static DCTELEM block1
[64] __attribute__ ((aligned (8)));
176 static DCTELEM block_org
[64] __attribute__ ((aligned (8)));
178 void dct_error(const char *name
, int is_idct
,
179 void (*fdct_func
)(DCTELEM
*block
),
180 void (*fdct_ref
)(DCTELEM
*block
), int form
, int test
)
184 int64_t err2
, ti
, ti1
, it1
;
185 int64_t sysErr
[64], sysErrMax
=0;
187 int blockSumErrMax
=0, blockSumErr
;
193 for(i
=0; i
<64; i
++) sysErr
[i
]=0;
194 for(it
=0;it
<NB_ITS
;it
++) {
200 block1
[i
] = (random() % 512) -256;
209 int num
= (random()%10)+1;
211 block1
[random()%64] = (random() % 512) -256;
214 block1
[0]= (random()%4096)-2048;
215 block1
[63]= (block1
[0]&1)^1;
219 #if 0 // simulate mismatch control
224 if((sum
&1)==0) block1
[63]^=1;
229 block_org
[i
]= block1
[i
];
231 if (form
== MMX_PERM
) {
233 block
[idct_mmx_perm
[i
]] = block1
[i
];
234 } else if (form
== MMX_SIMPLE_PERM
) {
236 block
[idct_simple_mmx_perm
[i
]] = block1
[i
];
242 #if 0 // simulate mismatch control for tested IDCT but not the ref
247 if((sum
&1)==0) block
[63]^=1;
252 emms_c(); /* for ff_mmx_idct */
254 if (form
== SCALE_PERM
) {
255 for(i
=0; i
<64; i
++) {
256 scale
= 8*(1 << (AANSCALE_BITS
+ 11)) / aanscales
[i
];
257 block
[i
] = (block
[i
] * scale
/*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS
;
265 v
= abs(block
[i
] - block1
[i
]);
269 sysErr
[i
] += block
[i
] - block1
[i
];
271 if( abs(block
[i
])>maxout
) maxout
=abs(block
[i
]);
273 if(blockSumErrMax
< blockSumErr
) blockSumErrMax
= blockSumErr
;
274 #if 0 // print different matrix pairs
278 if((i
&7)==0) printf("\n");
279 printf("%4d ", block_org
[i
]);
282 if((i
&7)==0) printf("\n");
283 printf("%4d ", block
[i
] - block1
[i
]);
288 for(i
=0; i
<64; i
++) sysErrMax
= MAX(sysErrMax
, FFABS(sysErr
[i
]));
290 #if 1 // dump systematic errors
292 if(i
%8==0) printf("\n");
293 printf("%5d ", (int)sysErr
[i
]);
298 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
299 is_idct
? "IDCT" : "DCT",
300 name
, err_inf
, (double)err2
/ NB_ITS
/ 64.0, (double)sysErrMax
/ NB_ITS
, maxout
, blockSumErrMax
);
308 block1
[i
] = (random() % 512) -256;
318 block1
[0] = (random() % 512) -256;
319 block1
[1] = (random() % 512) -256;
320 block1
[2] = (random() % 512) -256;
321 block1
[3] = (random() % 512) -256;
325 if (form
== MMX_PERM
) {
327 block
[idct_mmx_perm
[i
]] = block1
[i
];
328 } else if(form
== MMX_SIMPLE_PERM
) {
330 block
[idct_simple_mmx_perm
[i
]] = block1
[i
];
339 for(it
=0;it
<NB_ITS_SPEED
;it
++) {
342 // memcpy(block, block1, sizeof(DCTELEM) * 64);
343 // do not memcpy especially not fastmemcpy because it does movntq !!!
347 ti1
= gettime() - ti
;
348 } while (ti1
< 1000000);
351 printf("%s %s: %0.1f kdct/s\n",
352 is_idct
? "IDCT" : "DCT",
353 name
, (double)it1
* 1000.0 / (double)ti1
);
357 static uint8_t img_dest
[64] __attribute__ ((aligned (8)));
358 static uint8_t img_dest1
[64] __attribute__ ((aligned (8)));
360 void idct248_ref(uint8_t *dest
, int linesize
, int16_t *block
)
363 static double c8
[8][8];
364 static double c4
[4][4];
365 double block1
[64], block2
[64], block3
[64];
375 s
= (i
==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
376 c8
[i
][j
] = s
* cos(M_PI
* i
* (j
+ 0.5) / 8.0);
377 sum
+= c8
[i
][j
] * c8
[i
][j
];
384 s
= (i
==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
385 c4
[i
][j
] = s
* cos(M_PI
* i
* (j
+ 0.5) / 4.0);
386 sum
+= c4
[i
][j
] * c4
[i
][j
];
395 block1
[8*(2*i
)+j
] = (block
[8*(2*i
)+j
] + block
[8*(2*i
+1)+j
]) * s
;
396 block1
[8*(2*i
+1)+j
] = (block
[8*(2*i
)+j
] - block
[8*(2*i
+1)+j
]) * s
;
405 sum
+= c8
[k
][j
] * block1
[8*i
+k
];
416 sum
+= c4
[k
][j
] * block2
[8*(2*k
)+i
];
417 block3
[8*(2*j
)+i
] = sum
;
422 sum
+= c4
[k
][j
] * block2
[8*(2*k
+1)+i
];
423 block3
[8*(2*j
+1)+i
] = sum
;
427 /* clamp and store the result */
435 dest
[i
* linesize
+ j
] = (int)rint(v
);
440 void idct248_error(const char *name
,
441 void (*idct248_put
)(uint8_t *dest
, int line_size
, int16_t *block
))
443 int it
, i
, it1
, ti
, ti1
, err_max
, v
;
447 /* just one test to see if code is correct (precision is less
450 for(it
=0;it
<NB_ITS
;it
++) {
452 /* XXX: use forward transform to generate values */
454 block1
[i
] = (random() % 256) - 128;
459 idct248_ref(img_dest1
, 8, block
);
463 idct248_put(img_dest
, 8, block
);
466 v
= abs((int)img_dest
[i
] - (int)img_dest1
[i
]);
468 printf("%d %d\n", img_dest
[i
], img_dest1
[i
]);
477 printf(" %3d", img_dest1
[i
*8+j
]);
486 printf(" %3d", img_dest
[i
*8+j
]);
492 printf("%s %s: err_inf=%d\n",
493 1 ? "IDCT248" : "DCT248",
499 for(it
=0;it
<NB_ITS_SPEED
;it
++) {
502 // memcpy(block, block1, sizeof(DCTELEM) * 64);
503 // do not memcpy especially not fastmemcpy because it does movntq !!!
504 idct248_put(img_dest
, 8, block
);
507 ti1
= gettime() - ti
;
508 } while (ti1
< 1000000);
511 printf("%s %s: %0.1f kdct/s\n",
512 1 ? "IDCT248" : "DCT248",
513 name
, (double)it1
* 1000.0 / (double)ti1
);
518 printf("dct-test [-i] [<test-number>]\n"
519 "test-number 0 -> test with random matrixes\n"
520 " 1 -> test with random sparse matrixes\n"
521 " 2 -> do 3. test from mpeg4 std\n"
522 "-i test IDCT implementations\n"
523 "-4 test IDCT248 implementations\n");
526 int main(int argc
, char **argv
)
528 int test_idct
= 0, test_248_dct
= 0;
534 mm_flags
= mm_support();
536 for(i
=0;i
<256;i
++) cropTbl
[i
+ MAX_NEG_CROP
] = i
;
537 for(i
=0;i
<MAX_NEG_CROP
;i
++) {
539 cropTbl
[i
+ MAX_NEG_CROP
+ 256] = 255;
543 c
= getopt(argc
, argv
, "ih4");
560 if(optind
<argc
) test
= atoi(argv
[optind
]);
562 printf("ffmpeg DCT/IDCT test\n");
565 idct248_error("SIMPLE-C", ff_simple_idct248_put
);
567 for (i
=0;algos
[i
].name
;i
++)
568 if (algos
[i
].is_idct
== test_idct
) {
569 dct_error (algos
[i
].name
, algos
[i
].is_idct
, algos
[i
].func
, algos
[i
].ref
, algos
[i
].format
, test
);