additional protection from segmentation faults and memory access errors by
[ffmpeg-lucabe.git] / libavcodec / dct-test.c
blobdd8099b2e29bfdc7e0097eb4246840b8eb10c7b3
1 /*
2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file dct-test.c
24 * DCT test. (c) 2001 Fabrice Bellard.
25 * Started from sample code by Juan J. Sierralta P.
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <sys/time.h>
32 #include <unistd.h>
33 #include <math.h>
35 #include "dsputil.h"
37 #include "simple_idct.h"
38 #include "faandct.h"
39 #include "faanidct.h"
41 #ifndef MAX
42 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
43 #endif
45 #undef printf
46 #undef random
48 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
50 /* reference fdct/idct */
51 extern void fdct(DCTELEM *block);
52 extern void idct(DCTELEM *block);
53 extern void ff_idct_xvid_mmx(DCTELEM *block);
54 extern void ff_idct_xvid_mmx2(DCTELEM *block);
55 extern void init_fdct();
57 extern void ff_mmx_idct(DCTELEM *data);
58 extern void ff_mmxext_idct(DCTELEM *data);
60 extern void odivx_idct_c (short *block);
62 // BFIN
63 extern void ff_bfin_idct (DCTELEM *block) ;
64 extern void ff_bfin_fdct (DCTELEM *block) ;
66 // ALTIVEC
67 extern void fdct_altivec (DCTELEM *block);
68 //extern void idct_altivec (DCTELEM *block);?? no routine
71 struct algo {
72 char *name;
73 enum { FDCT, IDCT } is_idct;
74 void (* func) (DCTELEM *block);
75 void (* ref) (DCTELEM *block);
76 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
79 #ifndef FAAN_POSTSCALE
80 #define FAAN_SCALE SCALE_PERM
81 #else
82 #define FAAN_SCALE NO_PERM
83 #endif
85 #define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
88 struct algo algos[] = {
89 DCT_ERROR( "REF-DBL", 0, fdct, fdct, NO_PERM),
90 DCT_ERROR("FAAN", 0, ff_faandct, fdct, FAAN_SCALE),
91 DCT_ERROR("FAANI", 1, ff_faanidct, idct, NO_PERM),
92 DCT_ERROR("IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM),
93 DCT_ERROR("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM),
94 DCT_ERROR("REF-DBL", 1, idct, idct, NO_PERM),
95 DCT_ERROR("INT", 1, j_rev_dct, idct, MMX_PERM),
96 DCT_ERROR("SIMPLE-C", 1, ff_simple_idct, idct, NO_PERM),
98 #ifdef HAVE_MMX
99 DCT_ERROR("MMX", 0, ff_fdct_mmx, fdct, NO_PERM),
100 #ifdef HAVE_MMX2
101 DCT_ERROR("MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM),
102 #endif
104 #ifdef CONFIG_GPL
105 DCT_ERROR("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM),
106 DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM),
107 #endif
108 DCT_ERROR("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM),
109 DCT_ERROR("XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM),
110 DCT_ERROR("XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM),
111 #endif
113 #ifdef HAVE_ALTIVEC
114 DCT_ERROR("altivecfdct", 0, fdct_altivec, fdct, NO_PERM),
115 #endif
117 #ifdef ARCH_BFIN
118 DCT_ERROR("BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM),
119 DCT_ERROR("BFINidct", 1, ff_bfin_idct, idct, NO_PERM),
120 #endif
122 { 0 }
125 #define AANSCALE_BITS 12
126 static const unsigned short aanscales[64] = {
127 /* precomputed values scaled up by 14 bits */
128 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
129 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
130 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
131 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
132 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
133 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
134 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
135 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
138 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
140 int64_t gettime(void)
142 struct timeval tv;
143 gettimeofday(&tv,NULL);
144 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
147 #define NB_ITS 20000
148 #define NB_ITS_SPEED 50000
150 static short idct_mmx_perm[64];
152 static short idct_simple_mmx_perm[64]={
153 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
154 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
155 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
156 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
157 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
158 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
159 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
160 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
163 void idct_mmx_init(void)
165 int i;
167 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
168 for (i = 0; i < 64; i++) {
169 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
170 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
174 static DCTELEM block[64] __attribute__ ((aligned (8)));
175 static DCTELEM block1[64] __attribute__ ((aligned (8)));
176 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
178 void dct_error(const char *name, int is_idct,
179 void (*fdct_func)(DCTELEM *block),
180 void (*fdct_ref)(DCTELEM *block), int form, int test)
182 int it, i, scale;
183 int err_inf, v;
184 int64_t err2, ti, ti1, it1;
185 int64_t sysErr[64], sysErrMax=0;
186 int maxout=0;
187 int blockSumErrMax=0, blockSumErr;
189 srandom(0);
191 err_inf = 0;
192 err2 = 0;
193 for(i=0; i<64; i++) sysErr[i]=0;
194 for(it=0;it<NB_ITS;it++) {
195 for(i=0;i<64;i++)
196 block1[i] = 0;
197 switch(test){
198 case 0:
199 for(i=0;i<64;i++)
200 block1[i] = (random() % 512) -256;
201 if (is_idct){
202 fdct(block1);
204 for(i=0;i<64;i++)
205 block1[i]>>=3;
207 break;
208 case 1:{
209 int num= (random()%10)+1;
210 for(i=0;i<num;i++)
211 block1[random()%64] = (random() % 512) -256;
212 }break;
213 case 2:
214 block1[0]= (random()%4096)-2048;
215 block1[63]= (block1[0]&1)^1;
216 break;
219 #if 0 // simulate mismatch control
220 { int sum=0;
221 for(i=0;i<64;i++)
222 sum+=block1[i];
224 if((sum&1)==0) block1[63]^=1;
226 #endif
228 for(i=0; i<64; i++)
229 block_org[i]= block1[i];
231 if (form == MMX_PERM) {
232 for(i=0;i<64;i++)
233 block[idct_mmx_perm[i]] = block1[i];
234 } else if (form == MMX_SIMPLE_PERM) {
235 for(i=0;i<64;i++)
236 block[idct_simple_mmx_perm[i]] = block1[i];
238 } else {
239 for(i=0; i<64; i++)
240 block[i]= block1[i];
242 #if 0 // simulate mismatch control for tested IDCT but not the ref
243 { int sum=0;
244 for(i=0;i<64;i++)
245 sum+=block[i];
247 if((sum&1)==0) block[63]^=1;
249 #endif
251 fdct_func(block);
252 emms_c(); /* for ff_mmx_idct */
254 if (form == SCALE_PERM) {
255 for(i=0; i<64; i++) {
256 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
257 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
261 fdct_ref(block1);
263 blockSumErr=0;
264 for(i=0;i<64;i++) {
265 v = abs(block[i] - block1[i]);
266 if (v > err_inf)
267 err_inf = v;
268 err2 += v * v;
269 sysErr[i] += block[i] - block1[i];
270 blockSumErr += v;
271 if( abs(block[i])>maxout) maxout=abs(block[i]);
273 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
274 #if 0 // print different matrix pairs
275 if(blockSumErr){
276 printf("\n");
277 for(i=0; i<64; i++){
278 if((i&7)==0) printf("\n");
279 printf("%4d ", block_org[i]);
281 for(i=0; i<64; i++){
282 if((i&7)==0) printf("\n");
283 printf("%4d ", block[i] - block1[i]);
286 #endif
288 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
290 #if 1 // dump systematic errors
291 for(i=0; i<64; i++){
292 if(i%8==0) printf("\n");
293 printf("%5d ", (int)sysErr[i]);
295 printf("\n");
296 #endif
298 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
299 is_idct ? "IDCT" : "DCT",
300 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
301 #if 1 //Speed test
302 /* speed test */
303 for(i=0;i<64;i++)
304 block1[i] = 0;
305 switch(test){
306 case 0:
307 for(i=0;i<64;i++)
308 block1[i] = (random() % 512) -256;
309 if (is_idct){
310 fdct(block1);
312 for(i=0;i<64;i++)
313 block1[i]>>=3;
315 break;
316 case 1:{
317 case 2:
318 block1[0] = (random() % 512) -256;
319 block1[1] = (random() % 512) -256;
320 block1[2] = (random() % 512) -256;
321 block1[3] = (random() % 512) -256;
322 }break;
325 if (form == MMX_PERM) {
326 for(i=0;i<64;i++)
327 block[idct_mmx_perm[i]] = block1[i];
328 } else if(form == MMX_SIMPLE_PERM) {
329 for(i=0;i<64;i++)
330 block[idct_simple_mmx_perm[i]] = block1[i];
331 } else {
332 for(i=0; i<64; i++)
333 block[i]= block1[i];
336 ti = gettime();
337 it1 = 0;
338 do {
339 for(it=0;it<NB_ITS_SPEED;it++) {
340 for(i=0; i<64; i++)
341 block[i]= block1[i];
342 // memcpy(block, block1, sizeof(DCTELEM) * 64);
343 // do not memcpy especially not fastmemcpy because it does movntq !!!
344 fdct_func(block);
346 it1 += NB_ITS_SPEED;
347 ti1 = gettime() - ti;
348 } while (ti1 < 1000000);
349 emms_c();
351 printf("%s %s: %0.1f kdct/s\n",
352 is_idct ? "IDCT" : "DCT",
353 name, (double)it1 * 1000.0 / (double)ti1);
354 #endif
357 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
358 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
360 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
362 static int init;
363 static double c8[8][8];
364 static double c4[4][4];
365 double block1[64], block2[64], block3[64];
366 double s, sum, v;
367 int i, j, k;
369 if (!init) {
370 init = 1;
372 for(i=0;i<8;i++) {
373 sum = 0;
374 for(j=0;j<8;j++) {
375 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
376 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
377 sum += c8[i][j] * c8[i][j];
381 for(i=0;i<4;i++) {
382 sum = 0;
383 for(j=0;j<4;j++) {
384 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
385 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
386 sum += c4[i][j] * c4[i][j];
391 /* butterfly */
392 s = 0.5 * sqrt(2.0);
393 for(i=0;i<4;i++) {
394 for(j=0;j<8;j++) {
395 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
396 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
400 /* idct8 on lines */
401 for(i=0;i<8;i++) {
402 for(j=0;j<8;j++) {
403 sum = 0;
404 for(k=0;k<8;k++)
405 sum += c8[k][j] * block1[8*i+k];
406 block2[8*i+j] = sum;
410 /* idct4 */
411 for(i=0;i<8;i++) {
412 for(j=0;j<4;j++) {
413 /* top */
414 sum = 0;
415 for(k=0;k<4;k++)
416 sum += c4[k][j] * block2[8*(2*k)+i];
417 block3[8*(2*j)+i] = sum;
419 /* bottom */
420 sum = 0;
421 for(k=0;k<4;k++)
422 sum += c4[k][j] * block2[8*(2*k+1)+i];
423 block3[8*(2*j+1)+i] = sum;
427 /* clamp and store the result */
428 for(i=0;i<8;i++) {
429 for(j=0;j<8;j++) {
430 v = block3[8*i+j];
431 if (v < 0)
432 v = 0;
433 else if (v > 255)
434 v = 255;
435 dest[i * linesize + j] = (int)rint(v);
440 void idct248_error(const char *name,
441 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
443 int it, i, it1, ti, ti1, err_max, v;
445 srandom(0);
447 /* just one test to see if code is correct (precision is less
448 important here) */
449 err_max = 0;
450 for(it=0;it<NB_ITS;it++) {
452 /* XXX: use forward transform to generate values */
453 for(i=0;i<64;i++)
454 block1[i] = (random() % 256) - 128;
455 block1[0] += 1024;
457 for(i=0; i<64; i++)
458 block[i]= block1[i];
459 idct248_ref(img_dest1, 8, block);
461 for(i=0; i<64; i++)
462 block[i]= block1[i];
463 idct248_put(img_dest, 8, block);
465 for(i=0;i<64;i++) {
466 v = abs((int)img_dest[i] - (int)img_dest1[i]);
467 if (v == 255)
468 printf("%d %d\n", img_dest[i], img_dest1[i]);
469 if (v > err_max)
470 err_max = v;
472 #if 0
473 printf("ref=\n");
474 for(i=0;i<8;i++) {
475 int j;
476 for(j=0;j<8;j++) {
477 printf(" %3d", img_dest1[i*8+j]);
479 printf("\n");
482 printf("out=\n");
483 for(i=0;i<8;i++) {
484 int j;
485 for(j=0;j<8;j++) {
486 printf(" %3d", img_dest[i*8+j]);
488 printf("\n");
490 #endif
492 printf("%s %s: err_inf=%d\n",
493 1 ? "IDCT248" : "DCT248",
494 name, err_max);
496 ti = gettime();
497 it1 = 0;
498 do {
499 for(it=0;it<NB_ITS_SPEED;it++) {
500 for(i=0; i<64; i++)
501 block[i]= block1[i];
502 // memcpy(block, block1, sizeof(DCTELEM) * 64);
503 // do not memcpy especially not fastmemcpy because it does movntq !!!
504 idct248_put(img_dest, 8, block);
506 it1 += NB_ITS_SPEED;
507 ti1 = gettime() - ti;
508 } while (ti1 < 1000000);
509 emms_c();
511 printf("%s %s: %0.1f kdct/s\n",
512 1 ? "IDCT248" : "DCT248",
513 name, (double)it1 * 1000.0 / (double)ti1);
516 void help(void)
518 printf("dct-test [-i] [<test-number>]\n"
519 "test-number 0 -> test with random matrixes\n"
520 " 1 -> test with random sparse matrixes\n"
521 " 2 -> do 3. test from mpeg4 std\n"
522 "-i test IDCT implementations\n"
523 "-4 test IDCT248 implementations\n");
526 int main(int argc, char **argv)
528 int test_idct = 0, test_248_dct = 0;
529 int c,i;
530 int test=1;
532 init_fdct();
533 idct_mmx_init();
534 mm_flags = mm_support();
536 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
537 for(i=0;i<MAX_NEG_CROP;i++) {
538 cropTbl[i] = 0;
539 cropTbl[i + MAX_NEG_CROP + 256] = 255;
542 for(;;) {
543 c = getopt(argc, argv, "ih4");
544 if (c == -1)
545 break;
546 switch(c) {
547 case 'i':
548 test_idct = 1;
549 break;
550 case '4':
551 test_248_dct = 1;
552 break;
553 default :
554 case 'h':
555 help();
556 return 0;
560 if(optind <argc) test= atoi(argv[optind]);
562 printf("ffmpeg DCT/IDCT test\n");
564 if (test_248_dct) {
565 idct248_error("SIMPLE-C", ff_simple_idct248_put);
566 } else {
567 for (i=0;algos[i].name;i++)
568 if (algos[i].is_idct == test_idct) {
569 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
572 return 0;