2 * benchmark tool for fast_memcpy code from libvo
4 * NOTE: This code can not be used on Pentium MMX / II because they contain
5 * a bug in rdtsc. For Intel processors since P6(PII) rdpmc should be used
6 * instead. For PIII it's disputable and it seems the bug was fixed but this
7 * was not confirmed through testing.
13 #include <sys/ioctl.h>
21 #include "cpudetect.h"
23 #define BLOCK_SIZE 4096
24 #define CONFUSION_FACTOR 0
35 #define COMPILE_AMD3DNOW
51 #define HAVE_AMD3DNOW 0
54 #define RENAME(a) a ## _MMX
55 #include "libvo/aclib_template.c"
67 #define HAVE_AMD3DNOW 0
70 #define RENAME(a) a ## _MMX2
71 #include "libvo/aclib_template.c"
74 #ifdef COMPILE_AMD3DNOW
83 #define HAVE_AMD3DNOW 1
86 #define RENAME(a) a ## _3DNow
87 #include "libvo/aclib_template.c"
99 #define HAVE_AMD3DNOW 0
102 #define RENAME(a) a ## _SSE
103 #include "libvo/aclib_template.c"
106 //#define ARR_SIZE 100000
107 #define ARR_SIZE (1024*768*2)
111 #include "drivers/mga_vid.h"
113 static mga_vid_config_t mga_vid_config
;
114 static unsigned char* frame
= NULL
;
117 static int mga_init(void)
119 f
= open("/dev/mga_vid", O_RDWR
);
121 fprintf(stderr
, "Couldn't open /dev/mga_vid.\n");
125 mga_vid_config
.num_frames
= 1;
126 mga_vid_config
.frame_size
= ARR_SIZE
;
127 mga_vid_config
.format
= MGA_VID_FORMAT_YUY2
;
129 mga_vid_config
.colkey_on
= 0;
130 mga_vid_config
.src_width
= 640;
131 mga_vid_config
.src_height
= 480;
132 mga_vid_config
.dest_width
= 320;
133 mga_vid_config
.dest_height
= 200;
134 mga_vid_config
.x_org
= 0;
135 mga_vid_config
.y_org
= 0;
137 mga_vid_config
.version
= MGA_VID_VERSION
;
138 if (ioctl(f
, MGA_VID_CONFIG
, &mga_vid_config
)) {
139 perror("Error in mga_vid_config ioctl()");
140 printf("Your mga_vid driver version is incompatible with this MPlayer version!\n");
143 ioctl(f
, MGA_VID_ON
, 0);
145 frame
= (char*)mmap(0, mga_vid_config
.frame_size
*mga_vid_config
.num_frames
,
146 PROT_WRITE
,MAP_SHARED
, f
, 0);
148 printf("Can't mmap MGA frame.\n");
153 //memset(frames[0], 0x80, mga_vid_config.frame_size*mga_vid_config.num_frames);
160 // Returns current time in microseconds
161 static unsigned int GetTimer(void)
166 gettimeofday(&tv
, &tz
);
167 //s = tv.tv_usec; s *= 0.000001; s += tv.tv_sec;
168 return tv
.tv_sec
* 1000000 + tv
.tv_usec
;
171 static inline unsigned long long int read_tsc(void)
173 unsigned long long int retval
;
174 __asm__
volatile ("rdtsc":"=A" (retval
)::"memory");
178 unsigned char __attribute__((aligned(4096)))arr1
[ARR_SIZE
], arr2
[ARR_SIZE
];
182 unsigned long long int v1
, v2
;
183 unsigned char *marr1
, *marr2
;
194 for (i
= 0; i
< ARR_SIZE
- 16; i
++)
195 marr1
[i
] = marr2
[i
] = i
;
197 #define testblock(func, name) \
200 for (i = 0; i < 100; i++) \
201 func(marr1, marr2, ARR_SIZE - 16); \
203 t = GetTimer() - t; \
204 /* ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t */ \
205 printf(name "CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, \
206 100000000.0f / (float)t, (float)ARR_SIZE*95.36743f / (float)t);
208 testblock(memcpy
, "libc: ");
211 testblock(fast_memcpy_MMX
, "MMX: ");
215 testblock(fast_memcpy_3DNow
, "3DNow!: ");
219 testblock(fast_memcpy_MMX2
, "MMX2: ");
223 testblock(fast_memcpy_SSE
, "SSE: ");