2 * benchmark tool for fast_memcpy code from libvo
4 * NOTE: This code can not be used on Pentium MMX / II because they contain
5 * a bug in rdtsc. For Intel processors since P6(PII) rdpmc should be used
6 * instead. For PIII it's disputable and it seems the bug was fixed but this
7 * was not confirmed through testing.
13 #include <sys/ioctl.h>
21 #include "cpudetect.h"
23 #define BLOCK_SIZE 4096
24 #define CONFUSION_FACTOR 0
35 #define COMPILE_AMD3DNOW
51 #define HAVE_AMD3DNOW 0
54 #define RENAME(a) a ## _MMX
55 #include "libvo/aclib_template.c"
67 #define HAVE_AMD3DNOW 0
70 #define RENAME(a) a ## _MMX2
71 #include "libvo/aclib_template.c"
74 #ifdef COMPILE_AMD3DNOW
83 #define HAVE_AMD3DNOW 1
86 #define RENAME(a) a ## _3DNow
87 #include "libvo/aclib_template.c"
99 #define HAVE_AMD3DNOW 0
102 #define RENAME(a) a ## _SSE
103 #include "libvo/aclib_template.c"
106 //#define ARR_SIZE 100000
107 #define ARR_SIZE (1024*768*2)
111 #include "drivers/mga_vid.h"
113 static mga_vid_config_t mga_vid_config
;
114 static unsigned char* frame
= NULL
;
117 static int mga_init(void)
119 f
= open("/dev/mga_vid", O_RDWR
);
121 fprintf(stderr
, "Couldn't open /dev/mga_vid.\n");
125 mga_vid_config
.num_frames
= 1;
126 mga_vid_config
.frame_size
= ARR_SIZE
;
127 mga_vid_config
.format
= MGA_VID_FORMAT_YUY2
;
129 mga_vid_config
.colkey_on
= 0;
130 mga_vid_config
.src_width
= 640;
131 mga_vid_config
.src_height
= 480;
132 mga_vid_config
.dest_width
= 320;
133 mga_vid_config
.dest_height
= 200;
134 mga_vid_config
.x_org
= 0;
135 mga_vid_config
.y_org
= 0;
137 mga_vid_config
.version
= MGA_VID_VERSION
;
138 if (ioctl(f
, MGA_VID_CONFIG
, &mga_vid_config
)) {
139 perror("Error in mga_vid_config ioctl()");
140 printf("Your mga_vid driver version is incompatible with this MPlayer version!\n");
143 ioctl(f
, MGA_VID_ON
, 0);
145 frame
= (char*)mmap(0, mga_vid_config
.frame_size
*mga_vid_config
.num_frames
,
146 PROT_WRITE
,MAP_SHARED
, f
, 0);
148 printf("Can't mmap MGA frame.\n");
153 //memset(frames[0], 0x80, mga_vid_config.frame_size*mga_vid_config.num_frames);
160 // Returns current time in microseconds
161 static unsigned int GetTimer(void)
166 gettimeofday(&tv
, &tz
);
167 //s = tv.tv_usec; s *= 0.000001; s += tv.tv_sec;
168 return tv
.tv_sec
* 1000000 + tv
.tv_usec
;
171 static inline unsigned long long int read_tsc(void)
173 unsigned long long int retval
;
174 __asm__
volatile ("rdtsc":"=A" (retval
)::"memory");
178 unsigned char __attribute__((aligned(4096)))arr1
[ARR_SIZE
], arr2
[ARR_SIZE
];
182 unsigned long long int v1
, v2
;
183 unsigned char *marr1
, *marr2
;
194 for (i
= 0; i
< ARR_SIZE
- 16; i
++)
195 marr1
[i
] = marr2
[i
] = i
;
199 for (i
= 0; i
< 100; i
++)
200 memcpy(marr1
, marr2
, ARR_SIZE
- 16);
203 // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
204 printf("libc: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2
-v1
, t
,
205 100000000.0f
/(float)t
, (float)ARR_SIZE
*95.36743f
/(float)t
);
210 for (i
= 0; i
< 100; i
++)
211 fast_memcpy_MMX(marr1
, marr2
, ARR_SIZE
- 16);
214 // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
215 printf("MMX: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2
-v1
, t
,
216 100000000.0f
/(float)t
, (float)ARR_SIZE
*95.36743f
/(float)t
);
222 for (i
= 0; i
< 100; i
++)
223 fast_memcpy_3DNow(marr1
, marr2
, ARR_SIZE
- 16);
226 // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
227 printf("3DNow!: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2
-v1
, t
,
228 100000000.0f
/(float)t
, (float)ARR_SIZE
*95.36743f
/(float)t
);
234 for (i
= 0; i
< 100; i
++)
235 fast_memcpy_MMX2(marr1
, marr2
, ARR_SIZE
- 16);
238 // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
239 printf("MMX2: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2
-v1
, t
,
240 100000000.0f
/(float)t
, (float)ARR_SIZE
*95.36743f
/(float)t
);
246 for (i
= 0; i
< 100; i
++)
247 fast_memcpy_SSE(marr1
, marr2
, ARR_SIZE
- 16);
250 // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
251 printf("SSE: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2
-v1
, t
,
252 100000000.0f
/(float)t
, (float)ARR_SIZE
*95.36743f
/(float)t
);