4 * (c)Copyright 2003 Matthew Dillon. This code is hereby placed in the public
7 * Attempt to figure out the L1 and L2 cache sizes and measure memory
8 * bandwidth for the L1 and L2 cache and for non-cache memory.
10 * $DragonFly: src/test/sysperf/mbwtest.c,v 1.1 2003/11/13 07:10:36 dillon Exp $
20 #define MAXBYTES (16*1024*1024)
22 static int bandwidth_test(char *buf
, int loops
, int bytes
, char *msg
);
23 static void start_timing(void);
24 static int stop_timing(char *str
, long long bytes
);
27 main(int ac
, char **av
)
42 buf
= malloc(MAXBYTES
* 2);
43 bzero(buf
, MAXBYTES
* 2);
46 * Get a baseline for 1/4 second L1 cache timing maximizing the number
47 * of loops. The minimum L1 cache size is 4K.
50 us1
= bandwidth_test(buf
, 1000, 4096, NULL
); /* uS per 1000 loops */
51 loops
= 1000000LL * 1000 / 4 / us1
; /* loops for 1/4 sec */
52 count1
= loops
* 4096LL;
54 us1
= bandwidth_test(buf
, loops
, 4096, NULL
); /* best case timing */
55 printf("."); fflush(stdout
); usleep(1000000 / 4);
58 * Search for the L1 cache size. Look for a 20% difference in bandwidth
62 us1
= bandwidth_test(buf
, count1
/ 4096 + 20, 4096, NULL
);
63 for (bytes1
= 8192; bytes1
< MAXBYTES
; bytes1
<<= 1) {
65 us2
= bandwidth_test(buf
, count1
/ bytes1
+ 20, bytes1
, NULL
);
66 if (us2
> us1
+ us1
/ 5)
69 bytes1
>>= 1; /* actual L1 cache size */
70 count2
= count1
* us1
/ us2
;
71 printf("."); fflush(stdout
); usleep(1000000 / 4);
76 us1
= bandwidth_test(buf
, count2
/ bytes2
+ 20, bytes2
, NULL
);
77 for (bytes2
<<= 1; bytes2
< MAXBYTES
; bytes2
<<= 1) {
79 us2
= bandwidth_test(buf
, count2
/ bytes2
+ 20, bytes2
, NULL
);
80 if (us2
> us1
+ us1
/ 5)
83 count3
= count2
* us1
/ us2
;
84 bytes2
>>= 1; /* actual L2 cache size */
89 us1
= bandwidth_test(buf
, count3
/ bytes3
+ 20, bytes3
, NULL
);
90 for (bytes3
<<= 1; bytes3
< MAXBYTES
; bytes3
<<= 1) {
92 us2
= bandwidth_test(buf
, count3
/ bytes3
+ 20, bytes3
, NULL
);
93 if (us2
> us1
+ us1
/ 5)
96 count4
= count3
* us1
/ us2
;
97 bytes3
>>= 1; /* actual L3 cache size */
100 * Final run to generate output
102 printf("\nL1 cache size: %d\n", bytes1
);
104 if (bytes2
== MAXBYTES
)
105 printf("L2 cache size: No L2 cache found\n");
107 printf("L2 cache size: %d\n", bytes2
);
109 if (bytes3
== MAXBYTES
)
110 printf("L3 cache size: No L3 cache found\n");
112 printf("L3 cache size: %d\n", bytes3
);
116 bandwidth_test(buf
, count1
/ bytes1
+ 20, bytes1
, "L1 cache bandwidth");
117 if (bytes2
!= MAXBYTES
) {
119 bandwidth_test(buf
, count2
/ bytes2
+ 20, bytes2
,
120 "L2 cache bandwidth");
122 if (bytes3
!= MAXBYTES
) {
124 bandwidth_test(buf
, count3
/ bytes3
+ 20, bytes3
,
125 "L3 cache bandwidth");
129 * Set bytes2 to exceed the L2 cache size
131 bytes4
= bytes3
<< 1;
132 if (bytes4
< MAXBYTES
)
135 bandwidth_test(buf
, count4
/ bytes4
+ 20, bytes4
, "non-cache bandwidth");
144 bandwidth_test(char *buf
, int loops
, int bytes
, char *msg
)
153 for (j
= 0; j
< loops
; ++j
) {
154 for (bptr
= buf
; bptr
< lptr
; bptr
+= 32) {
155 v
= *(volatile int *)(bptr
+ 0);
156 v
= *(volatile int *)(bptr
+ 4);
157 v
= *(volatile int *)(bptr
+ 8);
158 v
= *(volatile int *)(bptr
+ 12);
159 v
= *(volatile int *)(bptr
+ 16);
160 v
= *(volatile int *)(bptr
+ 20);
161 v
= *(volatile int *)(bptr
+ 24);
162 v
= *(volatile int *)(bptr
+ 28);
165 us
= stop_timing(msg
, (long long)bytes
* loops
);
173 gettimeofday(&tv1
, NULL
);
178 stop_timing(char *str
, long long bytes
)
182 gettimeofday(&tv2
, NULL
);
184 us
= tv2
.tv_usec
+ 1000000 - tv1
.tv_usec
+
185 (tv2
.tv_sec
- tv1
.tv_sec
- 1) * 1000000;
187 printf("%s: %4.2f Mbytes/sec\n",
189 (double)bytes
* 1000000.0 / ((double)us
* 1024.0 * 1024.0));