- Add lwkt_serialize_adaptive_enter(9), it is same as lwkt_serialize_enter(9)
[dragonfly.git] / test / sysperf / mbwtest.c
blob75a2756ea313fc495030237d9101dafd7f49c267
1 /*
2 * MBWTEST.C
4 * (c)Copyright 2003 Matthew Dillon. This code is hereby placed in the public
5 * domain.
7 * Attempt to figure out the L1 and L2 cache sizes and measure memory
8 * bandwidth for the L1 and L2 cache and for non-cache memory.
10 * $DragonFly: src/test/sysperf/mbwtest.c,v 1.1 2003/11/13 07:10:36 dillon Exp $
13 #include <sys/file.h>
14 #include <sys/time.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <unistd.h>
19 #define MAXBYTES (16*1024*1024)
21 static int bandwidth_test(char *buf, int loops, int bytes, char *msg);
22 static void start_timing(void);
23 static int stop_timing(char *str, long long bytes);
25 int
26 main(int ac, char **av)
28 char *buf;
29 int loops;
30 int us1;
31 int us2;
32 long long count1;
33 long long count2;
34 long long count3;
35 int bytes1;
36 int bytes2;
37 int bytes3;
39 buf = malloc(MAXBYTES * 2);
40 bzero(buf, MAXBYTES * 2);
43 * Get a baseline for 1/4 second L1 cache timing maximizing the number
44 * of loops. The minimum L1 cache size is 4K.
46 start_timing();
47 us1 = bandwidth_test(buf, 1000, 4096, NULL); /* uS per 1000 loops */
48 loops = 1000000LL * 1000 / 4 / us1; /* loops for 1/4 sec */
49 count1 = loops * 4096LL;
50 start_timing();
51 us1 = bandwidth_test(buf, loops, 4096, NULL); /* best case timing */
52 printf("."); fflush(stdout); usleep(1000000 / 4);
55 * Search for the L1 cache size. Look for a 20% difference in bandwidth
57 bzero(buf, 4096);
58 start_timing();
59 us1 = bandwidth_test(buf, count1 / 4096 + 20, 4096, NULL);
60 for (bytes1 = 8192; bytes1 < MAXBYTES; bytes1 <<= 1) {
61 start_timing();
62 us2 = bandwidth_test(buf, count1 / bytes1 + 20, bytes1, NULL);
63 if (us2 > us1 + us1 / 5)
64 break;
66 bytes1 >>= 1; /* actual L1 cache size */
67 count2 = count1 * us1 / us2;
68 printf("."); fflush(stdout); usleep(1000000 / 4);
70 bytes2 = bytes1 << 1;
71 bzero(buf, bytes2);
72 start_timing();
73 us1 = bandwidth_test(buf, count2 / bytes2 + 20, bytes2, NULL);
74 for (bytes2 <<= 1; bytes2 < MAXBYTES; bytes2 <<= 1) {
75 start_timing();
76 us2 = bandwidth_test(buf, count2 / bytes2 + 20, bytes2, NULL);
77 if (us2 > us1 + us1 / 5)
78 break;
80 count3 = count2 * us1 / us2;
81 bytes2 >>= 1; /* actual L2 cache size */
84 * Final run to generate output
86 printf("\nL1 cache size: %d\n", bytes1);
87 if (bytes2 == MAXBYTES)
88 printf("L2 cache size: No L2 cache found\n");
89 else
90 printf("L2 cache size: %d\n", bytes2);
91 sleep(1);
92 start_timing();
93 bandwidth_test(buf, count1 / bytes1 + 20, bytes1, "L1 cache bandwidth");
94 if (bytes2 != MAXBYTES) {
95 start_timing();
96 bandwidth_test(buf, count2 / bytes2 + 20, bytes2,
97 "L2 cache bandwidth");
101 * Set bytes2 to exceed the L2 cache size
103 bytes2 <<= 1;
104 if (bytes2 < MAXBYTES)
105 bytes2 <<= 1;
106 start_timing();
107 bandwidth_test(buf, count3 / bytes2 + 20, bytes2, "non-cache bandwidth");
108 return(0);
111 struct timeval tv1;
112 struct timeval tv2;
114 static
116 bandwidth_test(char *buf, int loops, int bytes, char *msg)
118 register char *bptr;
119 register char *lptr;
120 register int v;
121 int j;
122 int us;
124 lptr = buf + bytes;
125 for (j = 0; j < loops; ++j) {
126 for (bptr = buf; bptr < lptr; bptr += 32) {
127 v = *(volatile int *)(bptr + 0);
128 v = *(volatile int *)(bptr + 4);
129 v = *(volatile int *)(bptr + 8);
130 v = *(volatile int *)(bptr + 12);
131 v = *(volatile int *)(bptr + 16);
132 v = *(volatile int *)(bptr + 20);
133 v = *(volatile int *)(bptr + 24);
134 v = *(volatile int *)(bptr + 28);
137 us = stop_timing(msg, (long long)bytes * loops);
138 return(us);
141 static
142 void
143 start_timing(void)
145 gettimeofday(&tv1, NULL);
148 static
150 stop_timing(char *str, long long bytes)
152 int us;
154 gettimeofday(&tv2, NULL);
156 us = tv2.tv_usec + 1000000 - tv1.tv_usec +
157 (tv2.tv_sec - tv1.tv_sec - 1) * 1000000;
158 if (str) {
159 printf("%s: %4.2f Mbytes/sec\n",
160 str,
161 (double)bytes * 1000000.0 / ((double)us * 1024.0 * 1024.0));
163 return(us);