1 ;;; This is a stress test of multi-thread garbage collection without
2 ;;; introducing contrived factors into the system such as extra debugging
3 ;;; or manually invoked GC.
7 NB
: You need to compile with
#+sb-devel
8 so that compiling
"src/compiler/srctran" does not fail quickly.
10 Legend
: One line per statistic
, measured in microsec. One column per thread.
12 1. worst observed GC wait time per thread
13 2. average GC wait time per thread
14 3. total CPU time per thread
16 .
/run-sbcl.sh --dynamic-space-size
4GB
17 * (load "src/cold/chill")
18 * (load (compile-file "benchmarks/threads-compile"))
20 * (benchmark 4 4) ; 4 threads, 4 iterations each
21 ;; worst-case stop-the-world pause = .528 sec
22 [ 161289 528922 528972 528953]
23 [ 67780 100901 93856 99925]
24 [ 4342694 3875017 4088501 3992092]
26 * (benchmark 20 6) ; 20 threads, 6 iterations each
27 ;; worst-case stop-the-world pause = .905 sec
28 ;; but I've seen this parameter pair produce as much as 1.38 sec worst-case pause
29 [ 853493 905262 904932 904812 904660 904480 905302 904205 904040 904363 905253 905290 905231 903957 903644 903827 903432 903272 903607 905291]
30 [ 95906 98105 96955 96992 100130 98969 99631 96186 98073 96748 97830 97861 94608 94574 97282 95638 97308 96941 97169 95195]
31 [ 8638099 7620908 8408132 7783041 7411906 7616439 7550742 7881625 8042876 7627665 7090403 7322993 8996690 8231693 7415837 8477329 7745566 8130082 7640412 7891094]
33 * (benchmark 30 4) ; 30 threads, 4 iterations each
34 ;; worst-case stop-the-world pause = 1.59 sec
35 [ 1589254 1589235 1589236 1589246 1589200 1589244 1589293 1589249 1589258 1589260 1589195 1589517 1589541 1589267 1589454 1589577 1589311 1589311 1589420 1589658 1589638 1589322 1589302 1589262 1426929 1589448 1589644 1589307 1589492 1589577]
36 [ 131124 133234 134216 132862 134032 133074 131811 133394 134221 133830 133337 135129 133034 131109 133957 130416 128010 133089 128650 131075 134138 133200 130342 132036 126419 133778 132877 135274 132027 132272]
37 [ 6463084 5699894 6391162 5323400 5510025 5425688 6288613 4886611 5456971 5394043 5564274 5639621 5054329 5722550 5208487 5986264 6858847 5267559 7030543 5811645 5656792 5012832 6000738 5682139 7220169 6433044 5468151 5295718 5333045 5908446]
40 (defparameter *gcmetrics-condvar
*
41 (sb-sys:find-dynamic-foreign-symbol-address
"gcmetrics_condvar"))
42 (defparameter *gcmetrics-mutex
*
43 (sb-sys:find-dynamic-foreign-symbol-address
"gcmetrics_mutex"))
45 (define-alien-routine pthread-mutex-lock int
(m unsigned
))
46 (define-alien-routine pthread-mutex-unlock int
(m unsigned
))
47 (define-alien-routine pthread-cond-wait int
(cv unsigned
) (m unsigned
))
48 (define-alien-routine pthread-cond-broadcast int
(cv unsigned
))
50 (defun thread-gcmetrics (thread)
51 (sb-thread:with-deathlok
(thread c-thread
)
55 (sb-sys:sap
+ (sb-sys:int-sap c-thread
)
56 (+ (sb-alien:extern-alien
"dynamic_values_bytes" (sb-alien:unsigned
32))
57 (* 8 8))))) ; interrupt context pointers
58 (values (sb-sys:sap-ref-64 sap
8) ; avg
59 (sb-sys:sap-ref-64 sap
16) ; worst
60 (sb-sys:sap-ref-64 sap
0)))))) ; runtime
62 ;;; Exercise COMPILE-FILE in many threads, which is representative of a
63 ;;; lispy workload. Any suitable workload should do.
64 ;;; It would be better to have each thread doing a different kind of work,
65 ;;; but I took the easy route.
66 (defun gc-benchmark (n-threads n-iter
)
68 (running (make-array n-threads
:initial-element t
))
69 (avg-gc-wait (make-array n-threads
))
70 (worst-gc-wait (make-array n-threads
))
71 (runtime (make-array n-threads
)))
73 (with-open-file (*standard-output
*
74 (format nil
"/tmp/foo~d.stdout" arg
)
77 :if-does-not-exist
:create
)
78 (let ((*error-output
* *standard-output
*))
80 (let ((file (format nil
"/tmp/foo~d.fasl" arg
)))
81 (compile-file "src/compiler/srctran"
83 :output-file file
)))))
84 (setf (aref running arg
) nil
)
85 (pthread-cond-broadcast *gcmetrics-condvar
*)))
86 (dotimes (i n-threads
)
87 (push (sb-thread:make-thread
89 :name
(format nil
"worker~d" i
)
92 (let ((start (get-internal-real-time)))
93 (assert (= 0 (pthread-mutex-lock *gcmetrics-mutex
*)))
95 (let ((count (count t running
)))
96 (when (zerop count
) (return))
97 (assert (= 0 (pthread-cond-wait *gcmetrics-condvar
* *gcmetrics-mutex
*)))
99 (dolist (thread threads
)
100 (multiple-value-bind (avg worst run
) (thread-gcmetrics thread
)
102 (setf (aref avg-gc-wait i
) avg
103 (aref worst-gc-wait i
) worst
104 (aref runtime i
) run
)))
106 (format t
"~D threads:~% [~{~8d~^ ~}]~% [~{~8d~^ ~}]~% [~{~8d~^ ~}]~%"
108 (coerce worst-gc-wait
'list
)
109 (coerce avg-gc-wait
'list
)
110 (coerce runtime
'list
))))
111 (pthread-mutex-unlock *gcmetrics-mutex
*)
112 (let ((end (get-internal-real-time)))
113 (format t
"~&all done: ~fs~%"
114 (/ (- end start
) internal-time-units-per-second
))))))
116 ;;; run this with a 16GB dynamic space
117 (defun allocator-benchmark (n-threads n-iter
)
118 (let (threads (sem (make-semaphore)))
120 (let ((out (format nil
"/tmp/out~d.fasl" arg
)))
122 (compile-file "src/compiler/node"
123 :print nil
:block-compile t
:verbose nil
125 (signal-semaphore sem
))
127 (values (sb-vm::current-thread-offset-sap
128 sb-vm
::thread-et-allocator-mutex-acq-slot
)
129 (sb-vm::current-thread-offset-sap
130 sb-vm
::thread-et-find-freeish-page-slot
)
131 (sb-vm::current-thread-offset-sap
132 sb-vm
::thread-et-bzeroing-slot
))))
133 (dotimes (i n-threads
)
134 (push (make-thread #'work
:name
(format nil
"worker~d" i
) :arguments i
)
137 (setq threads
(nreverse threads
))
138 (macrolet ((intmetric (slot)
139 `(sap-ref-word sap
(ash ,slot sb-vm
:word-shift
)))
141 `(float (sap-ref-word sap
(ash ,slot sb-vm
:word-shift
)))))
142 (let ((n-to-go (* n-threads n-iter
)))
144 ;; Wait for any thread to be done with one COMPILE-FILE
145 (wait-on-semaphore sem
)
146 (dolist (thread threads
)
147 (with-deathlok (thread c-thread
)
148 (unless (= c-thread
0)
149 (let* ((sap (int-sap c-thread
))
150 (divisor (intmetric sb-vm
::thread-slow-path-allocs-slot
))
152 (list (/ (floatmetric sb-vm
::thread-et-allocator-mutex-acq-slot
)
154 (/ (floatmetric sb-vm
::thread-et-find-freeish-page-slot
)
156 (/ (floatmetric sb-vm
::thread-et-bzeroing-slot
)
158 (format t
"~a: ~a~%" (thread-name thread
) times
)))))
160 (when (zerop (decf n-to-go
)) (return))))))))
164 (ALLOCATOR-BENCHMARK 1 5)
165 worker0
: (330.69366
387.3285 6498.661)
167 (ALLOCATOR-BENCHMARK 2 5)
168 worker0
: (330.5141
267.6703 7333.836)
169 worker1
: (228.58601
165.74622 6578.589)
171 (ALLOCATOR-BENCHMARK 5 5)
172 worker0
: (690.2581
425.22952 5876.69)
173 worker1
: (710.41406
348.25806 6209.075)
174 worker2
: (839.3615
454.86133 7612.185)
175 worker3
: (885.43054
602.65674 10080.599)
176 worker4
: (610.4866
262.36072 8558.833)
178 (ALLOCATOR-BENCHMARK 10 5)
179 worker0
: (1223.6002
430.6594 7850.7573)
180 worker1
: (1330.8501
370.85773 6489.9937)
181 worker2
: (1253.6841
505.19583 5270.5938)
182 worker3
: (1490.959
715.54004 6404.7485)
183 worker4
: (1285.563
418.3966 4903.252)
184 worker5
: (1166.429
367.69632 4751.1025)
185 worker6
: (1516.6385
703.275 5229.6743)
186 worker7
: (1445.5946
435.18625 8682.394)
187 worker8
: (1445.0297
392.44226 6706.816)
188 worker9
: (1356.9069
461.00558 5664.2266)
190 (ALLOCATOR-BENCHMARK 20 3)
191 worker0
: (1556.1759
320.41278 7864.225)
192 worker1
: (2484.3042
380.25073 6287.422)
193 worker2
: (2330.8076
518.1103 6229.52)
194 worker3
: (1892.3644
413.4363 6322.3574)
195 worker4
: (2391.721
581.5211 5309.2114)
196 worker5
: (3180.5654
1101.414 5779.844)
197 worker6
: (2621.355
634.3344 4852.1455)
198 worker7
: (2378.809
440.01437 4085.8718)
199 worker8
: (2730.9878
432.23807 3691.8616)
200 worker9
: (2128.9807
376.76605 6020.571)
201 worker10
: (2715.6238
483.9466 7864.9487)
202 worker11
: (2880.8203
445.12094 5770.294)
203 worker12
: (3576.9197
767.5074 6190.4316)
204 worker13
: (3010.8503
437.47897 6542.27)
205 worker14
: (2961.2139
453.69385 6901.6504)
206 worker15
: (3242.9263
513.6723 6050.3047)
207 worker16
: (3760.2107
1017.8271 6511.578)
208 worker17
: (3949.1416
794.4195 5975.102)
209 worker18
: (3443.0042
444.75006 4557.97)
210 worker19
: (3430.517
806.4593 3539.3176)
212 (ALLOCATOR-BENCHMARK 30 2)
213 worker0
: (3228.2756
465.13016 8892.34)
214 worker1
: (3792.6448
770.495 7546.6333)
215 worker2
: (3741.0088
856.29407 9156.665)
216 worker3
: (3276.4631
410.84845 8926.436)
217 worker4
: (3618.4817
409.49045 6198.2173)
218 worker5
: (3677.3682
533.64966 6499.718)
219 worker6
: (3326.2502
426.92972 6894.4204)
220 worker7
: (4277.313
497.48938 8042.677)
221 worker8
: (4424.929
515.2159 8480.562)
222 worker9
: (4579.331
646.7453 7944.594)
223 worker10
: (5665.9673
585.96246 9082.217)
224 worker11
: (4093.323
536.14 8263.94)
225 worker12
: (5716.6953
636.16815 6921.578)
226 worker13
: (5787.886
771.44214 4725.5513)
227 worker14
: (7163.328
1685.777 5396.888)
228 worker15
: (5750.1753
584.4418 4869.9063)
229 worker16
: (5826.1787
653.7092 3785.243)
230 worker17
: (6162.1816
760.8072 3882.8232)
231 worker18
: (5333.0513
477.8418 4006.6885)
232 worker19
: (8481.007
597.1158 3250.377)
233 worker20
: (9162.3125
2120.3945 5063.6616)
234 worker21
: (5398.499
643.1221 11578.032)
235 worker22
: (7045.36
1039.0885 5842.894)
236 worker23
: (9666.884
543.9834 4494.3945)
237 worker24
: (9476.041
770.6879 4494.854)
238 worker25
: (4477.0054
348.83954 5587.9424)
239 worker26
: (5616.502
469.45154 5180.7173)
240 worker27
: (10800.295
481.92975 6047.9507)
241 worker28
: (11228.471
606.4268 4192.347)
242 worker29
: (19881.9
996.91534 157.6319)