Trust non-returning functions during sb-xc.
[sbcl.git] / benchmarks / threads-compile.lisp
blob1c3d9f10f9e245ffda47961a3a486fc11988ace8
1 ;;; This is a stress test of multi-thread garbage collection without
2 ;;; introducing contrived factors into the system such as extra debugging
3 ;;; or manually invoked GC.
5 #|
7 NB: You need to compile with #+sb-devel
8 so that compiling "src/compiler/srctran" does not fail quickly.
10 Legend: One line per statistic, measured in microsec. One column per thread.
11 The statistics:
12 1. worst observed GC wait time per thread
13 2. average GC wait time per thread
14 3. total CPU time per thread
16 ./run-sbcl.sh --dynamic-space-size 4GB
17 * (load "src/cold/chill")
18 * (load (compile-file "benchmarks/threads-compile"))
20 * (benchmark 4 4) ; 4 threads, 4 iterations each
21 ;; worst-case stop-the-world pause = .528 sec
22 [ 161289 528922 528972 528953]
23 [ 67780 100901 93856 99925]
24 [ 4342694 3875017 4088501 3992092]
26 * (benchmark 20 6) ; 20 threads, 6 iterations each
27 ;; worst-case stop-the-world pause = .905 sec
28 ;; but I've seen this parameter pair produce as much as 1.38 sec worst-case pause
29 [ 853493 905262 904932 904812 904660 904480 905302 904205 904040 904363 905253 905290 905231 903957 903644 903827 903432 903272 903607 905291]
30 [ 95906 98105 96955 96992 100130 98969 99631 96186 98073 96748 97830 97861 94608 94574 97282 95638 97308 96941 97169 95195]
31 [ 8638099 7620908 8408132 7783041 7411906 7616439 7550742 7881625 8042876 7627665 7090403 7322993 8996690 8231693 7415837 8477329 7745566 8130082 7640412 7891094]
33 * (benchmark 30 4) ; 30 threads, 4 iterations each
34 ;; worst-case stop-the-world pause = 1.59 sec
35 [ 1589254 1589235 1589236 1589246 1589200 1589244 1589293 1589249 1589258 1589260 1589195 1589517 1589541 1589267 1589454 1589577 1589311 1589311 1589420 1589658 1589638 1589322 1589302 1589262 1426929 1589448 1589644 1589307 1589492 1589577]
36 [ 131124 133234 134216 132862 134032 133074 131811 133394 134221 133830 133337 135129 133034 131109 133957 130416 128010 133089 128650 131075 134138 133200 130342 132036 126419 133778 132877 135274 132027 132272]
37 [ 6463084 5699894 6391162 5323400 5510025 5425688 6288613 4886611 5456971 5394043 5564274 5639621 5054329 5722550 5208487 5986264 6858847 5267559 7030543 5811645 5656792 5012832 6000738 5682139 7220169 6433044 5468151 5295718 5333045 5908446]
40 (defparameter *gcmetrics-condvar*
41 (sb-sys:find-dynamic-foreign-symbol-address "gcmetrics_condvar"))
42 (defparameter *gcmetrics-mutex*
43 (sb-sys:find-dynamic-foreign-symbol-address "gcmetrics_mutex"))
45 (define-alien-routine pthread-mutex-lock int (m unsigned))
46 (define-alien-routine pthread-mutex-unlock int (m unsigned))
47 (define-alien-routine pthread-cond-wait int (cv unsigned) (m unsigned))
48 (define-alien-routine pthread-cond-broadcast int (cv unsigned))
50 (defun thread-gcmetrics (thread)
51 (sb-thread:with-deathlok (thread c-thread)
52 (if (= c-thread 0)
53 (values nil nil nil)
54 (let ((sap
55 (sb-sys:sap+ (sb-sys:int-sap c-thread)
56 (+ (sb-alien:extern-alien "dynamic_values_bytes" (sb-alien:unsigned 32))
57 (* 8 8))))) ; interrupt context pointers
58 (values (sb-sys:sap-ref-64 sap 8) ; avg
59 (sb-sys:sap-ref-64 sap 16) ; worst
60 (sb-sys:sap-ref-64 sap 0)))))) ; runtime
62 ;;; Exercise COMPILE-FILE in many threads, which is representative of a
63 ;;; lispy workload. Any suitable workload should do.
64 ;;; It would be better to have each thread doing a different kind of work,
65 ;;; but I took the easy route.
66 (defun gc-benchmark (n-threads n-iter)
67 (let (threads
68 (running (make-array n-threads :initial-element t))
69 (avg-gc-wait (make-array n-threads))
70 (worst-gc-wait (make-array n-threads))
71 (runtime (make-array n-threads)))
72 (flet ((work (arg)
73 (with-open-file (*standard-output*
74 (format nil "/tmp/foo~d.stdout" arg)
75 :direction :output
76 :if-exists :supersede
77 :if-does-not-exist :create)
78 (let ((*error-output* *standard-output*))
79 (dotimes (i n-iter)
80 (let ((file (format nil "/tmp/foo~d.fasl" arg)))
81 (compile-file "src/compiler/srctran"
82 :print nil
83 :output-file file)))))
84 (setf (aref running arg) nil)
85 (pthread-cond-broadcast *gcmetrics-condvar*)))
86 (dotimes (i n-threads)
87 (push (sb-thread:make-thread
88 #'work
89 :name (format nil "worker~d" i)
90 :arguments i)
91 threads)))
92 (let ((start (get-internal-real-time)))
93 (assert (= 0 (pthread-mutex-lock *gcmetrics-mutex*)))
94 (loop
95 (let ((count (count t running)))
96 (when (zerop count) (return))
97 (assert (= 0 (pthread-cond-wait *gcmetrics-condvar* *gcmetrics-mutex*)))
98 (let ((i 0))
99 (dolist (thread threads)
100 (multiple-value-bind (avg worst run) (thread-gcmetrics thread)
101 (when avg
102 (setf (aref avg-gc-wait i) avg
103 (aref worst-gc-wait i) worst
104 (aref runtime i) run)))
105 (incf i)))
106 (format t "~D threads:~% [~{~8d~^ ~}]~% [~{~8d~^ ~}]~% [~{~8d~^ ~}]~%"
107 count
108 (coerce worst-gc-wait 'list)
109 (coerce avg-gc-wait 'list)
110 (coerce runtime 'list))))
111 (pthread-mutex-unlock *gcmetrics-mutex*)
112 (let ((end (get-internal-real-time)))
113 (format t "~&all done: ~fs~%"
114 (/ (- end start) internal-time-units-per-second))))))
116 ;;; run this with a 16GB dynamic space
117 (defun allocator-benchmark (n-threads n-iter)
118 (let (threads (sem (make-semaphore)))
119 (flet ((work (arg)
120 (let ((out (format nil "/tmp/out~d.fasl" arg)))
121 (dotimes (i n-iter)
122 (compile-file "src/compiler/node"
123 :print nil :block-compile t :verbose nil
124 :output-file out)
125 (signal-semaphore sem))
126 (delete-file out))
127 (values (sb-vm::current-thread-offset-sap
128 sb-vm::thread-et-allocator-mutex-acq-slot)
129 (sb-vm::current-thread-offset-sap
130 sb-vm::thread-et-find-freeish-page-slot)
131 (sb-vm::current-thread-offset-sap
132 sb-vm::thread-et-bzeroing-slot))))
133 (dotimes (i n-threads)
134 (push (make-thread #'work :name (format nil "worker~d" i) :arguments i)
135 threads)
136 (sleep .25))
137 (setq threads (nreverse threads))
138 (macrolet ((intmetric (slot)
139 `(sap-ref-word sap (ash ,slot sb-vm:word-shift)))
140 (floatmetric (slot)
141 `(float (sap-ref-word sap (ash ,slot sb-vm:word-shift)))))
142 (let ((n-to-go (* n-threads n-iter)))
143 (loop
144 ;; Wait for any thread to be done with one COMPILE-FILE
145 (wait-on-semaphore sem)
146 (dolist (thread threads)
147 (with-deathlok (thread c-thread)
148 (unless (= c-thread 0)
149 (let* ((sap (int-sap c-thread))
150 (divisor (intmetric sb-vm::thread-slow-path-allocs-slot))
151 (times
152 (list (/ (floatmetric sb-vm::thread-et-allocator-mutex-acq-slot)
153 divisor)
154 (/ (floatmetric sb-vm::thread-et-find-freeish-page-slot)
155 divisor)
156 (/ (floatmetric sb-vm::thread-et-bzeroing-slot)
157 divisor))))
158 (format t "~a: ~a~%" (thread-name thread) times)))))
159 (terpri)
160 (when (zerop (decf n-to-go)) (return))))))))
163 typical results:
164 (ALLOCATOR-BENCHMARK 1 5)
165 worker0: (330.69366 387.3285 6498.661)
167 (ALLOCATOR-BENCHMARK 2 5)
168 worker0: (330.5141 267.6703 7333.836)
169 worker1: (228.58601 165.74622 6578.589)
171 (ALLOCATOR-BENCHMARK 5 5)
172 worker0: (690.2581 425.22952 5876.69)
173 worker1: (710.41406 348.25806 6209.075)
174 worker2: (839.3615 454.86133 7612.185)
175 worker3: (885.43054 602.65674 10080.599)
176 worker4: (610.4866 262.36072 8558.833)
178 (ALLOCATOR-BENCHMARK 10 5)
179 worker0: (1223.6002 430.6594 7850.7573)
180 worker1: (1330.8501 370.85773 6489.9937)
181 worker2: (1253.6841 505.19583 5270.5938)
182 worker3: (1490.959 715.54004 6404.7485)
183 worker4: (1285.563 418.3966 4903.252)
184 worker5: (1166.429 367.69632 4751.1025)
185 worker6: (1516.6385 703.275 5229.6743)
186 worker7: (1445.5946 435.18625 8682.394)
187 worker8: (1445.0297 392.44226 6706.816)
188 worker9: (1356.9069 461.00558 5664.2266)
190 (ALLOCATOR-BENCHMARK 20 3)
191 worker0: (1556.1759 320.41278 7864.225)
192 worker1: (2484.3042 380.25073 6287.422)
193 worker2: (2330.8076 518.1103 6229.52)
194 worker3: (1892.3644 413.4363 6322.3574)
195 worker4: (2391.721 581.5211 5309.2114)
196 worker5: (3180.5654 1101.414 5779.844)
197 worker6: (2621.355 634.3344 4852.1455)
198 worker7: (2378.809 440.01437 4085.8718)
199 worker8: (2730.9878 432.23807 3691.8616)
200 worker9: (2128.9807 376.76605 6020.571)
201 worker10: (2715.6238 483.9466 7864.9487)
202 worker11: (2880.8203 445.12094 5770.294)
203 worker12: (3576.9197 767.5074 6190.4316)
204 worker13: (3010.8503 437.47897 6542.27)
205 worker14: (2961.2139 453.69385 6901.6504)
206 worker15: (3242.9263 513.6723 6050.3047)
207 worker16: (3760.2107 1017.8271 6511.578)
208 worker17: (3949.1416 794.4195 5975.102)
209 worker18: (3443.0042 444.75006 4557.97)
210 worker19: (3430.517 806.4593 3539.3176)
212 (ALLOCATOR-BENCHMARK 30 2)
213 worker0: (3228.2756 465.13016 8892.34)
214 worker1: (3792.6448 770.495 7546.6333)
215 worker2: (3741.0088 856.29407 9156.665)
216 worker3: (3276.4631 410.84845 8926.436)
217 worker4: (3618.4817 409.49045 6198.2173)
218 worker5: (3677.3682 533.64966 6499.718)
219 worker6: (3326.2502 426.92972 6894.4204)
220 worker7: (4277.313 497.48938 8042.677)
221 worker8: (4424.929 515.2159 8480.562)
222 worker9: (4579.331 646.7453 7944.594)
223 worker10: (5665.9673 585.96246 9082.217)
224 worker11: (4093.323 536.14 8263.94)
225 worker12: (5716.6953 636.16815 6921.578)
226 worker13: (5787.886 771.44214 4725.5513)
227 worker14: (7163.328 1685.777 5396.888)
228 worker15: (5750.1753 584.4418 4869.9063)
229 worker16: (5826.1787 653.7092 3785.243)
230 worker17: (6162.1816 760.8072 3882.8232)
231 worker18: (5333.0513 477.8418 4006.6885)
232 worker19: (8481.007 597.1158 3250.377)
233 worker20: (9162.3125 2120.3945 5063.6616)
234 worker21: (5398.499 643.1221 11578.032)
235 worker22: (7045.36 1039.0885 5842.894)
236 worker23: (9666.884 543.9834 4494.3945)
237 worker24: (9476.041 770.6879 4494.854)
238 worker25: (4477.0054 348.83954 5587.9424)
239 worker26: (5616.502 469.45154 5180.7173)
240 worker27: (10800.295 481.92975 6047.9507)
241 worker28: (11228.471 606.4268 4192.347)
242 worker29: (19881.9 996.91534 157.6319)