Make *uncompacted-fun-maps* synchronized.
[sbcl.git] / tests / parallel-exec.lisp
blob79d87f50f6fe8d247cb7b7db8964abcfe5600585
1 (pop *posix-argv*)
2 (require :sb-posix)
3 #-sparc (push :test-sprof *features*)
4 #+test-sprof (require :sb-sprof)
6 ;; (push :tlsf-stress *features*)
7 #+tlsf-stress
8 (progn
9 (setq sb-c:*compile-to-memory-space* :immobile)
10 (with-alien ((tlsf-control system-area-pointer :extern)
11 (tlsf-mem-start system-area-pointer :extern))
12 (defun tlsf-checks ()
13 (alien-funcall (extern-alien "tlsf_check" (function void system-area-pointer))
14 tlsf-control)
15 (alien-funcall (extern-alien "tlsf_check_pool" (function void system-area-pointer))
16 tlsf-mem-start)
17 (let ((msg #.(format nil "TLSF checks passed~%")))
18 (sb-unix:unix-write 2 msg 0 (length msg))))
19 (defun tlsf-dump ()
20 (alien-funcall (extern-alien "tlsf_dump_pool"
21 (function void system-area-pointer system-area-pointer c-string))
22 tlsf-control tlsf-mem-start "/dev/stdout")))
23 (compile 'tlsf-checks)
24 (compile 'tlsf-dump)
25 (push #'tlsf-checks sb-ext:*after-gc-hooks*))
27 (let ((*evaluator-mode* :compile))
28 (with-compilation-unit () (load"run-tests")))
29 #+(and x86-64 linux sb-thread)
30 (unless (or (find :mark-region-gc sb-impl:+internal-features+)
31 (find :gs-seg sb-impl:+internal-features+))
32 (push :test-aprof *features*))
33 (in-package run-tests)
34 (import '(sb-alien:alien-funcall sb-alien:extern-alien
35 sb-alien:int sb-alien:c-string sb-alien:unsigned))
36 (setq *summarize-test-times* t)
37 ;;; Ordered approximately in descending order by running time
38 (defvar *timings* (with-open-file (s "timing") (read s)))
40 (defvar *filter* nil)
41 (defglobal *delete-logs* nil)
42 (defun choose-order (tests)
43 (when *filter*
44 (let (strings)
45 (with-open-file (file *filter*)
46 (loop (let ((line (read-line file nil)))
47 (if line (push line strings) (return)))))
48 (setq tests (remove-if (lambda (x) (not (find x strings :test #'string=)))
49 tests))))
50 (sort tests
51 #'> :key (lambda (file) (or (cadr (assoc file *timings* :test #'equal)) 0))))
53 (defvar *logdir* (posix-getenv "TEST_LOGDIR"))
55 (defun summarize-gc-times ()
56 ;; this function prints random numbers now, I don't know why
57 (return-from summarize-gc-times)
58 (let (observations)
59 (flet ((parse-triple (string pos)
60 (sb-int:binding* (((int1 end) (parse-integer string :start (1+ pos)
61 :junk-allowed t))
62 ((int2 end) (parse-integer string :start (1+ end)
63 :junk-allowed t))
64 ((int3) (parse-integer string :start (1+ end)
65 :junk-allowed t)))
66 (list int1 int2 int3))))
67 (dolist (pn (directory (format nil "~a/*.*" *logdir*)))
68 (with-open-file (f pn)
69 (let ((legend "GC: stw_delay"))
70 (loop
71 (let ((line (read-line f nil)))
72 (unless line (return))
73 (when (and (> (length line) (length legend))
74 (string= line legend :end1 (length legend)))
75 (let* ((p1 (position #\= line))
76 (p2 (position #\= line :start (1+ p1)))
77 (stw-dur (parse-triple line p1))
78 (gc-dur (parse-triple line p2))
79 (count (parse-integer line :start (+ (search "over " line) 5)
80 :junk-allowed t)))
81 (let ((name (concatenate 'string (pathname-name pn) "."
82 (pathname-type pn))))
83 (push (list count stw-dur gc-dur name) observations))))))))))
84 (let ((fmt " ~5d (~{~10d~^ ~}) (~{~7d~^ ~}) ~a~%"))
85 (format t "~&Top 15 worst by max time to stop-the-world:~%")
86 (let ((list (sort (copy-list observations) #'> :key (lambda (x) (third (second x))))))
87 (dotimes (i 15) (apply #'format t fmt (pop list))))
88 (format t "~&Top 15 worst by avg GC duration (excluding STW delay):~%")
89 (let ((list (sort (copy-list observations) #'> :key (lambda (x) (second (third x))))))
90 (dotimes (i 15) (apply #'format t fmt (pop list))))
91 (format t "~&Top 15 worst by max GC duration (excluding STW delay):~%")
92 (let ((list (sort (copy-list observations) #'> :key (lambda (x) (third (third x))))))
93 (dotimes (i 15) (apply #'format t fmt (pop list)))))))
94 (defun parallel-execute-tests (files max-jobs vop-summary-stats-p)
95 (format t "Using ~D processes~%" max-jobs)
96 ;; Interleave the order in which all tests are launched rather than
97 ;; starting them in the batches that filtering places them in.
98 (let ((subprocess-count 0)
99 (subprocess-list nil)
100 (aggregate-vop-usage (make-hash-table :test #'equal))
101 ;; Start timing only after all the DIRECTORY calls are done (above)
102 (start-time (get-internal-real-time))
103 (missing-usage)
104 (losing))
105 (labels ((wait ()
106 ;; Though far from elegant, this is an easy way to figure out
107 ;; which tests are getting stuck, if any are.
108 #+nil (format t "Runner is waiting on: ~S~%" subprocess-list)
109 (multiple-value-bind (pid status) (sb-posix:wait)
110 (decf subprocess-count)
111 (let ((process (assoc pid subprocess-list))
112 (code (ash status -8))
113 (et))
114 (unless process
115 (warn "Whoa! Process ~D is an unexpected child" pid)
116 (return-from wait (wait)))
117 (setq subprocess-list (delete process subprocess-list))
118 (destructuring-bind ((filename . iteration) start-time) (cdr process)
119 (setq et (elapsed-time-from start-time))
120 (when vop-summary-stats-p
121 (unless (sum-vop-usage (format nil "~a/~a.vop-usage" *logdir* filename) t)
122 (when (or (search ".pure" filename) (search ".impure" filename))
123 (push filename missing-usage))))
124 (cond ((eq code 104)
125 (format t "~A: success (~d msec)~%" filename et))
127 (format t "~A~@[[~d]~]: status ~D (~d msec)~%"
128 filename iteration code et)
129 (push (list filename iteration pid) losing)))))))
130 (elapsed-time-from (when) ; return value in milliseconds
131 (round (- (get-internal-real-time) when)
132 (/ internal-time-units-per-second 1000)))
133 (sum-vop-usage (input deletep)
134 (with-open-file (f input :if-does-not-exist nil)
135 ;; No vop coverage file from shell script tests or any test
136 ;; that internally uses (EXIT) for whatever reason.
137 (when f
138 (loop (let ((line (read-line f nil)))
139 (unless line (return))
140 (let ((count (read-from-string line))
141 (name (read-from-string line t nil :start 8)))
142 (incf (gethash name aggregate-vop-usage 0) count))))
143 (when deletep (delete-file f))))))
144 (dolist (file files)
145 (when (>= subprocess-count max-jobs)
146 (wait))
147 (let ((pid (sb-posix:fork)))
148 (when (zerop pid)
149 (let ((mylog (format nil "~a/~a~@[-~d~]" *logdir* (car file) (cdr file))))
150 ;; FILE is (filename . test-iteration)
151 (with-open-file (stream mylog :direction :output :if-exists :supersede)
152 (alien-funcall (extern-alien "dup2" (function int int int))
153 (sb-sys:fd-stream-fd stream) 1)
154 (alien-funcall (extern-alien "dup2" (function int int int)) 1 2))
155 (setq file (car file))
156 #+test-aprof
157 (unless (search "allocator.pure" file)
158 (sb-aprof::aprof-start)
159 (proclaim '(optimize sb-c:instrument-consing)))
160 ;; Send this to the log file, not the terminal
161 (setq *debug-io* (make-two-way-stream (make-concatenated-stream)
162 *error-output*))
163 (cond ((string= (pathname-type file) "test")
164 (let ((shell (or #+sunos (posix-getenv "SHELL") "/bin/sh")))
165 ;; exec the shell with the test and we'll pick up its exit code
166 (alien-funcall (extern-alien "execl" (function int c-string c-string
167 &optional c-string unsigned))
168 shell shell
169 (concatenate 'string file ".sh") 0))
170 ;; if exec fails, just exit with a wrong (not 104) status
171 (alien-funcall (extern-alien "_exit" (function (values) int)) 0))
173 #+test-sprof (sb-sprof:start-profiling :sample-interval .001)
174 (setq sb-c::*static-vop-usage-counts* (make-hash-table :synchronized t))
175 (let ((*features* (cons :parallel-test-runner *features*)))
176 (pure-runner (list (concatenate 'string file ".lisp"))
177 (if (search "-cload" file) 'cload-test 'load-test)
178 (make-broadcast-stream)))
179 (when vop-summary-stats-p
180 (with-open-file (output (format nil "~a/~a.vop-usage" *logdir* file)
181 :direction :output)
182 ;; There's an impure test that screws with the default pprint dispatch
183 ;; table such that integers don't print normally (and can't be parsed).
184 (let ((*print-pretty* nil))
185 (sb-int:dohash ((name count) sb-c::*static-vop-usage-counts*)
186 (format output "~7d \"~s\"~%" count name)))))
187 #+test-sprof (sb-sprof:stop-profiling)
188 #+test-aprof (progn (sb-aprof::aprof-stop) (sb-aprof:aprof-show))
189 (when (member :allocator-metrics sb-impl:+internal-features+)
190 (format t "~2&Allocator histogram:~%")
191 (funcall (intern "PRINT-ALLOCATOR-HISTOGRAM" "SB-THREAD")))
192 #+test-sprof (sb-sprof:report :type :flat)
193 #+tlsf-stress (cl-user::tlsf-dump)
194 (gc :gen 7)
195 (when (and (not (unexpected-failures)) *delete-logs*) (delete-file mylog))
196 (exit :code (if (unexpected-failures) 1 104))))))
197 (format t "~A: pid ~d~@[ (trial ~d)~]~%" (car file) pid (cdr file))
198 (incf subprocess-count)
199 (push (list pid file (get-internal-real-time)) subprocess-list)))
200 (loop (if (plusp subprocess-count) (wait) (return)))
202 (when vop-summary-stats-p
203 (dolist (result '("vop-usage.txt" "vop-usage-combined.txt"))
204 (let (list)
205 (sb-int:dohash ((name vop) sb-c::*backend-template-names*)
206 (declare (ignore vop))
207 (push (cons (gethash (prin1-to-string name) aggregate-vop-usage 0) name) list))
208 (with-open-file (output (format nil "~a/~a" *logdir* result)
209 :direction :output
210 :if-exists :supersede)
211 (dolist (cell (sort list #'> :key #'car))
212 (format output "~7d ~s~%" (car cell) (cdr cell)))))
213 (sum-vop-usage "../obj/from-self/warm-vop-usage.txt" nil)))
215 (format t "~&Total realtime: ~d msec~%" (elapsed-time-from start-time))
216 (when missing-usage
217 (format t "~&Missing vop-usage:~{ ~a~}~%" missing-usage))
219 (when losing
220 (format t "~&Failing files:~%")
221 (dolist (filename losing)
222 (format t "~A~%" filename))
223 (format t "==== Logs are in ~a ====~%" *logdir*)
224 (exit :code 1)))))
225 (when (string= (car *posix-argv*) "--filter")
226 (setq *filter* (cadr *posix-argv*))
227 (setq *posix-argv* (cddr *posix-argv*)))
228 (if (<= (length *posix-argv*) 1)
229 ;; short form - test all files. Argument N if specified is the number of
230 ;; tasks, defaulting to half the machine's reported cores
231 (let ((jobs
232 (if *posix-argv*
233 (parse-integer (car *posix-argv*))
234 (or #+unix
235 (floor (sb-alien:alien-funcall
236 (sb-alien:extern-alien "sysconf"
237 (function sb-alien:long sb-alien:int))
238 sb-unix::sc-nprocessors-onln)
240 (error "How many jobs?")))))
241 (parallel-execute-tests
242 (mapcar #'list
243 (choose-order
244 (mapcar #'pathname-name
245 (append (pure-load-files)
246 (pure-cload-files)
247 (impure-load-files)
248 (impure-cload-files)
249 (sh-files)))))
250 jobs
252 #+(and linux sb-thread 64-bit) (summarize-gc-times))
253 ;; long form
254 (let ((jobs 4)
255 (runs-per-test 1)
256 (argv *posix-argv*))
257 (loop (cond ((string= (car argv) "-j")
258 (setq jobs (parse-integer (cadr argv))
259 argv (cddr argv)))
260 ((string= (car argv) "--runs_per_test")
261 (setq runs-per-test (parse-integer (cadr argv))
262 argv (cddr argv)))
264 (return))))
265 (when (>= runs-per-test 10)
266 (format t "~&Note: will not keep logs of passing runs~%")
267 (setq *delete-logs* t))
268 (setq argv
269 (mapcar (lambda (file)
270 (probe-file file) ; for effect
271 (pathname-name file)) argv))
272 (parallel-execute-tests
273 (loop for trial-number from 1 to runs-per-test
274 nconc (mapcar (lambda (file)
275 (cons file
276 (when (> runs-per-test 1) trial-number)))
277 argv))
278 jobs
279 nil)))