Fix interrupt-thread error detection.
[sbcl.git] / tools-for-build / editcore.lisp
blobcd0d9568938f94e9f8210852d563e9b8d0efe52e
1 ;;;; Utilities for separating an SBCL core file into two pieces:
2 ;;;; 1. An assembly language file containing the immobile code space
3 ;;;; 2. A '.o' file wrapping a core file containing everything else
4 ;;;; We operate as a "tool" that processes external files rather than
5 ;;;; operating on the in-process data, but it is also possible to dump
6 ;;;; the current image by creating a straight-through translation
7 ;;;; of internal/external code addresses.
9 ;;;; This software is part of the SBCL system. See the README file for
10 ;;;; more information.
11 ;;;;
12 ;;;; This software is derived from the CMU CL system, which was
13 ;;;; written at Carnegie Mellon University and released into the
14 ;;;; public domain. The software is in the public domain and is
15 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
16 ;;;; files for more information.
18 (eval-when (:compile-toplevel :load-toplevel :execute)
19 (require :sb-posix)) ; for mmap
21 (defpackage "SB-EDITCORE"
22 (:use "CL" "SB-VM" "SB-INT" "SB-EXT" "SB-KERNEL" "SB-SYS"
23 "SB-ALIEN")
24 (:import-from "SB-ALIEN-INTERNALS"
25 #:alien-type-bits #:parse-alien-type
26 #:alien-value-sap #:alien-value-type)
27 (:import-from "SB-C" #:+backend-page-bytes+)
28 (:import-from "SB-VM" #:map-objects-in-range #:reconstitute-object
29 #:%closure-callee)
30 (:import-from "SB-DISASSEM" #:get-inst-space #:find-inst
31 #:make-dstate #:%make-segment
32 #:seg-virtual-location #:seg-length #:seg-sap-maker
33 #:map-segment-instructions
34 #:dstate-next-addr #:dstate-cur-offs)
35 (:import-from "SB-X86-64-ASM" #:near-jump-displacement)
36 (:import-from "SB-IMPL" #:package-hashtable #:package-%name
37 #:package-hashtable-cells
38 #:hash-table-table #:hash-table-number-entries))
40 (in-package "SB-EDITCORE")
42 (declaim (muffle-conditions compiler-note))
44 (eval-when (:execute)
45 (setq *evaluator-mode* :compile))
47 (defconstant core-magic
48 (logior (ash (char-code #\S) 24)
49 (ash (char-code #\B) 16)
50 (ash (char-code #\C) 8)
51 (char-code #\L)))
53 (defconstant build-id-core-entry-type-code 3860)
54 (defconstant new-directory-core-entry-type-code 3861)
55 (defconstant initial-fun-core-entry-type-code 3863)
56 (defconstant page-table-core-entry-type-code 3880)
57 (defconstant end-core-entry-type-code 3840)
59 (defconstant dynamic-core-space-id 1)
60 (defconstant static-core-space-id 2)
61 (defconstant immobile-fixedobj-core-space-id 4)
62 (defconstant immobile-varyobj-core-space-id 5)
64 (defglobal +noexec-stack-note+ ".section .note.GNU-stack, \"\", @progbits")
66 (defstruct (core-space ; "space" is a CL symbol
67 (:conc-name space-)
68 (:constructor make-space (id addr data-page page-adjust nwords)))
69 id addr data-page page-adjust nwords)
70 (defun space-size (space) (* (space-nwords space) n-word-bytes))
71 (defun space-end (space) (+ (space-addr space) (space-size space)))
72 (defun space-nbytes-aligned (space)
73 (logandc2 (+ (space-size space) (1- +backend-page-bytes+))
74 (1- +backend-page-bytes+)))
75 (defun space-physaddr (space spaces)
76 (sap+ (car spaces) (* (space-data-page space) +backend-page-bytes+)))
78 ;;; Given ADDR which is an address in the target core, return the address at which
79 ;;; ADDR is currently mapped while performing the split.
80 ;;; SPACES is a cons of a SAP and an alist whose elements are (ADDR . CORE-SPACE)
81 (defun translate-ptr (addr spaces)
82 (let ((space (find addr (cdr spaces) :key #'space-addr :test #'>=)))
83 ;; FIXME: duplicates SPACE-PHYSADDR to avoid consing a SAP.
84 ;; macroize or something.
85 (+ (sap-int (car spaces)) (* (space-data-page space) +backend-page-bytes+)
86 (- addr (space-addr space)))))
88 ;;;
89 (defun get-space (id spaces)
90 (find id (cdr spaces) :key #'space-id))
91 (defun compute-nil-object (spaces)
92 (let ((space (get-space static-core-space-id spaces)))
93 (%make-lisp-obj (logior (space-addr space) #x17))))
95 ;;; Given OBJ which is tagged pointer into the target core, translate it into
96 ;;; the range at which the core is now mapped during execution of this tool,
97 ;;; so that host accessors can dereference its slots.
98 ;;; Use extreme care: while it works to use host accessors on the target core,
99 ;;; we must avoid type checks on instances because LAYOUTs need translation.
100 ;;; Printing boxed objects from the target core will almost always crash.
101 (defun translate (obj spaces)
102 (%make-lisp-obj (translate-ptr (get-lisp-obj-address obj) spaces)))
104 (defstruct (core-sym (:copier nil) (:predicate nil)
105 (:constructor make-core-sym (package name external)))
106 (package nil)
107 (name nil :read-only t)
108 (external nil :read-only t))
110 (defun c-name (lispname pp-state)
111 (when (and (symbolp lispname)
112 (eq (symbol-package lispname) *cl-package*))
113 (return-from c-name (concatenate 'string "cl:" (string-downcase lispname))))
114 ;; Get rid of junk from LAMBDAs
115 (setq lispname
116 (named-let recurse ((x lispname))
117 (cond ((typep x '(cons (eql lambda)))
118 (let ((args (second x)))
119 `(lambda ,(if args sb-c::*debug-name-sharp* "()")
120 ,@(recurse (cddr x)))))
121 ((eq x :in) "in")
122 ((consp x)
123 (recons x (recurse (car x)) (recurse (cdr x))))
124 (t x))))
126 ;; Shorten obnoxiously long printed representations of methods
127 ;; by changing FAST-METHOD to METHOD (because who cares?)
128 ;; and shorten
129 ;; (method my-long-package-name:my-method-name (my-long-package-name:type-name))
130 ;; to
131 ;; (method my-method-name (type-name))
132 ;; I suspect that can use DWARF info to provide even more description,
133 ;; though I also suspect it's relatively unambiguous anyway
134 ;; especially given that file information is available separately.
135 (flet ((unpackageize (thing)
136 (when (typep thing 'core-sym)
137 (setf (core-sym-package thing) nil))
138 thing))
139 (when (typep lispname '(cons (eql sb-pcl::fast-method)))
140 (setq lispname `(method ,@(cdr lispname)))
141 (setf (second lispname) (unpackageize (second lispname)))
142 (dolist (qual (car (last lispname)))
143 (unpackageize qual))))
145 ;; Perform backslash escaping on the exploded string
146 ;; Strings were stringified without surrounding quotes,
147 ;; but there might be quotes embedded anywhere, so escape them,
148 ;; and also remove newlines and non-ASCII.
149 (let ((characters
150 (mapcan (lambda (char)
151 (cond ((not (base-char-p char)) (list #\?))
152 ((member char '(#\\ #\")) (list #\\ char))
153 ((eql char #\newline) (list #\_))
154 (t (list char))))
155 (coerce (if (and (stringp lispname)
156 ;; L denotes a symbol which can not be global on macOS.
157 (char= (char lispname 0) #\L))
158 (concatenate 'string "_" lispname)
159 (write-to-string lispname
160 :pretty t :pprint-dispatch (cdr pp-state)
161 ;; FIXME: should be :level 1, however see
162 ;; https://bugs.launchpad.net/sbcl/+bug/1733222
163 :escape t :level 2 :length 5
164 :case :downcase :gensym nil
165 :right-margin 10000))
166 'list))))
167 (let* ((string (coerce characters 'string))
168 (occurs (incf (gethash string (car pp-state) 0))))
169 (if (> occurs 1)
170 (concatenate 'string string "_" (write-to-string occurs))
171 string))))
173 (defmethod print-object ((sym core-sym) stream)
174 (format stream "~(~:[~*~;~:*~A~:[:~;~]:~]~A~)"
175 (core-sym-package sym)
176 (core-sym-external sym)
177 (core-sym-name sym)))
179 (defun fun-name-from-core (fun spaces core-nil packages
180 &aux (name (%simple-fun-name fun)))
181 (named-let recurse ((depth 0) (x name))
182 (unless (= (logand (get-lisp-obj-address x) 3) 3)
183 (return-from recurse x)) ; immediate object
184 (when (eq x core-nil)
185 (return-from recurse nil))
186 (setq x (translate x spaces))
187 (ecase (lowtag-of x)
188 (#.list-pointer-lowtag
189 (cons (recurse (1+ depth) (car x))
190 (recurse (1+ depth) (cdr x))))
191 ((#.instance-pointer-lowtag #.fun-pointer-lowtag) "?")
192 (#.other-pointer-lowtag
193 (cond
194 ((symbolp x)
195 (let ((name (translate (symbol-name x) spaces)))
196 (if (eq (symbol-package x) core-nil) ; uninterned
197 (string-downcase name)
198 (let* ((package (truly-the package
199 (translate (symbol-package x) spaces)))
200 (package-name (translate (package-%name package) spaces))
201 (compute-externals
202 (not (or (string= package-name "KEYWORD")
203 (string= package-name "COMMON-LISP"))))
204 (externals (if compute-externals
205 (gethash package-name packages)
206 t)))
207 (unless externals
208 (dovector (x (translate
209 (package-hashtable-cells
210 (truly-the package-hashtable
211 (translate (package-external-symbols package)
212 spaces)))
213 spaces))
214 (unless (fixnump x)
215 (push (if (eq x core-nil) ; random packages can export NIL. wow.
216 "NIL"
217 (translate (symbol-name (translate x spaces)) spaces))
218 externals)))
219 (setf externals (coerce externals 'vector)
220 (gethash package-name packages) externals))
221 ;; The name-cleaning code wants to compare against symbols
222 ;; in CL, PCL, and KEYWORD, so use real symbols for those.
223 ;; Other than that, we avoid finding host symbols
224 ;; because the externalness could be wrong and misleading.
225 ;; It's a very subtle point, but best to get it right.
226 (cond ((string= package-name "COMMON-LISP")
227 (find-symbol name *cl-package*))
228 ((and (string= package-name "KEYWORD")
229 (find-symbol name package-name))) ; if existing keyword, use it
230 ((string= package-name "SB-PCL")
231 (or (find-symbol name "SB-PCL")
232 (error "FIND-SYMBOL failed? ~S ~S" name package-name)))
234 (make-core-sym (if (string= package-name "KEYWORD") nil package-name)
235 name
236 (if compute-externals
237 (find name externals :test 'string=)
238 t))))))))
239 ((stringp x) x)
240 (t "?"))))))
242 (defstruct (core-state
243 (:conc-name "CS-")
244 (:predicate nil)
245 (:copier nil)
246 (:constructor make-core-state
247 (code-space-start code-space-end
248 fixedobj-space-start fixedobj-space-end
249 &aux (inst-space (get-inst-space))
250 (call-inst (find-inst #b11101000 inst-space))
251 (jmp-inst (find-inst #b11101001 inst-space))
252 (pop-inst (find-inst #x5d inst-space)))))
253 (code-space-start 0 :type fixnum :read-only t)
254 (code-space-end 0 :type fixnum :read-only t)
255 (fixedobj-space-start 0 :type fixnum :read-only t)
256 (fixedobj-space-end 0 :type fixnum :read-only t)
257 (dstate (make-dstate nil) :read-only t)
258 (seg (%make-segment :sap-maker (lambda () (error "Bad sap maker"))
259 :virtual-location 0) :read-only t)
260 (fixup-addrs nil)
261 (call-inst nil :read-only t)
262 (jmp-inst nil :read-only t)
263 (pop-inst nil :read-only t))
265 ;;; Emit .byte or .quad directives dumping memory from SAP for COUNT bytes
266 ;;; to STREAM. SIZE specifies which direcive to emit.
267 ;;; EXCEPTIONS specify offsets at which a specific string should be
268 ;;; written to the file in lieu of memory contents, useful for emitting
269 ;;; expressions involving the assembler '.' symbol (the current PC).
270 (defun emit-asm-directives (size sap count stream &optional exceptions)
271 (declare (optimize speed))
272 (declare (stream stream))
273 (let ((*print-base* 16)
274 (string-buffer (make-array 18 :element-type 'base-char))
275 (fmt #.(coerce "0x%lx" 'base-string))
276 (per-line 0))
277 (declare ((integer 0 32) per-line)
278 (fixnum count))
279 string-buffer fmt
280 (ecase size
281 (:qword
282 (format stream " .quad")
283 (dotimes (i count)
284 (declare ((unsigned-byte 20) i))
285 (declare (simple-vector exceptions))
286 (write-char (if (> per-line 0) #\, #\space) stream)
287 (acond ((and (< i (length exceptions)) (aref exceptions i))
288 (write-string it stream))
290 #+nil
291 (let ((len
292 ;; output-reasonable-integer-in-base is so slow comparated
293 ;; to printf() that the second-most amount of time spent
294 ;; writing the asm file occurs in that function.
295 ;; Unbelievable that we can't do better than that.
296 (with-pinned-objects (string-buffer fmt)
297 (alien-funcall
298 (extern-alien "snprintf"
299 (function int system-area-pointer unsigned system-area-pointer unsigned))
300 (vector-sap string-buffer)
301 (length string-buffer)
302 (vector-sap fmt)
303 (sap-ref-word sap (* i n-word-bytes))))))
304 (write-string string-buffer stream :end len))
305 (write-string "0x" stream)
306 (write (sap-ref-word sap (* i n-word-bytes)) :stream stream)))
307 (when (and (= (incf per-line) 16) (< (1+ i) count))
308 (format stream "~% .quad")
309 (setq per-line 0))))
310 (:byte
311 (aver (not exceptions))
312 (format stream " .byte")
313 (dotimes (i count)
314 (write-char (if (> per-line 0) #\, #\space) stream)
315 (write-string "0x" stream)
316 (write (sap-ref-8 sap i) :stream stream)
317 (when (and (= (incf per-line) 32) (< (1+ i) count))
318 (format stream "~% .byte")
319 (setq per-line 0))))))
320 (terpri stream))
322 (defun emit-lisp-asm-routines (spaces code-component output emit-sizes vector count)
323 (emit-asm-directives :qword
324 (sap+ (code-instructions code-component)
325 (- (* sb-vm:code-constants-offset sb-vm:n-word-bytes)))
326 sb-vm:code-constants-offset
327 output #())
328 (let ((list (loop for i from 2 by 2 repeat count
329 collect
330 (let* ((location (translate (svref vector (1+ i)) spaces))
331 (offset (car location))
332 (nbytes (- (1+ (cdr location)) offset))
333 (name (translate
334 (symbol-name (translate (svref vector i) spaces))
335 spaces)))
336 (list* offset name nbytes)))))
337 (loop for (offset name . nbytes) in (sort list #'< :key #'car)
338 do (format output " .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
339 (format nil "~(\"~a\"~)" name) (if emit-sizes nbytes))
340 (emit-asm-directives
341 :qword
342 (sap+ (code-instructions code-component) offset)
343 (ceiling nbytes sb-vm:n-word-bytes)
344 output #()))))
346 (defun code-fixup-locs (code spaces)
347 (let ((locs (sb-vm::%code-fixups code)))
348 (unless (eql locs 0)
349 (sb-c::unpack-code-fixup-locs
350 (if (fixnump locs) locs (translate locs spaces))))))
352 ;;; Disassemble the function pointed to by SAP for LENGTH bytes, returning
353 ;;; all instructions that should be emitted using assembly language
354 ;;; instead of assembler pseudo-ops. This includes two sets of instructions:
355 ;;; - function prologue instructions that setup the call frame
356 ;;; - jmp/call instructions that transfer control to the fixedoj space
357 ;;; delimited by bounds in STATE.
358 ;;; At execution time the function will have virtual address LOAD-ADDR.
359 (defun list-annotated-instructions (sap length state load-addr emit-cfi)
360 (let ((dstate (cs-dstate state))
361 (seg (cs-seg state))
362 (call-inst (cs-call-inst state))
363 (jmp-inst (cs-jmp-inst state))
364 (pop-inst (cs-pop-inst state))
365 (next-fixup-addr
366 (or (car (cs-fixup-addrs state)) most-positive-word))
367 (list))
368 (setf (seg-virtual-location seg) load-addr
369 (seg-length seg) length
370 (seg-sap-maker seg) (lambda () sap))
371 ;; KLUDGE: "8f 45 08" is the standard prologue
372 (when (and emit-cfi (= (logand (sap-ref-32 sap 0) #xFFFFFF) #x08458f))
373 (push (list* 0 3 "pop" "8(%rbp)") list))
374 (map-segment-instructions
375 (lambda (dchunk inst)
376 (cond
377 ((< next-fixup-addr (dstate-next-addr dstate))
378 (let ((operand (sap-ref-32 sap (- next-fixup-addr load-addr))))
379 (when (<= (cs-code-space-start state) operand (cs-code-space-end state))
380 (aver (eql (sap-ref-8 sap (- next-fixup-addr load-addr 1)) #xB8)) ; mov rax, imm32
381 (push (list* (dstate-cur-offs dstate) 5 "mov" operand) list)))
382 (pop (cs-fixup-addrs state))
383 (setq next-fixup-addr (or (car (cs-fixup-addrs state)) most-positive-word)))
384 ((or (eq inst jmp-inst) (eq inst call-inst))
385 (let ((target-addr (+ (near-jump-displacement dchunk dstate)
386 (dstate-next-addr dstate))))
387 (when (<= (cs-fixedobj-space-start state)
388 target-addr
389 (cs-fixedobj-space-end state))
390 (push (list* (dstate-cur-offs dstate)
391 5 ; length
392 (if (eq inst call-inst) "call" "jmp")
393 target-addr)
394 list))))
395 ((and (eq inst pop-inst) (eq (logand dchunk #xFF) #x5D))
396 (push (list* (dstate-cur-offs dstate) 1 "pop" "%rbp") list))))
398 dstate
399 nil)
400 (nreverse list)))
402 ;;; Using assembler directives and/or real mnemonics, dump COUNT bytes
403 ;;; of memory at PADDR (physical addr) to STREAM.
404 ;;; The function's address as per the core file is VADDR.
405 ;;; (Its eventual address is indeterminate)
406 ;;; If EMIT-CFI is true, then also emit cfi directives.
408 ;;; Notice that we can use one fewer cfi directive than usual because
409 ;;; Lisp always carries a frame pointer as set up by the caller.
411 ;;; C convention
412 ;;; ============
413 ;;; pushq %rbp
414 ;;; .cfi_def_cfa_offset 16 # CFA offset from default register (rsp) is +16
415 ;;; .cfi_offset 6, -16 # old rbp was saved in -16(CFA)
416 ;;; movq %rsp, %rbp
417 ;;; .cfi_def_cfa_register 6 # use rbp as CFA register
419 ;;; Lisp convention
420 ;;; ===============
421 ;;; popq 8(%rbp) # place saved %rip in its ABI-compatible stack slot
422 ;;; # making RSP = RBP after the pop, and RBP = CFA - 16
423 ;;; .cfi_def_cfa 6, 16
424 ;;; .cfi_offset 6, -16
426 ;;; Of course there is a flip-side to this: unwinders think that the new frame
427 ;;; is already begun in the caller. Interruption between these two instructions:
428 ;;; MOV RBP, RSP / CALL #xzzzzz
429 ;;; will show the backtrace as if two invocations of the caller are on stack.
430 ;;; This is tricky to fix because while we can relativize the CFA to the
431 ;;; known frame size, we can't do that based only on a disassembly.
433 (defun emit-lisp-function (paddr vaddr count stream emit-cfi core-state)
434 (when emit-cfi
435 (format stream " .cfi_startproc~%"))
436 ;; Any byte offset that appears as a key in the INSTRUCTIONS causes the indicated
437 ;; bytes to be written as an assembly language instruction rather than opaquely,
438 ;; thereby affecting the ELF data (cfi or relocs) produced.
439 (let ((instructions
440 (list-annotated-instructions (int-sap paddr) count core-state vaddr emit-cfi))
441 (ptr paddr))
442 (symbol-macrolet ((cur-offset (- ptr paddr)))
443 (loop
444 (let ((until (if instructions (caar instructions) count)))
445 ;; if we're not aligned, then write some number of bytes
446 ;; to cause alignment. But do not write past the next offset
447 ;; that needs to be written as an instruction.
448 (when (logtest ptr #x7) ; unaligned
449 (let ((n (min (- (nth-value 1 (ceiling ptr 8)))
450 (- until cur-offset))))
451 (aver (<= 0 n 7))
452 (emit-asm-directives :byte (int-sap ptr) n stream)
453 (incf ptr n)))
454 ;; Now we're either aligned to a multiple of 8, or the current
455 ;; offset needs to be written as a textual instruction.
456 (let ((n (- until cur-offset)))
457 (aver (>= n 0))
458 (multiple-value-bind (qwords remainder) (floor n 8)
459 (when (plusp qwords)
460 (emit-asm-directives :qword (int-sap ptr) qwords stream #())
461 (incf ptr (* qwords 8)))
462 (when (plusp remainder)
463 (emit-asm-directives :byte (int-sap ptr) remainder stream)
464 (incf ptr remainder))))
465 ;; If the current offset is COUNT, we're done.
466 (when (= cur-offset count) (return))
467 (aver (= cur-offset until))
468 (destructuring-bind (length opcode . operand) (cdr (pop instructions))
469 (when (cond ((member opcode '("jmp" "call") :test #'string=)
470 (format stream " ~A 0x~X~%" opcode operand))
471 ((string= opcode "pop")
472 (format stream " ~A ~A~%" opcode operand)
473 (cond ((string= operand "8(%rbp)")
474 (format stream " .cfi_def_cfa 6, 16~% .cfi_offset 6, -16~%"))
475 ((string= operand "%rbp")
476 ;(format stream " .cfi_def_cfa 7, 8~%")
477 nil)
478 (t)))
479 ((string= opcode "mov")
480 (format stream " mov $(__lisp_code_start+0x~x),%eax~%"
481 (- operand (cs-code-space-start core-state))))
482 (t))
483 (bug "Random annotated opcode ~S" opcode))
484 (incf ptr length))
485 (when (= cur-offset count) (return))))))
486 (when emit-cfi
487 (format stream " .cfi_endproc~%")))
489 ;;; Convert immobile CODE-SPACE to an assembly file in OUTPUT.
490 ;;; TODO: relocate fdefns and instances of standard-generic-function
491 ;;; into the space that is dumped into an ELF section.
492 (defun write-assembler-text
493 (spaces fixedobj-range output
494 &optional emit-sizes (emit-cfi t)
495 &aux (code-space (get-space immobile-varyobj-core-space-id spaces))
496 (code-space-start (space-addr code-space)) ; target virtual address
497 (code-space-end (+ code-space-start (space-size code-space)))
498 (code-addr code-space-start)
499 (core-state
500 (make-core-state code-space-start code-space-end
501 (car fixedobj-range)
502 (+ (car fixedobj-range) (cdr fixedobj-range))))
503 (total-code-size 0)
504 (pp-state (cons (make-hash-table :test 'equal)
505 ;; copy no entries for macros/special-operators (flet, etc)
506 (sb-pretty::make-pprint-dispatch-table)))
507 (packages (make-hash-table :test 'equal))
508 (core-nil (compute-nil-object spaces))
509 (prev-namestring "")
510 (n-linker-relocs 0)
511 end-loc)
512 (set-pprint-dispatch 'string
513 ;; Write strings without string quotes
514 (lambda (stream string) (write-string string stream))
516 (cdr pp-state))
517 (labels ((ldsym-quote (name)
518 (concatenate 'string '(#\") name '(#\")))
519 (dumpwords (addr count stream &optional (exceptions #()) logical-addr)
520 (let ((sap (int-sap addr)))
521 (aver (sap>= sap (car spaces)))
522 ;; Make intra-code-space pointers computed at link time
523 (dotimes (i (if logical-addr count 0))
524 (unless (and (< i (length exceptions)) (svref exceptions i))
525 (let ((word (sap-ref-word sap (* i n-word-bytes))))
526 (when (and (= (logand word 3) 3) ; is a pointer
527 (<= code-space-start word (1- code-space-end))) ; to code space
528 #+nil
529 (format t "~&~(~x: ~x~)~%" (+ logical-addr (* i n-word-bytes))
530 word)
531 (incf n-linker-relocs)
532 (setf exceptions (adjust-array exceptions (max (length exceptions) (1+ i))
533 :initial-element nil)
534 (svref exceptions i)
535 (format nil "__lisp_code_start+0x~x"
536 (- word code-space-start)))))))
537 (emit-asm-directives :qword sap count stream exceptions)))
538 (make-code-obj (addr)
539 (let ((translation (translate-ptr addr spaces)))
540 (aver (= (%widetag-of (sap-ref-word (int-sap translation) 0))
541 code-header-widetag))
542 (%make-lisp-obj (logior translation other-pointer-lowtag))))
543 (calc-obj-size (code)
544 ;; No need to pin - it's not managed by GC
545 (nth-value 2
546 (reconstitute-object
547 (ash (logandc2 (get-lisp-obj-address code) lowtag-mask)
548 (- n-fixnum-tag-bits)))))
549 (%widetag-of (word)
550 (logand word widetag-mask)))
551 (format output " .text~% .file \"sbcl.core\"
552 .globl __lisp_code_start, __lisp_code_end~% .balign 4096~%__lisp_code_start:~%")
554 ;; Scan the assembly routines.
555 (let* ((code-component (make-code-obj code-addr))
556 (size (calc-obj-size code-component))
557 (hashtable
558 (truly-the hash-table
559 (translate (car (translate (%code-debug-info code-component)
560 spaces))
561 spaces)))
562 (cells (translate (hash-table-table hashtable) spaces))
563 (count (hash-table-number-entries hashtable)))
564 (incf code-addr size)
565 (setf total-code-size size)
566 (emit-lisp-asm-routines spaces code-component output emit-sizes cells count))
568 (loop
569 (when (>= code-addr code-space-end) (return))
570 ;(format t "~&vaddr ~x paddr ~x~%" code-addr (get-lisp-obj-address (make-code-obj code-addr)))
571 (let* ((code (make-code-obj code-addr))
572 (objsize (calc-obj-size code)))
573 (setq end-loc (+ code-addr objsize))
574 (incf total-code-size objsize)
575 (cond
576 ((< (code-header-words code) 4) ; filler object
577 ;; Shouldn't occur unless defrag was not performed
578 (format output "#x~x:~% .quad 0x~X, 0x~X~% .fill ~D~%"
579 code-addr
580 simple-array-unsigned-byte-8-widetag
581 (ash (- objsize (* 2 n-word-bytes))
582 n-fixnum-tag-bits)
583 (- objsize (* 2 n-word-bytes))))
584 ((%instancep (%code-debug-info code)) ; assume it's a COMPILED-DEBUG-INFO
585 (aver (plusp (code-n-entries code)))
586 (let* ((source
587 (sb-c::compiled-debug-info-source
588 (truly-the sb-c::compiled-debug-info
589 (translate (%code-debug-info code) spaces))))
590 (namestring
591 (sb-c::debug-source-namestring
592 (truly-the sb-c::debug-source (translate source spaces)))))
593 (setq namestring (if (eq namestring core-nil)
594 "sbcl.core"
595 (translate namestring spaces)))
596 (unless (string= namestring prev-namestring)
597 (format output " .file \"~a\"~%" namestring)
598 (setq prev-namestring namestring)))
599 (let* ((code-physaddr (logandc2 (get-lisp-obj-address code) lowtag-mask))
600 (boxed-end (+ code-physaddr
601 (ash (code-header-words code) word-shift)))
602 (first-fun (logandc2 (get-lisp-obj-address (%code-entry-point code 0))
603 lowtag-mask)))
604 (format output "#x~x:~%" code-addr)
605 (dumpwords code-physaddr (code-header-words code) output #() code-addr)
606 ;; Any words after 'boxed' preceding 'first-fun' are unboxed
607 (when (> first-fun boxed-end)
608 (dumpwords boxed-end (floor (- first-fun boxed-end) n-word-bytes)
609 output)))
610 (setf (cs-fixup-addrs core-state)
611 (mapcar (lambda (x)
612 (+ code-addr (ash (code-header-words code) word-shift) x))
613 (code-fixup-locs code spaces)))
614 ;; Loop over all embedded functions.
615 ;; Because simple-fun offsets are relative to the code start
616 ;; (and not in a linked list as they were in the past),
617 ;; iteratation in a "foreign" code object works just fine,
618 ;; subject to the caution about reading boxed words.
619 (dotimes (j (code-n-entries code))
620 (let* ((fun (%code-entry-point code j))
621 (fun-addr (logandc2 (get-lisp-obj-address fun) lowtag-mask))
622 (end (if (< (1+ j) (code-n-entries code))
623 (logandc2 (get-lisp-obj-address (%code-entry-point code (1+ j)))
624 lowtag-mask)
625 (+ (translate-ptr code-addr spaces) objsize)))
626 (entrypoint
627 (+ fun-addr (* simple-fun-code-offset n-word-bytes)))
628 (size (- end entrypoint))
629 (lispname (fun-name-from-core fun spaces core-nil packages))
630 (quotname (ldsym-quote (c-name lispname pp-state))))
631 ;; Globalize the C symbol only if the name is a legal function designator
632 ;; per the standard definition.
633 ;; This is a technique to try to avoid appending a uniquifying suffix
634 ;; on all the junky internal things like "(lambda # in srcfile.lisp)"
635 (format output "~:[~*~; .globl ~a~%~]~@[ .type ~:*~a, @function~%~]"
636 (typep lispname '(or symbol core-sym (cons (eql setf))))
637 quotname emit-sizes)
638 (dumpwords fun-addr
639 simple-fun-code-offset output
640 (load-time-value
641 `#(nil ,(format nil ".+~D"
642 (* (1- simple-fun-code-offset)
643 n-word-bytes)))
645 nil)
646 (format output " .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
647 quotname (if emit-sizes size))
648 ;; entrypoint is the current physical address.
649 ;; Also pass in the virtual address in the core
650 ;; (which will differ from the actual load-time address)
651 (emit-lisp-function entrypoint
652 (+ code-addr (- entrypoint
653 (logandc2 (get-lisp-obj-address code)
654 lowtag-mask)))
655 size output emit-cfi core-state)))
656 (terpri output)
657 ;; All fixups should have been consumed by writing the code out
658 (aver (null (cs-fixup-addrs core-state))))
660 (error "Strange code component: ~S" code)))
661 (incf code-addr objsize))))
663 ;; coreparse uses unpadded __lisp_code_end to set varyobj_free_pointer
664 (format output "~:[~; .size __lisp_code_start, 0x~x~%~]__lisp_code_end:~%"
665 emit-sizes total-code-size)
667 ;; Pad so that non-lisp code can't be colocated on a GC page.
668 ;; (Lack of Lisp object headers in C code is the issue)
669 (let ((aligned-end (logandc2 (+ end-loc 4095) 4095)))
670 (when (> aligned-end end-loc)
671 (multiple-value-bind (nwords remainder)
672 (floor (- aligned-end end-loc) n-word-bytes)
673 (aver (>= nwords 2))
674 (aver (zerop remainder))
675 (decf nwords 2)
676 (format output " .quad ~d, ~d # (simple-array fixnum (~d))~%"
677 simple-array-fixnum-widetag
678 (ash nwords n-fixnum-tag-bits)
679 nwords)
680 (when (plusp nwords)
681 (format output " .fill ~d~%" (* nwords n-word-bytes))))))
682 ; (format t "~&linker-relocs=~D~%" n-linker-relocs)
683 (values total-code-size n-linker-relocs))
685 (defun extract-required-c-symbols (spaces fixedobj-range asm-file &optional (verbose nil))
686 (flet ((find-target-symbol (package-name symbol-name)
687 (let* ((physaddr (translate-ptr (car fixedobj-range) spaces))
688 (limit (+ physaddr (car fixedobj-range))))
689 (loop
690 (when (>= physaddr limit) (bug "Can't find symbol"))
691 (multiple-value-bind (obj tag size)
692 (reconstitute-object (ash physaddr (- n-fixnum-tag-bits)))
693 (when (and (= tag symbol-widetag)
694 (string= symbol-name (translate (symbol-name obj) spaces))
695 (%instancep (symbol-package obj))
696 (string= package-name
697 (translate
698 (package-%name
699 (truly-the package (translate (symbol-package obj) spaces)))
700 spaces)))
701 (return (%make-lisp-obj (logior physaddr other-pointer-lowtag))))
702 (incf physaddr size)))))
703 (symbol-fdefn-fun (symbol)
704 (let ((vector (translate (symbol-info-vector symbol) spaces)))
705 ;; TODO: allow for (plist . info-vector) in the info slot
706 (aver (simple-vector-p vector))
707 (translate (fdefn-fun (translate (info-vector-fdefn vector) spaces))
708 spaces))))
709 (let ((linkage-info
710 (translate (symbol-global-value (find-target-symbol "SB-SYS" "*LINKAGE-INFO*"))
711 spaces))
712 (dyn-syminfo
713 (symbol-fdefn-fun
714 (find-target-symbol "SB-SYS" "ENSURE-DYNAMIC-FOREIGN-SYMBOL-ADDRESS"))))
715 (aver (= (get-closure-length dyn-syminfo) 3))
716 (let* ((ht1 (translate (%closure-index-ref dyn-syminfo 1) spaces))
717 (ht2 (translate (%closure-index-ref dyn-syminfo 0) spaces))
718 (table0 (translate (hash-table-table (truly-the hash-table linkage-info))
719 spaces))
720 (table1 (translate (hash-table-table (truly-the hash-table ht1)) spaces))
721 (table2 (translate (hash-table-table (truly-the hash-table ht2)) spaces))
722 (linkage)
723 (foreign))
724 (declare (simple-vector table0 table1 table2))
725 (flet ((show (x)
726 (push x foreign)
727 (when verbose
728 (format t "~A~%" x)))
729 (scan-table (table name fun &aux (n 0) (end (length table)))
730 (when verbose
731 (format t "~&~A:~%~A~%"
732 name (make-string (1+ (length name)) :initial-element #\-)))
733 (do ((i 2 (+ i 2)))
734 ((= i end))
735 (let ((val (svref table i)))
736 (unless (unbound-marker-p val)
737 (funcall fun (translate val spaces))
738 (incf n))))
739 (when verbose
740 (format t "TOTAL: ~D entries~2%" n))))
741 (scan-table table0 "linkage info"
742 (lambda (x &aux (type #\T))
743 (when (consp x)
744 (setq x (translate (car x) spaces) type #\D))
745 (format asm-file " .long ~A~%" x)
746 (when verbose
747 (format t "~A ~A~%" type x))
748 (push x linkage)))
749 (scan-table table1 "defined" #'show)
750 (scan-table table2 "undefined" #'show)
751 (let ((diff1 ; linkage not in foreign
752 (remove-if (lambda (x) (member x foreign :test #'string=)) linkage))
753 (diff2 ; foreign not in linkage
754 (remove-if (lambda (x) (member x linkage :test #'string=)) foreign)))
755 (when verbose
756 (format t "~&Linkage not in foreign:~%~S~%" diff1)
757 (format t "~&Foreign not in linkage:~%~S~%" diff2))
758 )))))
759 (terpri asm-file))
761 ;;;; ELF file I/O
763 (defconstant +sht-null+ 0)
764 (defconstant +sht-progbits+ 1)
765 (defconstant +sht-symtab+ 2)
766 (defconstant +sht-strtab+ 3)
767 (defconstant +sht-rela+ 4)
768 (defconstant +sht-rel+ 9)
770 (define-alien-type elf64-ehdr
771 (struct elf64-edhr
772 (ident (array unsigned-char 16)) ; 7F 45 4C 46 2 1 1 0 0 0 0 0 0 0 0 0
773 (type (unsigned 16)) ; 1 0
774 (machine (unsigned 16)) ; 3E 0
775 (version (unsigned 32)) ; 1 0 0 0
776 (entry unsigned) ; 0 0 0 0 0 0 0 0
777 (phoff unsigned) ; 0 0 0 0 0 0 0 0
778 (shoff unsigned) ;
779 (flags (unsigned 32)) ; 0 0 0 0
780 (ehsize (unsigned 16)) ; 40 0
781 (phentsize (unsigned 16)) ; 0 0
782 (phnum (unsigned 16)) ; 0 0
783 (shentsize (unsigned 16)) ; 40 0
784 (shnum (unsigned 16)) ; n 0
785 (shstrndx (unsigned 16)))) ; n 0
786 (define-alien-type elf64-shdr
787 (struct elf64-shdr
788 (name (unsigned 32))
789 (type (unsigned 32))
790 (flags (unsigned 64))
791 (addr (unsigned 64))
792 (off (unsigned 64))
793 (size (unsigned 64))
794 (link (unsigned 32))
795 (info (unsigned 32))
796 (addralign (unsigned 64))
797 (entsize (unsigned 64))))
798 (define-alien-type elf64-sym
799 (struct elf64-sym
800 (name (unsigned 32))
801 (info (unsigned 8))
802 (other (unsigned 8))
803 (shndx (unsigned 16))
804 (value unsigned)
805 (size unsigned)))
806 (define-alien-type elf64-rela
807 (struct elf64-rela
808 (offset (unsigned 64))
809 (info (unsigned 64))
810 (addend (signed 64))))
812 (defun make-elf64-sym (name info)
813 (let ((a (make-array 24 :element-type '(unsigned-byte 8))))
814 (with-pinned-objects (a)
815 (setf (sap-ref-32 (vector-sap a) 0) name
816 (sap-ref-8 (vector-sap a) 4) info))
819 ;;; Return two values: an octet vector comprising a string table
820 ;;; and an alist which maps string to offset in the table.
821 (defun string-table (strings)
822 (let* ((length (+ (1+ (length strings)) ; one more null than there are strings
823 (reduce #'+ strings :key #'length))) ; data length
824 (bytes (make-array length :element-type '(unsigned-byte 8)
825 :initial-element 0))
826 (index 1)
827 (alist))
828 (dolist (string strings)
829 (push (cons string index) alist)
830 (replace bytes (map 'vector #'char-code string) :start1 index)
831 (incf index (1+ (length string))))
832 (cons (nreverse alist) bytes)))
834 (defun write-alien (alien size stream)
835 (dotimes (i size)
836 (write-byte (sap-ref-8 (alien-value-sap alien) i) stream)))
838 (defun copy-bytes (in-stream out-stream nbytes
839 &optional (buffer
840 (make-array 1024 :element-type '(unsigned-byte 8))))
841 (loop (let ((chunksize (min (length buffer) nbytes)))
842 (aver (eql (read-sequence buffer in-stream :end chunksize) chunksize))
843 (write-sequence buffer out-stream :end chunksize)
844 (when (zerop (decf nbytes chunksize)) (return)))))
846 ;;; core header should be an array of words in '.rodata', not a 32K page
847 (defconstant core-header-size +backend-page-bytes+) ; stupidly large (FIXME)
849 ;;; Write everything except for the core file itself into OUTPUT-STREAM
850 ;;; and leave the stream padded to a 4K boundary ready to receive data.
851 (defun prepare-elf (core-size relocs output)
852 (let* ((sym-entry-size 24)
853 (reloc-entry-size 24)
854 (core-align 4096)
855 (sections
856 `#((:core "lisp.core" ,+sht-progbits+ 0 0 0 ,core-align 0)
857 (:sym ".symtab" ,+sht-symtab+ 0 3 1 8 ,sym-entry-size)
858 ; section with the strings -- ^ ^ -- 1+ highest local symbol
859 (:str ".strtab" ,+sht-strtab+ 0 0 0 1 0)
860 (:rel ".relalisp.core" ,+sht-rela+ 0 2 1 8 ,reloc-entry-size)
861 ; symbol table -- ^ ^ -- for which section
862 (:note ".note.GNU-stack" ,+sht-null+ 0 0 0 1 0)))
863 (string-table
864 (string-table (append '("__lisp_code_start") (map 'list #'second sections))))
865 (strings (cdr string-table))
866 (padded-strings-size (logandc2 (+ (length strings) 7) 7))
867 (ehdr-size #.(ceiling (alien-type-bits (parse-alien-type 'elf64-ehdr nil)) 8))
868 (shdr-size #.(ceiling (alien-type-bits (parse-alien-type 'elf64-shdr nil)) 8))
869 (symbols-size (* 2 sym-entry-size))
870 (shdrs-start (+ ehdr-size symbols-size padded-strings-size))
871 (shdrs-end (+ shdrs-start (* (1+ (length sections)) shdr-size)))
872 (relocs-size (* (length relocs) reloc-entry-size))
873 (relocs-end (+ shdrs-end relocs-size))
874 (core-start (logandc2 (+ relocs-end (1- core-align)) (1- core-align)))
875 (ident #.(coerce '(#x7F #x45 #x4C #x46 2 1 1 0 0 0 0 0 0 0 0 0)
876 '(array (unsigned-byte 8) 1))))
878 (with-alien ((ehdr elf64-ehdr))
879 (dotimes (i (ceiling ehdr-size n-word-bytes))
880 (setf (sap-ref-word (alien-value-sap ehdr) (* i n-word-bytes)) 0))
881 (with-pinned-objects (ident)
882 (%byte-blt (vector-sap ident) 0 (alien-value-sap ehdr) 0 16))
883 (setf (slot ehdr 'type) 1
884 (slot ehdr 'machine) #x3E
885 (slot ehdr 'version) 1
886 (slot ehdr 'shoff) shdrs-start
887 (slot ehdr 'ehsize) ehdr-size
888 (slot ehdr 'shentsize) shdr-size
889 (slot ehdr 'shnum) (1+ (length sections)) ; section 0 is implied
890 (slot ehdr 'shstrndx) (1+ (position :str sections :key #'car)))
891 (write-alien ehdr ehdr-size output))
893 ;; Write symbol table
894 (aver (eql (file-position output) ehdr-size))
895 (write-sequence (make-elf64-sym 0 0) output)
896 ;; The symbol name index is always 1 by construction. The type is #x10
897 ;; given: #define STB_GLOBAL 1
898 ;; and: #define ELF32_ST_BIND(val) ((unsigned char) (val)) >> 4)
899 ;; which places the binding in the high 4 bits of the low byte.
900 (write-sequence (make-elf64-sym 1 #x10) output)
902 ;; Write string table
903 (aver (eql (file-position output) (+ ehdr-size symbols-size)))
904 (write-sequence strings output) ; an octet vector at this point
905 (dotimes (i (- padded-strings-size (length strings)))
906 (write-byte 0 output))
908 ;; Write section headers
909 (aver (eql (file-position output) shdrs-start))
910 (with-alien ((shdr elf64-shdr))
911 (dotimes (i (ceiling shdr-size n-word-bytes)) ; Zero-fill
912 (setf (sap-ref-word (alien-value-sap shdr) (* i n-word-bytes)) 0))
913 (dotimes (i (1+ (length sections)))
914 (when (plusp i) ; Write the zero-filled header as section 0
915 (destructuring-bind (key name type flags link info alignment entsize)
916 (aref sections (1- i))
917 (multiple-value-bind (offset size)
918 (ecase key
919 (:sym (values ehdr-size symbols-size))
920 (:str (values (+ ehdr-size symbols-size) (length strings)))
921 (:rel (values shdrs-end relocs-size))
922 (:core (values core-start core-size))
923 (:note (values 0 0)))
924 (let ((name (cdr (assoc name (car string-table) :test #'string=))))
925 (setf (slot shdr 'name) name
926 (slot shdr 'type) type
927 (slot shdr 'flags) flags
928 (slot shdr 'off) offset
929 (slot shdr 'size) size
930 (slot shdr 'link) link
931 (slot shdr 'info) info
932 (slot shdr 'addralign) alignment
933 (slot shdr 'entsize) entsize)))))
934 (write-alien shdr shdr-size output)))
936 ;; Write relocations
937 (aver (eql (file-position output) shdrs-end))
938 (let ((buf (make-array relocs-size :element-type '(unsigned-byte 8)))
939 (ptr 0))
940 (with-alien ((rela elf64-rela))
941 (dovector (reloc relocs)
942 (destructuring-bind (place addend . kind) reloc
943 (setf (slot rela 'offset) place
944 (slot rela 'info) (logior (ash 1 32) kind) ; 1 = symbol index
945 (slot rela 'addend) addend))
946 (setf (%vector-raw-bits buf (+ ptr 0)) (sap-ref-word (alien-value-sap rela) 0)
947 (%vector-raw-bits buf (+ ptr 1)) (sap-ref-word (alien-value-sap rela) 8)
948 (%vector-raw-bits buf (+ ptr 2)) (sap-ref-word (alien-value-sap rela) 16))
949 (incf ptr 3)))
950 (write-sequence buf output))
952 ;; Write padding
953 (dotimes (i (- core-start (file-position output)))
954 (write-byte 0 output))
955 (aver (eq (file-position output) core-start))))
957 ;;; Return a list of fixups (FIXUP-WHERE KIND ADDEND) to peform in a foreign core
958 ;;; whose code space is subject to link-time relocation.
959 (defconstant R_X86_64_64 1) ; /* Direct 64 bit */
960 (defconstant R_X86_64_PC32 2) ; /* PC relative 32 bit signed */
961 (defconstant R_X86_64_32 10) ; /* Direct 32 bit zero extended */
963 (defun collect-relocations (spaces fixups &aux (print nil))
964 (binding* (((static-start static-end)
965 (let ((space (get-space static-core-space-id spaces)))
966 (values (space-addr space) (space-end space))))
967 ((code-start code-end)
968 (let ((space (get-space immobile-varyobj-core-space-id spaces)))
969 (values (space-addr space) (space-end space))))
970 ;; the distance between fixedobj space address (i.e following the pages of
971 ;; dynamic space) in the ELF section which has a presumptive address of 0
972 ;; due to being non-loaded, to where it will be later mapped by coreparse
973 (fixedobj-space-displacement
974 (let ((space (get-space immobile-fixedobj-core-space-id spaces)))
975 (- (* (1+ (space-data-page space)) +backend-page-bytes+) ; 1+ = core header
976 (space-addr space))))
977 (n-abs 0)
978 (n-rel 0))
979 (labels
980 ((abs-fixup (core-offs referent)
981 (incf n-abs)
982 (when print
983 (format t "~x = 0x~(~x~): (a)~%" core-offs (core-to-logical core-offs) #+nil referent))
984 (setf (sap-ref-word (car spaces) core-offs) 0)
985 (vector-push-extend `(,(+ core-header-size core-offs)
986 ,(- referent code-start) . ,R_X86_64_64)
987 fixups))
988 (abs32-fixup (core-offs referent)
989 (incf n-abs)
990 (when print
991 (format t "~x = 0x~(~x~): (a)~%" core-offs (core-to-logical core-offs) #+nil referent))
992 (setf (sap-ref-32 (car spaces) core-offs) 0)
993 (vector-push-extend `(,(+ core-header-size core-offs)
994 ,(- referent code-start) . ,R_X86_64_32)
995 fixups))
996 (rel-fixup (core-offs referent)
997 (incf n-rel)
998 (when print
999 (format t "~x = 0x~(~x~): (r)~%" core-offs (core-to-logical core-offs) #+nil referent))
1000 (setf (sap-ref-32 (car spaces) core-offs) 0)
1001 (vector-push-extend `(,(+ core-header-size core-offs)
1002 ,(- referent code-start) . ,R_X86_64_PC32)
1003 fixups))
1004 (in-code-space-p (ptr)
1005 (and (<= code-start ptr) (< ptr code-end)))
1006 ;; Given a address which is an offset into the data pages of the target core,
1007 ;; compute the logical address which that offset would be mapped to.
1008 ;; For example core address 0 is the virtual address of static space.
1009 (core-to-logical (core-offs &aux (page (floor core-offs +backend-page-bytes+)))
1010 (dolist (space (cdr spaces)
1011 (bug "Can't translate core offset ~x using ~x"
1012 core-offs spaces))
1013 (let* ((page0 (space-data-page space))
1014 (nwords (space-nwords space))
1015 (id (space-id space))
1016 (npages (ceiling nwords (/ +backend-page-bytes+ n-word-bytes))))
1017 (when (and (<= page0 page (+ page0 (1- npages)))
1018 (/= id immobile-varyobj-core-space-id))
1019 (return (+ (space-addr space)
1020 (* (- page page0) +backend-page-bytes+)
1021 (logand core-offs (1- +backend-page-bytes+))))))))
1022 (scanptrs (obj wordindex-min wordindex-max &aux (n-fixups 0))
1023 (do* ((base-addr (logandc2 (get-lisp-obj-address obj) lowtag-mask))
1024 (sap (int-sap base-addr))
1025 ;; core-offs is the offset in the lisp.core ELF section.
1026 (core-offs (- base-addr (sap-int (car spaces))))
1027 (i wordindex-min (1+ i)))
1028 ((> i wordindex-max) n-fixups)
1029 (let ((ptr (sap-ref-word sap (ash i word-shift))))
1030 (when (and (= (logand ptr 3) 3) (in-code-space-p ptr))
1031 (abs-fixup (+ core-offs (ash i word-shift)) ptr)
1032 (incf n-fixups)))))
1033 (scanptr (obj wordindex)
1034 (plusp (scanptrs obj wordindex wordindex))) ; trivial wrapper
1035 (scan-obj (obj widetag size vaddr
1036 &aux (core-offs (- (logandc2 (get-lisp-obj-address obj) lowtag-mask)
1037 (sap-int (car spaces))))
1038 (nwords (ceiling size n-word-bytes)))
1039 (when (listp obj)
1040 (scanptrs obj 0 1)
1041 (return-from scan-obj))
1042 (case widetag
1043 (#.instance-widetag
1044 (let ((layout (truly-the layout
1045 (translate (%instance-layout obj) spaces))))
1046 ;; FIXME: even though the layout is supplied, it's not good enough,
1047 ;; because the macro references the layout-bitmap which might
1048 ;; be a bignum which is a pointer into the logical core address.
1049 (unless (fixnump (layout-bitmap layout))
1050 (error "Can't process bignum bitmap"))
1051 (do-instance-tagged-slot (i obj :layout layout)
1052 (scanptr obj (1+ i))))
1053 (return-from scan-obj))
1054 (#.simple-vector-widetag
1055 (let ((len (length (the simple-vector obj))))
1056 (when (eql (logand (get-header-data obj) #xFF) vector-valid-hashing-subtype)
1057 (do ((i 2 (+ i 2)) (needs-rehash))
1058 ((= i len)
1059 (when needs-rehash
1060 (setf (svref obj 1) 1)))
1061 (when (scanptr obj (+ vector-data-offset i))
1062 (format t "~&SET REHASH: vector=~X~%" (get-lisp-obj-address obj))
1063 (setq needs-rehash t))
1064 (scanptr obj (+ vector-data-offset i 1)))
1065 (return-from scan-obj))
1066 (setq nwords (+ len 2))))
1067 (#.fdefn-widetag
1068 (scanptrs obj 1 2)
1069 (let* ((fdefn-pc-sap ; where to read to access the rel32 operand
1070 (int-sap (+ (- (get-lisp-obj-address obj) other-pointer-lowtag)
1071 (ash fdefn-raw-addr-slot word-shift))))
1072 ;; what the fdefn's logical PC will be
1073 (fdefn-logical-pc (+ vaddr (ash fdefn-raw-addr-slot word-shift)))
1074 (rel32off (signed-sap-ref-32 fdefn-pc-sap 1))
1075 (target (+ fdefn-logical-pc 1 rel32off))
1076 (space-displacement
1077 (if (<= static-start vaddr static-end)
1078 (+ (- sb-vm:static-space-start) +backend-page-bytes+)
1079 fixedobj-space-displacement)))
1080 (when (in-code-space-p target)
1081 ;; This addend needs to account for the fact that the location
1082 ;; where fixup occurs is not where the fdefn will actually exist.
1083 (rel-fixup (+ core-offs (ash 3 word-shift) 1)
1084 (+ target space-displacement))))
1085 (return-from scan-obj))
1086 ((#.closure-widetag #.funcallable-instance-widetag)
1087 (let ((word (sap-ref-word (int-sap (get-lisp-obj-address obj))
1088 (- n-word-bytes fun-pointer-lowtag))))
1089 (when (in-code-space-p word)
1090 (abs-fixup (+ core-offs (ash 1 word-shift)) word)))
1091 (when (eq widetag funcallable-instance-widetag)
1092 (let ((layout (truly-the layout
1093 (translate (%funcallable-instance-layout obj) spaces))))
1094 (unless (fixnump (layout-bitmap layout))
1095 (error "Can't process bignum bitmap"))
1096 (let ((bitmap (layout-bitmap layout)))
1097 (unless (eql bitmap -1)
1098 ;; tagged slots precede untagged slots,
1099 ;; so integer-length is the count of tagged slots.
1100 (setq nwords (1+ (integer-length bitmap))))))))
1101 ;; mixed boxed/unboxed objects
1102 (#.code-header-widetag
1103 (dolist (loc (code-fixup-locs obj spaces))
1104 (let ((val (sap-ref-32 (code-instructions obj) loc)))
1105 (when (in-code-space-p val)
1106 (abs32-fixup (sap- (sap+ (code-instructions obj) loc) (car spaces))
1107 val))))
1108 (dotimes (i (code-n-entries obj))
1109 (scanptrs (%code-entry-point obj i) 2 5))
1110 (setq nwords (code-header-words obj)))
1111 ;; boxed objects that can reference code/simple-funs
1112 ((#.value-cell-widetag #.symbol-widetag #.weak-pointer-widetag))
1114 (return-from scan-obj)))
1115 (scanptrs obj 1 (1- nwords))))
1116 (dolist (space (reverse (cdr spaces)))
1117 (let* ((logical-addr (space-addr space))
1118 (size (space-size space))
1119 (physical-addr (space-physaddr space spaces))
1120 (physical-end (sap+ physical-addr size))
1121 (vaddr-translation (+ (- (sap-int physical-addr)) logical-addr)))
1122 (unless (or (= (space-id space) immobile-varyobj-core-space-id)
1123 (= (space-id space) -92 #|dynamic-core-space-id|#))
1124 (format t "~&scan range vaddr=~12x:~12x paddr=~12x:~12x~%"
1125 logical-addr (+ logical-addr size)
1126 (sap-int physical-addr) (sap-int physical-end))
1127 (dx-flet ((visit (obj widetag size)
1128 ;; Compute the object's intended virtual address
1129 (let ((vaddr (+ (logandc2 (get-lisp-obj-address obj) lowtag-mask)
1130 vaddr-translation)))
1131 (scan-obj obj widetag size vaddr))))
1132 (map-objects-in-range
1133 #'visit
1134 (ash (sap-int physical-addr) (- n-fixnum-tag-bits))
1135 (ash (sap-int physical-end) (- n-fixnum-tag-bits))))
1136 (when (plusp (logior n-abs n-rel))
1137 (format t "space @ ~x: ~d absolute + ~d relative fixups~%"
1138 logical-addr n-abs n-rel))
1139 (setq n-abs 0 n-rel 0))))))
1140 (format t "total of ~D linker fixups~%" (length fixups))
1141 fixups)
1143 ;;;;
1145 (macrolet ((do-core-header-entry (((id-var len-var ptr-var) buffer) &body body)
1146 `(let ((,ptr-var 1))
1147 (loop
1148 (let ((,id-var (%vector-raw-bits ,buffer ,ptr-var))
1149 (,len-var (%vector-raw-bits ,buffer (1+ ,ptr-var))))
1150 (incf ,ptr-var 2)
1151 (decf ,len-var 2)
1152 (when (= ,id-var end-core-entry-type-code) (return))
1153 ,@body
1154 (incf ,ptr-var ,len-var)))))
1155 (do-directory-entry (((index-var start-index input-nbytes) buffer) &body body)
1156 `(let ((words-per-dirent 5))
1157 (multiple-value-bind (n-entries remainder)
1158 (floor ,input-nbytes words-per-dirent)
1159 (aver (zerop remainder))
1160 (symbol-macrolet ((id (%vector-raw-bits ,buffer index))
1161 (nwords (%vector-raw-bits ,buffer (+ index 1)))
1162 (data-page (%vector-raw-bits ,buffer (+ index 2)))
1163 (addr (%vector-raw-bits ,buffer (+ index 3)))
1164 (npages (%vector-raw-bits ,buffer (+ index 4))))
1165 (do ((,index-var ,start-index (+ ,index-var words-per-dirent)))
1166 ((= ,index-var (+ ,start-index (* n-entries words-per-dirent))))
1167 ,@body)))))
1168 (with-mapped-core ((sap-var start npages stream) &body body)
1169 `(let (,sap-var)
1170 (unwind-protect
1171 (progn
1172 (setq ,sap-var
1173 (sb-posix:mmap nil
1174 (* ,npages +backend-page-bytes+)
1175 (logior sb-posix:prot-read sb-posix:prot-write)
1176 sb-posix:map-private
1177 (sb-sys:fd-stream-fd ,stream)
1178 ;; Skip the core header
1179 (+ ,start +backend-page-bytes+)))
1180 ,@body)
1181 (when ,sap-var
1182 (sb-posix:munmap ,sap-var (* ,npages +backend-page-bytes+)))))))
1184 ;;; Given a native SBCL '.core' file, or one attached to the end of an executable,
1185 ;;; separate it into pieces.
1186 ;;; ASM-PATHNAME is the name of the assembler file that will hold all the Lisp code.
1187 ;;; The other two output pathnames are implicit: "x.s" -> "x.core" and "x-core.o"
1188 ;;; The ".core" file is a native core file used for starting a binary that
1189 ;;; contains the asm code using the "--core" argument. The "-core.o" file
1190 ;;; is for linking in to a binary that needs no "--core" argument.
1191 (defun split-core
1192 (input-pathname asm-pathname
1193 &key emit-sizes (verbose t)
1194 &aux (split-core-pathname
1195 (merge-pathnames (make-pathname :type "core") asm-pathname))
1196 (elf-core-pathname
1197 (merge-pathnames
1198 (make-pathname :name (concatenate 'string (pathname-name asm-pathname) "-core")
1199 :type "o")
1200 asm-pathname))
1201 (core-header (make-array +backend-page-bytes+ :element-type '(unsigned-byte 8)))
1202 (original-total-npages 0)
1203 (core-offset 0)
1204 (page-adjust 0)
1205 (code-start-fixup-ofs 0) ; where to fixup the core header
1206 (spaces)
1207 (copy-actions)
1208 (fixedobj-range) ; = (START . SIZE-IN-BYTES)
1209 (relocs (make-array 100000 :adjustable t :fill-pointer 0)))
1211 ;; Remove old files
1212 (ignore-errors (delete-file asm-pathname))
1213 (ignore-errors (delete-file split-core-pathname))
1214 (ignore-errors (delete-file elf-core-pathname))
1215 ;; Ensure that all files can be opened
1216 (with-open-file (input input-pathname :element-type '(unsigned-byte 8))
1217 (with-open-file (asm-file asm-pathname :direction :output :if-exists :supersede)
1218 (with-open-file (split-core split-core-pathname :direction :output
1219 :element-type '(unsigned-byte 8) :if-exists :supersede)
1220 (read-sequence core-header input)
1221 (cond ((= (%vector-raw-bits core-header 0) core-magic))
1222 (t ; possible embedded core
1223 (file-position input (- (file-length input)
1224 (* 2 n-word-bytes)))
1225 (aver (eql (read-sequence core-header input) (* 2 n-word-bytes)))
1226 (aver (= (%vector-raw-bits core-header 1) core-magic))
1227 (setq core-offset (%vector-raw-bits core-header 0))
1228 (when verbose
1229 (format t "~&embedded core starts at #x~x into input~%" core-offset))
1230 (file-position input core-offset)
1231 (read-sequence core-header input)
1232 (aver (= (%vector-raw-bits core-header 0) core-magic))))
1233 (do-core-header-entry ((id len ptr) core-header)
1234 (case id
1235 (#.build-id-core-entry-type-code
1236 (when verbose
1237 (let ((string (make-string (%vector-raw-bits core-header ptr)
1238 :element-type 'base-char)))
1239 (%byte-blt core-header (* (1+ ptr) n-word-bytes) string 0 (length string))
1240 (format t "Build ID [~a]~%" string))))
1241 (#.new-directory-core-entry-type-code
1242 (do-directory-entry ((index ptr len) core-header)
1243 (incf original-total-npages npages)
1244 (push (make-space id addr data-page page-adjust nwords) spaces)
1245 (when verbose
1246 (format t "id=~d page=~5x + ~5x addr=~10x words=~8x~:[~; (drop)~]~%"
1247 id data-page npages addr nwords
1248 (= id immobile-varyobj-core-space-id)))
1249 (cond ((= id immobile-varyobj-core-space-id)
1250 (setq code-start-fixup-ofs (+ index 3))
1251 ;; Keep this entry but delete the page count. We need to know
1252 ;; where the space was supposed to be mapped and at what size.
1253 ;; Subsequent core entries will need to adjust their start page
1254 ;; downward (just the PTEs's start page now).
1255 (setq page-adjust npages data-page 0 npages 0))
1257 ;; Keep track of where the fixedobj space wants to be.
1258 (when (= id immobile-fixedobj-core-space-id)
1259 (setq fixedobj-range (cons addr (ash nwords word-shift))))
1260 (when (plusp npages) ; enqueue
1261 (push (cons data-page (* npages +backend-page-bytes+))
1262 copy-actions))
1263 ;; adjust this entry's start page in the new core
1264 (decf data-page page-adjust)))))
1265 (#.page-table-core-entry-type-code
1266 (aver (= len 3))
1267 (symbol-macrolet ((nbytes (%vector-raw-bits core-header (1+ ptr)))
1268 (data-page (%vector-raw-bits core-header (+ ptr 2))))
1269 (aver (= data-page original-total-npages))
1270 (aver (= (ceiling (space-nwords
1271 (find dynamic-core-space-id spaces :key #'space-id))
1272 (/ +backend-page-bytes+ n-word-bytes))
1273 (%vector-raw-bits core-header ptr))) ; number of PTEs
1274 (when verbose
1275 (format t "PTE: page=~5x~40tbytes=~8x~%" data-page nbytes))
1276 (push (cons data-page nbytes) copy-actions)
1277 (decf data-page page-adjust)))))
1278 (let ((buffer (make-array +backend-page-bytes+
1279 :element-type '(unsigned-byte 8)))
1280 (filepos))
1281 ;; Write the new core file
1282 (write-sequence core-header split-core)
1283 (dolist (action (reverse copy-actions)) ; nondestructive
1284 ;; page index convention assumes absence of core header.
1285 ;; i.e. data page 0 is the file page immediately following the core header
1286 (let ((offset (* (1+ (car action)) +backend-page-bytes+))
1287 (nbytes (cdr action)))
1288 (when verbose
1289 (format t "File offset ~10x: ~10x bytes~%" offset nbytes))
1290 (setq filepos (+ core-offset offset))
1291 (file-position input filepos)
1292 (copy-bytes input split-core nbytes buffer)))
1293 ;; Trailer (runtime options and magic number)
1294 (let ((nbytes (read-sequence buffer input)))
1295 ;; expect trailing magic number
1296 (let ((ptr (floor (- nbytes n-word-bytes) n-word-bytes)))
1297 (aver (= (%vector-raw-bits buffer ptr) core-magic)))
1298 ;; File position of the core header needs to be set to 0
1299 ;; regardless of what it was
1300 (setf (%vector-raw-bits buffer 4) 0)
1301 (when verbose
1302 (format t "Trailer words:(~{~X~^ ~})~%"
1303 (loop for i below (floor nbytes n-word-bytes)
1304 collect (%vector-raw-bits buffer i))))
1305 (write-sequence buffer split-core :end nbytes)
1306 (finish-output split-core))
1307 ;; Sanity test
1308 (aver (= (+ core-offset
1309 (* page-adjust +backend-page-bytes+)
1310 (file-length split-core))
1311 (file-length input)))
1312 ;; Seek back to the PTE pages so they can be copied to the '.o' file
1313 (file-position input filepos)))
1315 ;; Map the original core file to memory
1316 (with-mapped-core (sap core-offset original-total-npages input)
1317 (let* ((data-spaces
1318 (delete immobile-varyobj-core-space-id (reverse spaces)
1319 :key #'space-id))
1320 (map (cons sap (sort (copy-list spaces) #'> :key #'space-addr)))
1321 (pte-nbytes (cdar copy-actions)))
1322 (collect-relocations map relocs)
1323 (with-open-file (output elf-core-pathname
1324 :direction :output :if-exists :supersede
1325 :element-type '(unsigned-byte 8))
1326 (vector-push-extend
1327 `(,(ash code-start-fixup-ofs word-shift) 0 . ,R_X86_64_64) relocs)
1328 (prepare-elf (+ (apply #'+ (mapcar #'space-nbytes-aligned data-spaces))
1329 +backend-page-bytes+ ; core header
1330 pte-nbytes)
1331 relocs output)
1332 (setf (%vector-raw-bits core-header code-start-fixup-ofs) 0)
1333 (write-sequence core-header output) ; Copy prepared header
1334 (force-output output)
1335 (dolist (space data-spaces) ; Copy pages from memory
1336 (let ((start (space-physaddr space map))
1337 (size (space-nbytes-aligned space)))
1338 (aver (eql (sb-unix:unix-write (sb-sys:fd-stream-fd output)
1339 start 0 size)
1340 size))))
1341 (format t "Copying ~d bytes (#x~x) from ptes = ~d PTEs~%"
1342 pte-nbytes pte-nbytes (floor pte-nbytes 10))
1343 (copy-bytes input output pte-nbytes)) ; Copy PTEs from input
1345 ;; There's no relation between emit-sizes and which section to put
1346 ;; C symbol references in, however it's a safe bet that if sizes
1347 ;; are supported then so is the .rodata directive.
1348 (format asm-file (if emit-sizes " .rodata~%" " .data~%"))
1349 (extract-required-c-symbols map fixedobj-range asm-file)
1350 (write-assembler-text map fixedobj-range asm-file emit-sizes)))
1352 (format asm-file "~% ~A~%" +noexec-stack-note+))))
1354 ) ; end MACROLET
1356 ;;;;
1358 (defun cl-user::elfinate (&optional (args (cdr sb-ext:*posix-argv*)))
1359 (cond ((string= (car args) "split")
1360 (pop args)
1361 (let ((sizes (string= (car args) "--sizes")))
1362 (when sizes
1363 (pop args))
1364 (destructuring-bind (input asm) args
1365 (split-core input asm :emit-sizes sizes))))
1366 #+nil
1367 ((string= (car args) "relocate")
1368 (destructuring-bind (input output binary start-sym) (cdr args)
1369 (relocate-core
1370 input output binary (parse-integer start-sym :radix 16))))
1372 (error "Unknown command: ~S" args))))
1374 ;; If loaded as a script, do this
1375 (eval-when (:execute)
1376 (let ((args (cdr sb-ext:*posix-argv*)))
1377 (when args
1378 (let ((*print-pretty* nil))
1379 (format t "Args: ~S~%" args)
1380 (cl-user::elfinate args)))))