1 ;;;; Utilities for separating an SBCL core file into two pieces:
2 ;;;; 1. An assembly language file containing the immobile code space
3 ;;;; 2. A '.o' file wrapping a core file containing everything else
4 ;;;; We operate as a "tool" that processes external files rather than
5 ;;;; operating on the in-process data, but it is also possible to dump
6 ;;;; the current image by creating a straight-through translation
7 ;;;; of internal/external code addresses.
9 ;;;; This software is part of the SBCL system. See the README file for
10 ;;;; more information.
12 ;;;; This software is derived from the CMU CL system, which was
13 ;;;; written at Carnegie Mellon University and released into the
14 ;;;; public domain. The software is in the public domain and is
15 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
16 ;;;; files for more information.
18 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
19 (require :sb-posix
)) ; for mmap
21 (defpackage "SB-EDITCORE"
22 (:use
"CL" "SB-VM" "SB-INT" "SB-EXT" "SB-KERNEL" "SB-SYS"
24 (:import-from
"SB-ALIEN-INTERNALS"
25 #:alien-type-bits
#:parse-alien-type
26 #:alien-value-sap
#:alien-value-type
)
27 (:import-from
"SB-C" #:+backend-page-bytes
+)
28 (:import-from
"SB-VM" #:map-objects-in-range
#:reconstitute-object
30 (:import-from
"SB-DISASSEM" #:get-inst-space
#:find-inst
31 #:make-dstate
#:%make-segment
32 #:seg-virtual-location
#:seg-length
#:seg-sap-maker
33 #:map-segment-instructions
34 #:dstate-next-addr
#:dstate-cur-offs
)
35 (:import-from
"SB-X86-64-ASM" #:near-jump-displacement
)
36 (:import-from
"SB-IMPL" #:package-hashtable
#:package-%name
37 #:package-hashtable-cells
38 #:hash-table-table
#:hash-table-number-entries
))
40 (in-package "SB-EDITCORE")
42 (declaim (muffle-conditions compiler-note
))
45 (setq *evaluator-mode
* :compile
))
47 (defconstant core-magic
48 (logior (ash (char-code #\S
) 24)
49 (ash (char-code #\B
) 16)
50 (ash (char-code #\C
) 8)
53 (defconstant build-id-core-entry-type-code
3860)
54 (defconstant new-directory-core-entry-type-code
3861)
55 (defconstant initial-fun-core-entry-type-code
3863)
56 (defconstant page-table-core-entry-type-code
3880)
57 (defconstant end-core-entry-type-code
3840)
59 (defconstant dynamic-core-space-id
1)
60 (defconstant static-core-space-id
2)
61 (defconstant immobile-fixedobj-core-space-id
4)
62 (defconstant immobile-varyobj-core-space-id
5)
64 (defglobal +noexec-stack-note
+ ".section .note.GNU-stack, \"\", @progbits")
66 (defstruct (core-space ; "space" is a CL symbol
68 (:constructor make-space
(id addr data-page page-adjust nwords
)))
69 id addr data-page page-adjust nwords
)
70 (defun space-size (space) (* (space-nwords space
) n-word-bytes
))
71 (defun space-end (space) (+ (space-addr space
) (space-size space
)))
72 (defun space-nbytes-aligned (space)
73 (logandc2 (+ (space-size space
) (1- +backend-page-bytes
+))
74 (1- +backend-page-bytes
+)))
75 (defun space-physaddr (space spaces
)
76 (sap+ (car spaces
) (* (space-data-page space
) +backend-page-bytes
+)))
78 ;;; Given ADDR which is an address in the target core, return the address at which
79 ;;; ADDR is currently mapped while performing the split.
80 ;;; SPACES is a cons of a SAP and an alist whose elements are (ADDR . CORE-SPACE)
81 (defun translate-ptr (addr spaces
)
82 (let ((space (find addr
(cdr spaces
) :key
#'space-addr
:test
#'>=)))
83 ;; FIXME: duplicates SPACE-PHYSADDR to avoid consing a SAP.
84 ;; macroize or something.
85 (+ (sap-int (car spaces
)) (* (space-data-page space
) +backend-page-bytes
+)
86 (- addr
(space-addr space
)))))
89 (defun get-space (id spaces
)
90 (find id
(cdr spaces
) :key
#'space-id
))
91 (defun compute-nil-object (spaces)
92 (let ((space (get-space static-core-space-id spaces
)))
93 (%make-lisp-obj
(logior (space-addr space
) #x17
))))
95 ;;; Given OBJ which is tagged pointer into the target core, translate it into
96 ;;; the range at which the core is now mapped during execution of this tool,
97 ;;; so that host accessors can dereference its slots.
98 ;;; Use extreme care: while it works to use host accessors on the target core,
99 ;;; we must avoid type checks on instances because LAYOUTs need translation.
100 ;;; Printing boxed objects from the target core will almost always crash.
101 (defun translate (obj spaces
)
102 (%make-lisp-obj
(translate-ptr (get-lisp-obj-address obj
) spaces
)))
104 (defstruct (core-sym (:copier nil
) (:predicate nil
)
105 (:constructor make-core-sym
(package name external
)))
107 (name nil
:read-only t
)
108 (external nil
:read-only t
))
110 (defun c-name (lispname pp-state
)
111 (when (and (symbolp lispname
)
112 (eq (symbol-package lispname
) *cl-package
*))
113 (return-from c-name
(concatenate 'string
"cl:" (string-downcase lispname
))))
114 ;; Get rid of junk from LAMBDAs
116 (named-let recurse
((x lispname
))
117 (cond ((typep x
'(cons (eql lambda
)))
118 (let ((args (second x
)))
119 `(lambda ,(if args sb-c
::*debug-name-sharp
* "()")
120 ,@(recurse (cddr x
)))))
123 (recons x
(recurse (car x
)) (recurse (cdr x
))))
126 ;; Shorten obnoxiously long printed representations of methods
127 ;; by changing FAST-METHOD to METHOD (because who cares?)
129 ;; (method my-long-package-name:my-method-name (my-long-package-name:type-name))
131 ;; (method my-method-name (type-name))
132 ;; I suspect that can use DWARF info to provide even more description,
133 ;; though I also suspect it's relatively unambiguous anyway
134 ;; especially given that file information is available separately.
135 (flet ((unpackageize (thing)
136 (when (typep thing
'core-sym
)
137 (setf (core-sym-package thing
) nil
))
139 (when (typep lispname
'(cons (eql sb-pcl
::fast-method
)))
140 (setq lispname
`(method ,@(cdr lispname
)))
141 (setf (second lispname
) (unpackageize (second lispname
)))
142 (dolist (qual (car (last lispname
)))
143 (unpackageize qual
))))
145 ;; Perform backslash escaping on the exploded string
146 ;; Strings were stringified without surrounding quotes,
147 ;; but there might be quotes embedded anywhere, so escape them,
148 ;; and also remove newlines and non-ASCII.
150 (mapcan (lambda (char)
151 (cond ((not (base-char-p char
)) (list #\?))
152 ((member char
'(#\\ #\")) (list #\\ char
))
153 ((eql char
#\newline
) (list #\_
))
155 (coerce (if (and (stringp lispname
)
156 ;; L denotes a symbol which can not be global on macOS.
157 (char= (char lispname
0) #\L
))
158 (concatenate 'string
"_" lispname
)
159 (write-to-string lispname
160 :pretty t
:pprint-dispatch
(cdr pp-state
)
161 ;; FIXME: should be :level 1, however see
162 ;; https://bugs.launchpad.net/sbcl/+bug/1733222
163 :escape t
:level
2 :length
5
164 :case
:downcase
:gensym nil
165 :right-margin
10000))
167 (let* ((string (coerce characters
'string
))
168 (occurs (incf (gethash string
(car pp-state
) 0))))
170 (concatenate 'string string
"_" (write-to-string occurs
))
173 (defmethod print-object ((sym core-sym
) stream
)
174 (format stream
"~(~:[~*~;~:*~A~:[:~;~]:~]~A~)"
175 (core-sym-package sym
)
176 (core-sym-external sym
)
177 (core-sym-name sym
)))
179 (defun fun-name-from-core (fun spaces core-nil packages
180 &aux
(name (%simple-fun-name fun
)))
181 (named-let recurse
((depth 0) (x name
))
182 (unless (= (logand (get-lisp-obj-address x
) 3) 3)
183 (return-from recurse x
)) ; immediate object
184 (when (eq x core-nil
)
185 (return-from recurse nil
))
186 (setq x
(translate x spaces
))
188 (#.list-pointer-lowtag
189 (cons (recurse (1+ depth
) (car x
))
190 (recurse (1+ depth
) (cdr x
))))
191 ((#.instance-pointer-lowtag
#.fun-pointer-lowtag
) "?")
192 (#.other-pointer-lowtag
195 (let ((name (translate (symbol-name x
) spaces
)))
196 (if (eq (symbol-package x
) core-nil
) ; uninterned
197 (string-downcase name
)
198 (let* ((package (truly-the package
199 (translate (symbol-package x
) spaces
)))
200 (package-name (translate (package-%name package
) spaces
))
202 (not (or (string= package-name
"KEYWORD")
203 (string= package-name
"COMMON-LISP"))))
204 (externals (if compute-externals
205 (gethash package-name packages
)
208 (dovector (x (translate
209 (package-hashtable-cells
210 (truly-the package-hashtable
211 (translate (package-external-symbols package
)
215 (push (if (eq x core-nil
) ; random packages can export NIL. wow.
217 (translate (symbol-name (translate x spaces
)) spaces
))
219 (setf externals
(coerce externals
'vector
)
220 (gethash package-name packages
) externals
))
221 ;; The name-cleaning code wants to compare against symbols
222 ;; in CL, PCL, and KEYWORD, so use real symbols for those.
223 ;; Other than that, we avoid finding host symbols
224 ;; because the externalness could be wrong and misleading.
225 ;; It's a very subtle point, but best to get it right.
226 (cond ((string= package-name
"COMMON-LISP")
227 (find-symbol name
*cl-package
*))
228 ((and (string= package-name
"KEYWORD")
229 (find-symbol name package-name
))) ; if existing keyword, use it
230 ((string= package-name
"SB-PCL")
231 (or (find-symbol name
"SB-PCL")
232 (error "FIND-SYMBOL failed? ~S ~S" name package-name
)))
234 (make-core-sym (if (string= package-name
"KEYWORD") nil package-name
)
236 (if compute-externals
237 (find name externals
:test
'string
=)
242 (defstruct (core-state
246 (:constructor make-core-state
247 (code-space-start code-space-end
248 fixedobj-space-start fixedobj-space-end
249 &aux
(inst-space (get-inst-space))
250 (call-inst (find-inst #b11101000 inst-space
))
251 (jmp-inst (find-inst #b11101001 inst-space
))
252 (pop-inst (find-inst #x5d inst-space
)))))
253 (code-space-start 0 :type fixnum
:read-only t
)
254 (code-space-end 0 :type fixnum
:read-only t
)
255 (fixedobj-space-start 0 :type fixnum
:read-only t
)
256 (fixedobj-space-end 0 :type fixnum
:read-only t
)
257 (dstate (make-dstate nil
) :read-only t
)
258 (seg (%make-segment
:sap-maker
(lambda () (error "Bad sap maker"))
259 :virtual-location
0) :read-only t
)
261 (call-inst nil
:read-only t
)
262 (jmp-inst nil
:read-only t
)
263 (pop-inst nil
:read-only t
))
265 ;;; Emit .byte or .quad directives dumping memory from SAP for COUNT bytes
266 ;;; to STREAM. SIZE specifies which direcive to emit.
267 ;;; EXCEPTIONS specify offsets at which a specific string should be
268 ;;; written to the file in lieu of memory contents, useful for emitting
269 ;;; expressions involving the assembler '.' symbol (the current PC).
270 (defun emit-asm-directives (size sap count stream
&optional exceptions
)
271 (declare (optimize speed
))
272 (declare (stream stream
))
273 (let ((*print-base
* 16)
274 (string-buffer (make-array 18 :element-type
'base-char
))
275 (fmt #.
(coerce "0x%lx" 'base-string
))
277 (declare ((integer 0 32) per-line
)
282 (format stream
" .quad")
284 (declare ((unsigned-byte 20) i
))
285 (declare (simple-vector exceptions
))
286 (write-char (if (> per-line
0) #\
, #\space
) stream
)
287 (acond ((and (< i
(length exceptions
)) (aref exceptions i
))
288 (write-string it stream
))
292 ;; output-reasonable-integer-in-base is so slow comparated
293 ;; to printf() that the second-most amount of time spent
294 ;; writing the asm file occurs in that function.
295 ;; Unbelievable that we can't do better than that.
296 (with-pinned-objects (string-buffer fmt
)
298 (extern-alien "snprintf"
299 (function int system-area-pointer unsigned system-area-pointer unsigned
))
300 (vector-sap string-buffer
)
301 (length string-buffer
)
303 (sap-ref-word sap
(* i n-word-bytes
))))))
304 (write-string string-buffer stream
:end len
))
305 (write-string "0x" stream
)
306 (write (sap-ref-word sap
(* i n-word-bytes
)) :stream stream
)))
307 (when (and (= (incf per-line
) 16) (< (1+ i
) count
))
308 (format stream
"~% .quad")
311 (aver (not exceptions
))
312 (format stream
" .byte")
314 (write-char (if (> per-line
0) #\
, #\space
) stream
)
315 (write-string "0x" stream
)
316 (write (sap-ref-8 sap i
) :stream stream
)
317 (when (and (= (incf per-line
) 32) (< (1+ i
) count
))
318 (format stream
"~% .byte")
319 (setq per-line
0))))))
322 (defun emit-lisp-asm-routines (spaces code-component output emit-sizes vector count
)
323 (emit-asm-directives :qword
324 (sap+ (code-instructions code-component
)
325 (- (* sb-vm
:code-constants-offset sb-vm
:n-word-bytes
)))
326 sb-vm
:code-constants-offset
328 (let ((list (loop for i from
2 by
2 repeat count
330 (let* ((location (translate (svref vector
(1+ i
)) spaces
))
331 (offset (car location
))
332 (nbytes (- (1+ (cdr location
)) offset
))
334 (symbol-name (translate (svref vector i
) spaces
))
336 (list* offset name nbytes
)))))
337 (loop for
(offset name . nbytes
) in
(sort list
#'< :key
#'car
)
338 do
(format output
" .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
339 (format nil
"~(\"~a\"~)" name
) (if emit-sizes nbytes
))
342 (sap+ (code-instructions code-component
) offset
)
343 (ceiling nbytes sb-vm
:n-word-bytes
)
346 (defun code-fixup-locs (code spaces
)
347 (let ((locs (sb-vm::%code-fixups code
)))
349 (sb-c::unpack-code-fixup-locs
350 (if (fixnump locs
) locs
(translate locs spaces
))))))
352 ;;; Disassemble the function pointed to by SAP for LENGTH bytes, returning
353 ;;; all instructions that should be emitted using assembly language
354 ;;; instead of assembler pseudo-ops. This includes two sets of instructions:
355 ;;; - function prologue instructions that setup the call frame
356 ;;; - jmp/call instructions that transfer control to the fixedoj space
357 ;;; delimited by bounds in STATE.
358 ;;; At execution time the function will have virtual address LOAD-ADDR.
359 (defun list-annotated-instructions (sap length state load-addr emit-cfi
)
360 (let ((dstate (cs-dstate state
))
362 (call-inst (cs-call-inst state
))
363 (jmp-inst (cs-jmp-inst state
))
364 (pop-inst (cs-pop-inst state
))
366 (or (car (cs-fixup-addrs state
)) most-positive-word
))
368 (setf (seg-virtual-location seg
) load-addr
369 (seg-length seg
) length
370 (seg-sap-maker seg
) (lambda () sap
))
371 ;; KLUDGE: "8f 45 08" is the standard prologue
372 (when (and emit-cfi
(= (logand (sap-ref-32 sap
0) #xFFFFFF
) #x08458f
))
373 (push (list* 0 3 "pop" "8(%rbp)") list
))
374 (map-segment-instructions
375 (lambda (dchunk inst
)
377 ((< next-fixup-addr
(dstate-next-addr dstate
))
378 (let ((operand (sap-ref-32 sap
(- next-fixup-addr load-addr
))))
379 (when (<= (cs-code-space-start state
) operand
(cs-code-space-end state
))
380 (aver (eql (sap-ref-8 sap
(- next-fixup-addr load-addr
1)) #xB8
)) ; mov rax, imm32
381 (push (list* (dstate-cur-offs dstate
) 5 "mov" operand
) list
)))
382 (pop (cs-fixup-addrs state
))
383 (setq next-fixup-addr
(or (car (cs-fixup-addrs state
)) most-positive-word
)))
384 ((or (eq inst jmp-inst
) (eq inst call-inst
))
385 (let ((target-addr (+ (near-jump-displacement dchunk dstate
)
386 (dstate-next-addr dstate
))))
387 (when (<= (cs-fixedobj-space-start state
)
389 (cs-fixedobj-space-end state
))
390 (push (list* (dstate-cur-offs dstate
)
392 (if (eq inst call-inst
) "call" "jmp")
395 ((and (eq inst pop-inst
) (eq (logand dchunk
#xFF
) #x5D
))
396 (push (list* (dstate-cur-offs dstate
) 1 "pop" "%rbp") list
))))
402 ;;; Using assembler directives and/or real mnemonics, dump COUNT bytes
403 ;;; of memory at PADDR (physical addr) to STREAM.
404 ;;; The function's address as per the core file is VADDR.
405 ;;; (Its eventual address is indeterminate)
406 ;;; If EMIT-CFI is true, then also emit cfi directives.
408 ;;; Notice that we can use one fewer cfi directive than usual because
409 ;;; Lisp always carries a frame pointer as set up by the caller.
414 ;;; .cfi_def_cfa_offset 16 # CFA offset from default register (rsp) is +16
415 ;;; .cfi_offset 6, -16 # old rbp was saved in -16(CFA)
417 ;;; .cfi_def_cfa_register 6 # use rbp as CFA register
421 ;;; popq 8(%rbp) # place saved %rip in its ABI-compatible stack slot
422 ;;; # making RSP = RBP after the pop, and RBP = CFA - 16
423 ;;; .cfi_def_cfa 6, 16
424 ;;; .cfi_offset 6, -16
426 ;;; Of course there is a flip-side to this: unwinders think that the new frame
427 ;;; is already begun in the caller. Interruption between these two instructions:
428 ;;; MOV RBP, RSP / CALL #xzzzzz
429 ;;; will show the backtrace as if two invocations of the caller are on stack.
430 ;;; This is tricky to fix because while we can relativize the CFA to the
431 ;;; known frame size, we can't do that based only on a disassembly.
433 (defun emit-lisp-function (paddr vaddr count stream emit-cfi core-state
)
435 (format stream
" .cfi_startproc~%"))
436 ;; Any byte offset that appears as a key in the INSTRUCTIONS causes the indicated
437 ;; bytes to be written as an assembly language instruction rather than opaquely,
438 ;; thereby affecting the ELF data (cfi or relocs) produced.
440 (list-annotated-instructions (int-sap paddr
) count core-state vaddr emit-cfi
))
442 (symbol-macrolet ((cur-offset (- ptr paddr
)))
444 (let ((until (if instructions
(caar instructions
) count
)))
445 ;; if we're not aligned, then write some number of bytes
446 ;; to cause alignment. But do not write past the next offset
447 ;; that needs to be written as an instruction.
448 (when (logtest ptr
#x7
) ; unaligned
449 (let ((n (min (- (nth-value 1 (ceiling ptr
8)))
450 (- until cur-offset
))))
452 (emit-asm-directives :byte
(int-sap ptr
) n stream
)
454 ;; Now we're either aligned to a multiple of 8, or the current
455 ;; offset needs to be written as a textual instruction.
456 (let ((n (- until cur-offset
)))
458 (multiple-value-bind (qwords remainder
) (floor n
8)
460 (emit-asm-directives :qword
(int-sap ptr
) qwords stream
#())
461 (incf ptr
(* qwords
8)))
462 (when (plusp remainder
)
463 (emit-asm-directives :byte
(int-sap ptr
) remainder stream
)
464 (incf ptr remainder
))))
465 ;; If the current offset is COUNT, we're done.
466 (when (= cur-offset count
) (return))
467 (aver (= cur-offset until
))
468 (destructuring-bind (length opcode . operand
) (cdr (pop instructions
))
469 (when (cond ((member opcode
'("jmp" "call") :test
#'string
=)
470 (format stream
" ~A 0x~X~%" opcode operand
))
471 ((string= opcode
"pop")
472 (format stream
" ~A ~A~%" opcode operand
)
473 (cond ((string= operand
"8(%rbp)")
474 (format stream
" .cfi_def_cfa 6, 16~% .cfi_offset 6, -16~%"))
475 ((string= operand
"%rbp")
476 ;(format stream " .cfi_def_cfa 7, 8~%")
479 ((string= opcode
"mov")
480 (format stream
" mov $(__lisp_code_start+0x~x),%eax~%"
481 (- operand
(cs-code-space-start core-state
))))
483 (bug "Random annotated opcode ~S" opcode
))
485 (when (= cur-offset count
) (return))))))
487 (format stream
" .cfi_endproc~%")))
489 ;;; Convert immobile CODE-SPACE to an assembly file in OUTPUT.
490 ;;; TODO: relocate fdefns and instances of standard-generic-function
491 ;;; into the space that is dumped into an ELF section.
492 (defun write-assembler-text
493 (spaces fixedobj-range output
494 &optional emit-sizes
(emit-cfi t
)
495 &aux
(code-space (get-space immobile-varyobj-core-space-id spaces
))
496 (code-space-start (space-addr code-space
)) ; target virtual address
497 (code-space-end (+ code-space-start
(space-size code-space
)))
498 (code-addr code-space-start
)
500 (make-core-state code-space-start code-space-end
502 (+ (car fixedobj-range
) (cdr fixedobj-range
))))
504 (pp-state (cons (make-hash-table :test
'equal
)
505 ;; copy no entries for macros/special-operators (flet, etc)
506 (sb-pretty::make-pprint-dispatch-table
)))
507 (packages (make-hash-table :test
'equal
))
508 (core-nil (compute-nil-object spaces
))
512 (set-pprint-dispatch 'string
513 ;; Write strings without string quotes
514 (lambda (stream string
) (write-string string stream
))
517 (labels ((ldsym-quote (name)
518 (concatenate 'string
'(#\") name
'(#\")))
519 (dumpwords (addr count stream
&optional
(exceptions #()) logical-addr
)
520 (let ((sap (int-sap addr
)))
521 (aver (sap>= sap
(car spaces
)))
522 ;; Make intra-code-space pointers computed at link time
523 (dotimes (i (if logical-addr count
0))
524 (unless (and (< i
(length exceptions
)) (svref exceptions i
))
525 (let ((word (sap-ref-word sap
(* i n-word-bytes
))))
526 (when (and (= (logand word
3) 3) ; is a pointer
527 (<= code-space-start word
(1- code-space-end
))) ; to code space
529 (format t
"~&~(~x: ~x~)~%" (+ logical-addr
(* i n-word-bytes
))
531 (incf n-linker-relocs
)
532 (setf exceptions
(adjust-array exceptions
(max (length exceptions
) (1+ i
))
533 :initial-element nil
)
535 (format nil
"__lisp_code_start+0x~x"
536 (- word code-space-start
)))))))
537 (emit-asm-directives :qword sap count stream exceptions
)))
538 (make-code-obj (addr)
539 (let ((translation (translate-ptr addr spaces
)))
540 (aver (= (%widetag-of
(sap-ref-word (int-sap translation
) 0))
541 code-header-widetag
))
542 (%make-lisp-obj
(logior translation other-pointer-lowtag
))))
543 (calc-obj-size (code)
544 ;; No need to pin - it's not managed by GC
547 (ash (logandc2 (get-lisp-obj-address code
) lowtag-mask
)
548 (- n-fixnum-tag-bits
)))))
550 (logand word widetag-mask
)))
551 (format output
" .text~% .file \"sbcl.core\"
552 .globl __lisp_code_start, __lisp_code_end~% .balign 4096~%__lisp_code_start:~%")
554 ;; Scan the assembly routines.
555 (let* ((code-component (make-code-obj code-addr
))
556 (size (calc-obj-size code-component
))
558 (truly-the hash-table
559 (translate (car (translate (%code-debug-info code-component
)
562 (cells (translate (hash-table-table hashtable
) spaces
))
563 (count (hash-table-number-entries hashtable
)))
564 (incf code-addr size
)
565 (setf total-code-size size
)
566 (emit-lisp-asm-routines spaces code-component output emit-sizes cells count
))
569 (when (>= code-addr code-space-end
) (return))
570 ;(format t "~&vaddr ~x paddr ~x~%" code-addr (get-lisp-obj-address (make-code-obj code-addr)))
571 (let* ((code (make-code-obj code-addr
))
572 (objsize (calc-obj-size code
)))
573 (setq end-loc
(+ code-addr objsize
))
574 (incf total-code-size objsize
)
576 ((< (code-header-words code
) 4) ; filler object
577 ;; Shouldn't occur unless defrag was not performed
578 (format output
"#x~x:~% .quad 0x~X, 0x~X~% .fill ~D~%"
580 simple-array-unsigned-byte-8-widetag
581 (ash (- objsize
(* 2 n-word-bytes
))
583 (- objsize
(* 2 n-word-bytes
))))
584 ((%instancep
(%code-debug-info code
)) ; assume it's a COMPILED-DEBUG-INFO
585 (aver (plusp (code-n-entries code
)))
587 (sb-c::compiled-debug-info-source
588 (truly-the sb-c
::compiled-debug-info
589 (translate (%code-debug-info code
) spaces
))))
591 (sb-c::debug-source-namestring
592 (truly-the sb-c
::debug-source
(translate source spaces
)))))
593 (setq namestring
(if (eq namestring core-nil
)
595 (translate namestring spaces
)))
596 (unless (string= namestring prev-namestring
)
597 (format output
" .file \"~a\"~%" namestring
)
598 (setq prev-namestring namestring
)))
599 (let* ((code-physaddr (logandc2 (get-lisp-obj-address code
) lowtag-mask
))
600 (boxed-end (+ code-physaddr
601 (ash (code-header-words code
) word-shift
)))
602 (first-fun (logandc2 (get-lisp-obj-address (%code-entry-point code
0))
604 (format output
"#x~x:~%" code-addr
)
605 (dumpwords code-physaddr
(code-header-words code
) output
#() code-addr
)
606 ;; Any words after 'boxed' preceding 'first-fun' are unboxed
607 (when (> first-fun boxed-end
)
608 (dumpwords boxed-end
(floor (- first-fun boxed-end
) n-word-bytes
)
610 (setf (cs-fixup-addrs core-state
)
612 (+ code-addr
(ash (code-header-words code
) word-shift
) x
))
613 (code-fixup-locs code spaces
)))
614 ;; Loop over all embedded functions.
615 ;; Because simple-fun offsets are relative to the code start
616 ;; (and not in a linked list as they were in the past),
617 ;; iteratation in a "foreign" code object works just fine,
618 ;; subject to the caution about reading boxed words.
619 (dotimes (j (code-n-entries code
))
620 (let* ((fun (%code-entry-point code j
))
621 (fun-addr (logandc2 (get-lisp-obj-address fun
) lowtag-mask
))
622 (end (if (< (1+ j
) (code-n-entries code
))
623 (logandc2 (get-lisp-obj-address (%code-entry-point code
(1+ j
)))
625 (+ (translate-ptr code-addr spaces
) objsize
)))
627 (+ fun-addr
(* simple-fun-code-offset n-word-bytes
)))
628 (size (- end entrypoint
))
629 (lispname (fun-name-from-core fun spaces core-nil packages
))
630 (quotname (ldsym-quote (c-name lispname pp-state
))))
631 ;; Globalize the C symbol only if the name is a legal function designator
632 ;; per the standard definition.
633 ;; This is a technique to try to avoid appending a uniquifying suffix
634 ;; on all the junky internal things like "(lambda # in srcfile.lisp)"
635 (format output
"~:[~*~; .globl ~a~%~]~@[ .type ~:*~a, @function~%~]"
636 (typep lispname
'(or symbol core-sym
(cons (eql setf
))))
639 simple-fun-code-offset output
641 `#(nil ,(format nil
".+~D"
642 (* (1- simple-fun-code-offset
)
646 (format output
" .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
647 quotname
(if emit-sizes size
))
648 ;; entrypoint is the current physical address.
649 ;; Also pass in the virtual address in the core
650 ;; (which will differ from the actual load-time address)
651 (emit-lisp-function entrypoint
652 (+ code-addr
(- entrypoint
653 (logandc2 (get-lisp-obj-address code
)
655 size output emit-cfi core-state
)))
657 ;; All fixups should have been consumed by writing the code out
658 (aver (null (cs-fixup-addrs core-state
))))
660 (error "Strange code component: ~S" code
)))
661 (incf code-addr objsize
))))
663 ;; coreparse uses unpadded __lisp_code_end to set varyobj_free_pointer
664 (format output
"~:[~; .size __lisp_code_start, 0x~x~%~]__lisp_code_end:~%"
665 emit-sizes total-code-size
)
667 ;; Pad so that non-lisp code can't be colocated on a GC page.
668 ;; (Lack of Lisp object headers in C code is the issue)
669 (let ((aligned-end (logandc2 (+ end-loc
4095) 4095)))
670 (when (> aligned-end end-loc
)
671 (multiple-value-bind (nwords remainder
)
672 (floor (- aligned-end end-loc
) n-word-bytes
)
674 (aver (zerop remainder
))
676 (format output
" .quad ~d, ~d # (simple-array fixnum (~d))~%"
677 simple-array-fixnum-widetag
678 (ash nwords n-fixnum-tag-bits
)
681 (format output
" .fill ~d~%" (* nwords n-word-bytes
))))))
682 ; (format t "~&linker-relocs=~D~%" n-linker-relocs)
683 (values total-code-size n-linker-relocs
))
685 (defun extract-required-c-symbols (spaces fixedobj-range asm-file
&optional
(verbose nil
))
686 (flet ((find-target-symbol (package-name symbol-name
)
687 (let* ((physaddr (translate-ptr (car fixedobj-range
) spaces
))
688 (limit (+ physaddr
(car fixedobj-range
))))
690 (when (>= physaddr limit
) (bug "Can't find symbol"))
691 (multiple-value-bind (obj tag size
)
692 (reconstitute-object (ash physaddr
(- n-fixnum-tag-bits
)))
693 (when (and (= tag symbol-widetag
)
694 (string= symbol-name
(translate (symbol-name obj
) spaces
))
695 (%instancep
(symbol-package obj
))
696 (string= package-name
699 (truly-the package
(translate (symbol-package obj
) spaces
)))
701 (return (%make-lisp-obj
(logior physaddr other-pointer-lowtag
))))
702 (incf physaddr size
)))))
703 (symbol-fdefn-fun (symbol)
704 (let ((vector (translate (symbol-info-vector symbol
) spaces
)))
705 ;; TODO: allow for (plist . info-vector) in the info slot
706 (aver (simple-vector-p vector
))
707 (translate (fdefn-fun (translate (info-vector-fdefn vector
) spaces
))
710 (translate (symbol-global-value (find-target-symbol "SB-SYS" "*LINKAGE-INFO*"))
714 (find-target-symbol "SB-SYS" "ENSURE-DYNAMIC-FOREIGN-SYMBOL-ADDRESS"))))
715 (aver (= (get-closure-length dyn-syminfo
) 3))
716 (let* ((ht1 (translate (%closure-index-ref dyn-syminfo
1) spaces
))
717 (ht2 (translate (%closure-index-ref dyn-syminfo
0) spaces
))
718 (table0 (translate (hash-table-table (truly-the hash-table linkage-info
))
720 (table1 (translate (hash-table-table (truly-the hash-table ht1
)) spaces
))
721 (table2 (translate (hash-table-table (truly-the hash-table ht2
)) spaces
))
724 (declare (simple-vector table0 table1 table2
))
728 (format t
"~A~%" x
)))
729 (scan-table (table name fun
&aux
(n 0) (end (length table
)))
731 (format t
"~&~A:~%~A~%"
732 name
(make-string (1+ (length name
)) :initial-element
#\-
)))
735 (let ((val (svref table i
)))
736 (unless (unbound-marker-p val
)
737 (funcall fun
(translate val spaces
))
740 (format t
"TOTAL: ~D entries~2%" n
))))
741 (scan-table table0
"linkage info"
742 (lambda (x &aux
(type #\T
))
744 (setq x
(translate (car x
) spaces
) type
#\D
))
745 (format asm-file
" .long ~A~%" x
)
747 (format t
"~A ~A~%" type x
))
749 (scan-table table1
"defined" #'show
)
750 (scan-table table2
"undefined" #'show
)
751 (let ((diff1 ; linkage not in foreign
752 (remove-if (lambda (x) (member x foreign
:test
#'string
=)) linkage
))
753 (diff2 ; foreign not in linkage
754 (remove-if (lambda (x) (member x linkage
:test
#'string
=)) foreign
)))
756 (format t
"~&Linkage not in foreign:~%~S~%" diff1
)
757 (format t
"~&Foreign not in linkage:~%~S~%" diff2
))
763 (defconstant +sht-null
+ 0)
764 (defconstant +sht-progbits
+ 1)
765 (defconstant +sht-symtab
+ 2)
766 (defconstant +sht-strtab
+ 3)
767 (defconstant +sht-rela
+ 4)
768 (defconstant +sht-rel
+ 9)
770 (define-alien-type elf64-ehdr
772 (ident (array unsigned-char
16)) ; 7F 45 4C 46 2 1 1 0 0 0 0 0 0 0 0 0
773 (type (unsigned 16)) ; 1 0
774 (machine (unsigned 16)) ; 3E 0
775 (version (unsigned 32)) ; 1 0 0 0
776 (entry unsigned
) ; 0 0 0 0 0 0 0 0
777 (phoff unsigned
) ; 0 0 0 0 0 0 0 0
779 (flags (unsigned 32)) ; 0 0 0 0
780 (ehsize (unsigned 16)) ; 40 0
781 (phentsize (unsigned 16)) ; 0 0
782 (phnum (unsigned 16)) ; 0 0
783 (shentsize (unsigned 16)) ; 40 0
784 (shnum (unsigned 16)) ; n 0
785 (shstrndx (unsigned 16)))) ; n 0
786 (define-alien-type elf64-shdr
790 (flags (unsigned 64))
796 (addralign (unsigned 64))
797 (entsize (unsigned 64))))
798 (define-alien-type elf64-sym
803 (shndx (unsigned 16))
806 (define-alien-type elf64-rela
808 (offset (unsigned 64))
810 (addend (signed 64))))
812 (defun make-elf64-sym (name info
)
813 (let ((a (make-array 24 :element-type
'(unsigned-byte 8))))
814 (with-pinned-objects (a)
815 (setf (sap-ref-32 (vector-sap a
) 0) name
816 (sap-ref-8 (vector-sap a
) 4) info
))
819 ;;; Return two values: an octet vector comprising a string table
820 ;;; and an alist which maps string to offset in the table.
821 (defun string-table (strings)
822 (let* ((length (+ (1+ (length strings
)) ; one more null than there are strings
823 (reduce #'+ strings
:key
#'length
))) ; data length
824 (bytes (make-array length
:element-type
'(unsigned-byte 8)
828 (dolist (string strings
)
829 (push (cons string index
) alist
)
830 (replace bytes
(map 'vector
#'char-code string
) :start1 index
)
831 (incf index
(1+ (length string
))))
832 (cons (nreverse alist
) bytes
)))
834 (defun write-alien (alien size stream
)
836 (write-byte (sap-ref-8 (alien-value-sap alien
) i
) stream
)))
838 (defun copy-bytes (in-stream out-stream nbytes
840 (make-array 1024 :element-type
'(unsigned-byte 8))))
841 (loop (let ((chunksize (min (length buffer
) nbytes
)))
842 (aver (eql (read-sequence buffer in-stream
:end chunksize
) chunksize
))
843 (write-sequence buffer out-stream
:end chunksize
)
844 (when (zerop (decf nbytes chunksize
)) (return)))))
846 ;;; core header should be an array of words in '.rodata', not a 32K page
847 (defconstant core-header-size
+backend-page-bytes
+) ; stupidly large (FIXME)
849 ;;; Write everything except for the core file itself into OUTPUT-STREAM
850 ;;; and leave the stream padded to a 4K boundary ready to receive data.
851 (defun prepare-elf (core-size relocs output
)
852 (let* ((sym-entry-size 24)
853 (reloc-entry-size 24)
856 `#((:core
"lisp.core" ,+sht-progbits
+ 0 0 0 ,core-align
0)
857 (:sym
".symtab" ,+sht-symtab
+ 0 3 1 8 ,sym-entry-size
)
858 ; section with the strings -- ^ ^ -- 1+ highest local symbol
859 (:str
".strtab" ,+sht-strtab
+ 0 0 0 1 0)
860 (:rel
".relalisp.core" ,+sht-rela
+ 0 2 1 8 ,reloc-entry-size
)
861 ; symbol table -- ^ ^ -- for which section
862 (:note
".note.GNU-stack" ,+sht-null
+ 0 0 0 1 0)))
864 (string-table (append '("__lisp_code_start") (map 'list
#'second sections
))))
865 (strings (cdr string-table
))
866 (padded-strings-size (logandc2 (+ (length strings
) 7) 7))
867 (ehdr-size #.
(ceiling (alien-type-bits (parse-alien-type 'elf64-ehdr nil
)) 8))
868 (shdr-size #.
(ceiling (alien-type-bits (parse-alien-type 'elf64-shdr nil
)) 8))
869 (symbols-size (* 2 sym-entry-size
))
870 (shdrs-start (+ ehdr-size symbols-size padded-strings-size
))
871 (shdrs-end (+ shdrs-start
(* (1+ (length sections
)) shdr-size
)))
872 (relocs-size (* (length relocs
) reloc-entry-size
))
873 (relocs-end (+ shdrs-end relocs-size
))
874 (core-start (logandc2 (+ relocs-end
(1- core-align
)) (1- core-align
)))
875 (ident #.
(coerce '(#x7F
#x45
#x4C
#x46
2 1 1 0 0 0 0 0 0 0 0 0)
876 '(array (unsigned-byte 8) 1))))
878 (with-alien ((ehdr elf64-ehdr
))
879 (dotimes (i (ceiling ehdr-size n-word-bytes
))
880 (setf (sap-ref-word (alien-value-sap ehdr
) (* i n-word-bytes
)) 0))
881 (with-pinned-objects (ident)
882 (%byte-blt
(vector-sap ident
) 0 (alien-value-sap ehdr
) 0 16))
883 (setf (slot ehdr
'type
) 1
884 (slot ehdr
'machine
) #x3E
885 (slot ehdr
'version
) 1
886 (slot ehdr
'shoff
) shdrs-start
887 (slot ehdr
'ehsize
) ehdr-size
888 (slot ehdr
'shentsize
) shdr-size
889 (slot ehdr
'shnum
) (1+ (length sections
)) ; section 0 is implied
890 (slot ehdr
'shstrndx
) (1+ (position :str sections
:key
#'car
)))
891 (write-alien ehdr ehdr-size output
))
893 ;; Write symbol table
894 (aver (eql (file-position output
) ehdr-size
))
895 (write-sequence (make-elf64-sym 0 0) output
)
896 ;; The symbol name index is always 1 by construction. The type is #x10
897 ;; given: #define STB_GLOBAL 1
898 ;; and: #define ELF32_ST_BIND(val) ((unsigned char) (val)) >> 4)
899 ;; which places the binding in the high 4 bits of the low byte.
900 (write-sequence (make-elf64-sym 1 #x10
) output
)
902 ;; Write string table
903 (aver (eql (file-position output
) (+ ehdr-size symbols-size
)))
904 (write-sequence strings output
) ; an octet vector at this point
905 (dotimes (i (- padded-strings-size
(length strings
)))
906 (write-byte 0 output
))
908 ;; Write section headers
909 (aver (eql (file-position output
) shdrs-start
))
910 (with-alien ((shdr elf64-shdr
))
911 (dotimes (i (ceiling shdr-size n-word-bytes
)) ; Zero-fill
912 (setf (sap-ref-word (alien-value-sap shdr
) (* i n-word-bytes
)) 0))
913 (dotimes (i (1+ (length sections
)))
914 (when (plusp i
) ; Write the zero-filled header as section 0
915 (destructuring-bind (key name type flags link info alignment entsize
)
916 (aref sections
(1- i
))
917 (multiple-value-bind (offset size
)
919 (:sym
(values ehdr-size symbols-size
))
920 (:str
(values (+ ehdr-size symbols-size
) (length strings
)))
921 (:rel
(values shdrs-end relocs-size
))
922 (:core
(values core-start core-size
))
923 (:note
(values 0 0)))
924 (let ((name (cdr (assoc name
(car string-table
) :test
#'string
=))))
925 (setf (slot shdr
'name
) name
926 (slot shdr
'type
) type
927 (slot shdr
'flags
) flags
928 (slot shdr
'off
) offset
929 (slot shdr
'size
) size
930 (slot shdr
'link
) link
931 (slot shdr
'info
) info
932 (slot shdr
'addralign
) alignment
933 (slot shdr
'entsize
) entsize
)))))
934 (write-alien shdr shdr-size output
)))
937 (aver (eql (file-position output
) shdrs-end
))
938 (let ((buf (make-array relocs-size
:element-type
'(unsigned-byte 8)))
940 (with-alien ((rela elf64-rela
))
941 (dovector (reloc relocs
)
942 (destructuring-bind (place addend . kind
) reloc
943 (setf (slot rela
'offset
) place
944 (slot rela
'info
) (logior (ash 1 32) kind
) ; 1 = symbol index
945 (slot rela
'addend
) addend
))
946 (setf (%vector-raw-bits buf
(+ ptr
0)) (sap-ref-word (alien-value-sap rela
) 0)
947 (%vector-raw-bits buf
(+ ptr
1)) (sap-ref-word (alien-value-sap rela
) 8)
948 (%vector-raw-bits buf
(+ ptr
2)) (sap-ref-word (alien-value-sap rela
) 16))
950 (write-sequence buf output
))
953 (dotimes (i (- core-start
(file-position output
)))
954 (write-byte 0 output
))
955 (aver (eq (file-position output
) core-start
))))
957 ;;; Return a list of fixups (FIXUP-WHERE KIND ADDEND) to peform in a foreign core
958 ;;; whose code space is subject to link-time relocation.
959 (defconstant R_X86_64_64
1) ; /* Direct 64 bit */
960 (defconstant R_X86_64_PC32
2) ; /* PC relative 32 bit signed */
961 (defconstant R_X86_64_32
10) ; /* Direct 32 bit zero extended */
963 (defun collect-relocations (spaces fixups
&aux
(print nil
))
964 (binding* (((static-start static-end
)
965 (let ((space (get-space static-core-space-id spaces
)))
966 (values (space-addr space
) (space-end space
))))
967 ((code-start code-end
)
968 (let ((space (get-space immobile-varyobj-core-space-id spaces
)))
969 (values (space-addr space
) (space-end space
))))
970 ;; the distance between fixedobj space address (i.e following the pages of
971 ;; dynamic space) in the ELF section which has a presumptive address of 0
972 ;; due to being non-loaded, to where it will be later mapped by coreparse
973 (fixedobj-space-displacement
974 (let ((space (get-space immobile-fixedobj-core-space-id spaces
)))
975 (- (* (1+ (space-data-page space
)) +backend-page-bytes
+) ; 1+ = core header
976 (space-addr space
))))
980 ((abs-fixup (core-offs referent
)
983 (format t
"~x = 0x~(~x~): (a)~%" core-offs
(core-to-logical core-offs
) #+nil referent
))
984 (setf (sap-ref-word (car spaces
) core-offs
) 0)
985 (vector-push-extend `(,(+ core-header-size core-offs
)
986 ,(- referent code-start
) .
,R_X86_64_64
)
988 (abs32-fixup (core-offs referent
)
991 (format t
"~x = 0x~(~x~): (a)~%" core-offs
(core-to-logical core-offs
) #+nil referent
))
992 (setf (sap-ref-32 (car spaces
) core-offs
) 0)
993 (vector-push-extend `(,(+ core-header-size core-offs
)
994 ,(- referent code-start
) .
,R_X86_64_32
)
996 (rel-fixup (core-offs referent
)
999 (format t
"~x = 0x~(~x~): (r)~%" core-offs
(core-to-logical core-offs
) #+nil referent
))
1000 (setf (sap-ref-32 (car spaces
) core-offs
) 0)
1001 (vector-push-extend `(,(+ core-header-size core-offs
)
1002 ,(- referent code-start
) .
,R_X86_64_PC32
)
1004 (in-code-space-p (ptr)
1005 (and (<= code-start ptr
) (< ptr code-end
)))
1006 ;; Given a address which is an offset into the data pages of the target core,
1007 ;; compute the logical address which that offset would be mapped to.
1008 ;; For example core address 0 is the virtual address of static space.
1009 (core-to-logical (core-offs &aux
(page (floor core-offs
+backend-page-bytes
+)))
1010 (dolist (space (cdr spaces
)
1011 (bug "Can't translate core offset ~x using ~x"
1013 (let* ((page0 (space-data-page space
))
1014 (nwords (space-nwords space
))
1015 (id (space-id space
))
1016 (npages (ceiling nwords
(/ +backend-page-bytes
+ n-word-bytes
))))
1017 (when (and (<= page0 page
(+ page0
(1- npages
)))
1018 (/= id immobile-varyobj-core-space-id
))
1019 (return (+ (space-addr space
)
1020 (* (- page page0
) +backend-page-bytes
+)
1021 (logand core-offs
(1- +backend-page-bytes
+))))))))
1022 (scanptrs (obj wordindex-min wordindex-max
&aux
(n-fixups 0))
1023 (do* ((base-addr (logandc2 (get-lisp-obj-address obj
) lowtag-mask
))
1024 (sap (int-sap base-addr
))
1025 ;; core-offs is the offset in the lisp.core ELF section.
1026 (core-offs (- base-addr
(sap-int (car spaces
))))
1027 (i wordindex-min
(1+ i
)))
1028 ((> i wordindex-max
) n-fixups
)
1029 (let ((ptr (sap-ref-word sap
(ash i word-shift
))))
1030 (when (and (= (logand ptr
3) 3) (in-code-space-p ptr
))
1031 (abs-fixup (+ core-offs
(ash i word-shift
)) ptr
)
1033 (scanptr (obj wordindex
)
1034 (plusp (scanptrs obj wordindex wordindex
))) ; trivial wrapper
1035 (scan-obj (obj widetag size vaddr
1036 &aux
(core-offs (- (logandc2 (get-lisp-obj-address obj
) lowtag-mask
)
1037 (sap-int (car spaces
))))
1038 (nwords (ceiling size n-word-bytes
)))
1041 (return-from scan-obj
))
1044 (let ((layout (truly-the layout
1045 (translate (%instance-layout obj
) spaces
))))
1046 ;; FIXME: even though the layout is supplied, it's not good enough,
1047 ;; because the macro references the layout-bitmap which might
1048 ;; be a bignum which is a pointer into the logical core address.
1049 (unless (fixnump (layout-bitmap layout
))
1050 (error "Can't process bignum bitmap"))
1051 (do-instance-tagged-slot (i obj
:layout layout
)
1052 (scanptr obj
(1+ i
))))
1053 (return-from scan-obj
))
1054 (#.simple-vector-widetag
1055 (let ((len (length (the simple-vector obj
))))
1056 (when (eql (logand (get-header-data obj
) #xFF
) vector-valid-hashing-subtype
)
1057 (do ((i 2 (+ i
2)) (needs-rehash))
1060 (setf (svref obj
1) 1)))
1061 (when (scanptr obj
(+ vector-data-offset i
))
1062 (format t
"~&SET REHASH: vector=~X~%" (get-lisp-obj-address obj
))
1063 (setq needs-rehash t
))
1064 (scanptr obj
(+ vector-data-offset i
1)))
1065 (return-from scan-obj
))
1066 (setq nwords
(+ len
2))))
1069 (let* ((fdefn-pc-sap ; where to read to access the rel32 operand
1070 (int-sap (+ (- (get-lisp-obj-address obj
) other-pointer-lowtag
)
1071 (ash fdefn-raw-addr-slot word-shift
))))
1072 ;; what the fdefn's logical PC will be
1073 (fdefn-logical-pc (+ vaddr
(ash fdefn-raw-addr-slot word-shift
)))
1074 (rel32off (signed-sap-ref-32 fdefn-pc-sap
1))
1075 (target (+ fdefn-logical-pc
1 rel32off
))
1077 (if (<= static-start vaddr static-end
)
1078 (+ (- sb-vm
:static-space-start
) +backend-page-bytes
+)
1079 fixedobj-space-displacement
)))
1080 (when (in-code-space-p target
)
1081 ;; This addend needs to account for the fact that the location
1082 ;; where fixup occurs is not where the fdefn will actually exist.
1083 (rel-fixup (+ core-offs
(ash 3 word-shift
) 1)
1084 (+ target space-displacement
))))
1085 (return-from scan-obj
))
1086 ((#.closure-widetag
#.funcallable-instance-widetag
)
1087 (let ((word (sap-ref-word (int-sap (get-lisp-obj-address obj
))
1088 (- n-word-bytes fun-pointer-lowtag
))))
1089 (when (in-code-space-p word
)
1090 (abs-fixup (+ core-offs
(ash 1 word-shift
)) word
)))
1091 (when (eq widetag funcallable-instance-widetag
)
1092 (let ((layout (truly-the layout
1093 (translate (%funcallable-instance-layout obj
) spaces
))))
1094 (unless (fixnump (layout-bitmap layout
))
1095 (error "Can't process bignum bitmap"))
1096 (let ((bitmap (layout-bitmap layout
)))
1097 (unless (eql bitmap -
1)
1098 ;; tagged slots precede untagged slots,
1099 ;; so integer-length is the count of tagged slots.
1100 (setq nwords
(1+ (integer-length bitmap
))))))))
1101 ;; mixed boxed/unboxed objects
1102 (#.code-header-widetag
1103 (dolist (loc (code-fixup-locs obj spaces
))
1104 (let ((val (sap-ref-32 (code-instructions obj
) loc
)))
1105 (when (in-code-space-p val
)
1106 (abs32-fixup (sap- (sap+ (code-instructions obj
) loc
) (car spaces
))
1108 (dotimes (i (code-n-entries obj
))
1109 (scanptrs (%code-entry-point obj i
) 2 5))
1110 (setq nwords
(code-header-words obj
)))
1111 ;; boxed objects that can reference code/simple-funs
1112 ((#.value-cell-widetag
#.symbol-widetag
#.weak-pointer-widetag
))
1114 (return-from scan-obj
)))
1115 (scanptrs obj
1 (1- nwords
))))
1116 (dolist (space (reverse (cdr spaces
)))
1117 (let* ((logical-addr (space-addr space
))
1118 (size (space-size space
))
1119 (physical-addr (space-physaddr space spaces
))
1120 (physical-end (sap+ physical-addr size
))
1121 (vaddr-translation (+ (- (sap-int physical-addr
)) logical-addr
)))
1122 (unless (or (= (space-id space
) immobile-varyobj-core-space-id
)
1123 (= (space-id space
) -
92 #|dynamic-core-space-id|
#))
1124 (format t
"~&scan range vaddr=~12x:~12x paddr=~12x:~12x~%"
1125 logical-addr
(+ logical-addr size
)
1126 (sap-int physical-addr
) (sap-int physical-end
))
1127 (dx-flet ((visit (obj widetag size
)
1128 ;; Compute the object's intended virtual address
1129 (let ((vaddr (+ (logandc2 (get-lisp-obj-address obj
) lowtag-mask
)
1130 vaddr-translation
)))
1131 (scan-obj obj widetag size vaddr
))))
1132 (map-objects-in-range
1134 (ash (sap-int physical-addr
) (- n-fixnum-tag-bits
))
1135 (ash (sap-int physical-end
) (- n-fixnum-tag-bits
))))
1136 (when (plusp (logior n-abs n-rel
))
1137 (format t
"space @ ~x: ~d absolute + ~d relative fixups~%"
1138 logical-addr n-abs n-rel
))
1139 (setq n-abs
0 n-rel
0))))))
1140 (format t
"total of ~D linker fixups~%" (length fixups
))
1145 (macrolet ((do-core-header-entry (((id-var len-var ptr-var
) buffer
) &body body
)
1146 `(let ((,ptr-var
1))
1148 (let ((,id-var
(%vector-raw-bits
,buffer
,ptr-var
))
1149 (,len-var
(%vector-raw-bits
,buffer
(1+ ,ptr-var
))))
1152 (when (= ,id-var end-core-entry-type-code
) (return))
1154 (incf ,ptr-var
,len-var
)))))
1155 (do-directory-entry (((index-var start-index input-nbytes
) buffer
) &body body
)
1156 `(let ((words-per-dirent 5))
1157 (multiple-value-bind (n-entries remainder
)
1158 (floor ,input-nbytes words-per-dirent
)
1159 (aver (zerop remainder
))
1160 (symbol-macrolet ((id (%vector-raw-bits
,buffer index
))
1161 (nwords (%vector-raw-bits
,buffer
(+ index
1)))
1162 (data-page (%vector-raw-bits
,buffer
(+ index
2)))
1163 (addr (%vector-raw-bits
,buffer
(+ index
3)))
1164 (npages (%vector-raw-bits
,buffer
(+ index
4))))
1165 (do ((,index-var
,start-index
(+ ,index-var words-per-dirent
)))
1166 ((= ,index-var
(+ ,start-index
(* n-entries words-per-dirent
))))
1168 (with-mapped-core ((sap-var start npages stream
) &body body
)
1174 (* ,npages
+backend-page-bytes
+)
1175 (logior sb-posix
:prot-read sb-posix
:prot-write
)
1176 sb-posix
:map-private
1177 (sb-sys:fd-stream-fd
,stream
)
1178 ;; Skip the core header
1179 (+ ,start
+backend-page-bytes
+)))
1182 (sb-posix:munmap
,sap-var
(* ,npages
+backend-page-bytes
+)))))))
1184 ;;; Given a native SBCL '.core' file, or one attached to the end of an executable,
1185 ;;; separate it into pieces.
1186 ;;; ASM-PATHNAME is the name of the assembler file that will hold all the Lisp code.
1187 ;;; The other two output pathnames are implicit: "x.s" -> "x.core" and "x-core.o"
1188 ;;; The ".core" file is a native core file used for starting a binary that
1189 ;;; contains the asm code using the "--core" argument. The "-core.o" file
1190 ;;; is for linking in to a binary that needs no "--core" argument.
1192 (input-pathname asm-pathname
1193 &key emit-sizes
(verbose t
)
1194 &aux
(split-core-pathname
1195 (merge-pathnames (make-pathname :type
"core") asm-pathname
))
1198 (make-pathname :name
(concatenate 'string
(pathname-name asm-pathname
) "-core")
1201 (core-header (make-array +backend-page-bytes
+ :element-type
'(unsigned-byte 8)))
1202 (original-total-npages 0)
1205 (code-start-fixup-ofs 0) ; where to fixup the core header
1208 (fixedobj-range) ; = (START . SIZE-IN-BYTES)
1209 (relocs (make-array 100000 :adjustable t
:fill-pointer
0)))
1212 (ignore-errors (delete-file asm-pathname
))
1213 (ignore-errors (delete-file split-core-pathname
))
1214 (ignore-errors (delete-file elf-core-pathname
))
1215 ;; Ensure that all files can be opened
1216 (with-open-file (input input-pathname
:element-type
'(unsigned-byte 8))
1217 (with-open-file (asm-file asm-pathname
:direction
:output
:if-exists
:supersede
)
1218 (with-open-file (split-core split-core-pathname
:direction
:output
1219 :element-type
'(unsigned-byte 8) :if-exists
:supersede
)
1220 (read-sequence core-header input
)
1221 (cond ((= (%vector-raw-bits core-header
0) core-magic
))
1222 (t ; possible embedded core
1223 (file-position input
(- (file-length input
)
1224 (* 2 n-word-bytes
)))
1225 (aver (eql (read-sequence core-header input
) (* 2 n-word-bytes
)))
1226 (aver (= (%vector-raw-bits core-header
1) core-magic
))
1227 (setq core-offset
(%vector-raw-bits core-header
0))
1229 (format t
"~&embedded core starts at #x~x into input~%" core-offset
))
1230 (file-position input core-offset
)
1231 (read-sequence core-header input
)
1232 (aver (= (%vector-raw-bits core-header
0) core-magic
))))
1233 (do-core-header-entry ((id len ptr
) core-header
)
1235 (#.build-id-core-entry-type-code
1237 (let ((string (make-string (%vector-raw-bits core-header ptr
)
1238 :element-type
'base-char
)))
1239 (%byte-blt core-header
(* (1+ ptr
) n-word-bytes
) string
0 (length string
))
1240 (format t
"Build ID [~a]~%" string
))))
1241 (#.new-directory-core-entry-type-code
1242 (do-directory-entry ((index ptr len
) core-header
)
1243 (incf original-total-npages npages
)
1244 (push (make-space id addr data-page page-adjust nwords
) spaces
)
1246 (format t
"id=~d page=~5x + ~5x addr=~10x words=~8x~:[~; (drop)~]~%"
1247 id data-page npages addr nwords
1248 (= id immobile-varyobj-core-space-id
)))
1249 (cond ((= id immobile-varyobj-core-space-id
)
1250 (setq code-start-fixup-ofs
(+ index
3))
1251 ;; Keep this entry but delete the page count. We need to know
1252 ;; where the space was supposed to be mapped and at what size.
1253 ;; Subsequent core entries will need to adjust their start page
1254 ;; downward (just the PTEs's start page now).
1255 (setq page-adjust npages data-page
0 npages
0))
1257 ;; Keep track of where the fixedobj space wants to be.
1258 (when (= id immobile-fixedobj-core-space-id
)
1259 (setq fixedobj-range
(cons addr
(ash nwords word-shift
))))
1260 (when (plusp npages
) ; enqueue
1261 (push (cons data-page
(* npages
+backend-page-bytes
+))
1263 ;; adjust this entry's start page in the new core
1264 (decf data-page page-adjust
)))))
1265 (#.page-table-core-entry-type-code
1267 (symbol-macrolet ((nbytes (%vector-raw-bits core-header
(1+ ptr
)))
1268 (data-page (%vector-raw-bits core-header
(+ ptr
2))))
1269 (aver (= data-page original-total-npages
))
1270 (aver (= (ceiling (space-nwords
1271 (find dynamic-core-space-id spaces
:key
#'space-id
))
1272 (/ +backend-page-bytes
+ n-word-bytes
))
1273 (%vector-raw-bits core-header ptr
))) ; number of PTEs
1275 (format t
"PTE: page=~5x~40tbytes=~8x~%" data-page nbytes
))
1276 (push (cons data-page nbytes
) copy-actions
)
1277 (decf data-page page-adjust
)))))
1278 (let ((buffer (make-array +backend-page-bytes
+
1279 :element-type
'(unsigned-byte 8)))
1281 ;; Write the new core file
1282 (write-sequence core-header split-core
)
1283 (dolist (action (reverse copy-actions
)) ; nondestructive
1284 ;; page index convention assumes absence of core header.
1285 ;; i.e. data page 0 is the file page immediately following the core header
1286 (let ((offset (* (1+ (car action
)) +backend-page-bytes
+))
1287 (nbytes (cdr action
)))
1289 (format t
"File offset ~10x: ~10x bytes~%" offset nbytes
))
1290 (setq filepos
(+ core-offset offset
))
1291 (file-position input filepos
)
1292 (copy-bytes input split-core nbytes buffer
)))
1293 ;; Trailer (runtime options and magic number)
1294 (let ((nbytes (read-sequence buffer input
)))
1295 ;; expect trailing magic number
1296 (let ((ptr (floor (- nbytes n-word-bytes
) n-word-bytes
)))
1297 (aver (= (%vector-raw-bits buffer ptr
) core-magic
)))
1298 ;; File position of the core header needs to be set to 0
1299 ;; regardless of what it was
1300 (setf (%vector-raw-bits buffer
4) 0)
1302 (format t
"Trailer words:(~{~X~^ ~})~%"
1303 (loop for i below
(floor nbytes n-word-bytes
)
1304 collect
(%vector-raw-bits buffer i
))))
1305 (write-sequence buffer split-core
:end nbytes
)
1306 (finish-output split-core
))
1308 (aver (= (+ core-offset
1309 (* page-adjust
+backend-page-bytes
+)
1310 (file-length split-core
))
1311 (file-length input
)))
1312 ;; Seek back to the PTE pages so they can be copied to the '.o' file
1313 (file-position input filepos
)))
1315 ;; Map the original core file to memory
1316 (with-mapped-core (sap core-offset original-total-npages input
)
1318 (delete immobile-varyobj-core-space-id
(reverse spaces
)
1320 (map (cons sap
(sort (copy-list spaces
) #'> :key
#'space-addr
)))
1321 (pte-nbytes (cdar copy-actions
)))
1322 (collect-relocations map relocs
)
1323 (with-open-file (output elf-core-pathname
1324 :direction
:output
:if-exists
:supersede
1325 :element-type
'(unsigned-byte 8))
1327 `(,(ash code-start-fixup-ofs word-shift
) 0 .
,R_X86_64_64
) relocs
)
1328 (prepare-elf (+ (apply #'+ (mapcar #'space-nbytes-aligned data-spaces
))
1329 +backend-page-bytes
+ ; core header
1332 (setf (%vector-raw-bits core-header code-start-fixup-ofs
) 0)
1333 (write-sequence core-header output
) ; Copy prepared header
1334 (force-output output
)
1335 (dolist (space data-spaces
) ; Copy pages from memory
1336 (let ((start (space-physaddr space map
))
1337 (size (space-nbytes-aligned space
)))
1338 (aver (eql (sb-unix:unix-write
(sb-sys:fd-stream-fd output
)
1341 (format t
"Copying ~d bytes (#x~x) from ptes = ~d PTEs~%"
1342 pte-nbytes pte-nbytes
(floor pte-nbytes
10))
1343 (copy-bytes input output pte-nbytes
)) ; Copy PTEs from input
1345 ;; There's no relation between emit-sizes and which section to put
1346 ;; C symbol references in, however it's a safe bet that if sizes
1347 ;; are supported then so is the .rodata directive.
1348 (format asm-file
(if emit-sizes
" .rodata~%" " .data~%"))
1349 (extract-required-c-symbols map fixedobj-range asm-file
)
1350 (write-assembler-text map fixedobj-range asm-file emit-sizes
)))
1352 (format asm-file
"~% ~A~%" +noexec-stack-note
+))))
1358 (defun cl-user::elfinate
(&optional
(args (cdr sb-ext
:*posix-argv
*)))
1359 (cond ((string= (car args
) "split")
1361 (let ((sizes (string= (car args
) "--sizes")))
1364 (destructuring-bind (input asm
) args
1365 (split-core input asm
:emit-sizes sizes
))))
1367 ((string= (car args
) "relocate")
1368 (destructuring-bind (input output binary start-sym
) (cdr args
)
1370 input output binary
(parse-integer start-sym
:radix
16))))
1372 (error "Unknown command: ~S" args
))))
1374 ;; If loaded as a script, do this
1375 (eval-when (:execute
)
1376 (let ((args (cdr sb-ext
:*posix-argv
*)))
1378 (let ((*print-pretty
* nil
))
1379 (format t
"Args: ~S~%" args
)
1380 (cl-user::elfinate args
)))))