1 ;;;; Utilities for separating an SBCL core file into two pieces:
2 ;;;; 1. An assembly language file containing the immobile code space
3 ;;;; 2. A '.o' file wrapping a core file containing everything else
4 ;;;; We operate as a "tool" that processes external files rather than
5 ;;;; operating on the in-process data, but it is also possible to dump
6 ;;;; the current image by creating a straight-through translation
7 ;;;; of internal/external code addresses.
9 ;;;; This software is part of the SBCL system. See the README file for
10 ;;;; more information.
12 ;;;; This software is derived from the CMU CL system, which was
13 ;;;; written at Carnegie Mellon University and released into the
14 ;;;; public domain. The software is in the public domain and is
15 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
16 ;;;; files for more information.
18 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
19 (require :sb-posix
)) ; for mmap
21 (defpackage "SB-EDITCORE"
22 (:use
"CL" "SB-VM" "SB-INT" "SB-EXT" "SB-KERNEL" "SB-SYS"
24 (:import-from
"SB-ALIEN-INTERNALS"
25 #:alien-type-bits
#:parse-alien-type
26 #:alien-value-sap
#:alien-value-type
)
27 (:import-from
"SB-C" #:+backend-page-bytes
+)
28 (:import-from
"SB-VM" #:map-objects-in-range
#:reconstitute-object
30 (:import-from
"SB-DISASSEM" #:get-inst-space
#:find-inst
31 #:make-dstate
#:%make-segment
32 #:seg-virtual-location
#:seg-length
#:seg-sap-maker
33 #:map-segment-instructions
34 #:dstate-next-addr
#:dstate-cur-offs
)
35 (:import-from
"SB-X86-64-ASM" #:near-jump-displacement
)
36 (:import-from
"SB-IMPL" #:package-hashtable
#:package-%name
37 #:package-hashtable-cells
38 #:hash-table-table
#:hash-table-number-entries
))
40 (in-package "SB-EDITCORE")
42 (declaim (muffle-conditions compiler-note
))
45 (setq *evaluator-mode
* :compile
))
47 (defconstant core-magic
48 (logior (ash (char-code #\S
) 24)
49 (ash (char-code #\B
) 16)
50 (ash (char-code #\C
) 8)
53 (defconstant build-id-core-entry-type-code
3860)
54 (defconstant new-directory-core-entry-type-code
3861)
55 (defconstant initial-fun-core-entry-type-code
3863)
56 (defconstant page-table-core-entry-type-code
3880)
57 (defconstant end-core-entry-type-code
3840)
59 (defconstant dynamic-core-space-id
1)
60 (defconstant static-core-space-id
2)
61 (defconstant immobile-fixedobj-core-space-id
4)
62 (defconstant immobile-varyobj-core-space-id
5)
64 (defglobal +noexec-stack-note
+ ".section .note.GNU-stack, \"\", @progbits")
66 (defstruct (core-space ; "space" is a CL symbol
68 (:constructor make-space
(id addr data-page page-adjust nwords
)))
69 id addr data-page page-adjust nwords
)
70 (defun space-size (space) (* (space-nwords space
) n-word-bytes
))
71 (defun space-end (space) (+ (space-addr space
) (space-size space
)))
72 (defun space-nbytes-aligned (space)
73 (logandc2 (+ (space-size space
) (1- +backend-page-bytes
+))
74 (1- +backend-page-bytes
+)))
75 (defun space-physaddr (space spaces
)
76 (sap+ (car spaces
) (* (space-data-page space
) +backend-page-bytes
+)))
78 ;;; Given ADDR which is an address in the target core, return the address at which
79 ;;; ADDR is currently mapped while performing the split.
80 ;;; SPACES is a cons of a SAP and an alist whose elements are (ADDR . CORE-SPACE)
81 (defun translate-ptr (addr spaces
)
82 (let ((space (find addr
(cdr spaces
) :key
#'space-addr
:test
#'>=)))
83 ;; FIXME: duplicates SPACE-PHYSADDR to avoid consing a SAP.
84 ;; macroize or something.
85 (+ (sap-int (car spaces
)) (* (space-data-page space
) +backend-page-bytes
+)
86 (- addr
(space-addr space
)))))
89 (defun get-space (id spaces
)
90 (find id
(cdr spaces
) :key
#'space-id
))
91 (defun compute-nil-object (spaces)
92 (let ((space (get-space static-core-space-id spaces
)))
93 (%make-lisp-obj
(logior (space-addr space
) #x17
))))
95 ;;; Given OBJ which is tagged pointer into the target core, translate it into
96 ;;; the range at which the core is now mapped during execution of this tool,
97 ;;; so that host accessors can dereference its slots.
98 ;;; Use extreme care: while it works to use host accessors on the target core,
99 ;;; we must avoid type checks on instances because LAYOUTs need translation.
100 ;;; Printing boxed objects from the target core will almost always crash.
101 (defun translate (obj spaces
)
102 (%make-lisp-obj
(translate-ptr (get-lisp-obj-address obj
) spaces
)))
104 (defstruct (core-sym (:copier nil
) (:predicate nil
)
105 (:constructor make-core-sym
(package name external
)))
107 (name nil
:read-only t
)
108 (external nil
:read-only t
))
110 (defun c-name (lispname pp-state
)
111 (when (and (symbolp lispname
)
112 (eq (symbol-package lispname
) *cl-package
*))
113 (return-from c-name
(concatenate 'string
"cl:" (string-downcase lispname
))))
114 ;; Get rid of junk from LAMBDAs
116 (named-let recurse
((x lispname
))
117 (cond ((typep x
'(cons (eql lambda
)))
118 (let ((args (second x
)))
119 `(lambda ,(if args sb-c
::*debug-name-sharp
* "()")
120 ,@(recurse (cddr x
)))))
123 (recons x
(recurse (car x
)) (recurse (cdr x
))))
126 ;; Shorten obnoxiously long printed representations of methods
127 ;; by changing FAST-METHOD to METHOD (because who cares?)
129 ;; (method my-long-package-name:my-method-name (my-long-package-name:type-name))
131 ;; (method my-method-name (type-name))
132 ;; I suspect that can use DWARF info to provide even more description,
133 ;; though I also suspect it's relatively unambiguous anyway
134 ;; especially given that file information is available separately.
135 (flet ((unpackageize (thing)
136 (when (typep thing
'core-sym
)
137 (setf (core-sym-package thing
) nil
))
139 (when (typep lispname
'(cons (eql sb-pcl
::fast-method
)))
140 (setq lispname
`(method ,@(cdr lispname
)))
141 (setf (second lispname
) (unpackageize (second lispname
)))
142 (dolist (qual (car (last lispname
)))
143 (unpackageize qual
))))
145 ;; Perform backslash escaping on the exploded string
146 ;; Strings were stringified without surrounding quotes,
147 ;; but there might be quotes embedded anywhere, so escape them,
148 ;; and also remove newlines and non-ASCII.
150 (mapcan (lambda (char)
151 (cond ((not (base-char-p char
)) (list #\?))
152 ((member char
'(#\\ #\")) (list #\\ char
))
153 ((eql char
#\newline
) (list #\_
))
155 (coerce (if (and (stringp lispname
)
156 ;; L denotes a symbol which can not be global on macOS.
157 (char= (char lispname
0) #\L
))
158 (concatenate 'string
"_" lispname
)
159 (write-to-string lispname
160 :pretty t
:pprint-dispatch
(cdr pp-state
)
161 ;; FIXME: should be :level 1, however see
162 ;; https://bugs.launchpad.net/sbcl/+bug/1733222
163 :escape t
:level
2 :length
5
164 :case
:downcase
:gensym nil
165 :right-margin
10000))
167 (let* ((string (coerce characters
'string
))
168 (occurs (incf (gethash string
(car pp-state
) 0))))
170 (concatenate 'string string
"_" (write-to-string occurs
))
173 (defmethod print-object ((sym core-sym
) stream
)
174 (format stream
"~(~:[~*~;~:*~A~:[:~;~]:~]~A~)"
175 (core-sym-package sym
)
176 (core-sym-external sym
)
177 (core-sym-name sym
)))
179 (defun fun-name-from-core (fun spaces core-nil packages
180 &aux
(name (%simple-fun-name fun
)))
181 (named-let recurse
((depth 0) (x name
))
182 (unless (= (logand (get-lisp-obj-address x
) 3) 3)
183 (return-from recurse x
)) ; immediate object
184 (when (eq x core-nil
)
185 (return-from recurse nil
))
186 (setq x
(translate x spaces
))
188 (#.list-pointer-lowtag
189 (cons (recurse (1+ depth
) (car x
))
190 (recurse (1+ depth
) (cdr x
))))
191 ((#.instance-pointer-lowtag
#.fun-pointer-lowtag
) "?")
192 (#.other-pointer-lowtag
195 (let ((name (translate (symbol-name x
) spaces
)))
196 (if (eq (symbol-package x
) core-nil
) ; uninterned
197 (string-downcase name
)
198 (let* ((package (truly-the package
199 (translate (symbol-package x
) spaces
)))
200 (package-name (translate (package-%name package
) spaces
))
202 (not (or (string= package-name
"KEYWORD")
203 (string= package-name
"COMMON-LISP"))))
204 (externals (if compute-externals
205 (gethash package-name packages
)
208 (dovector (x (translate
209 (package-hashtable-cells
210 (truly-the package-hashtable
211 (translate (package-external-symbols package
)
215 (push (if (eq x core-nil
) ; random packages can export NIL. wow.
217 (translate (symbol-name (translate x spaces
)) spaces
))
219 (setf externals
(coerce externals
'vector
)
220 (gethash package-name packages
) externals
))
221 ;; The name-cleaning code wants to compare against symbols
222 ;; in CL, PCL, and KEYWORD, so use real symbols for those.
223 ;; Other than that, we avoid finding host symbols
224 ;; because the externalness could be wrong and misleading.
225 ;; It's a very subtle point, but best to get it right.
226 (if (member package-name
'("COMMON-LISP" "KEYWORD" "SB-PCL")
228 ; NIL can't occur, because it has list-pointer-lowtag
229 (find-symbol name package-name
) ; if existing symbol, use it
230 (make-core-sym (if (string= package-name
"KEYWORD") nil package-name
)
232 (if compute-externals
233 (find name externals
:test
'string
=)
238 (defstruct (core-state
242 (:constructor make-core-state
243 (code-space-start code-space-end
244 fixedobj-space-start fixedobj-space-end
245 &aux
(inst-space (get-inst-space))
246 (call-inst (find-inst #b11101000 inst-space
))
247 (jmp-inst (find-inst #b11101001 inst-space
))
248 (pop-inst (find-inst #x5d inst-space
)))))
249 (code-space-start 0 :type fixnum
:read-only t
)
250 (code-space-end 0 :type fixnum
:read-only t
)
251 (fixedobj-space-start 0 :type fixnum
:read-only t
)
252 (fixedobj-space-end 0 :type fixnum
:read-only t
)
253 (dstate (make-dstate nil
) :read-only t
)
254 (seg (%make-segment
:sap-maker
(lambda () (error "Bad sap maker"))
255 :virtual-location
0) :read-only t
)
257 (call-inst nil
:read-only t
)
258 (jmp-inst nil
:read-only t
)
259 (pop-inst nil
:read-only t
))
261 ;;; Emit .byte or .quad directives dumping memory from SAP for COUNT bytes
262 ;;; to STREAM. SIZE specifies which direcive to emit.
263 ;;; EXCEPTIONS specify offsets at which a specific string should be
264 ;;; written to the file in lieu of memory contents, useful for emitting
265 ;;; expressions involving the assembler '.' symbol (the current PC).
266 (defun emit-asm-directives (size sap count stream
&optional exceptions
)
267 (declare (optimize speed
))
268 (declare (stream stream
))
269 (let ((*print-base
* 16)
270 (string-buffer (make-array 18 :element-type
'base-char
))
271 (fmt #.
(coerce "0x%lx" 'base-string
))
273 (declare ((integer 0 32) per-line
)
278 (format stream
" .quad")
280 (declare ((unsigned-byte 20) i
))
281 (declare (simple-vector exceptions
))
282 (write-char (if (> per-line
0) #\
, #\space
) stream
)
283 (acond ((and (< i
(length exceptions
)) (aref exceptions i
))
284 (write-string it stream
))
288 ;; output-reasonable-integer-in-base is so slow comparated
289 ;; to printf() that the second-most amount of time spent
290 ;; writing the asm file occurs in that function.
291 ;; Unbelievable that we can't do better than that.
292 (with-pinned-objects (string-buffer fmt
)
294 (extern-alien "snprintf"
295 (function int system-area-pointer unsigned system-area-pointer unsigned
))
296 (vector-sap string-buffer
)
297 (length string-buffer
)
299 (sap-ref-word sap
(* i n-word-bytes
))))))
300 (write-string string-buffer stream
:end len
))
301 (write-string "0x" stream
)
302 (write (sap-ref-word sap
(* i n-word-bytes
)) :stream stream
)))
303 (when (and (= (incf per-line
) 16) (< (1+ i
) count
))
304 (format stream
"~% .quad")
307 (aver (not exceptions
))
308 (format stream
" .byte")
310 (write-char (if (> per-line
0) #\
, #\space
) stream
)
311 (write-string "0x" stream
)
312 (write (sap-ref-8 sap i
) :stream stream
)
313 (when (and (= (incf per-line
) 32) (< (1+ i
) count
))
314 (format stream
"~% .byte")
315 (setq per-line
0))))))
318 (defun emit-lisp-asm-routines (spaces code-component output emit-sizes vector count
)
319 (emit-asm-directives :qword
320 (sap+ (code-instructions code-component
)
321 (- (* sb-vm
:code-constants-offset sb-vm
:n-word-bytes
)))
322 sb-vm
:code-constants-offset
324 (let ((list (loop for i from
2 by
2 repeat count
326 (let* ((location (translate (svref vector
(1+ i
)) spaces
))
327 (offset (car location
))
328 (nbytes (- (1+ (cdr location
)) offset
))
330 (symbol-name (translate (svref vector i
) spaces
))
332 (list* offset name nbytes
)))))
333 (loop for
(offset name . nbytes
) in
(sort list
#'< :key
#'car
)
334 do
(format output
" .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
335 (format nil
"~(\"~a\"~)" name
) (if emit-sizes nbytes
))
338 (sap+ (code-instructions code-component
) offset
)
339 (ceiling nbytes sb-vm
:n-word-bytes
)
342 (defun code-fixup-locs (code spaces
)
343 (let ((locs (sb-vm::%code-fixups code
)))
345 (sb-c::unpack-code-fixup-locs
346 (if (fixnump locs
) locs
(translate locs spaces
))))))
348 ;;; Disassemble the function pointed to by SAP for LENGTH bytes, returning
349 ;;; all instructions that should be emitted using assembly language
350 ;;; instead of assembler pseudo-ops. This includes two sets of instructions:
351 ;;; - function prologue instructions that setup the call frame
352 ;;; - jmp/call instructions that transfer control to the fixedoj space
353 ;;; delimited by bounds in STATE.
354 ;;; At execution time the function will have virtual address LOAD-ADDR.
355 (defun list-annotated-instructions (sap length state load-addr emit-cfi
)
356 (let ((dstate (cs-dstate state
))
358 (call-inst (cs-call-inst state
))
359 (jmp-inst (cs-jmp-inst state
))
360 (pop-inst (cs-pop-inst state
))
362 (or (car (cs-fixup-addrs state
)) most-positive-word
))
364 (setf (seg-virtual-location seg
) load-addr
365 (seg-length seg
) length
366 (seg-sap-maker seg
) (lambda () sap
))
367 ;; KLUDGE: "8f 45 08" is the standard prologue
368 (when (and emit-cfi
(= (logand (sap-ref-32 sap
0) #xFFFFFF
) #x08458f
))
369 (push (list* 0 3 "pop" "8(%rbp)") list
))
370 (map-segment-instructions
371 (lambda (dchunk inst
)
373 ((< next-fixup-addr
(dstate-next-addr dstate
))
374 (let ((operand (sap-ref-32 sap
(- next-fixup-addr load-addr
))))
375 (when (<= (cs-code-space-start state
) operand
(cs-code-space-end state
))
376 (aver (eql (sap-ref-8 sap
(- next-fixup-addr load-addr
1)) #xB8
)) ; mov rax, imm32
377 (push (list* (dstate-cur-offs dstate
) 5 "mov" operand
) list
)))
378 (pop (cs-fixup-addrs state
))
379 (setq next-fixup-addr
(or (car (cs-fixup-addrs state
)) most-positive-word
)))
380 ((or (eq inst jmp-inst
) (eq inst call-inst
))
381 (let ((target-addr (+ (near-jump-displacement dchunk dstate
)
382 (dstate-next-addr dstate
))))
383 (when (<= (cs-fixedobj-space-start state
)
385 (cs-fixedobj-space-end state
))
386 (push (list* (dstate-cur-offs dstate
)
388 (if (eq inst call-inst
) "call" "jmp")
391 ((and (eq inst pop-inst
) (eq (logand dchunk
#xFF
) #x5D
))
392 (push (list* (dstate-cur-offs dstate
) 1 "pop" "%rbp") list
))))
398 ;;; Using assembler directives and/or real mnemonics, dump COUNT bytes
399 ;;; of memory at PADDR (physical addr) to STREAM.
400 ;;; The function's address as per the core file is VADDR.
401 ;;; (Its eventual address is indeterminate)
402 ;;; If EMIT-CFI is true, then also emit cfi directives.
404 ;;; Notice that we can use one fewer cfi directive than usual because
405 ;;; Lisp always carries a frame pointer as set up by the caller.
410 ;;; .cfi_def_cfa_offset 16 # CFA offset from default register (rsp) is +16
411 ;;; .cfi_offset 6, -16 # old rbp was saved in -16(CFA)
413 ;;; .cfi_def_cfa_register 6 # use rbp as CFA register
417 ;;; popq 8(%rbp) # place saved %rip in its ABI-compatible stack slot
418 ;;; # making RSP = RBP after the pop, and RBP = CFA - 16
419 ;;; .cfi_def_cfa 6, 16
420 ;;; .cfi_offset 6, -16
422 ;;; Of course there is a flip-side to this: unwinders think that the new frame
423 ;;; is already begun in the caller. Interruption between these two instructions:
424 ;;; MOV RBP, RSP / CALL #xzzzzz
425 ;;; will show the backtrace as if two invocations of the caller are on stack.
426 ;;; This is tricky to fix because while we can relativize the CFA to the
427 ;;; known frame size, we can't do that based only on a disassembly.
429 (defun emit-lisp-function (paddr vaddr count stream emit-cfi core-state
)
431 (format stream
" .cfi_startproc~%"))
432 ;; Any byte offset that appears as a key in the INSTRUCTIONS causes the indicated
433 ;; bytes to be written as an assembly language instruction rather than opaquely,
434 ;; thereby affecting the ELF data (cfi or relocs) produced.
436 (list-annotated-instructions (int-sap paddr
) count core-state vaddr emit-cfi
))
438 (symbol-macrolet ((cur-offset (- ptr paddr
)))
440 (let ((until (if instructions
(caar instructions
) count
)))
441 ;; if we're not aligned, then write some number of bytes
442 ;; to cause alignment. But do not write past the next offset
443 ;; that needs to be written as an instruction.
444 (when (logtest ptr
#x7
) ; unaligned
445 (let ((n (min (- (nth-value 1 (ceiling ptr
8)))
446 (- until cur-offset
))))
448 (emit-asm-directives :byte
(int-sap ptr
) n stream
)
450 ;; Now we're either aligned to a multiple of 8, or the current
451 ;; offset needs to be written as a textual instruction.
452 (let ((n (- until cur-offset
)))
454 (multiple-value-bind (qwords remainder
) (floor n
8)
456 (emit-asm-directives :qword
(int-sap ptr
) qwords stream
#())
457 (incf ptr
(* qwords
8)))
458 (when (plusp remainder
)
459 (emit-asm-directives :byte
(int-sap ptr
) remainder stream
)
460 (incf ptr remainder
))))
461 ;; If the current offset is COUNT, we're done.
462 (when (= cur-offset count
) (return))
463 (aver (= cur-offset until
))
464 (destructuring-bind (length opcode . operand
) (cdr (pop instructions
))
465 (when (cond ((member opcode
'("jmp" "call") :test
#'string
=)
466 (format stream
" ~A 0x~X~%" opcode operand
))
467 ((string= opcode
"pop")
468 (format stream
" ~A ~A~%" opcode operand
)
469 (cond ((string= operand
"8(%rbp)")
470 (format stream
" .cfi_def_cfa 6, 16~% .cfi_offset 6, -16~%"))
471 ((string= operand
"%rbp")
472 ;(format stream " .cfi_def_cfa 7, 8~%")
475 ((string= opcode
"mov")
476 (format stream
" mov $(__lisp_code_start+0x~x),%eax~%"
477 (- operand
(cs-code-space-start core-state
))))
479 (bug "Random annotated opcode ~S" opcode
))
481 (when (= cur-offset count
) (return))))))
483 (format stream
" .cfi_endproc~%")))
485 ;;; Convert immobile CODE-SPACE to an assembly file in OUTPUT.
486 ;;; TODO: relocate fdefns and instances of standard-generic-function
487 ;;; into the space that is dumped into an ELF section.
488 (defun write-assembler-text
489 (spaces fixedobj-range output
490 &optional emit-sizes
(emit-cfi t
)
491 &aux
(code-space (get-space immobile-varyobj-core-space-id spaces
))
492 (code-space-start (space-addr code-space
)) ; target virtual address
493 (code-space-end (+ code-space-start
(space-size code-space
)))
494 (code-addr code-space-start
)
496 (make-core-state code-space-start code-space-end
498 (+ (car fixedobj-range
) (cdr fixedobj-range
))))
500 (pp-state (cons (make-hash-table :test
'equal
)
501 ;; copy no entries for macros/special-operators (flet, etc)
502 (sb-pretty::make-pprint-dispatch-table
)))
503 (packages (make-hash-table :test
'equal
))
504 (core-nil (compute-nil-object spaces
))
508 (set-pprint-dispatch 'string
509 ;; Write strings without string quotes
510 (lambda (stream string
) (write-string string stream
))
513 (labels ((ldsym-quote (name)
514 (concatenate 'string
'(#\") name
'(#\")))
515 (dumpwords (addr count stream
&optional
(exceptions #()) logical-addr
)
516 (let ((sap (int-sap addr
)))
517 (aver (sap>= sap
(car spaces
)))
518 ;; Make intra-code-space pointers computed at link time
519 (dotimes (i (if logical-addr count
0))
520 (unless (and (< i
(length exceptions
)) (svref exceptions i
))
521 (let ((word (sap-ref-word sap
(* i n-word-bytes
))))
522 (when (and (= (logand word
3) 3) ; is a pointer
523 (<= code-space-start word
(1- code-space-end
))) ; to code space
525 (format t
"~&~(~x: ~x~)~%" (+ logical-addr
(* i n-word-bytes
))
527 (incf n-linker-relocs
)
528 (setf exceptions
(adjust-array exceptions
(max (length exceptions
) (1+ i
))
529 :initial-element nil
)
531 (format nil
"__lisp_code_start+0x~x"
532 (- word code-space-start
)))))))
533 (emit-asm-directives :qword sap count stream exceptions
)))
534 (make-code-obj (addr)
535 (let ((translation (translate-ptr addr spaces
)))
536 (aver (= (%widetag-of
(sap-ref-word (int-sap translation
) 0))
537 code-header-widetag
))
538 (%make-lisp-obj
(logior translation other-pointer-lowtag
))))
539 (calc-obj-size (code)
540 ;; No need to pin - it's not managed by GC
543 (ash (logandc2 (get-lisp-obj-address code
) lowtag-mask
)
544 (- n-fixnum-tag-bits
)))))
546 (logand word widetag-mask
)))
547 (format output
" .text~% .file \"sbcl.core\"
548 .globl __lisp_code_start, __lisp_code_end~% .balign 4096~%__lisp_code_start:~%")
550 ;; Scan the assembly routines.
551 (let* ((code-component (make-code-obj code-addr
))
552 (size (calc-obj-size code-component
))
554 (truly-the hash-table
555 (translate (car (translate (%code-debug-info code-component
)
558 (cells (translate (hash-table-table hashtable
) spaces
))
559 (count (hash-table-number-entries hashtable
)))
560 (incf code-addr size
)
561 (setf total-code-size size
)
562 (emit-lisp-asm-routines spaces code-component output emit-sizes cells count
))
565 (when (>= code-addr code-space-end
) (return))
566 ;(format t "~&vaddr ~x paddr ~x~%" code-addr (get-lisp-obj-address (make-code-obj code-addr)))
567 (let* ((code (make-code-obj code-addr
))
568 (objsize (calc-obj-size code
)))
569 (setq end-loc
(+ code-addr objsize
))
570 (incf total-code-size objsize
)
572 ((< (code-header-words code
) 4) ; filler object
573 ;; Shouldn't occur unless defrag was not performed
574 (format output
"#x~x:~% .quad 0x~X, 0x~X~% .fill ~D~%"
576 simple-array-unsigned-byte-8-widetag
577 (ash (- objsize
(* 2 n-word-bytes
))
579 (- objsize
(* 2 n-word-bytes
))))
580 ((%instancep
(%code-debug-info code
)) ; assume it's a COMPILED-DEBUG-INFO
581 (aver (plusp (code-n-entries code
)))
583 (sb-c::compiled-debug-info-source
584 (truly-the sb-c
::compiled-debug-info
585 (translate (%code-debug-info code
) spaces
))))
587 (sb-c::debug-source-namestring
588 (truly-the sb-c
::debug-source
(translate source spaces
)))))
589 (setq namestring
(if (eq namestring core-nil
)
591 (translate namestring spaces
)))
592 (unless (string= namestring prev-namestring
)
593 (format output
" .file \"~a\"~%" namestring
)
594 (setq prev-namestring namestring
)))
595 (let* ((code-physaddr (logandc2 (get-lisp-obj-address code
) lowtag-mask
))
596 (boxed-end (+ code-physaddr
597 (ash (code-header-words code
) word-shift
)))
598 (first-fun (logandc2 (get-lisp-obj-address (%code-entry-point code
0))
600 (format output
"#x~x:~%" code-addr
)
601 (dumpwords code-physaddr
(code-header-words code
) output
#() code-addr
)
602 ;; Any words after 'boxed' preceding 'first-fun' are unboxed
603 (when (> first-fun boxed-end
)
604 (dumpwords boxed-end
(floor (- first-fun boxed-end
) n-word-bytes
)
606 (setf (cs-fixup-addrs core-state
)
608 (+ code-addr
(ash (code-header-words code
) word-shift
) x
))
609 (code-fixup-locs code spaces
)))
610 ;; Loop over all embedded functions.
611 ;; Because simple-fun offsets are relative to the code start
612 ;; (and not in a linked list as they were in the past),
613 ;; iteratation in a "foreign" code object works just fine,
614 ;; subject to the caution about reading boxed words.
615 (dotimes (j (code-n-entries code
))
616 (let* ((fun (%code-entry-point code j
))
617 (fun-addr (logandc2 (get-lisp-obj-address fun
) lowtag-mask
))
618 (end (if (< (1+ j
) (code-n-entries code
))
619 (logandc2 (get-lisp-obj-address (%code-entry-point code
(1+ j
)))
621 (+ (translate-ptr code-addr spaces
) objsize
)))
623 (+ fun-addr
(* simple-fun-code-offset n-word-bytes
)))
624 (size (- end entrypoint
))
625 (lispname (fun-name-from-core fun spaces core-nil packages
))
626 (quotname (ldsym-quote (c-name lispname pp-state
))))
627 ;; Globalize the C symbol only if the name is a legal function designator
628 ;; per the standard definition.
629 ;; This is a technique to try to avoid appending a uniquifying suffix
630 ;; on all the junky internal things like "(lambda # in srcfile.lisp)"
631 (format output
"~:[~*~; .globl ~a~%~]~@[ .type ~:*~a, @function~%~]"
632 (typep lispname
'(or symbol core-sym
(cons (eql setf
))))
635 simple-fun-code-offset output
637 `#(nil ,(format nil
".+~D"
638 (* (1- simple-fun-code-offset
)
642 (format output
" .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
643 quotname
(if emit-sizes size
))
644 ;; entrypoint is the current physical address.
645 ;; Also pass in the virtual address in the core
646 ;; (which will differ from the actual load-time address)
647 (emit-lisp-function entrypoint
648 (+ code-addr
(- entrypoint
649 (logandc2 (get-lisp-obj-address code
)
651 size output emit-cfi core-state
)))
653 ;; All fixups should have been consumed by writing the code out
654 (aver (null (cs-fixup-addrs core-state
))))
656 (error "Strange code component: ~S" code
)))
657 (incf code-addr objsize
))))
659 ;; coreparse uses unpadded __lisp_code_end to set varyobj_free_pointer
660 (format output
"__lisp_code_end:~%")
662 ;; Pad so that non-lisp code can't be colocated on a GC page.
663 ;; (Lack of Lisp object headers in C code is the issue)
664 (let ((aligned-end (logandc2 (+ end-loc
4095) 4095)))
665 (when (> aligned-end end-loc
)
666 (multiple-value-bind (nwords remainder
)
667 (floor (- aligned-end end-loc
) n-word-bytes
)
669 (aver (zerop remainder
))
671 (format output
" .quad ~d, ~d # (simple-array fixnum (~d))~%"
672 simple-array-fixnum-widetag
673 (ash nwords n-fixnum-tag-bits
)
676 (format output
" .fill ~d~%" (* nwords n-word-bytes
))))))
677 ; (format t "~&linker-relocs=~D~%" n-linker-relocs)
678 (values total-code-size n-linker-relocs
))
680 ;;; Return either the physical or logical address of the specified symbol.
681 (defun find-target-symbol (package-name symbol-name spaces
682 &optional
(address-mode :physical
))
683 (let* ((space (find immobile-fixedobj-core-space-id
(cdr spaces
) :key
#'space-id
))
684 (start (translate-ptr (space-addr space
) spaces
))
685 (end (+ start
(space-size space
)))
688 (when (>= physaddr end
) (bug "Can't find symbol"))
689 (multiple-value-bind (obj tag size
)
690 (reconstitute-object (ash physaddr
(- n-fixnum-tag-bits
)))
691 (when (and (= tag symbol-widetag
)
692 (string= symbol-name
(translate (symbol-name obj
) spaces
))
693 (%instancep
(symbol-package obj
))
694 (string= package-name
697 (truly-the package
(translate (symbol-package obj
) spaces
)))
699 (return (%make-lisp-obj
700 (logior (ecase address-mode
702 (:logical
(+ (space-addr space
) (- physaddr start
))))
703 other-pointer-lowtag
))))
704 (incf physaddr size
)))))
706 (defun extract-required-c-symbols (spaces asm-file
&optional
(verbose nil
))
707 (flet ((symbol-fdefn-fun (symbol)
708 (let ((vector (translate (symbol-info-vector symbol
) spaces
)))
709 ;; TODO: allow for (plist . info-vector) in the info slot
710 (aver (simple-vector-p vector
))
711 (translate (fdefn-fun (translate (info-vector-fdefn vector
) spaces
))
714 (translate (symbol-global-value
715 (find-target-symbol "SB-SYS" "*LINKAGE-INFO*" spaces
))
719 (find-target-symbol "SB-SYS"
720 "ENSURE-DYNAMIC-FOREIGN-SYMBOL-ADDRESS"
722 (aver (= (get-closure-length dyn-syminfo
) 3))
723 (let* ((ht1 (translate (%closure-index-ref dyn-syminfo
1) spaces
))
724 (ht2 (translate (%closure-index-ref dyn-syminfo
0) spaces
))
725 (table0 (translate (hash-table-table (truly-the hash-table linkage-info
))
727 (table1 (translate (hash-table-table (truly-the hash-table ht1
)) spaces
))
728 (table2 (translate (hash-table-table (truly-the hash-table ht2
)) spaces
))
731 (declare (simple-vector table0 table1 table2
))
735 (format t
"~A~%" x
)))
736 (scan-table (table name fun
&aux
(n 0) (end (length table
)))
738 (format t
"~&~A:~%~A~%"
739 name
(make-string (1+ (length name
)) :initial-element
#\-
)))
742 (let ((val (svref table i
)))
743 (unless (unbound-marker-p val
)
744 (funcall fun
(translate val spaces
))
747 (format t
"TOTAL: ~D entries~2%" n
))))
748 (scan-table table0
"linkage info"
749 (lambda (x &aux
(type #\T
))
751 (setq x
(translate (car x
) spaces
) type
#\D
))
752 (format asm-file
" .long ~A~%" x
)
754 (format t
"~A ~A~%" type x
))
756 (scan-table table1
"defined" #'show
)
757 (scan-table table2
"undefined" #'show
)
758 (let ((diff1 ; linkage not in foreign
759 (remove-if (lambda (x) (member x foreign
:test
#'string
=)) linkage
))
760 (diff2 ; foreign not in linkage
761 (remove-if (lambda (x) (member x linkage
:test
#'string
=)) foreign
)))
763 (format t
"~&Linkage not in foreign:~%~S~%" diff1
)
764 (format t
"~&Foreign not in linkage:~%~S~%" diff2
))
770 (defconstant +sht-null
+ 0)
771 (defconstant +sht-progbits
+ 1)
772 (defconstant +sht-symtab
+ 2)
773 (defconstant +sht-strtab
+ 3)
774 (defconstant +sht-rela
+ 4)
775 (defconstant +sht-rel
+ 9)
777 (define-alien-type elf64-ehdr
779 (ident (array unsigned-char
16)) ; 7F 45 4C 46 2 1 1 0 0 0 0 0 0 0 0 0
780 (type (unsigned 16)) ; 1 0
781 (machine (unsigned 16)) ; 3E 0
782 (version (unsigned 32)) ; 1 0 0 0
783 (entry unsigned
) ; 0 0 0 0 0 0 0 0
784 (phoff unsigned
) ; 0 0 0 0 0 0 0 0
786 (flags (unsigned 32)) ; 0 0 0 0
787 (ehsize (unsigned 16)) ; 40 0
788 (phentsize (unsigned 16)) ; 0 0
789 (phnum (unsigned 16)) ; 0 0
790 (shentsize (unsigned 16)) ; 40 0
791 (shnum (unsigned 16)) ; n 0
792 (shstrndx (unsigned 16)))) ; n 0
793 (define-alien-type elf64-shdr
797 (flags (unsigned 64))
803 (addralign (unsigned 64))
804 (entsize (unsigned 64))))
805 (define-alien-type elf64-sym
810 (shndx (unsigned 16))
813 (define-alien-type elf64-rela
815 (offset (unsigned 64))
817 (addend (signed 64))))
819 (defun make-elf64-sym (name info
)
820 (let ((a (make-array 24 :element-type
'(unsigned-byte 8))))
821 (with-pinned-objects (a)
822 (setf (sap-ref-32 (vector-sap a
) 0) name
823 (sap-ref-8 (vector-sap a
) 4) info
))
826 ;;; Return two values: an octet vector comprising a string table
827 ;;; and an alist which maps string to offset in the table.
828 (defun string-table (strings)
829 (let* ((length (+ (1+ (length strings
)) ; one more null than there are strings
830 (reduce #'+ strings
:key
#'length
))) ; data length
831 (bytes (make-array length
:element-type
'(unsigned-byte 8)
835 (dolist (string strings
)
836 (push (cons string index
) alist
)
837 (replace bytes
(map 'vector
#'char-code string
) :start1 index
)
838 (incf index
(1+ (length string
))))
839 (cons (nreverse alist
) bytes
)))
841 (defun write-alien (alien size stream
)
843 (write-byte (sap-ref-8 (alien-value-sap alien
) i
) stream
)))
845 (defun copy-bytes (in-stream out-stream nbytes
847 (make-array 1024 :element-type
'(unsigned-byte 8))))
848 (loop (let ((chunksize (min (length buffer
) nbytes
)))
849 (aver (eql (read-sequence buffer in-stream
:end chunksize
) chunksize
))
850 (write-sequence buffer out-stream
:end chunksize
)
851 (when (zerop (decf nbytes chunksize
)) (return)))))
853 ;;; core header should be an array of words in '.rodata', not a 32K page
854 (defconstant core-header-size
+backend-page-bytes
+) ; stupidly large (FIXME)
856 ;;; Write everything except for the core file itself into OUTPUT-STREAM
857 ;;; and leave the stream padded to a 4K boundary ready to receive data.
858 (defun prepare-elf (core-size relocs output
)
859 (let* ((sym-entry-size 24)
860 (reloc-entry-size 24)
863 `#((:core
"lisp.core" ,+sht-progbits
+ 0 0 0 ,core-align
0)
864 (:sym
".symtab" ,+sht-symtab
+ 0 3 1 8 ,sym-entry-size
)
865 ; section with the strings -- ^ ^ -- 1+ highest local symbol
866 (:str
".strtab" ,+sht-strtab
+ 0 0 0 1 0)
867 (:rel
".relalisp.core" ,+sht-rela
+ 0 2 1 8 ,reloc-entry-size
)
868 ; symbol table -- ^ ^ -- for which section
869 (:note
".note.GNU-stack" ,+sht-null
+ 0 0 0 1 0)))
871 (string-table (append '("__lisp_code_start") (map 'list
#'second sections
))))
872 (strings (cdr string-table
))
873 (padded-strings-size (logandc2 (+ (length strings
) 7) 7))
874 (ehdr-size #.
(ceiling (alien-type-bits (parse-alien-type 'elf64-ehdr nil
)) 8))
875 (shdr-size #.
(ceiling (alien-type-bits (parse-alien-type 'elf64-shdr nil
)) 8))
876 (symbols-size (* 2 sym-entry-size
))
877 (shdrs-start (+ ehdr-size symbols-size padded-strings-size
))
878 (shdrs-end (+ shdrs-start
(* (1+ (length sections
)) shdr-size
)))
879 (relocs-size (* (length relocs
) reloc-entry-size
))
880 (relocs-end (+ shdrs-end relocs-size
))
881 (core-start (logandc2 (+ relocs-end
(1- core-align
)) (1- core-align
)))
882 (ident #.
(coerce '(#x7F
#x45
#x4C
#x46
2 1 1 0 0 0 0 0 0 0 0 0)
883 '(array (unsigned-byte 8) 1))))
885 (with-alien ((ehdr elf64-ehdr
))
886 (dotimes (i (ceiling ehdr-size n-word-bytes
))
887 (setf (sap-ref-word (alien-value-sap ehdr
) (* i n-word-bytes
)) 0))
888 (with-pinned-objects (ident)
889 (%byte-blt
(vector-sap ident
) 0 (alien-value-sap ehdr
) 0 16))
890 (setf (slot ehdr
'type
) 1
891 (slot ehdr
'machine
) #x3E
892 (slot ehdr
'version
) 1
893 (slot ehdr
'shoff
) shdrs-start
894 (slot ehdr
'ehsize
) ehdr-size
895 (slot ehdr
'shentsize
) shdr-size
896 (slot ehdr
'shnum
) (1+ (length sections
)) ; section 0 is implied
897 (slot ehdr
'shstrndx
) (1+ (position :str sections
:key
#'car
)))
898 (write-alien ehdr ehdr-size output
))
900 ;; Write symbol table
901 (aver (eql (file-position output
) ehdr-size
))
902 (write-sequence (make-elf64-sym 0 0) output
)
903 ;; The symbol name index is always 1 by construction. The type is #x10
904 ;; given: #define STB_GLOBAL 1
905 ;; and: #define ELF32_ST_BIND(val) ((unsigned char) (val)) >> 4)
906 ;; which places the binding in the high 4 bits of the low byte.
907 (write-sequence (make-elf64-sym 1 #x10
) output
)
909 ;; Write string table
910 (aver (eql (file-position output
) (+ ehdr-size symbols-size
)))
911 (write-sequence strings output
) ; an octet vector at this point
912 (dotimes (i (- padded-strings-size
(length strings
)))
913 (write-byte 0 output
))
915 ;; Write section headers
916 (aver (eql (file-position output
) shdrs-start
))
917 (with-alien ((shdr elf64-shdr
))
918 (dotimes (i (ceiling shdr-size n-word-bytes
)) ; Zero-fill
919 (setf (sap-ref-word (alien-value-sap shdr
) (* i n-word-bytes
)) 0))
920 (dotimes (i (1+ (length sections
)))
921 (when (plusp i
) ; Write the zero-filled header as section 0
922 (destructuring-bind (key name type flags link info alignment entsize
)
923 (aref sections
(1- i
))
924 (multiple-value-bind (offset size
)
926 (:sym
(values ehdr-size symbols-size
))
927 (:str
(values (+ ehdr-size symbols-size
) (length strings
)))
928 (:rel
(values shdrs-end relocs-size
))
929 (:core
(values core-start core-size
))
930 (:note
(values 0 0)))
931 (let ((name (cdr (assoc name
(car string-table
) :test
#'string
=))))
932 (setf (slot shdr
'name
) name
933 (slot shdr
'type
) type
934 (slot shdr
'flags
) flags
935 (slot shdr
'off
) offset
936 (slot shdr
'size
) size
937 (slot shdr
'link
) link
938 (slot shdr
'info
) info
939 (slot shdr
'addralign
) alignment
940 (slot shdr
'entsize
) entsize
)))))
941 (write-alien shdr shdr-size output
)))
944 (aver (eql (file-position output
) shdrs-end
))
945 (let ((buf (make-array relocs-size
:element-type
'(unsigned-byte 8)))
947 (with-alien ((rela elf64-rela
))
948 (dovector (reloc relocs
)
949 (destructuring-bind (place addend . kind
) reloc
950 (setf (slot rela
'offset
) place
951 (slot rela
'info
) (logior (ash 1 32) kind
) ; 1 = symbol index
952 (slot rela
'addend
) addend
))
953 (setf (%vector-raw-bits buf
(+ ptr
0)) (sap-ref-word (alien-value-sap rela
) 0)
954 (%vector-raw-bits buf
(+ ptr
1)) (sap-ref-word (alien-value-sap rela
) 8)
955 (%vector-raw-bits buf
(+ ptr
2)) (sap-ref-word (alien-value-sap rela
) 16))
957 (write-sequence buf output
))
960 (dotimes (i (- core-start
(file-position output
)))
961 (write-byte 0 output
))
962 (aver (eq (file-position output
) core-start
))))
964 ;;; Return a list of fixups (FIXUP-WHERE KIND ADDEND) to peform in a foreign core
965 ;;; whose code space is subject to link-time relocation.
966 (defconstant R_X86_64_64
1) ; /* Direct 64 bit */
967 (defconstant R_X86_64_PC32
2) ; /* PC relative 32 bit signed */
968 (defconstant R_X86_64_32
10) ; /* Direct 32 bit zero extended */
970 (defun collect-relocations (spaces fixups
&aux
(print nil
))
971 (binding* (((static-start static-end
)
972 (let ((space (get-space static-core-space-id spaces
)))
973 (values (space-addr space
) (space-end space
))))
974 ((code-start code-end
)
975 (let ((space (get-space immobile-varyobj-core-space-id spaces
)))
976 (values (space-addr space
) (space-end space
))))
977 ;; the distance between fixedobj space address (i.e following the pages of
978 ;; dynamic space) in the ELF section which has a presumptive address of 0
979 ;; due to being non-loaded, to where it will be later mapped by coreparse
980 (fixedobj-space-displacement
981 (let ((space (get-space immobile-fixedobj-core-space-id spaces
)))
982 (- (* (1+ (space-data-page space
)) +backend-page-bytes
+) ; 1+ = core header
983 (space-addr space
))))
987 ((abs-fixup (core-offs referent
)
990 (format t
"~x = 0x~(~x~): (a)~%" core-offs
(core-to-logical core-offs
) #+nil referent
))
991 (setf (sap-ref-word (car spaces
) core-offs
) 0)
992 (vector-push-extend `(,(+ core-header-size core-offs
)
993 ,(- referent code-start
) .
,R_X86_64_64
)
995 (abs32-fixup (core-offs referent
)
998 (format t
"~x = 0x~(~x~): (a)~%" core-offs
(core-to-logical core-offs
) #+nil referent
))
999 (setf (sap-ref-32 (car spaces
) core-offs
) 0)
1000 (vector-push-extend `(,(+ core-header-size core-offs
)
1001 ,(- referent code-start
) .
,R_X86_64_32
)
1003 (rel-fixup (core-offs referent
)
1006 (format t
"~x = 0x~(~x~): (r)~%" core-offs
(core-to-logical core-offs
) #+nil referent
))
1007 (setf (sap-ref-32 (car spaces
) core-offs
) 0)
1008 (vector-push-extend `(,(+ core-header-size core-offs
)
1009 ,(- referent code-start
) .
,R_X86_64_PC32
)
1011 (in-code-space-p (ptr)
1012 (and (<= code-start ptr
) (< ptr code-end
)))
1013 ;; Given a address which is an offset into the data pages of the target core,
1014 ;; compute the logical address which that offset would be mapped to.
1015 ;; For example core address 0 is the virtual address of static space.
1016 (core-to-logical (core-offs &aux
(page (floor core-offs
+backend-page-bytes
+)))
1017 (dolist (space (cdr spaces
)
1018 (bug "Can't translate core offset ~x using ~x"
1020 (let* ((page0 (space-data-page space
))
1021 (nwords (space-nwords space
))
1022 (id (space-id space
))
1023 (npages (ceiling nwords
(/ +backend-page-bytes
+ n-word-bytes
))))
1024 (when (and (<= page0 page
(+ page0
(1- npages
)))
1025 (/= id immobile-varyobj-core-space-id
))
1026 (return (+ (space-addr space
)
1027 (* (- page page0
) +backend-page-bytes
+)
1028 (logand core-offs
(1- +backend-page-bytes
+))))))))
1029 (scanptrs (obj wordindex-min wordindex-max
&aux
(n-fixups 0))
1030 (do* ((base-addr (logandc2 (get-lisp-obj-address obj
) lowtag-mask
))
1031 (sap (int-sap base-addr
))
1032 ;; core-offs is the offset in the lisp.core ELF section.
1033 (core-offs (- base-addr
(sap-int (car spaces
))))
1034 (i wordindex-min
(1+ i
)))
1035 ((> i wordindex-max
) n-fixups
)
1036 (let ((ptr (sap-ref-word sap
(ash i word-shift
))))
1037 (when (and (= (logand ptr
3) 3) (in-code-space-p ptr
))
1038 (abs-fixup (+ core-offs
(ash i word-shift
)) ptr
)
1040 (scanptr (obj wordindex
)
1041 (plusp (scanptrs obj wordindex wordindex
))) ; trivial wrapper
1042 (scan-obj (obj widetag size vaddr
1043 &aux
(core-offs (- (logandc2 (get-lisp-obj-address obj
) lowtag-mask
)
1044 (sap-int (car spaces
))))
1045 (nwords (ceiling size n-word-bytes
)))
1048 (return-from scan-obj
))
1051 (let ((layout (truly-the layout
1052 (translate (%instance-layout obj
) spaces
))))
1053 ;; FIXME: even though the layout is supplied, it's not good enough,
1054 ;; because the macro references the layout-bitmap which might
1055 ;; be a bignum which is a pointer into the logical core address.
1056 (unless (fixnump (layout-bitmap layout
))
1057 (error "Can't process bignum bitmap"))
1058 (do-instance-tagged-slot (i obj
:layout layout
)
1059 (scanptr obj
(1+ i
))))
1060 (return-from scan-obj
))
1061 (#.simple-vector-widetag
1062 (let ((len (length (the simple-vector obj
))))
1063 (when (eql (logand (get-header-data obj
) #xFF
) vector-valid-hashing-subtype
)
1064 (do ((i 2 (+ i
2)) (needs-rehash))
1067 (setf (svref obj
1) 1)))
1068 (when (scanptr obj
(+ vector-data-offset i
))
1069 (format t
"~&SET REHASH: vector=~X~%" (get-lisp-obj-address obj
))
1070 (setq needs-rehash t
))
1071 (scanptr obj
(+ vector-data-offset i
1)))
1072 (return-from scan-obj
))
1073 (setq nwords
(+ len
2))))
1076 (let* ((fdefn-pc-sap ; where to read to access the rel32 operand
1077 (int-sap (+ (- (get-lisp-obj-address obj
) other-pointer-lowtag
)
1078 (ash fdefn-raw-addr-slot word-shift
))))
1079 ;; what the fdefn's logical PC will be
1080 (fdefn-logical-pc (+ vaddr
(ash fdefn-raw-addr-slot word-shift
)))
1081 (rel32off (signed-sap-ref-32 fdefn-pc-sap
1))
1082 (target (+ fdefn-logical-pc
1 rel32off
))
1084 (if (<= static-start vaddr static-end
)
1085 (+ (- sb-vm
:static-space-start
) +backend-page-bytes
+)
1086 fixedobj-space-displacement
)))
1087 (when (in-code-space-p target
)
1088 ;; This addend needs to account for the fact that the location
1089 ;; where fixup occurs is not where the fdefn will actually exist.
1090 (rel-fixup (+ core-offs
(ash 3 word-shift
) 1)
1091 (+ target space-displacement
))))
1092 (return-from scan-obj
))
1093 ((#.closure-widetag
#.funcallable-instance-widetag
)
1094 (let ((word (sap-ref-word (int-sap (get-lisp-obj-address obj
))
1095 (- n-word-bytes fun-pointer-lowtag
))))
1096 (when (in-code-space-p word
)
1097 (abs-fixup (+ core-offs
(ash 1 word-shift
)) word
)))
1098 (when (eq widetag funcallable-instance-widetag
)
1099 (let ((layout (truly-the layout
1100 (translate (%funcallable-instance-layout obj
) spaces
))))
1101 (unless (fixnump (layout-bitmap layout
))
1102 (error "Can't process bignum bitmap"))
1103 (let ((bitmap (layout-bitmap layout
)))
1104 (unless (eql bitmap -
1)
1105 ;; tagged slots precede untagged slots,
1106 ;; so integer-length is the count of tagged slots.
1107 (setq nwords
(1+ (integer-length bitmap
))))))))
1108 ;; mixed boxed/unboxed objects
1109 (#.code-header-widetag
1110 (dolist (loc (code-fixup-locs obj spaces
))
1111 (let ((val (sap-ref-32 (code-instructions obj
) loc
)))
1112 (when (in-code-space-p val
)
1113 (abs32-fixup (sap- (sap+ (code-instructions obj
) loc
) (car spaces
))
1115 (dotimes (i (code-n-entries obj
))
1116 (scanptrs (%code-entry-point obj i
) 2 5))
1117 (setq nwords
(code-header-words obj
)))
1118 ;; boxed objects that can reference code/simple-funs
1119 ((#.value-cell-widetag
#.symbol-widetag
#.weak-pointer-widetag
))
1121 (return-from scan-obj
)))
1122 (scanptrs obj
1 (1- nwords
))))
1123 (dolist (space (reverse (cdr spaces
)))
1124 (let* ((logical-addr (space-addr space
))
1125 (size (space-size space
))
1126 (physical-addr (space-physaddr space spaces
))
1127 (physical-end (sap+ physical-addr size
))
1128 (vaddr-translation (+ (- (sap-int physical-addr
)) logical-addr
)))
1129 (unless (= (space-id space
) immobile-varyobj-core-space-id
)
1130 (dx-flet ((visit (obj widetag size
)
1131 ;; Compute the object's intended virtual address
1132 (let ((vaddr (+ (logandc2 (get-lisp-obj-address obj
) lowtag-mask
)
1133 vaddr-translation
)))
1134 (scan-obj obj widetag size vaddr
))))
1135 (map-objects-in-range
1137 (ash (sap-int physical-addr
) (- n-fixnum-tag-bits
))
1138 (ash (sap-int physical-end
) (- n-fixnum-tag-bits
))))
1139 (when (and (plusp (logior n-abs n-rel
)) print
)
1140 (format t
"space @ ~x: ~d absolute + ~d relative fixups~%"
1141 logical-addr n-abs n-rel
))
1142 (setq n-abs
0 n-rel
0))))))
1144 (format t
"total of ~D linker fixups~%" (length fixups
)))
1149 (macrolet ((do-core-header-entry (((id-var len-var ptr-var
) buffer
) &body body
)
1150 `(let ((,ptr-var
1))
1152 (let ((,id-var
(%vector-raw-bits
,buffer
,ptr-var
))
1153 (,len-var
(%vector-raw-bits
,buffer
(1+ ,ptr-var
))))
1156 (when (= ,id-var end-core-entry-type-code
) (return))
1158 (incf ,ptr-var
,len-var
)))))
1159 (do-directory-entry (((index-var start-index input-nbytes
) buffer
) &body body
)
1160 `(let ((words-per-dirent 5))
1161 (multiple-value-bind (n-entries remainder
)
1162 (floor ,input-nbytes words-per-dirent
)
1163 (aver (zerop remainder
))
1164 (symbol-macrolet ((id (%vector-raw-bits
,buffer index
))
1165 (nwords (%vector-raw-bits
,buffer
(+ index
1)))
1166 (data-page (%vector-raw-bits
,buffer
(+ index
2)))
1167 (addr (%vector-raw-bits
,buffer
(+ index
3)))
1168 (npages (%vector-raw-bits
,buffer
(+ index
4))))
1169 (do ((,index-var
,start-index
(+ ,index-var words-per-dirent
)))
1170 ((= ,index-var
(+ ,start-index
(* n-entries words-per-dirent
))))
1172 (with-mapped-core ((sap-var start npages stream
) &body body
)
1178 (* ,npages
+backend-page-bytes
+)
1179 (logior sb-posix
:prot-read sb-posix
:prot-write
)
1180 sb-posix
:map-private
1181 (sb-sys:fd-stream-fd
,stream
)
1182 ;; Skip the core header
1183 (+ ,start
+backend-page-bytes
+)))
1186 (sb-posix:munmap
,sap-var
(* ,npages
+backend-page-bytes
+)))))))
1188 ;;; Given a native SBCL '.core' file, or one attached to the end of an executable,
1189 ;;; separate it into pieces.
1190 ;;; ASM-PATHNAME is the name of the assembler file that will hold all the Lisp code.
1191 ;;; The other two output pathnames are implicit: "x.s" -> "x.core" and "x-core.o"
1192 ;;; The ".core" file is a native core file used for starting a binary that
1193 ;;; contains the asm code using the "--core" argument. The "-core.o" file
1194 ;;; is for linking in to a binary that needs no "--core" argument.
1196 (input-pathname asm-pathname
1197 &key emit-sizes
(verbose nil
)
1198 &aux
(split-core-pathname
1199 (merge-pathnames (make-pathname :type
"core") asm-pathname
))
1202 (make-pathname :name
(concatenate 'string
(pathname-name asm-pathname
) "-core")
1205 (core-header (make-array +backend-page-bytes
+ :element-type
'(unsigned-byte 8)))
1206 (original-total-npages 0)
1209 (code-start-fixup-ofs 0) ; where to fixup the core header
1212 (fixedobj-range) ; = (START . SIZE-IN-BYTES)
1213 (relocs (make-array 100000 :adjustable t
:fill-pointer
0)))
1216 (ignore-errors (delete-file asm-pathname
))
1217 (ignore-errors (delete-file split-core-pathname
))
1218 (ignore-errors (delete-file elf-core-pathname
))
1219 ;; Ensure that all files can be opened
1220 (with-open-file (input input-pathname
:element-type
'(unsigned-byte 8))
1221 (with-open-file (asm-file asm-pathname
:direction
:output
:if-exists
:supersede
)
1222 (with-open-file (split-core split-core-pathname
:direction
:output
1223 :element-type
'(unsigned-byte 8) :if-exists
:supersede
)
1224 (read-sequence core-header input
)
1225 (cond ((= (%vector-raw-bits core-header
0) core-magic
))
1226 (t ; possible embedded core
1227 (file-position input
(- (file-length input
)
1228 (* 2 n-word-bytes
)))
1229 (aver (eql (read-sequence core-header input
) (* 2 n-word-bytes
)))
1230 (aver (= (%vector-raw-bits core-header
1) core-magic
))
1231 (setq core-offset
(%vector-raw-bits core-header
0))
1233 (format t
"~&embedded core starts at #x~x into input~%" core-offset
))
1234 (file-position input core-offset
)
1235 (read-sequence core-header input
)
1236 (aver (= (%vector-raw-bits core-header
0) core-magic
))))
1237 (do-core-header-entry ((id len ptr
) core-header
)
1239 (#.build-id-core-entry-type-code
1241 (let ((string (make-string (%vector-raw-bits core-header ptr
)
1242 :element-type
'base-char
)))
1243 (%byte-blt core-header
(* (1+ ptr
) n-word-bytes
) string
0 (length string
))
1244 (format t
"Build ID [~a]~%" string
))))
1245 (#.new-directory-core-entry-type-code
1246 (do-directory-entry ((index ptr len
) core-header
)
1247 (incf original-total-npages npages
)
1248 (push (make-space id addr data-page page-adjust nwords
) spaces
)
1250 (format t
"id=~d page=~5x + ~5x addr=~10x words=~8x~:[~; (drop)~]~%"
1251 id data-page npages addr nwords
1252 (= id immobile-varyobj-core-space-id
)))
1253 (cond ((= id immobile-varyobj-core-space-id
)
1254 (setq code-start-fixup-ofs
(+ index
3))
1255 ;; Keep this entry but delete the page count. We need to know
1256 ;; where the space was supposed to be mapped and at what size.
1257 ;; Subsequent core entries will need to adjust their start page
1258 ;; downward (just the PTEs's start page now).
1259 (setq page-adjust npages data-page
0 npages
0))
1261 ;; Keep track of where the fixedobj space wants to be.
1262 (when (= id immobile-fixedobj-core-space-id
)
1263 (setq fixedobj-range
(cons addr
(ash nwords word-shift
))))
1264 (when (plusp npages
) ; enqueue
1265 (push (cons data-page
(* npages
+backend-page-bytes
+))
1267 ;; adjust this entry's start page in the new core
1268 (decf data-page page-adjust
)))))
1269 (#.page-table-core-entry-type-code
1271 (symbol-macrolet ((nbytes (%vector-raw-bits core-header
(1+ ptr
)))
1272 (data-page (%vector-raw-bits core-header
(+ ptr
2))))
1273 (aver (= data-page original-total-npages
))
1274 (aver (= (ceiling (space-nwords
1275 (find dynamic-core-space-id spaces
:key
#'space-id
))
1276 (/ +backend-page-bytes
+ n-word-bytes
))
1277 (%vector-raw-bits core-header ptr
))) ; number of PTEs
1279 (format t
"PTE: page=~5x~40tbytes=~8x~%" data-page nbytes
))
1280 (push (cons data-page nbytes
) copy-actions
)
1281 (decf data-page page-adjust
)))))
1282 (let ((buffer (make-array +backend-page-bytes
+
1283 :element-type
'(unsigned-byte 8)))
1285 ;; Write the new core file
1286 (write-sequence core-header split-core
)
1287 (dolist (action (reverse copy-actions
)) ; nondestructive
1288 ;; page index convention assumes absence of core header.
1289 ;; i.e. data page 0 is the file page immediately following the core header
1290 (let ((offset (* (1+ (car action
)) +backend-page-bytes
+))
1291 (nbytes (cdr action
)))
1293 (format t
"File offset ~10x: ~10x bytes~%" offset nbytes
))
1294 (setq filepos
(+ core-offset offset
))
1295 (file-position input filepos
)
1296 (copy-bytes input split-core nbytes buffer
)))
1297 ;; Trailer (runtime options and magic number)
1298 (let ((nbytes (read-sequence buffer input
)))
1299 ;; expect trailing magic number
1300 (let ((ptr (floor (- nbytes n-word-bytes
) n-word-bytes
)))
1301 (aver (= (%vector-raw-bits buffer ptr
) core-magic
)))
1302 ;; File position of the core header needs to be set to 0
1303 ;; regardless of what it was
1304 (setf (%vector-raw-bits buffer
4) 0)
1306 (format t
"Trailer words:(~{~X~^ ~})~%"
1307 (loop for i below
(floor nbytes n-word-bytes
)
1308 collect
(%vector-raw-bits buffer i
))))
1309 (write-sequence buffer split-core
:end nbytes
)
1310 (finish-output split-core
))
1312 (aver (= (+ core-offset
1313 (* page-adjust
+backend-page-bytes
+)
1314 (file-length split-core
))
1315 (file-length input
)))
1316 ;; Seek back to the PTE pages so they can be copied to the '.o' file
1317 (file-position input filepos
)))
1319 ;; Map the original core file to memory
1320 (with-mapped-core (sap core-offset original-total-npages input
)
1322 (delete immobile-varyobj-core-space-id
(reverse spaces
)
1324 (map (cons sap
(sort (copy-list spaces
) #'> :key
#'space-addr
)))
1325 (pte-nbytes (cdar copy-actions
)))
1326 (collect-relocations map relocs
)
1327 (with-open-file (output elf-core-pathname
1328 :direction
:output
:if-exists
:supersede
1329 :element-type
'(unsigned-byte 8))
1331 `(,(ash code-start-fixup-ofs word-shift
) 0 .
,R_X86_64_64
) relocs
)
1332 (prepare-elf (+ (apply #'+ (mapcar #'space-nbytes-aligned data-spaces
))
1333 +backend-page-bytes
+ ; core header
1336 (setf (%vector-raw-bits core-header code-start-fixup-ofs
) 0)
1337 (write-sequence core-header output
) ; Copy prepared header
1338 (force-output output
)
1339 ;; Change SB-C::*COMPILE[-FILE]-TO-MEMORY-SPACE* to :DYNAMIC
1340 ;; in case the resulting executable needs to compile anything.
1341 ;; (Call frame info will be missing, but at least it's something.)
1342 (dolist (name '("*COMPILE-FILE-TO-MEMORY-SPACE*"
1343 "*COMPILE-TO-MEMORY-SPACE*"))
1344 (%set-symbol-global-value
1345 (find-target-symbol "SB-C" name map
)
1346 (find-target-symbol "KEYWORD" "DYNAMIC" map
:logical
)))
1348 (dolist (space data-spaces
) ; Copy pages from memory
1349 (let ((start (space-physaddr space map
))
1350 (size (space-nbytes-aligned space
)))
1351 (aver (eql (sb-unix:unix-write
(sb-sys:fd-stream-fd output
)
1355 (format t
"Copying ~d bytes (#x~x) from ptes = ~d PTEs~%"
1356 pte-nbytes pte-nbytes
(floor pte-nbytes
10)))
1357 (copy-bytes input output pte-nbytes
)) ; Copy PTEs from input
1359 ;; There's no relation between emit-sizes and which section to put
1360 ;; C symbol references in, however it's a safe bet that if sizes
1361 ;; are supported then so is the .rodata directive.
1362 (format asm-file
(if emit-sizes
" .rodata~%" " .data~%"))
1363 (extract-required-c-symbols map asm-file
)
1364 (write-assembler-text map fixedobj-range asm-file emit-sizes
)))
1366 (format asm-file
"~% ~A~%" +noexec-stack-note
+))))
1372 (defun cl-user::elfinate
(&optional
(args (cdr sb-ext
:*posix-argv
*)))
1373 (cond ((string= (car args
) "split")
1375 (let ((sizes (string= (car args
) "--sizes")))
1378 (destructuring-bind (input asm
) args
1379 (split-core input asm
:emit-sizes sizes
))))
1381 ((string= (car args
) "relocate")
1382 (destructuring-bind (input output binary start-sym
) (cdr args
)
1384 input output binary
(parse-integer start-sym
:radix
16))))
1386 (error "Unknown command: ~S" args
))))
1388 ;; If loaded as a script, do this
1389 (eval-when (:execute
)
1390 (let ((args (cdr sb-ext
:*posix-argv
*)))
1392 (let ((*print-pretty
* nil
))
1393 (format t
"Args: ~S~%" args
)
1394 (cl-user::elfinate args
)))))