1 ;;;; Utilities for separating an SBCL core file into two pieces:
2 ;;;; 1. An assembly language file containing the immobile code space
3 ;;;; 2. A '.o' file wrapping a core file containing everything else
4 ;;;; We operate as a "tool" that processes external files rather than
5 ;;;; operating on the in-process data, but it is also possible to dump
6 ;;;; the current image by creating a straight-through translation
7 ;;;; of internal/external code addresses.
9 ;;;; This software is part of the SBCL system. See the README file for
10 ;;;; more information.
12 ;;;; This software is derived from the CMU CL system, which was
13 ;;;; written at Carnegie Mellon University and released into the
14 ;;;; public domain. The software is in the public domain and is
15 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
16 ;;;; files for more information.
18 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
19 (require :sb-posix
)) ; for mmap
21 (load (merge-pathnames "corefile.lisp" *load-pathname
*))
23 (defpackage "SB-EDITCORE"
24 (:use
"CL" "SB-ALIEN" "SB-COREFILE" "SB-INT" "SB-EXT"
25 "SB-KERNEL" "SB-SYS" "SB-VM")
26 (:import-from
"SB-ALIEN-INTERNALS"
27 #:alien-type-bits
#:parse-alien-type
28 #:alien-value-sap
#:alien-value-type
)
29 (:import-from
"SB-C" #:+backend-page-bytes
+)
30 (:import-from
"SB-VM" #:map-objects-in-range
#:reconstitute-object
32 (:import-from
"SB-DISASSEM" #:get-inst-space
#:find-inst
33 #:make-dstate
#:%make-segment
34 #:seg-virtual-location
#:seg-length
#:seg-sap-maker
35 #:map-segment-instructions
36 #:dstate-next-addr
#:dstate-cur-offs
)
37 (:import-from
"SB-X86-64-ASM" #:near-jump-displacement
)
38 (:import-from
"SB-IMPL" #:package-hashtable
#:package-%name
39 #:package-hashtable-cells
40 #:hash-table-table
#:hash-table-number-entries
))
42 (in-package "SB-EDITCORE")
44 (declaim (muffle-conditions compiler-note
))
47 (setq *evaluator-mode
* :compile
))
49 (defconstant core-magic
50 (logior (ash (char-code #\S
) 24)
51 (ash (char-code #\B
) 16)
52 (ash (char-code #\C
) 8)
55 (defglobal +noexec-stack-note
+ ".section .note.GNU-stack, \"\", @progbits")
57 (defstruct (core-space ; "space" is a CL symbol
59 (:constructor make-space
(id addr data-page page-adjust nwords
)))
60 id addr data-page page-adjust nwords
)
61 (defun space-size (space) (* (space-nwords space
) n-word-bytes
))
62 (defun space-end (space) (+ (space-addr space
) (space-size space
)))
63 (defun space-nbytes-aligned (space)
64 (logandc2 (+ (space-size space
) (1- +backend-page-bytes
+))
65 (1- +backend-page-bytes
+)))
66 (defun space-physaddr (space spaces
)
67 (sap+ (car spaces
) (* (space-data-page space
) +backend-page-bytes
+)))
69 ;;; Given ADDR which is an address in the target core, return the address at which
70 ;;; ADDR is currently mapped while performing the split.
71 ;;; SPACES is a cons of a SAP and an alist whose elements are (ADDR . CORE-SPACE)
72 (defun translate-ptr (addr spaces
)
73 (let ((space (find addr
(cdr spaces
) :key
#'space-addr
:test
#'>=)))
74 ;; FIXME: duplicates SPACE-PHYSADDR to avoid consing a SAP.
75 ;; macroize or something.
76 (+ (sap-int (car spaces
)) (* (space-data-page space
) +backend-page-bytes
+)
77 (- addr
(space-addr space
)))))
80 (defun get-space (id spaces
)
81 (find id
(cdr spaces
) :key
#'space-id
))
82 (defun compute-nil-object (spaces)
83 (let ((space (get-space static-core-space-id spaces
)))
84 (%make-lisp-obj
(logior (space-addr space
) #x17
))))
86 ;;; Given OBJ which is tagged pointer into the target core, translate it into
87 ;;; the range at which the core is now mapped during execution of this tool,
88 ;;; so that host accessors can dereference its slots.
89 ;;; Use extreme care: while it works to use host accessors on the target core,
90 ;;; we must avoid type checks on instances because LAYOUTs need translation.
91 ;;; Printing boxed objects from the target core will almost always crash.
92 (defun translate (obj spaces
)
93 (%make-lisp-obj
(translate-ptr (get-lisp-obj-address obj
) spaces
)))
95 (defstruct (core-sym (:copier nil
) (:predicate nil
)
96 (:constructor make-core-sym
(package name external
)))
98 (name nil
:read-only t
)
99 (external nil
:read-only t
))
101 (defun c-name (lispname pp-state
)
102 (when (and (symbolp lispname
)
103 (eq (symbol-package lispname
) *cl-package
*))
104 (return-from c-name
(concatenate 'string
"cl:" (string-downcase lispname
))))
105 ;; Get rid of junk from LAMBDAs
107 (named-let recurse
((x lispname
))
108 (cond ((typep x
'(cons (eql lambda
)))
109 (let ((args (second x
)))
110 `(lambda ,(if args sb-c
::*debug-name-sharp
* "()")
111 ,@(recurse (cddr x
)))))
114 (recons x
(recurse (car x
)) (recurse (cdr x
))))
117 ;; Shorten obnoxiously long printed representations of methods
118 ;; by changing FAST-METHOD to METHOD (because who cares?)
120 ;; (method my-long-package-name:my-method-name (my-long-package-name:type-name))
122 ;; (method my-method-name (type-name))
123 ;; I suspect that can use DWARF info to provide even more description,
124 ;; though I also suspect it's relatively unambiguous anyway
125 ;; especially given that file information is available separately.
126 (flet ((unpackageize (thing)
127 (when (typep thing
'core-sym
)
128 (setf (core-sym-package thing
) nil
))
130 (when (typep lispname
'(cons (eql sb-pcl
::fast-method
)))
131 (setq lispname
`(method ,@(cdr lispname
)))
132 (setf (second lispname
) (unpackageize (second lispname
)))
133 (dolist (qual (car (last lispname
)))
134 (unpackageize qual
))))
136 ;; Perform backslash escaping on the exploded string
137 ;; Strings were stringified without surrounding quotes,
138 ;; but there might be quotes embedded anywhere, so escape them,
139 ;; and also remove newlines and non-ASCII.
141 (mapcan (lambda (char)
142 (cond ((not (base-char-p char
)) (list #\?))
143 ((member char
'(#\\ #\")) (list #\\ char
))
144 ((eql char
#\newline
) (list #\_
))
146 (coerce (if (and (stringp lispname
)
147 ;; L denotes a symbol which can not be global on macOS.
148 (char= (char lispname
0) #\L
))
149 (concatenate 'string
"_" lispname
)
150 (write-to-string lispname
151 :pretty t
:pprint-dispatch
(cdr pp-state
)
152 ;; FIXME: should be :level 1, however see
153 ;; https://bugs.launchpad.net/sbcl/+bug/1733222
154 :escape t
:level
2 :length
5
155 :case
:downcase
:gensym nil
156 :right-margin
10000))
158 (let* ((string (coerce characters
'string
))
159 (occurs (incf (gethash string
(car pp-state
) 0))))
161 (concatenate 'string string
"_" (write-to-string occurs
))
164 (defmethod print-object ((sym core-sym
) stream
)
165 (format stream
"~(~:[~*~;~:*~A~:[:~;~]:~]~A~)"
166 (core-sym-package sym
)
167 (core-sym-external sym
)
168 (core-sym-name sym
)))
170 (defun fun-name-from-core (fun spaces core-nil packages
171 &aux
(name (%simple-fun-name fun
)))
172 (named-let recurse
((depth 0) (x name
))
173 (unless (= (logand (get-lisp-obj-address x
) 3) 3)
174 (return-from recurse x
)) ; immediate object
175 (when (eq x core-nil
)
176 (return-from recurse nil
))
177 (setq x
(translate x spaces
))
179 (#.list-pointer-lowtag
180 (cons (recurse (1+ depth
) (car x
))
181 (recurse (1+ depth
) (cdr x
))))
182 ((#.instance-pointer-lowtag
#.fun-pointer-lowtag
) "?")
183 (#.other-pointer-lowtag
186 (let ((name (translate (symbol-name x
) spaces
)))
187 (if (eq (symbol-package x
) core-nil
) ; uninterned
188 (string-downcase name
)
189 (let* ((package (truly-the package
190 (translate (symbol-package x
) spaces
)))
191 (package-name (translate (package-%name package
) spaces
))
193 (not (or (string= package-name
"KEYWORD")
194 (string= package-name
"COMMON-LISP"))))
195 (externals (if compute-externals
196 (gethash package-name packages
)
199 (dovector (x (translate
200 (package-hashtable-cells
201 (truly-the package-hashtable
202 (translate (package-external-symbols package
)
206 (push (if (eq x core-nil
) ; random packages can export NIL. wow.
208 (translate (symbol-name (translate x spaces
)) spaces
))
210 (setf externals
(coerce externals
'vector
)
211 (gethash package-name packages
) externals
))
212 ;; The name-cleaning code wants to compare against symbols
213 ;; in CL, PCL, and KEYWORD, so use real symbols for those.
214 ;; Other than that, we avoid finding host symbols
215 ;; because the externalness could be wrong and misleading.
216 ;; It's a very subtle point, but best to get it right.
217 (if (member package-name
'("COMMON-LISP" "KEYWORD" "SB-PCL")
219 ; NIL can't occur, because it has list-pointer-lowtag
220 (find-symbol name package-name
) ; if existing symbol, use it
221 (make-core-sym (if (string= package-name
"KEYWORD") nil package-name
)
223 (if compute-externals
224 (find name externals
:test
'string
=)
229 (defstruct (core-state
233 (:constructor make-core-state
234 (code-space-start code-space-end
235 fixedobj-space-start fixedobj-space-end
236 &aux
(inst-space (get-inst-space))
237 (call-inst (find-inst #b11101000 inst-space
))
238 (jmp-inst (find-inst #b11101001 inst-space
))
239 (pop-inst (find-inst #x5d inst-space
)))))
240 (code-space-start 0 :type fixnum
:read-only t
)
241 (code-space-end 0 :type fixnum
:read-only t
)
242 (fixedobj-space-start 0 :type fixnum
:read-only t
)
243 (fixedobj-space-end 0 :type fixnum
:read-only t
)
244 (dstate (make-dstate nil
) :read-only t
)
245 (seg (%make-segment
:sap-maker
(lambda () (error "Bad sap maker"))
246 :virtual-location
0) :read-only t
)
248 (call-inst nil
:read-only t
)
249 (jmp-inst nil
:read-only t
)
250 (pop-inst nil
:read-only t
))
252 ;;; Emit .byte or .quad directives dumping memory from SAP for COUNT bytes
253 ;;; to STREAM. SIZE specifies which direcive to emit.
254 ;;; EXCEPTIONS specify offsets at which a specific string should be
255 ;;; written to the file in lieu of memory contents, useful for emitting
256 ;;; expressions involving the assembler '.' symbol (the current PC).
257 (defun emit-asm-directives (size sap count stream
&optional exceptions
)
258 (declare (optimize speed
))
259 (declare (stream stream
))
260 (let ((*print-base
* 16)
261 (string-buffer (make-array 18 :element-type
'base-char
))
262 (fmt #.
(coerce "0x%lx" 'base-string
))
264 (declare ((integer 0 32) per-line
)
269 (format stream
" .quad")
271 (declare ((unsigned-byte 20) i
))
272 (declare (simple-vector exceptions
))
273 (write-char (if (> per-line
0) #\
, #\space
) stream
)
274 (acond ((and (< i
(length exceptions
)) (aref exceptions i
))
275 (write-string it stream
))
279 ;; output-reasonable-integer-in-base is so slow comparated
280 ;; to printf() that the second-most amount of time spent
281 ;; writing the asm file occurs in that function.
282 ;; Unbelievable that we can't do better than that.
283 (with-pinned-objects (string-buffer fmt
)
285 (extern-alien "snprintf"
286 (function int system-area-pointer unsigned system-area-pointer unsigned
))
287 (vector-sap string-buffer
)
288 (length string-buffer
)
290 (sap-ref-word sap
(* i n-word-bytes
))))))
291 (write-string string-buffer stream
:end len
))
292 (write-string "0x" stream
)
293 (write (sap-ref-word sap
(* i n-word-bytes
)) :stream stream
)))
294 (when (and (= (incf per-line
) 16) (< (1+ i
) count
))
295 (format stream
"~% .quad")
298 (aver (not exceptions
))
299 (format stream
" .byte")
301 (write-char (if (> per-line
0) #\
, #\space
) stream
)
302 (write-string "0x" stream
)
303 (write (sap-ref-8 sap i
) :stream stream
)
304 (when (and (= (incf per-line
) 32) (< (1+ i
) count
))
305 (format stream
"~% .byte")
306 (setq per-line
0))))))
309 (defun emit-lisp-asm-routines (spaces code-component output emit-sizes vector count
)
310 (emit-asm-directives :qword
311 (sap+ (code-instructions code-component
)
312 (- (* sb-vm
:code-constants-offset sb-vm
:n-word-bytes
)))
313 sb-vm
:code-constants-offset
315 (let ((list (loop for i from
2 by
2 repeat count
317 (let* ((location (translate (svref vector
(1+ i
)) spaces
))
318 (offset (car location
))
319 (nbytes (- (1+ (cdr location
)) offset
))
321 (symbol-name (translate (svref vector i
) spaces
))
323 (list* offset name nbytes
)))))
324 (loop for
(offset name . nbytes
) in
(sort list
#'< :key
#'car
)
325 do
(format output
" .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
326 (format nil
"~(\"~a\"~)" name
) (if emit-sizes nbytes
))
329 (sap+ (code-instructions code-component
) offset
)
330 (ceiling nbytes sb-vm
:n-word-bytes
)
333 (defun code-fixup-locs (code spaces
)
334 (let ((locs (sb-vm::%code-fixups code
)))
336 (sb-c::unpack-code-fixup-locs
337 (if (fixnump locs
) locs
(translate locs spaces
))))))
339 ;;; Disassemble the function pointed to by SAP for LENGTH bytes, returning
340 ;;; all instructions that should be emitted using assembly language
341 ;;; instead of assembler pseudo-ops. This includes two sets of instructions:
342 ;;; - function prologue instructions that setup the call frame
343 ;;; - jmp/call instructions that transfer control to the fixedoj space
344 ;;; delimited by bounds in STATE.
345 ;;; At execution time the function will have virtual address LOAD-ADDR.
346 (defun list-annotated-instructions (sap length state load-addr emit-cfi
)
347 (let ((dstate (cs-dstate state
))
349 (call-inst (cs-call-inst state
))
350 (jmp-inst (cs-jmp-inst state
))
351 (pop-inst (cs-pop-inst state
))
353 (or (car (cs-fixup-addrs state
)) most-positive-word
))
355 (setf (seg-virtual-location seg
) load-addr
356 (seg-length seg
) length
357 (seg-sap-maker seg
) (lambda () sap
))
358 ;; KLUDGE: "8f 45 08" is the standard prologue
359 (when (and emit-cfi
(= (logand (sap-ref-32 sap
0) #xFFFFFF
) #x08458f
))
360 (push (list* 0 3 "pop" "8(%rbp)") list
))
361 (map-segment-instructions
362 (lambda (dchunk inst
)
364 ((< next-fixup-addr
(dstate-next-addr dstate
))
365 (let ((operand (sap-ref-32 sap
(- next-fixup-addr load-addr
))))
366 (when (<= (cs-code-space-start state
) operand
(cs-code-space-end state
))
367 (aver (eql (sap-ref-8 sap
(- next-fixup-addr load-addr
1)) #xB8
)) ; mov rax, imm32
368 (push (list* (dstate-cur-offs dstate
) 5 "mov" operand
) list
)))
369 (pop (cs-fixup-addrs state
))
370 (setq next-fixup-addr
(or (car (cs-fixup-addrs state
)) most-positive-word
)))
371 ((or (eq inst jmp-inst
) (eq inst call-inst
))
372 (let ((target-addr (+ (near-jump-displacement dchunk dstate
)
373 (dstate-next-addr dstate
))))
374 (when (<= (cs-fixedobj-space-start state
)
376 (cs-fixedobj-space-end state
))
377 (push (list* (dstate-cur-offs dstate
)
379 (if (eq inst call-inst
) "call" "jmp")
382 ((and (eq inst pop-inst
) (eq (logand dchunk
#xFF
) #x5D
))
383 (push (list* (dstate-cur-offs dstate
) 1 "pop" "%rbp") list
))))
389 ;;; Using assembler directives and/or real mnemonics, dump COUNT bytes
390 ;;; of memory at PADDR (physical addr) to STREAM.
391 ;;; The function's address as per the core file is VADDR.
392 ;;; (Its eventual address is indeterminate)
393 ;;; If EMIT-CFI is true, then also emit cfi directives.
395 ;;; Notice that we can use one fewer cfi directive than usual because
396 ;;; Lisp always carries a frame pointer as set up by the caller.
401 ;;; .cfi_def_cfa_offset 16 # CFA offset from default register (rsp) is +16
402 ;;; .cfi_offset 6, -16 # old rbp was saved in -16(CFA)
404 ;;; .cfi_def_cfa_register 6 # use rbp as CFA register
408 ;;; popq 8(%rbp) # place saved %rip in its ABI-compatible stack slot
409 ;;; # making RSP = RBP after the pop, and RBP = CFA - 16
410 ;;; .cfi_def_cfa 6, 16
411 ;;; .cfi_offset 6, -16
413 ;;; Of course there is a flip-side to this: unwinders think that the new frame
414 ;;; is already begun in the caller. Interruption between these two instructions:
415 ;;; MOV RBP, RSP / CALL #xzzzzz
416 ;;; will show the backtrace as if two invocations of the caller are on stack.
417 ;;; This is tricky to fix because while we can relativize the CFA to the
418 ;;; known frame size, we can't do that based only on a disassembly.
420 (defun emit-lisp-function (paddr vaddr count stream emit-cfi core-state
)
422 (format stream
" .cfi_startproc~%"))
423 ;; Any byte offset that appears as a key in the INSTRUCTIONS causes the indicated
424 ;; bytes to be written as an assembly language instruction rather than opaquely,
425 ;; thereby affecting the ELF data (cfi or relocs) produced.
427 (list-annotated-instructions (int-sap paddr
) count core-state vaddr emit-cfi
))
429 (symbol-macrolet ((cur-offset (- ptr paddr
)))
431 (let ((until (if instructions
(caar instructions
) count
)))
432 ;; if we're not aligned, then write some number of bytes
433 ;; to cause alignment. But do not write past the next offset
434 ;; that needs to be written as an instruction.
435 (when (logtest ptr
#x7
) ; unaligned
436 (let ((n (min (- (nth-value 1 (ceiling ptr
8)))
437 (- until cur-offset
))))
439 (emit-asm-directives :byte
(int-sap ptr
) n stream
)
441 ;; Now we're either aligned to a multiple of 8, or the current
442 ;; offset needs to be written as a textual instruction.
443 (let ((n (- until cur-offset
)))
445 (multiple-value-bind (qwords remainder
) (floor n
8)
447 (emit-asm-directives :qword
(int-sap ptr
) qwords stream
#())
448 (incf ptr
(* qwords
8)))
449 (when (plusp remainder
)
450 (emit-asm-directives :byte
(int-sap ptr
) remainder stream
)
451 (incf ptr remainder
))))
452 ;; If the current offset is COUNT, we're done.
453 (when (= cur-offset count
) (return))
454 (aver (= cur-offset until
))
455 (destructuring-bind (length opcode . operand
) (cdr (pop instructions
))
456 (when (cond ((member opcode
'("jmp" "call") :test
#'string
=)
457 (format stream
" ~A 0x~X~%" opcode operand
))
458 ((string= opcode
"pop")
459 (format stream
" ~A ~A~%" opcode operand
)
460 (cond ((string= operand
"8(%rbp)")
461 (format stream
" .cfi_def_cfa 6, 16~% .cfi_offset 6, -16~%"))
462 ((string= operand
"%rbp")
463 ;(format stream " .cfi_def_cfa 7, 8~%")
466 ((string= opcode
"mov")
467 (format stream
" mov $(__lisp_code_start+0x~x),%eax~%"
468 (- operand
(cs-code-space-start core-state
))))
470 (bug "Random annotated opcode ~S" opcode
))
472 (when (= cur-offset count
) (return))))))
474 (format stream
" .cfi_endproc~%")))
476 ;;; Convert immobile CODE-SPACE to an assembly file in OUTPUT.
477 ;;; TODO: relocate fdefns and instances of standard-generic-function
478 ;;; into the space that is dumped into an ELF section.
479 (defun write-assembler-text
480 (spaces fixedobj-range output
481 &optional emit-sizes
(emit-cfi t
)
482 &aux
(code-space (get-space immobile-varyobj-core-space-id spaces
))
483 (code-space-start (space-addr code-space
)) ; target virtual address
484 (code-space-end (+ code-space-start
(space-size code-space
)))
485 (code-addr code-space-start
)
487 (make-core-state code-space-start code-space-end
489 (+ (car fixedobj-range
) (cdr fixedobj-range
))))
491 (pp-state (cons (make-hash-table :test
'equal
)
492 ;; copy no entries for macros/special-operators (flet, etc)
493 (sb-pretty::make-pprint-dispatch-table
)))
494 (packages (make-hash-table :test
'equal
))
495 (core-nil (compute-nil-object spaces
))
499 (set-pprint-dispatch 'string
500 ;; Write strings without string quotes
501 (lambda (stream string
) (write-string string stream
))
504 (labels ((ldsym-quote (name)
505 (concatenate 'string
'(#\") name
'(#\")))
506 (dumpwords (addr count stream
&optional
(exceptions #()) logical-addr
)
507 (let ((sap (int-sap addr
)))
508 (aver (sap>= sap
(car spaces
)))
509 ;; Make intra-code-space pointers computed at link time
510 (dotimes (i (if logical-addr count
0))
511 (unless (and (< i
(length exceptions
)) (svref exceptions i
))
512 (let ((word (sap-ref-word sap
(* i n-word-bytes
))))
513 (when (and (= (logand word
3) 3) ; is a pointer
514 (<= code-space-start word
(1- code-space-end
))) ; to code space
516 (format t
"~&~(~x: ~x~)~%" (+ logical-addr
(* i n-word-bytes
))
518 (incf n-linker-relocs
)
519 (setf exceptions
(adjust-array exceptions
(max (length exceptions
) (1+ i
))
520 :initial-element nil
)
522 (format nil
"__lisp_code_start+0x~x"
523 (- word code-space-start
)))))))
524 (emit-asm-directives :qword sap count stream exceptions
)))
525 (make-code-obj (addr)
526 (let ((translation (translate-ptr addr spaces
)))
527 (aver (= (%widetag-of
(sap-ref-word (int-sap translation
) 0))
528 code-header-widetag
))
529 (%make-lisp-obj
(logior translation other-pointer-lowtag
))))
530 (calc-obj-size (code)
531 ;; No need to pin - it's not managed by GC
534 (ash (logandc2 (get-lisp-obj-address code
) lowtag-mask
)
535 (- n-fixnum-tag-bits
)))))
537 (logand word widetag-mask
)))
538 (format output
" .text~% .file \"sbcl.core\"
539 .globl __lisp_code_start, __lisp_code_end~% .balign 4096~%__lisp_code_start:~%")
541 ;; Scan the assembly routines.
542 (let* ((code-component (make-code-obj code-addr
))
543 (size (calc-obj-size code-component
))
545 (truly-the hash-table
546 (translate (car (translate (%code-debug-info code-component
)
549 (cells (translate (hash-table-table hashtable
) spaces
))
550 (count (hash-table-number-entries hashtable
)))
551 (incf code-addr size
)
552 (setf total-code-size size
)
553 (emit-lisp-asm-routines spaces code-component output emit-sizes cells count
))
556 (when (>= code-addr code-space-end
) (return))
557 ;(format t "~&vaddr ~x paddr ~x~%" code-addr (get-lisp-obj-address (make-code-obj code-addr)))
558 (let* ((code (make-code-obj code-addr
))
559 (objsize (calc-obj-size code
)))
560 (setq end-loc
(+ code-addr objsize
))
561 (incf total-code-size objsize
)
563 ((< (code-header-words code
) 4) ; filler object
564 ;; Shouldn't occur unless defrag was not performed
565 (format output
"#x~x:~% .quad 0x~X, 0x~X~% .fill ~D~%"
567 simple-array-unsigned-byte-8-widetag
568 (ash (- objsize
(* 2 n-word-bytes
))
570 (- objsize
(* 2 n-word-bytes
))))
571 ((%instancep
(%code-debug-info code
)) ; assume it's a COMPILED-DEBUG-INFO
572 (aver (plusp (code-n-entries code
)))
574 (sb-c::compiled-debug-info-source
575 (truly-the sb-c
::compiled-debug-info
576 (translate (%code-debug-info code
) spaces
))))
578 (sb-c::debug-source-namestring
579 (truly-the sb-c
::debug-source
(translate source spaces
)))))
580 (setq namestring
(if (eq namestring core-nil
)
582 (translate namestring spaces
)))
583 (unless (string= namestring prev-namestring
)
584 (format output
" .file \"~a\"~%" namestring
)
585 (setq prev-namestring namestring
)))
586 (let* ((code-physaddr (logandc2 (get-lisp-obj-address code
) lowtag-mask
))
587 (boxed-end (+ code-physaddr
588 (ash (code-header-words code
) word-shift
)))
589 (first-fun (logandc2 (get-lisp-obj-address (%code-entry-point code
0))
591 (format output
"#x~x:~%" code-addr
)
592 (dumpwords code-physaddr
(code-header-words code
) output
#() code-addr
)
593 ;; Any words after 'boxed' preceding 'first-fun' are unboxed
594 (when (> first-fun boxed-end
)
595 (dumpwords boxed-end
(floor (- first-fun boxed-end
) n-word-bytes
)
597 (setf (cs-fixup-addrs core-state
)
599 (+ code-addr
(ash (code-header-words code
) word-shift
) x
))
600 (code-fixup-locs code spaces
)))
601 ;; Loop over all embedded functions.
602 ;; Because simple-fun offsets are relative to the code start
603 ;; (and not in a linked list as they were in the past),
604 ;; iteratation in a "foreign" code object works just fine,
605 ;; subject to the caution about reading boxed words.
606 (dotimes (j (code-n-entries code
))
607 (let* ((fun (%code-entry-point code j
))
608 (fun-addr (logandc2 (get-lisp-obj-address fun
) lowtag-mask
))
609 (end (if (< (1+ j
) (code-n-entries code
))
610 (logandc2 (get-lisp-obj-address (%code-entry-point code
(1+ j
)))
612 (+ (translate-ptr code-addr spaces
) objsize
)))
614 (+ fun-addr
(* simple-fun-code-offset n-word-bytes
)))
615 (size (- end entrypoint
))
616 (lispname (fun-name-from-core fun spaces core-nil packages
))
617 (quotname (ldsym-quote (c-name lispname pp-state
))))
618 ;; Globalize the C symbol only if the name is a legal function designator
619 ;; per the standard definition.
620 ;; This is a technique to try to avoid appending a uniquifying suffix
621 ;; on all the junky internal things like "(lambda # in srcfile.lisp)"
622 (format output
"~:[~*~; .globl ~a~%~]~@[ .type ~:*~a, @function~%~]"
623 (typep lispname
'(or symbol core-sym
(cons (eql setf
))))
626 simple-fun-code-offset output
628 `#(nil ,(format nil
".+~D"
629 (* (1- simple-fun-code-offset
)
633 (format output
" .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
634 quotname
(if emit-sizes size
))
635 ;; entrypoint is the current physical address.
636 ;; Also pass in the virtual address in the core
637 ;; (which will differ from the actual load-time address)
638 (emit-lisp-function entrypoint
639 (+ code-addr
(- entrypoint
640 (logandc2 (get-lisp-obj-address code
)
642 size output emit-cfi core-state
)))
644 ;; All fixups should have been consumed by writing the code out
645 (aver (null (cs-fixup-addrs core-state
))))
647 (error "Strange code component: ~S" code
)))
648 (incf code-addr objsize
))))
650 ;; coreparse uses unpadded __lisp_code_end to set varyobj_free_pointer
651 (format output
"__lisp_code_end:~%")
653 ;; Pad so that non-lisp code can't be colocated on a GC page.
654 ;; (Lack of Lisp object headers in C code is the issue)
655 (let ((aligned-end (logandc2 (+ end-loc
4095) 4095)))
656 (when (> aligned-end end-loc
)
657 (multiple-value-bind (nwords remainder
)
658 (floor (- aligned-end end-loc
) n-word-bytes
)
660 (aver (zerop remainder
))
662 (format output
" .quad ~d, ~d # (simple-array fixnum (~d))~%"
663 simple-array-fixnum-widetag
664 (ash nwords n-fixnum-tag-bits
)
667 (format output
" .fill ~d~%" (* nwords n-word-bytes
))))))
668 ; (format t "~&linker-relocs=~D~%" n-linker-relocs)
669 (values total-code-size n-linker-relocs
))
671 ;;; Return either the physical or logical address of the specified symbol.
672 (defun find-target-symbol (package-name symbol-name spaces
673 &optional
(address-mode :physical
))
674 (let* ((space (find immobile-fixedobj-core-space-id
(cdr spaces
) :key
#'space-id
))
675 (start (translate-ptr (space-addr space
) spaces
))
676 (end (+ start
(space-size space
)))
679 (when (>= physaddr end
) (bug "Can't find symbol"))
680 (multiple-value-bind (obj tag size
)
681 (reconstitute-object (ash physaddr
(- n-fixnum-tag-bits
)))
682 (when (and (= tag symbol-widetag
)
683 (string= symbol-name
(translate (symbol-name obj
) spaces
))
684 (%instancep
(symbol-package obj
))
685 (string= package-name
688 (truly-the package
(translate (symbol-package obj
) spaces
)))
690 (return (%make-lisp-obj
691 (logior (ecase address-mode
693 (:logical
(+ (space-addr space
) (- physaddr start
))))
694 other-pointer-lowtag
))))
695 (incf physaddr size
)))))
697 (defun extract-required-c-symbols (spaces asm-file
&optional
(verbose nil
))
698 (flet ((symbol-fdefn-fun (symbol)
699 (let ((vector (translate (symbol-info-vector symbol
) spaces
)))
700 ;; TODO: allow for (plist . info-vector) in the info slot
701 (aver (simple-vector-p vector
))
702 (translate (fdefn-fun (translate (info-vector-fdefn vector
) spaces
))
705 (translate (symbol-global-value
706 (find-target-symbol "SB-SYS" "*LINKAGE-INFO*" spaces
))
710 (find-target-symbol "SB-SYS"
711 "ENSURE-DYNAMIC-FOREIGN-SYMBOL-ADDRESS"
713 (aver (= (get-closure-length dyn-syminfo
) 3))
714 (let* ((ht1 (translate (%closure-index-ref dyn-syminfo
1) spaces
))
715 (ht2 (translate (%closure-index-ref dyn-syminfo
0) spaces
))
716 (table0 (translate (hash-table-table (truly-the hash-table linkage-info
))
718 (table1 (translate (hash-table-table (truly-the hash-table ht1
)) spaces
))
719 (table2 (translate (hash-table-table (truly-the hash-table ht2
)) spaces
))
722 (declare (simple-vector table0 table1 table2
))
726 (format t
"~A~%" x
)))
727 (scan-table (table name fun
&aux
(n 0) (end (length table
)))
729 (format t
"~&~A:~%~A~%"
730 name
(make-string (1+ (length name
)) :initial-element
#\-
)))
733 (let ((val (svref table i
)))
734 (unless (unbound-marker-p val
)
735 (funcall fun
(translate val spaces
))
738 (format t
"TOTAL: ~D entries~2%" n
))))
739 (scan-table table0
"linkage info"
740 (lambda (x &aux
(type #\T
))
742 (setq x
(translate (car x
) spaces
) type
#\D
))
743 (format asm-file
" .long ~A~%" x
)
745 (format t
"~A ~A~%" type x
))
747 (scan-table table1
"defined" #'show
)
748 (scan-table table2
"undefined" #'show
)
749 (let ((diff1 ; linkage not in foreign
750 (remove-if (lambda (x) (member x foreign
:test
#'string
=)) linkage
))
751 (diff2 ; foreign not in linkage
752 (remove-if (lambda (x) (member x linkage
:test
#'string
=)) foreign
)))
754 (format t
"~&Linkage not in foreign:~%~S~%" diff1
)
755 (format t
"~&Foreign not in linkage:~%~S~%" diff2
))
761 (defconstant +sht-null
+ 0)
762 (defconstant +sht-progbits
+ 1)
763 (defconstant +sht-symtab
+ 2)
764 (defconstant +sht-strtab
+ 3)
765 (defconstant +sht-rela
+ 4)
766 (defconstant +sht-rel
+ 9)
768 (define-alien-type elf64-ehdr
770 (ident (array unsigned-char
16)) ; 7F 45 4C 46 2 1 1 0 0 0 0 0 0 0 0 0
771 (type (unsigned 16)) ; 1 0
772 (machine (unsigned 16)) ; 3E 0
773 (version (unsigned 32)) ; 1 0 0 0
774 (entry unsigned
) ; 0 0 0 0 0 0 0 0
775 (phoff unsigned
) ; 0 0 0 0 0 0 0 0
777 (flags (unsigned 32)) ; 0 0 0 0
778 (ehsize (unsigned 16)) ; 40 0
779 (phentsize (unsigned 16)) ; 0 0
780 (phnum (unsigned 16)) ; 0 0
781 (shentsize (unsigned 16)) ; 40 0
782 (shnum (unsigned 16)) ; n 0
783 (shstrndx (unsigned 16)))) ; n 0
784 (define-alien-type elf64-shdr
788 (flags (unsigned 64))
794 (addralign (unsigned 64))
795 (entsize (unsigned 64))))
796 (define-alien-type elf64-sym
801 (shndx (unsigned 16))
804 (define-alien-type elf64-rela
806 (offset (unsigned 64))
808 (addend (signed 64))))
810 (defun make-elf64-sym (name info
)
811 (let ((a (make-array 24 :element-type
'(unsigned-byte 8))))
812 (with-pinned-objects (a)
813 (setf (sap-ref-32 (vector-sap a
) 0) name
814 (sap-ref-8 (vector-sap a
) 4) info
))
817 ;;; Return two values: an octet vector comprising a string table
818 ;;; and an alist which maps string to offset in the table.
819 (defun string-table (strings)
820 (let* ((length (+ (1+ (length strings
)) ; one more null than there are strings
821 (reduce #'+ strings
:key
#'length
))) ; data length
822 (bytes (make-array length
:element-type
'(unsigned-byte 8)
826 (dolist (string strings
)
827 (push (cons string index
) alist
)
828 (replace bytes
(map 'vector
#'char-code string
) :start1 index
)
829 (incf index
(1+ (length string
))))
830 (cons (nreverse alist
) bytes
)))
832 (defun write-alien (alien size stream
)
834 (write-byte (sap-ref-8 (alien-value-sap alien
) i
) stream
)))
836 (defun copy-bytes (in-stream out-stream nbytes
838 (make-array 1024 :element-type
'(unsigned-byte 8))))
839 (loop (let ((chunksize (min (length buffer
) nbytes
)))
840 (aver (eql (read-sequence buffer in-stream
:end chunksize
) chunksize
))
841 (write-sequence buffer out-stream
:end chunksize
)
842 (when (zerop (decf nbytes chunksize
)) (return)))))
844 ;;; core header should be an array of words in '.rodata', not a 32K page
845 (defconstant core-header-size
+backend-page-bytes
+) ; stupidly large (FIXME)
847 ;;; Write everything except for the core file itself into OUTPUT-STREAM
848 ;;; and leave the stream padded to a 4K boundary ready to receive data.
849 (defun prepare-elf (core-size relocs output
)
850 (let* ((sym-entry-size 24)
851 (reloc-entry-size 24)
854 `#((:core
"lisp.core" ,+sht-progbits
+ 0 0 0 ,core-align
0)
855 (:sym
".symtab" ,+sht-symtab
+ 0 3 1 8 ,sym-entry-size
)
856 ; section with the strings -- ^ ^ -- 1+ highest local symbol
857 (:str
".strtab" ,+sht-strtab
+ 0 0 0 1 0)
858 (:rel
".relalisp.core" ,+sht-rela
+ 0 2 1 8 ,reloc-entry-size
)
859 ; symbol table -- ^ ^ -- for which section
860 (:note
".note.GNU-stack" ,+sht-null
+ 0 0 0 1 0)))
862 (string-table (append '("__lisp_code_start") (map 'list
#'second sections
))))
863 (strings (cdr string-table
))
864 (padded-strings-size (logandc2 (+ (length strings
) 7) 7))
865 (ehdr-size #.
(ceiling (alien-type-bits (parse-alien-type 'elf64-ehdr nil
)) 8))
866 (shdr-size #.
(ceiling (alien-type-bits (parse-alien-type 'elf64-shdr nil
)) 8))
867 (symbols-size (* 2 sym-entry-size
))
868 (shdrs-start (+ ehdr-size symbols-size padded-strings-size
))
869 (shdrs-end (+ shdrs-start
(* (1+ (length sections
)) shdr-size
)))
870 (relocs-size (* (length relocs
) reloc-entry-size
))
871 (relocs-end (+ shdrs-end relocs-size
))
872 (core-start (logandc2 (+ relocs-end
(1- core-align
)) (1- core-align
)))
873 (ident #.
(coerce '(#x7F
#x45
#x4C
#x46
2 1 1 0 0 0 0 0 0 0 0 0)
874 '(array (unsigned-byte 8) 1))))
876 (with-alien ((ehdr elf64-ehdr
))
877 (dotimes (i (ceiling ehdr-size n-word-bytes
))
878 (setf (sap-ref-word (alien-value-sap ehdr
) (* i n-word-bytes
)) 0))
879 (with-pinned-objects (ident)
880 (%byte-blt
(vector-sap ident
) 0 (alien-value-sap ehdr
) 0 16))
881 (setf (slot ehdr
'type
) 1
882 (slot ehdr
'machine
) #x3E
883 (slot ehdr
'version
) 1
884 (slot ehdr
'shoff
) shdrs-start
885 (slot ehdr
'ehsize
) ehdr-size
886 (slot ehdr
'shentsize
) shdr-size
887 (slot ehdr
'shnum
) (1+ (length sections
)) ; section 0 is implied
888 (slot ehdr
'shstrndx
) (1+ (position :str sections
:key
#'car
)))
889 (write-alien ehdr ehdr-size output
))
891 ;; Write symbol table
892 (aver (eql (file-position output
) ehdr-size
))
893 (write-sequence (make-elf64-sym 0 0) output
)
894 ;; The symbol name index is always 1 by construction. The type is #x10
895 ;; given: #define STB_GLOBAL 1
896 ;; and: #define ELF32_ST_BIND(val) ((unsigned char) (val)) >> 4)
897 ;; which places the binding in the high 4 bits of the low byte.
898 (write-sequence (make-elf64-sym 1 #x10
) output
)
900 ;; Write string table
901 (aver (eql (file-position output
) (+ ehdr-size symbols-size
)))
902 (write-sequence strings output
) ; an octet vector at this point
903 (dotimes (i (- padded-strings-size
(length strings
)))
904 (write-byte 0 output
))
906 ;; Write section headers
907 (aver (eql (file-position output
) shdrs-start
))
908 (with-alien ((shdr elf64-shdr
))
909 (dotimes (i (ceiling shdr-size n-word-bytes
)) ; Zero-fill
910 (setf (sap-ref-word (alien-value-sap shdr
) (* i n-word-bytes
)) 0))
911 (dotimes (i (1+ (length sections
)))
912 (when (plusp i
) ; Write the zero-filled header as section 0
913 (destructuring-bind (key name type flags link info alignment entsize
)
914 (aref sections
(1- i
))
915 (multiple-value-bind (offset size
)
917 (:sym
(values ehdr-size symbols-size
))
918 (:str
(values (+ ehdr-size symbols-size
) (length strings
)))
919 (:rel
(values shdrs-end relocs-size
))
920 (:core
(values core-start core-size
))
921 (:note
(values 0 0)))
922 (let ((name (cdr (assoc name
(car string-table
) :test
#'string
=))))
923 (setf (slot shdr
'name
) name
924 (slot shdr
'type
) type
925 (slot shdr
'flags
) flags
926 (slot shdr
'off
) offset
927 (slot shdr
'size
) size
928 (slot shdr
'link
) link
929 (slot shdr
'info
) info
930 (slot shdr
'addralign
) alignment
931 (slot shdr
'entsize
) entsize
)))))
932 (write-alien shdr shdr-size output
)))
935 (aver (eql (file-position output
) shdrs-end
))
936 (let ((buf (make-array relocs-size
:element-type
'(unsigned-byte 8)))
938 (with-alien ((rela elf64-rela
))
939 (dovector (reloc relocs
)
940 (destructuring-bind (place addend . kind
) reloc
941 (setf (slot rela
'offset
) place
942 (slot rela
'info
) (logior (ash 1 32) kind
) ; 1 = symbol index
943 (slot rela
'addend
) addend
))
944 (setf (%vector-raw-bits buf
(+ ptr
0)) (sap-ref-word (alien-value-sap rela
) 0)
945 (%vector-raw-bits buf
(+ ptr
1)) (sap-ref-word (alien-value-sap rela
) 8)
946 (%vector-raw-bits buf
(+ ptr
2)) (sap-ref-word (alien-value-sap rela
) 16))
948 (write-sequence buf output
))
951 (dotimes (i (- core-start
(file-position output
)))
952 (write-byte 0 output
))
953 (aver (eq (file-position output
) core-start
))))
955 ;;; Return a list of fixups (FIXUP-WHERE KIND ADDEND) to peform in a foreign core
956 ;;; whose code space is subject to link-time relocation.
957 (defconstant R_X86_64_64
1) ; /* Direct 64 bit */
958 (defconstant R_X86_64_PC32
2) ; /* PC relative 32 bit signed */
959 (defconstant R_X86_64_32
10) ; /* Direct 32 bit zero extended */
961 (defun collect-relocations (spaces fixups
&aux
(print nil
))
962 (binding* (((static-start static-end
)
963 (let ((space (get-space static-core-space-id spaces
)))
964 (values (space-addr space
) (space-end space
))))
965 ((code-start code-end
)
966 (let ((space (get-space immobile-varyobj-core-space-id spaces
)))
967 (values (space-addr space
) (space-end space
))))
968 ;; the distance between fixedobj space address (i.e following the pages of
969 ;; dynamic space) in the ELF section which has a presumptive address of 0
970 ;; due to being non-loaded, to where it will be later mapped by coreparse
971 (fixedobj-space-displacement
972 (let ((space (get-space immobile-fixedobj-core-space-id spaces
)))
973 (- (* (1+ (space-data-page space
)) +backend-page-bytes
+) ; 1+ = core header
974 (space-addr space
))))
978 ((abs-fixup (core-offs referent
)
981 (format t
"~x = 0x~(~x~): (a)~%" core-offs
(core-to-logical core-offs
) #+nil referent
))
982 (setf (sap-ref-word (car spaces
) core-offs
) 0)
983 (vector-push-extend `(,(+ core-header-size core-offs
)
984 ,(- referent code-start
) .
,R_X86_64_64
)
986 (abs32-fixup (core-offs referent
)
989 (format t
"~x = 0x~(~x~): (a)~%" core-offs
(core-to-logical core-offs
) #+nil referent
))
990 (setf (sap-ref-32 (car spaces
) core-offs
) 0)
991 (vector-push-extend `(,(+ core-header-size core-offs
)
992 ,(- referent code-start
) .
,R_X86_64_32
)
994 (rel-fixup (core-offs referent
)
997 (format t
"~x = 0x~(~x~): (r)~%" core-offs
(core-to-logical core-offs
) #+nil referent
))
998 (setf (sap-ref-32 (car spaces
) core-offs
) 0)
999 (vector-push-extend `(,(+ core-header-size core-offs
)
1000 ,(- referent code-start
) .
,R_X86_64_PC32
)
1002 (in-code-space-p (ptr)
1003 (and (<= code-start ptr
) (< ptr code-end
)))
1004 ;; Given a address which is an offset into the data pages of the target core,
1005 ;; compute the logical address which that offset would be mapped to.
1006 ;; For example core address 0 is the virtual address of static space.
1007 (core-to-logical (core-offs &aux
(page (floor core-offs
+backend-page-bytes
+)))
1008 (dolist (space (cdr spaces
)
1009 (bug "Can't translate core offset ~x using ~x"
1011 (let* ((page0 (space-data-page space
))
1012 (nwords (space-nwords space
))
1013 (id (space-id space
))
1014 (npages (ceiling nwords
(/ +backend-page-bytes
+ n-word-bytes
))))
1015 (when (and (<= page0 page
(+ page0
(1- npages
)))
1016 (/= id immobile-varyobj-core-space-id
))
1017 (return (+ (space-addr space
)
1018 (* (- page page0
) +backend-page-bytes
+)
1019 (logand core-offs
(1- +backend-page-bytes
+))))))))
1020 (scanptrs (obj wordindex-min wordindex-max
&aux
(n-fixups 0))
1021 (do* ((base-addr (logandc2 (get-lisp-obj-address obj
) lowtag-mask
))
1022 (sap (int-sap base-addr
))
1023 ;; core-offs is the offset in the lisp.core ELF section.
1024 (core-offs (- base-addr
(sap-int (car spaces
))))
1025 (i wordindex-min
(1+ i
)))
1026 ((> i wordindex-max
) n-fixups
)
1027 (let ((ptr (sap-ref-word sap
(ash i word-shift
))))
1028 (when (and (= (logand ptr
3) 3) (in-code-space-p ptr
))
1029 (abs-fixup (+ core-offs
(ash i word-shift
)) ptr
)
1031 (scanptr (obj wordindex
)
1032 (plusp (scanptrs obj wordindex wordindex
))) ; trivial wrapper
1033 (scan-obj (obj widetag size vaddr
1034 &aux
(core-offs (- (logandc2 (get-lisp-obj-address obj
) lowtag-mask
)
1035 (sap-int (car spaces
))))
1036 (nwords (ceiling size n-word-bytes
)))
1039 (return-from scan-obj
))
1042 (let ((layout (truly-the layout
1043 (translate (%instance-layout obj
) spaces
))))
1044 ;; FIXME: even though the layout is supplied, it's not good enough,
1045 ;; because the macro references the layout-bitmap which might
1046 ;; be a bignum which is a pointer into the logical core address.
1047 (unless (fixnump (layout-bitmap layout
))
1048 (error "Can't process bignum bitmap"))
1049 (do-instance-tagged-slot (i obj
:layout layout
)
1050 (scanptr obj
(1+ i
))))
1051 (return-from scan-obj
))
1052 (#.simple-vector-widetag
1053 (let ((len (length (the simple-vector obj
))))
1054 (when (eql (logand (get-header-data obj
) #xFF
) vector-valid-hashing-subtype
)
1055 (do ((i 2 (+ i
2)) (needs-rehash))
1058 (setf (svref obj
1) 1)))
1059 (when (scanptr obj
(+ vector-data-offset i
))
1060 (format t
"~&SET REHASH: vector=~X~%" (get-lisp-obj-address obj
))
1061 (setq needs-rehash t
))
1062 (scanptr obj
(+ vector-data-offset i
1)))
1063 (return-from scan-obj
))
1064 (setq nwords
(+ len
2))))
1067 (let* ((fdefn-pc-sap ; where to read to access the rel32 operand
1068 (int-sap (+ (- (get-lisp-obj-address obj
) other-pointer-lowtag
)
1069 (ash fdefn-raw-addr-slot word-shift
))))
1070 ;; what the fdefn's logical PC will be
1071 (fdefn-logical-pc (+ vaddr
(ash fdefn-raw-addr-slot word-shift
)))
1072 (rel32off (signed-sap-ref-32 fdefn-pc-sap
1))
1073 (target (+ fdefn-logical-pc
1 rel32off
))
1075 (if (<= static-start vaddr static-end
)
1076 (+ (- sb-vm
:static-space-start
) +backend-page-bytes
+)
1077 fixedobj-space-displacement
)))
1078 (when (in-code-space-p target
)
1079 ;; This addend needs to account for the fact that the location
1080 ;; where fixup occurs is not where the fdefn will actually exist.
1081 (rel-fixup (+ core-offs
(ash 3 word-shift
) 1)
1082 (+ target space-displacement
))))
1083 (return-from scan-obj
))
1084 ((#.closure-widetag
#.funcallable-instance-widetag
)
1085 (let ((word (sap-ref-word (int-sap (get-lisp-obj-address obj
))
1086 (- n-word-bytes fun-pointer-lowtag
))))
1087 (when (in-code-space-p word
)
1088 (abs-fixup (+ core-offs
(ash 1 word-shift
)) word
)))
1089 (when (eq widetag funcallable-instance-widetag
)
1090 (let ((layout (truly-the layout
1091 (translate (%funcallable-instance-layout obj
) spaces
))))
1092 (unless (fixnump (layout-bitmap layout
))
1093 (error "Can't process bignum bitmap"))
1094 (let ((bitmap (layout-bitmap layout
)))
1095 (unless (eql bitmap -
1)
1096 ;; tagged slots precede untagged slots,
1097 ;; so integer-length is the count of tagged slots.
1098 (setq nwords
(1+ (integer-length bitmap
))))))))
1099 ;; mixed boxed/unboxed objects
1100 (#.code-header-widetag
1101 (dolist (loc (code-fixup-locs obj spaces
))
1102 (let ((val (sap-ref-32 (code-instructions obj
) loc
)))
1103 (when (in-code-space-p val
)
1104 (abs32-fixup (sap- (sap+ (code-instructions obj
) loc
) (car spaces
))
1106 (dotimes (i (code-n-entries obj
))
1107 (scanptrs (%code-entry-point obj i
) 2 5))
1108 (setq nwords
(code-header-words obj
)))
1109 ;; boxed objects that can reference code/simple-funs
1110 ((#.value-cell-widetag
#.symbol-widetag
#.weak-pointer-widetag
))
1112 (return-from scan-obj
)))
1113 (scanptrs obj
1 (1- nwords
))))
1114 (dolist (space (reverse (cdr spaces
)))
1115 (let* ((logical-addr (space-addr space
))
1116 (size (space-size space
))
1117 (physical-addr (space-physaddr space spaces
))
1118 (physical-end (sap+ physical-addr size
))
1119 (vaddr-translation (+ (- (sap-int physical-addr
)) logical-addr
)))
1120 (unless (= (space-id space
) immobile-varyobj-core-space-id
)
1121 (dx-flet ((visit (obj widetag size
)
1122 ;; Compute the object's intended virtual address
1123 (let ((vaddr (+ (logandc2 (get-lisp-obj-address obj
) lowtag-mask
)
1124 vaddr-translation
)))
1125 (scan-obj obj widetag size vaddr
))))
1126 (map-objects-in-range
1128 (ash (sap-int physical-addr
) (- n-fixnum-tag-bits
))
1129 (ash (sap-int physical-end
) (- n-fixnum-tag-bits
))))
1130 (when (and (plusp (logior n-abs n-rel
)) print
)
1131 (format t
"space @ ~x: ~d absolute + ~d relative fixups~%"
1132 logical-addr n-abs n-rel
))
1133 (setq n-abs
0 n-rel
0))))))
1135 (format t
"total of ~D linker fixups~%" (length fixups
)))
1140 (macrolet ((do-core-header-entry (((id-var len-var ptr-var
) buffer
) &body body
)
1141 `(let ((,ptr-var
1))
1143 (let ((,id-var
(%vector-raw-bits
,buffer
,ptr-var
))
1144 (,len-var
(%vector-raw-bits
,buffer
(1+ ,ptr-var
))))
1147 (when (= ,id-var end-core-entry-type-code
) (return))
1149 (incf ,ptr-var
,len-var
)))))
1150 (do-directory-entry (((index-var start-index input-nbytes
) buffer
) &body body
)
1151 `(let ((words-per-dirent 5))
1152 (multiple-value-bind (n-entries remainder
)
1153 (floor ,input-nbytes words-per-dirent
)
1154 (aver (zerop remainder
))
1155 (symbol-macrolet ((id (%vector-raw-bits
,buffer index
))
1156 (nwords (%vector-raw-bits
,buffer
(+ index
1)))
1157 (data-page (%vector-raw-bits
,buffer
(+ index
2)))
1158 (addr (%vector-raw-bits
,buffer
(+ index
3)))
1159 (npages (%vector-raw-bits
,buffer
(+ index
4))))
1160 (do ((,index-var
,start-index
(+ ,index-var words-per-dirent
)))
1161 ((= ,index-var
(+ ,start-index
(* n-entries words-per-dirent
))))
1163 (with-mapped-core ((sap-var start npages stream
) &body body
)
1169 (* ,npages
+backend-page-bytes
+)
1170 (logior sb-posix
:prot-read sb-posix
:prot-write
)
1171 sb-posix
:map-private
1172 (sb-sys:fd-stream-fd
,stream
)
1173 ;; Skip the core header
1174 (+ ,start
+backend-page-bytes
+)))
1177 (sb-posix:munmap
,sap-var
(* ,npages
+backend-page-bytes
+)))))))
1179 ;;; Given a native SBCL '.core' file, or one attached to the end of an executable,
1180 ;;; separate it into pieces.
1181 ;;; ASM-PATHNAME is the name of the assembler file that will hold all the Lisp code.
1182 ;;; The other two output pathnames are implicit: "x.s" -> "x.core" and "x-core.o"
1183 ;;; The ".core" file is a native core file used for starting a binary that
1184 ;;; contains the asm code using the "--core" argument. The "-core.o" file
1185 ;;; is for linking in to a binary that needs no "--core" argument.
1187 (input-pathname asm-pathname
1188 &key emit-sizes
(verbose nil
)
1189 &aux
(split-core-pathname
1190 (merge-pathnames (make-pathname :type
"core") asm-pathname
))
1193 (make-pathname :name
(concatenate 'string
(pathname-name asm-pathname
) "-core")
1196 (core-header (make-array +backend-page-bytes
+ :element-type
'(unsigned-byte 8)))
1197 (original-total-npages 0)
1200 (code-start-fixup-ofs 0) ; where to fixup the core header
1203 (fixedobj-range) ; = (START . SIZE-IN-BYTES)
1204 (relocs (make-array 100000 :adjustable t
:fill-pointer
0)))
1207 (ignore-errors (delete-file asm-pathname
))
1208 (ignore-errors (delete-file split-core-pathname
))
1209 (ignore-errors (delete-file elf-core-pathname
))
1210 ;; Ensure that all files can be opened
1211 (with-open-file (input input-pathname
:element-type
'(unsigned-byte 8))
1212 (with-open-file (asm-file asm-pathname
:direction
:output
:if-exists
:supersede
)
1213 (with-open-file (split-core split-core-pathname
:direction
:output
1214 :element-type
'(unsigned-byte 8) :if-exists
:supersede
)
1215 (read-sequence core-header input
)
1216 (cond ((= (%vector-raw-bits core-header
0) core-magic
))
1217 (t ; possible embedded core
1218 (file-position input
(- (file-length input
)
1219 (* 2 n-word-bytes
)))
1220 (aver (eql (read-sequence core-header input
) (* 2 n-word-bytes
)))
1221 (aver (= (%vector-raw-bits core-header
1) core-magic
))
1222 (setq core-offset
(%vector-raw-bits core-header
0))
1224 (format t
"~&embedded core starts at #x~x into input~%" core-offset
))
1225 (file-position input core-offset
)
1226 (read-sequence core-header input
)
1227 (aver (= (%vector-raw-bits core-header
0) core-magic
))))
1228 (do-core-header-entry ((id len ptr
) core-header
)
1230 (#.build-id-core-entry-type-code
1232 (let ((string (make-string (%vector-raw-bits core-header ptr
)
1233 :element-type
'base-char
)))
1234 (%byte-blt core-header
(* (1+ ptr
) n-word-bytes
) string
0 (length string
))
1235 (format t
"Build ID [~a]~%" string
))))
1236 (#.directory-core-entry-type-code
1237 (do-directory-entry ((index ptr len
) core-header
)
1238 (incf original-total-npages npages
)
1239 (push (make-space id addr data-page page-adjust nwords
) spaces
)
1241 (format t
"id=~d page=~5x + ~5x addr=~10x words=~8x~:[~; (drop)~]~%"
1242 id data-page npages addr nwords
1243 (= id immobile-varyobj-core-space-id
)))
1244 (cond ((= id immobile-varyobj-core-space-id
)
1245 (setq code-start-fixup-ofs
(+ index
3))
1246 ;; Keep this entry but delete the page count. We need to know
1247 ;; where the space was supposed to be mapped and at what size.
1248 ;; Subsequent core entries will need to adjust their start page
1249 ;; downward (just the PTEs's start page now).
1250 (setq page-adjust npages data-page
0 npages
0))
1252 ;; Keep track of where the fixedobj space wants to be.
1253 (when (= id immobile-fixedobj-core-space-id
)
1254 (setq fixedobj-range
(cons addr
(ash nwords word-shift
))))
1255 (when (plusp npages
) ; enqueue
1256 (push (cons data-page
(* npages
+backend-page-bytes
+))
1258 ;; adjust this entry's start page in the new core
1259 (decf data-page page-adjust
)))))
1260 (#.page-table-core-entry-type-code
1262 (symbol-macrolet ((nbytes (%vector-raw-bits core-header
(1+ ptr
)))
1263 (data-page (%vector-raw-bits core-header
(+ ptr
2))))
1264 (aver (= data-page original-total-npages
))
1265 (aver (= (ceiling (space-nwords
1266 (find dynamic-core-space-id spaces
:key
#'space-id
))
1267 (/ +backend-page-bytes
+ n-word-bytes
))
1268 (%vector-raw-bits core-header ptr
))) ; number of PTEs
1270 (format t
"PTE: page=~5x~40tbytes=~8x~%" data-page nbytes
))
1271 (push (cons data-page nbytes
) copy-actions
)
1272 (decf data-page page-adjust
)))))
1273 (let ((buffer (make-array +backend-page-bytes
+
1274 :element-type
'(unsigned-byte 8)))
1276 ;; Write the new core file
1277 (write-sequence core-header split-core
)
1278 (dolist (action (reverse copy-actions
)) ; nondestructive
1279 ;; page index convention assumes absence of core header.
1280 ;; i.e. data page 0 is the file page immediately following the core header
1281 (let ((offset (* (1+ (car action
)) +backend-page-bytes
+))
1282 (nbytes (cdr action
)))
1284 (format t
"File offset ~10x: ~10x bytes~%" offset nbytes
))
1285 (setq filepos
(+ core-offset offset
))
1286 (file-position input filepos
)
1287 (copy-bytes input split-core nbytes buffer
)))
1288 ;; Trailer (runtime options and magic number)
1289 (let ((nbytes (read-sequence buffer input
)))
1290 ;; expect trailing magic number
1291 (let ((ptr (floor (- nbytes n-word-bytes
) n-word-bytes
)))
1292 (aver (= (%vector-raw-bits buffer ptr
) core-magic
)))
1293 ;; File position of the core header needs to be set to 0
1294 ;; regardless of what it was
1295 (setf (%vector-raw-bits buffer
4) 0)
1297 (format t
"Trailer words:(~{~X~^ ~})~%"
1298 (loop for i below
(floor nbytes n-word-bytes
)
1299 collect
(%vector-raw-bits buffer i
))))
1300 (write-sequence buffer split-core
:end nbytes
)
1301 (finish-output split-core
))
1303 (aver (= (+ core-offset
1304 (* page-adjust
+backend-page-bytes
+)
1305 (file-length split-core
))
1306 (file-length input
)))
1307 ;; Seek back to the PTE pages so they can be copied to the '.o' file
1308 (file-position input filepos
)))
1310 ;; Map the original core file to memory
1311 (with-mapped-core (sap core-offset original-total-npages input
)
1313 (delete immobile-varyobj-core-space-id
(reverse spaces
)
1315 (map (cons sap
(sort (copy-list spaces
) #'> :key
#'space-addr
)))
1316 (pte-nbytes (cdar copy-actions
)))
1317 (collect-relocations map relocs
)
1318 (with-open-file (output elf-core-pathname
1319 :direction
:output
:if-exists
:supersede
1320 :element-type
'(unsigned-byte 8))
1322 `(,(ash code-start-fixup-ofs word-shift
) 0 .
,R_X86_64_64
) relocs
)
1323 (prepare-elf (+ (apply #'+ (mapcar #'space-nbytes-aligned data-spaces
))
1324 +backend-page-bytes
+ ; core header
1327 (setf (%vector-raw-bits core-header code-start-fixup-ofs
) 0)
1328 (write-sequence core-header output
) ; Copy prepared header
1329 (force-output output
)
1330 ;; Change SB-C::*COMPILE[-FILE]-TO-MEMORY-SPACE* to :DYNAMIC
1331 ;; in case the resulting executable needs to compile anything.
1332 ;; (Call frame info will be missing, but at least it's something.)
1333 (dolist (name '("*COMPILE-FILE-TO-MEMORY-SPACE*"
1334 "*COMPILE-TO-MEMORY-SPACE*"))
1335 (%set-symbol-global-value
1336 (find-target-symbol "SB-C" name map
)
1337 (find-target-symbol "KEYWORD" "DYNAMIC" map
:logical
)))
1339 (dolist (space data-spaces
) ; Copy pages from memory
1340 (let ((start (space-physaddr space map
))
1341 (size (space-nbytes-aligned space
)))
1342 (aver (eql (sb-unix:unix-write
(sb-sys:fd-stream-fd output
)
1346 (format t
"Copying ~d bytes (#x~x) from ptes = ~d PTEs~%"
1347 pte-nbytes pte-nbytes
(floor pte-nbytes
10)))
1348 (copy-bytes input output pte-nbytes
)) ; Copy PTEs from input
1350 ;; There's no relation between emit-sizes and which section to put
1351 ;; C symbol references in, however it's a safe bet that if sizes
1352 ;; are supported then so is the .rodata directive.
1353 (format asm-file
(if emit-sizes
" .rodata~%" " .data~%"))
1354 (extract-required-c-symbols map asm-file
)
1355 (write-assembler-text map fixedobj-range asm-file emit-sizes
)))
1357 (format asm-file
"~% ~A~%" +noexec-stack-note
+))))
1363 (defun cl-user::elfinate
(&optional
(args (cdr sb-ext
:*posix-argv
*)))
1364 (cond ((string= (car args
) "split")
1366 (let ((sizes (string= (car args
) "--sizes")))
1369 (destructuring-bind (input asm
) args
1370 (split-core input asm
:emit-sizes sizes
))))
1372 ((string= (car args
) "relocate")
1373 (destructuring-bind (input output binary start-sym
) (cdr args
)
1375 input output binary
(parse-integer start-sym
:radix
16))))
1377 (error "Unknown command: ~S" args
))))
1379 ;; If loaded as a script, do this
1380 (eval-when (:execute
)
1381 (let ((args (cdr sb-ext
:*posix-argv
*)))
1383 (let ((*print-pretty
* nil
))
1384 (format t
"Args: ~S~%" args
)
1385 (cl-user::elfinate args
)))))