Share magic constants between genesis and editcore
[sbcl.git] / tools-for-build / editcore.lisp
blobafe25729075e17d8cdad9d16430ca1f29e7b2b98
1 ;;;; Utilities for separating an SBCL core file into two pieces:
2 ;;;; 1. An assembly language file containing the immobile code space
3 ;;;; 2. A '.o' file wrapping a core file containing everything else
4 ;;;; We operate as a "tool" that processes external files rather than
5 ;;;; operating on the in-process data, but it is also possible to dump
6 ;;;; the current image by creating a straight-through translation
7 ;;;; of internal/external code addresses.
9 ;;;; This software is part of the SBCL system. See the README file for
10 ;;;; more information.
11 ;;;;
12 ;;;; This software is derived from the CMU CL system, which was
13 ;;;; written at Carnegie Mellon University and released into the
14 ;;;; public domain. The software is in the public domain and is
15 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
16 ;;;; files for more information.
18 (eval-when (:compile-toplevel :load-toplevel :execute)
19 (require :sb-posix)) ; for mmap
21 (load (merge-pathnames "corefile.lisp" *load-pathname*))
23 (defpackage "SB-EDITCORE"
24 (:use "CL" "SB-ALIEN" "SB-COREFILE" "SB-INT" "SB-EXT"
25 "SB-KERNEL" "SB-SYS" "SB-VM")
26 (:import-from "SB-ALIEN-INTERNALS"
27 #:alien-type-bits #:parse-alien-type
28 #:alien-value-sap #:alien-value-type)
29 (:import-from "SB-C" #:+backend-page-bytes+)
30 (:import-from "SB-VM" #:map-objects-in-range #:reconstitute-object
31 #:%closure-callee)
32 (:import-from "SB-DISASSEM" #:get-inst-space #:find-inst
33 #:make-dstate #:%make-segment
34 #:seg-virtual-location #:seg-length #:seg-sap-maker
35 #:map-segment-instructions
36 #:dstate-next-addr #:dstate-cur-offs)
37 (:import-from "SB-X86-64-ASM" #:near-jump-displacement)
38 (:import-from "SB-IMPL" #:package-hashtable #:package-%name
39 #:package-hashtable-cells
40 #:hash-table-table #:hash-table-number-entries))
42 (in-package "SB-EDITCORE")
44 (declaim (muffle-conditions compiler-note))
46 (eval-when (:execute)
47 (setq *evaluator-mode* :compile))
49 (defconstant core-magic
50 (logior (ash (char-code #\S) 24)
51 (ash (char-code #\B) 16)
52 (ash (char-code #\C) 8)
53 (char-code #\L)))
55 (defglobal +noexec-stack-note+ ".section .note.GNU-stack, \"\", @progbits")
57 (defstruct (core-space ; "space" is a CL symbol
58 (:conc-name space-)
59 (:constructor make-space (id addr data-page page-adjust nwords)))
60 id addr data-page page-adjust nwords)
61 (defun space-size (space) (* (space-nwords space) n-word-bytes))
62 (defun space-end (space) (+ (space-addr space) (space-size space)))
63 (defun space-nbytes-aligned (space)
64 (logandc2 (+ (space-size space) (1- +backend-page-bytes+))
65 (1- +backend-page-bytes+)))
66 (defun space-physaddr (space spaces)
67 (sap+ (car spaces) (* (space-data-page space) +backend-page-bytes+)))
69 ;;; Given ADDR which is an address in the target core, return the address at which
70 ;;; ADDR is currently mapped while performing the split.
71 ;;; SPACES is a cons of a SAP and an alist whose elements are (ADDR . CORE-SPACE)
72 (defun translate-ptr (addr spaces)
73 (let ((space (find addr (cdr spaces) :key #'space-addr :test #'>=)))
74 ;; FIXME: duplicates SPACE-PHYSADDR to avoid consing a SAP.
75 ;; macroize or something.
76 (+ (sap-int (car spaces)) (* (space-data-page space) +backend-page-bytes+)
77 (- addr (space-addr space)))))
79 ;;;
80 (defun get-space (id spaces)
81 (find id (cdr spaces) :key #'space-id))
82 (defun compute-nil-object (spaces)
83 (let ((space (get-space static-core-space-id spaces)))
84 (%make-lisp-obj (logior (space-addr space) #x17))))
86 ;;; Given OBJ which is tagged pointer into the target core, translate it into
87 ;;; the range at which the core is now mapped during execution of this tool,
88 ;;; so that host accessors can dereference its slots.
89 ;;; Use extreme care: while it works to use host accessors on the target core,
90 ;;; we must avoid type checks on instances because LAYOUTs need translation.
91 ;;; Printing boxed objects from the target core will almost always crash.
92 (defun translate (obj spaces)
93 (%make-lisp-obj (translate-ptr (get-lisp-obj-address obj) spaces)))
95 (defstruct (core-sym (:copier nil) (:predicate nil)
96 (:constructor make-core-sym (package name external)))
97 (package nil)
98 (name nil :read-only t)
99 (external nil :read-only t))
101 (defun c-name (lispname pp-state)
102 (when (and (symbolp lispname)
103 (eq (symbol-package lispname) *cl-package*))
104 (return-from c-name (concatenate 'string "cl:" (string-downcase lispname))))
105 ;; Get rid of junk from LAMBDAs
106 (setq lispname
107 (named-let recurse ((x lispname))
108 (cond ((typep x '(cons (eql lambda)))
109 (let ((args (second x)))
110 `(lambda ,(if args sb-c::*debug-name-sharp* "()")
111 ,@(recurse (cddr x)))))
112 ((eq x :in) "in")
113 ((consp x)
114 (recons x (recurse (car x)) (recurse (cdr x))))
115 (t x))))
117 ;; Shorten obnoxiously long printed representations of methods
118 ;; by changing FAST-METHOD to METHOD (because who cares?)
119 ;; and shorten
120 ;; (method my-long-package-name:my-method-name (my-long-package-name:type-name))
121 ;; to
122 ;; (method my-method-name (type-name))
123 ;; I suspect that can use DWARF info to provide even more description,
124 ;; though I also suspect it's relatively unambiguous anyway
125 ;; especially given that file information is available separately.
126 (flet ((unpackageize (thing)
127 (when (typep thing 'core-sym)
128 (setf (core-sym-package thing) nil))
129 thing))
130 (when (typep lispname '(cons (eql sb-pcl::fast-method)))
131 (setq lispname `(method ,@(cdr lispname)))
132 (setf (second lispname) (unpackageize (second lispname)))
133 (dolist (qual (car (last lispname)))
134 (unpackageize qual))))
136 ;; Perform backslash escaping on the exploded string
137 ;; Strings were stringified without surrounding quotes,
138 ;; but there might be quotes embedded anywhere, so escape them,
139 ;; and also remove newlines and non-ASCII.
140 (let ((characters
141 (mapcan (lambda (char)
142 (cond ((not (base-char-p char)) (list #\?))
143 ((member char '(#\\ #\")) (list #\\ char))
144 ((eql char #\newline) (list #\_))
145 (t (list char))))
146 (coerce (if (and (stringp lispname)
147 ;; L denotes a symbol which can not be global on macOS.
148 (char= (char lispname 0) #\L))
149 (concatenate 'string "_" lispname)
150 (write-to-string lispname
151 :pretty t :pprint-dispatch (cdr pp-state)
152 ;; FIXME: should be :level 1, however see
153 ;; https://bugs.launchpad.net/sbcl/+bug/1733222
154 :escape t :level 2 :length 5
155 :case :downcase :gensym nil
156 :right-margin 10000))
157 'list))))
158 (let* ((string (coerce characters 'string))
159 (occurs (incf (gethash string (car pp-state) 0))))
160 (if (> occurs 1)
161 (concatenate 'string string "_" (write-to-string occurs))
162 string))))
164 (defmethod print-object ((sym core-sym) stream)
165 (format stream "~(~:[~*~;~:*~A~:[:~;~]:~]~A~)"
166 (core-sym-package sym)
167 (core-sym-external sym)
168 (core-sym-name sym)))
170 (defun fun-name-from-core (fun spaces core-nil packages
171 &aux (name (%simple-fun-name fun)))
172 (named-let recurse ((depth 0) (x name))
173 (unless (= (logand (get-lisp-obj-address x) 3) 3)
174 (return-from recurse x)) ; immediate object
175 (when (eq x core-nil)
176 (return-from recurse nil))
177 (setq x (translate x spaces))
178 (ecase (lowtag-of x)
179 (#.list-pointer-lowtag
180 (cons (recurse (1+ depth) (car x))
181 (recurse (1+ depth) (cdr x))))
182 ((#.instance-pointer-lowtag #.fun-pointer-lowtag) "?")
183 (#.other-pointer-lowtag
184 (cond
185 ((symbolp x)
186 (let ((name (translate (symbol-name x) spaces)))
187 (if (eq (symbol-package x) core-nil) ; uninterned
188 (string-downcase name)
189 (let* ((package (truly-the package
190 (translate (symbol-package x) spaces)))
191 (package-name (translate (package-%name package) spaces))
192 (compute-externals
193 (not (or (string= package-name "KEYWORD")
194 (string= package-name "COMMON-LISP"))))
195 (externals (if compute-externals
196 (gethash package-name packages)
197 t)))
198 (unless externals
199 (dovector (x (translate
200 (package-hashtable-cells
201 (truly-the package-hashtable
202 (translate (package-external-symbols package)
203 spaces)))
204 spaces))
205 (unless (fixnump x)
206 (push (if (eq x core-nil) ; random packages can export NIL. wow.
207 "NIL"
208 (translate (symbol-name (translate x spaces)) spaces))
209 externals)))
210 (setf externals (coerce externals 'vector)
211 (gethash package-name packages) externals))
212 ;; The name-cleaning code wants to compare against symbols
213 ;; in CL, PCL, and KEYWORD, so use real symbols for those.
214 ;; Other than that, we avoid finding host symbols
215 ;; because the externalness could be wrong and misleading.
216 ;; It's a very subtle point, but best to get it right.
217 (if (member package-name '("COMMON-LISP" "KEYWORD" "SB-PCL")
218 :test #'string=)
219 ; NIL can't occur, because it has list-pointer-lowtag
220 (find-symbol name package-name) ; if existing symbol, use it
221 (make-core-sym (if (string= package-name "KEYWORD") nil package-name)
222 name
223 (if compute-externals
224 (find name externals :test 'string=)
225 t)))))))
226 ((stringp x) x)
227 (t "?"))))))
229 (defstruct (core-state
230 (:conc-name "CS-")
231 (:predicate nil)
232 (:copier nil)
233 (:constructor make-core-state
234 (code-space-start code-space-end
235 fixedobj-space-start fixedobj-space-end
236 &aux (inst-space (get-inst-space))
237 (call-inst (find-inst #b11101000 inst-space))
238 (jmp-inst (find-inst #b11101001 inst-space))
239 (pop-inst (find-inst #x5d inst-space)))))
240 (code-space-start 0 :type fixnum :read-only t)
241 (code-space-end 0 :type fixnum :read-only t)
242 (fixedobj-space-start 0 :type fixnum :read-only t)
243 (fixedobj-space-end 0 :type fixnum :read-only t)
244 (dstate (make-dstate nil) :read-only t)
245 (seg (%make-segment :sap-maker (lambda () (error "Bad sap maker"))
246 :virtual-location 0) :read-only t)
247 (fixup-addrs nil)
248 (call-inst nil :read-only t)
249 (jmp-inst nil :read-only t)
250 (pop-inst nil :read-only t))
252 ;;; Emit .byte or .quad directives dumping memory from SAP for COUNT bytes
253 ;;; to STREAM. SIZE specifies which direcive to emit.
254 ;;; EXCEPTIONS specify offsets at which a specific string should be
255 ;;; written to the file in lieu of memory contents, useful for emitting
256 ;;; expressions involving the assembler '.' symbol (the current PC).
257 (defun emit-asm-directives (size sap count stream &optional exceptions)
258 (declare (optimize speed))
259 (declare (stream stream))
260 (let ((*print-base* 16)
261 (string-buffer (make-array 18 :element-type 'base-char))
262 (fmt #.(coerce "0x%lx" 'base-string))
263 (per-line 0))
264 (declare ((integer 0 32) per-line)
265 (fixnum count))
266 string-buffer fmt
267 (ecase size
268 (:qword
269 (format stream " .quad")
270 (dotimes (i count)
271 (declare ((unsigned-byte 20) i))
272 (declare (simple-vector exceptions))
273 (write-char (if (> per-line 0) #\, #\space) stream)
274 (acond ((and (< i (length exceptions)) (aref exceptions i))
275 (write-string it stream))
277 #+nil
278 (let ((len
279 ;; output-reasonable-integer-in-base is so slow comparated
280 ;; to printf() that the second-most amount of time spent
281 ;; writing the asm file occurs in that function.
282 ;; Unbelievable that we can't do better than that.
283 (with-pinned-objects (string-buffer fmt)
284 (alien-funcall
285 (extern-alien "snprintf"
286 (function int system-area-pointer unsigned system-area-pointer unsigned))
287 (vector-sap string-buffer)
288 (length string-buffer)
289 (vector-sap fmt)
290 (sap-ref-word sap (* i n-word-bytes))))))
291 (write-string string-buffer stream :end len))
292 (write-string "0x" stream)
293 (write (sap-ref-word sap (* i n-word-bytes)) :stream stream)))
294 (when (and (= (incf per-line) 16) (< (1+ i) count))
295 (format stream "~% .quad")
296 (setq per-line 0))))
297 (:byte
298 (aver (not exceptions))
299 (format stream " .byte")
300 (dotimes (i count)
301 (write-char (if (> per-line 0) #\, #\space) stream)
302 (write-string "0x" stream)
303 (write (sap-ref-8 sap i) :stream stream)
304 (when (and (= (incf per-line) 32) (< (1+ i) count))
305 (format stream "~% .byte")
306 (setq per-line 0))))))
307 (terpri stream))
309 (defun emit-lisp-asm-routines (spaces code-component output emit-sizes vector count)
310 (emit-asm-directives :qword
311 (sap+ (code-instructions code-component)
312 (- (* sb-vm:code-constants-offset sb-vm:n-word-bytes)))
313 sb-vm:code-constants-offset
314 output #())
315 (let ((list (loop for i from 2 by 2 repeat count
316 collect
317 (let* ((location (translate (svref vector (1+ i)) spaces))
318 (offset (car location))
319 (nbytes (- (1+ (cdr location)) offset))
320 (name (translate
321 (symbol-name (translate (svref vector i) spaces))
322 spaces)))
323 (list* offset name nbytes)))))
324 (loop for (offset name . nbytes) in (sort list #'< :key #'car)
325 do (format output " .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
326 (format nil "~(\"~a\"~)" name) (if emit-sizes nbytes))
327 (emit-asm-directives
328 :qword
329 (sap+ (code-instructions code-component) offset)
330 (ceiling nbytes sb-vm:n-word-bytes)
331 output #()))))
333 (defun code-fixup-locs (code spaces)
334 (let ((locs (sb-vm::%code-fixups code)))
335 (unless (eql locs 0)
336 (sb-c::unpack-code-fixup-locs
337 (if (fixnump locs) locs (translate locs spaces))))))
339 ;;; Disassemble the function pointed to by SAP for LENGTH bytes, returning
340 ;;; all instructions that should be emitted using assembly language
341 ;;; instead of assembler pseudo-ops. This includes two sets of instructions:
342 ;;; - function prologue instructions that setup the call frame
343 ;;; - jmp/call instructions that transfer control to the fixedoj space
344 ;;; delimited by bounds in STATE.
345 ;;; At execution time the function will have virtual address LOAD-ADDR.
346 (defun list-annotated-instructions (sap length state load-addr emit-cfi)
347 (let ((dstate (cs-dstate state))
348 (seg (cs-seg state))
349 (call-inst (cs-call-inst state))
350 (jmp-inst (cs-jmp-inst state))
351 (pop-inst (cs-pop-inst state))
352 (next-fixup-addr
353 (or (car (cs-fixup-addrs state)) most-positive-word))
354 (list))
355 (setf (seg-virtual-location seg) load-addr
356 (seg-length seg) length
357 (seg-sap-maker seg) (lambda () sap))
358 ;; KLUDGE: "8f 45 08" is the standard prologue
359 (when (and emit-cfi (= (logand (sap-ref-32 sap 0) #xFFFFFF) #x08458f))
360 (push (list* 0 3 "pop" "8(%rbp)") list))
361 (map-segment-instructions
362 (lambda (dchunk inst)
363 (cond
364 ((< next-fixup-addr (dstate-next-addr dstate))
365 (let ((operand (sap-ref-32 sap (- next-fixup-addr load-addr))))
366 (when (<= (cs-code-space-start state) operand (cs-code-space-end state))
367 (aver (eql (sap-ref-8 sap (- next-fixup-addr load-addr 1)) #xB8)) ; mov rax, imm32
368 (push (list* (dstate-cur-offs dstate) 5 "mov" operand) list)))
369 (pop (cs-fixup-addrs state))
370 (setq next-fixup-addr (or (car (cs-fixup-addrs state)) most-positive-word)))
371 ((or (eq inst jmp-inst) (eq inst call-inst))
372 (let ((target-addr (+ (near-jump-displacement dchunk dstate)
373 (dstate-next-addr dstate))))
374 (when (<= (cs-fixedobj-space-start state)
375 target-addr
376 (cs-fixedobj-space-end state))
377 (push (list* (dstate-cur-offs dstate)
378 5 ; length
379 (if (eq inst call-inst) "call" "jmp")
380 target-addr)
381 list))))
382 ((and (eq inst pop-inst) (eq (logand dchunk #xFF) #x5D))
383 (push (list* (dstate-cur-offs dstate) 1 "pop" "%rbp") list))))
385 dstate
386 nil)
387 (nreverse list)))
389 ;;; Using assembler directives and/or real mnemonics, dump COUNT bytes
390 ;;; of memory at PADDR (physical addr) to STREAM.
391 ;;; The function's address as per the core file is VADDR.
392 ;;; (Its eventual address is indeterminate)
393 ;;; If EMIT-CFI is true, then also emit cfi directives.
395 ;;; Notice that we can use one fewer cfi directive than usual because
396 ;;; Lisp always carries a frame pointer as set up by the caller.
398 ;;; C convention
399 ;;; ============
400 ;;; pushq %rbp
401 ;;; .cfi_def_cfa_offset 16 # CFA offset from default register (rsp) is +16
402 ;;; .cfi_offset 6, -16 # old rbp was saved in -16(CFA)
403 ;;; movq %rsp, %rbp
404 ;;; .cfi_def_cfa_register 6 # use rbp as CFA register
406 ;;; Lisp convention
407 ;;; ===============
408 ;;; popq 8(%rbp) # place saved %rip in its ABI-compatible stack slot
409 ;;; # making RSP = RBP after the pop, and RBP = CFA - 16
410 ;;; .cfi_def_cfa 6, 16
411 ;;; .cfi_offset 6, -16
413 ;;; Of course there is a flip-side to this: unwinders think that the new frame
414 ;;; is already begun in the caller. Interruption between these two instructions:
415 ;;; MOV RBP, RSP / CALL #xzzzzz
416 ;;; will show the backtrace as if two invocations of the caller are on stack.
417 ;;; This is tricky to fix because while we can relativize the CFA to the
418 ;;; known frame size, we can't do that based only on a disassembly.
420 (defun emit-lisp-function (paddr vaddr count stream emit-cfi core-state)
421 (when emit-cfi
422 (format stream " .cfi_startproc~%"))
423 ;; Any byte offset that appears as a key in the INSTRUCTIONS causes the indicated
424 ;; bytes to be written as an assembly language instruction rather than opaquely,
425 ;; thereby affecting the ELF data (cfi or relocs) produced.
426 (let ((instructions
427 (list-annotated-instructions (int-sap paddr) count core-state vaddr emit-cfi))
428 (ptr paddr))
429 (symbol-macrolet ((cur-offset (- ptr paddr)))
430 (loop
431 (let ((until (if instructions (caar instructions) count)))
432 ;; if we're not aligned, then write some number of bytes
433 ;; to cause alignment. But do not write past the next offset
434 ;; that needs to be written as an instruction.
435 (when (logtest ptr #x7) ; unaligned
436 (let ((n (min (- (nth-value 1 (ceiling ptr 8)))
437 (- until cur-offset))))
438 (aver (<= 0 n 7))
439 (emit-asm-directives :byte (int-sap ptr) n stream)
440 (incf ptr n)))
441 ;; Now we're either aligned to a multiple of 8, or the current
442 ;; offset needs to be written as a textual instruction.
443 (let ((n (- until cur-offset)))
444 (aver (>= n 0))
445 (multiple-value-bind (qwords remainder) (floor n 8)
446 (when (plusp qwords)
447 (emit-asm-directives :qword (int-sap ptr) qwords stream #())
448 (incf ptr (* qwords 8)))
449 (when (plusp remainder)
450 (emit-asm-directives :byte (int-sap ptr) remainder stream)
451 (incf ptr remainder))))
452 ;; If the current offset is COUNT, we're done.
453 (when (= cur-offset count) (return))
454 (aver (= cur-offset until))
455 (destructuring-bind (length opcode . operand) (cdr (pop instructions))
456 (when (cond ((member opcode '("jmp" "call") :test #'string=)
457 (format stream " ~A 0x~X~%" opcode operand))
458 ((string= opcode "pop")
459 (format stream " ~A ~A~%" opcode operand)
460 (cond ((string= operand "8(%rbp)")
461 (format stream " .cfi_def_cfa 6, 16~% .cfi_offset 6, -16~%"))
462 ((string= operand "%rbp")
463 ;(format stream " .cfi_def_cfa 7, 8~%")
464 nil)
465 (t)))
466 ((string= opcode "mov")
467 (format stream " mov $(__lisp_code_start+0x~x),%eax~%"
468 (- operand (cs-code-space-start core-state))))
469 (t))
470 (bug "Random annotated opcode ~S" opcode))
471 (incf ptr length))
472 (when (= cur-offset count) (return))))))
473 (when emit-cfi
474 (format stream " .cfi_endproc~%")))
476 ;;; Convert immobile CODE-SPACE to an assembly file in OUTPUT.
477 ;;; TODO: relocate fdefns and instances of standard-generic-function
478 ;;; into the space that is dumped into an ELF section.
479 (defun write-assembler-text
480 (spaces fixedobj-range output
481 &optional emit-sizes (emit-cfi t)
482 &aux (code-space (get-space immobile-varyobj-core-space-id spaces))
483 (code-space-start (space-addr code-space)) ; target virtual address
484 (code-space-end (+ code-space-start (space-size code-space)))
485 (code-addr code-space-start)
486 (core-state
487 (make-core-state code-space-start code-space-end
488 (car fixedobj-range)
489 (+ (car fixedobj-range) (cdr fixedobj-range))))
490 (total-code-size 0)
491 (pp-state (cons (make-hash-table :test 'equal)
492 ;; copy no entries for macros/special-operators (flet, etc)
493 (sb-pretty::make-pprint-dispatch-table)))
494 (packages (make-hash-table :test 'equal))
495 (core-nil (compute-nil-object spaces))
496 (prev-namestring "")
497 (n-linker-relocs 0)
498 end-loc)
499 (set-pprint-dispatch 'string
500 ;; Write strings without string quotes
501 (lambda (stream string) (write-string string stream))
503 (cdr pp-state))
504 (labels ((ldsym-quote (name)
505 (concatenate 'string '(#\") name '(#\")))
506 (dumpwords (addr count stream &optional (exceptions #()) logical-addr)
507 (let ((sap (int-sap addr)))
508 (aver (sap>= sap (car spaces)))
509 ;; Make intra-code-space pointers computed at link time
510 (dotimes (i (if logical-addr count 0))
511 (unless (and (< i (length exceptions)) (svref exceptions i))
512 (let ((word (sap-ref-word sap (* i n-word-bytes))))
513 (when (and (= (logand word 3) 3) ; is a pointer
514 (<= code-space-start word (1- code-space-end))) ; to code space
515 #+nil
516 (format t "~&~(~x: ~x~)~%" (+ logical-addr (* i n-word-bytes))
517 word)
518 (incf n-linker-relocs)
519 (setf exceptions (adjust-array exceptions (max (length exceptions) (1+ i))
520 :initial-element nil)
521 (svref exceptions i)
522 (format nil "__lisp_code_start+0x~x"
523 (- word code-space-start)))))))
524 (emit-asm-directives :qword sap count stream exceptions)))
525 (make-code-obj (addr)
526 (let ((translation (translate-ptr addr spaces)))
527 (aver (= (%widetag-of (sap-ref-word (int-sap translation) 0))
528 code-header-widetag))
529 (%make-lisp-obj (logior translation other-pointer-lowtag))))
530 (calc-obj-size (code)
531 ;; No need to pin - it's not managed by GC
532 (nth-value 2
533 (reconstitute-object
534 (ash (logandc2 (get-lisp-obj-address code) lowtag-mask)
535 (- n-fixnum-tag-bits)))))
536 (%widetag-of (word)
537 (logand word widetag-mask)))
538 (format output " .text~% .file \"sbcl.core\"
539 .globl __lisp_code_start, __lisp_code_end~% .balign 4096~%__lisp_code_start:~%")
541 ;; Scan the assembly routines.
542 (let* ((code-component (make-code-obj code-addr))
543 (size (calc-obj-size code-component))
544 (hashtable
545 (truly-the hash-table
546 (translate (car (translate (%code-debug-info code-component)
547 spaces))
548 spaces)))
549 (cells (translate (hash-table-table hashtable) spaces))
550 (count (hash-table-number-entries hashtable)))
551 (incf code-addr size)
552 (setf total-code-size size)
553 (emit-lisp-asm-routines spaces code-component output emit-sizes cells count))
555 (loop
556 (when (>= code-addr code-space-end) (return))
557 ;(format t "~&vaddr ~x paddr ~x~%" code-addr (get-lisp-obj-address (make-code-obj code-addr)))
558 (let* ((code (make-code-obj code-addr))
559 (objsize (calc-obj-size code)))
560 (setq end-loc (+ code-addr objsize))
561 (incf total-code-size objsize)
562 (cond
563 ((< (code-header-words code) 4) ; filler object
564 ;; Shouldn't occur unless defrag was not performed
565 (format output "#x~x:~% .quad 0x~X, 0x~X~% .fill ~D~%"
566 code-addr
567 simple-array-unsigned-byte-8-widetag
568 (ash (- objsize (* 2 n-word-bytes))
569 n-fixnum-tag-bits)
570 (- objsize (* 2 n-word-bytes))))
571 ((%instancep (%code-debug-info code)) ; assume it's a COMPILED-DEBUG-INFO
572 (aver (plusp (code-n-entries code)))
573 (let* ((source
574 (sb-c::compiled-debug-info-source
575 (truly-the sb-c::compiled-debug-info
576 (translate (%code-debug-info code) spaces))))
577 (namestring
578 (sb-c::debug-source-namestring
579 (truly-the sb-c::debug-source (translate source spaces)))))
580 (setq namestring (if (eq namestring core-nil)
581 "sbcl.core"
582 (translate namestring spaces)))
583 (unless (string= namestring prev-namestring)
584 (format output " .file \"~a\"~%" namestring)
585 (setq prev-namestring namestring)))
586 (let* ((code-physaddr (logandc2 (get-lisp-obj-address code) lowtag-mask))
587 (boxed-end (+ code-physaddr
588 (ash (code-header-words code) word-shift)))
589 (first-fun (logandc2 (get-lisp-obj-address (%code-entry-point code 0))
590 lowtag-mask)))
591 (format output "#x~x:~%" code-addr)
592 (dumpwords code-physaddr (code-header-words code) output #() code-addr)
593 ;; Any words after 'boxed' preceding 'first-fun' are unboxed
594 (when (> first-fun boxed-end)
595 (dumpwords boxed-end (floor (- first-fun boxed-end) n-word-bytes)
596 output)))
597 (setf (cs-fixup-addrs core-state)
598 (mapcar (lambda (x)
599 (+ code-addr (ash (code-header-words code) word-shift) x))
600 (code-fixup-locs code spaces)))
601 ;; Loop over all embedded functions.
602 ;; Because simple-fun offsets are relative to the code start
603 ;; (and not in a linked list as they were in the past),
604 ;; iteratation in a "foreign" code object works just fine,
605 ;; subject to the caution about reading boxed words.
606 (dotimes (j (code-n-entries code))
607 (let* ((fun (%code-entry-point code j))
608 (fun-addr (logandc2 (get-lisp-obj-address fun) lowtag-mask))
609 (end (if (< (1+ j) (code-n-entries code))
610 (logandc2 (get-lisp-obj-address (%code-entry-point code (1+ j)))
611 lowtag-mask)
612 (+ (translate-ptr code-addr spaces) objsize)))
613 (entrypoint
614 (+ fun-addr (* simple-fun-code-offset n-word-bytes)))
615 (size (- end entrypoint))
616 (lispname (fun-name-from-core fun spaces core-nil packages))
617 (quotname (ldsym-quote (c-name lispname pp-state))))
618 ;; Globalize the C symbol only if the name is a legal function designator
619 ;; per the standard definition.
620 ;; This is a technique to try to avoid appending a uniquifying suffix
621 ;; on all the junky internal things like "(lambda # in srcfile.lisp)"
622 (format output "~:[~*~; .globl ~a~%~]~@[ .type ~:*~a, @function~%~]"
623 (typep lispname '(or symbol core-sym (cons (eql setf))))
624 quotname emit-sizes)
625 (dumpwords fun-addr
626 simple-fun-code-offset output
627 (load-time-value
628 `#(nil ,(format nil ".+~D"
629 (* (1- simple-fun-code-offset)
630 n-word-bytes)))
632 nil)
633 (format output " .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
634 quotname (if emit-sizes size))
635 ;; entrypoint is the current physical address.
636 ;; Also pass in the virtual address in the core
637 ;; (which will differ from the actual load-time address)
638 (emit-lisp-function entrypoint
639 (+ code-addr (- entrypoint
640 (logandc2 (get-lisp-obj-address code)
641 lowtag-mask)))
642 size output emit-cfi core-state)))
643 (terpri output)
644 ;; All fixups should have been consumed by writing the code out
645 (aver (null (cs-fixup-addrs core-state))))
647 (error "Strange code component: ~S" code)))
648 (incf code-addr objsize))))
650 ;; coreparse uses unpadded __lisp_code_end to set varyobj_free_pointer
651 (format output "__lisp_code_end:~%")
653 ;; Pad so that non-lisp code can't be colocated on a GC page.
654 ;; (Lack of Lisp object headers in C code is the issue)
655 (let ((aligned-end (logandc2 (+ end-loc 4095) 4095)))
656 (when (> aligned-end end-loc)
657 (multiple-value-bind (nwords remainder)
658 (floor (- aligned-end end-loc) n-word-bytes)
659 (aver (>= nwords 2))
660 (aver (zerop remainder))
661 (decf nwords 2)
662 (format output " .quad ~d, ~d # (simple-array fixnum (~d))~%"
663 simple-array-fixnum-widetag
664 (ash nwords n-fixnum-tag-bits)
665 nwords)
666 (when (plusp nwords)
667 (format output " .fill ~d~%" (* nwords n-word-bytes))))))
668 ; (format t "~&linker-relocs=~D~%" n-linker-relocs)
669 (values total-code-size n-linker-relocs))
671 ;;; Return either the physical or logical address of the specified symbol.
672 (defun find-target-symbol (package-name symbol-name spaces
673 &optional (address-mode :physical))
674 (let* ((space (find immobile-fixedobj-core-space-id (cdr spaces) :key #'space-id))
675 (start (translate-ptr (space-addr space) spaces))
676 (end (+ start (space-size space)))
677 (physaddr start))
678 (loop
679 (when (>= physaddr end) (bug "Can't find symbol"))
680 (multiple-value-bind (obj tag size)
681 (reconstitute-object (ash physaddr (- n-fixnum-tag-bits)))
682 (when (and (= tag symbol-widetag)
683 (string= symbol-name (translate (symbol-name obj) spaces))
684 (%instancep (symbol-package obj))
685 (string= package-name
686 (translate
687 (package-%name
688 (truly-the package (translate (symbol-package obj) spaces)))
689 spaces)))
690 (return (%make-lisp-obj
691 (logior (ecase address-mode
692 (:physical physaddr)
693 (:logical (+ (space-addr space) (- physaddr start))))
694 other-pointer-lowtag))))
695 (incf physaddr size)))))
697 (defun extract-required-c-symbols (spaces asm-file &optional (verbose nil))
698 (flet ((symbol-fdefn-fun (symbol)
699 (let ((vector (translate (symbol-info-vector symbol) spaces)))
700 ;; TODO: allow for (plist . info-vector) in the info slot
701 (aver (simple-vector-p vector))
702 (translate (fdefn-fun (translate (info-vector-fdefn vector) spaces))
703 spaces))))
704 (let ((linkage-info
705 (translate (symbol-global-value
706 (find-target-symbol "SB-SYS" "*LINKAGE-INFO*" spaces))
707 spaces))
708 (dyn-syminfo
709 (symbol-fdefn-fun
710 (find-target-symbol "SB-SYS"
711 "ENSURE-DYNAMIC-FOREIGN-SYMBOL-ADDRESS"
712 spaces))))
713 (aver (= (get-closure-length dyn-syminfo) 3))
714 (let* ((ht1 (translate (%closure-index-ref dyn-syminfo 1) spaces))
715 (ht2 (translate (%closure-index-ref dyn-syminfo 0) spaces))
716 (table0 (translate (hash-table-table (truly-the hash-table linkage-info))
717 spaces))
718 (table1 (translate (hash-table-table (truly-the hash-table ht1)) spaces))
719 (table2 (translate (hash-table-table (truly-the hash-table ht2)) spaces))
720 (linkage)
721 (foreign))
722 (declare (simple-vector table0 table1 table2))
723 (flet ((show (x)
724 (push x foreign)
725 (when verbose
726 (format t "~A~%" x)))
727 (scan-table (table name fun &aux (n 0) (end (length table)))
728 (when verbose
729 (format t "~&~A:~%~A~%"
730 name (make-string (1+ (length name)) :initial-element #\-)))
731 (do ((i 2 (+ i 2)))
732 ((= i end))
733 (let ((val (svref table i)))
734 (unless (unbound-marker-p val)
735 (funcall fun (translate val spaces))
736 (incf n))))
737 (when verbose
738 (format t "TOTAL: ~D entries~2%" n))))
739 (scan-table table0 "linkage info"
740 (lambda (x &aux (type #\T))
741 (when (consp x)
742 (setq x (translate (car x) spaces) type #\D))
743 (format asm-file " .long ~A~%" x)
744 (when verbose
745 (format t "~A ~A~%" type x))
746 (push x linkage)))
747 (scan-table table1 "defined" #'show)
748 (scan-table table2 "undefined" #'show)
749 (let ((diff1 ; linkage not in foreign
750 (remove-if (lambda (x) (member x foreign :test #'string=)) linkage))
751 (diff2 ; foreign not in linkage
752 (remove-if (lambda (x) (member x linkage :test #'string=)) foreign)))
753 (when verbose
754 (format t "~&Linkage not in foreign:~%~S~%" diff1)
755 (format t "~&Foreign not in linkage:~%~S~%" diff2))
756 )))))
757 (terpri asm-file))
759 ;;;; ELF file I/O
761 (defconstant +sht-null+ 0)
762 (defconstant +sht-progbits+ 1)
763 (defconstant +sht-symtab+ 2)
764 (defconstant +sht-strtab+ 3)
765 (defconstant +sht-rela+ 4)
766 (defconstant +sht-rel+ 9)
768 (define-alien-type elf64-ehdr
769 (struct elf64-edhr
770 (ident (array unsigned-char 16)) ; 7F 45 4C 46 2 1 1 0 0 0 0 0 0 0 0 0
771 (type (unsigned 16)) ; 1 0
772 (machine (unsigned 16)) ; 3E 0
773 (version (unsigned 32)) ; 1 0 0 0
774 (entry unsigned) ; 0 0 0 0 0 0 0 0
775 (phoff unsigned) ; 0 0 0 0 0 0 0 0
776 (shoff unsigned) ;
777 (flags (unsigned 32)) ; 0 0 0 0
778 (ehsize (unsigned 16)) ; 40 0
779 (phentsize (unsigned 16)) ; 0 0
780 (phnum (unsigned 16)) ; 0 0
781 (shentsize (unsigned 16)) ; 40 0
782 (shnum (unsigned 16)) ; n 0
783 (shstrndx (unsigned 16)))) ; n 0
784 (define-alien-type elf64-shdr
785 (struct elf64-shdr
786 (name (unsigned 32))
787 (type (unsigned 32))
788 (flags (unsigned 64))
789 (addr (unsigned 64))
790 (off (unsigned 64))
791 (size (unsigned 64))
792 (link (unsigned 32))
793 (info (unsigned 32))
794 (addralign (unsigned 64))
795 (entsize (unsigned 64))))
796 (define-alien-type elf64-sym
797 (struct elf64-sym
798 (name (unsigned 32))
799 (info (unsigned 8))
800 (other (unsigned 8))
801 (shndx (unsigned 16))
802 (value unsigned)
803 (size unsigned)))
804 (define-alien-type elf64-rela
805 (struct elf64-rela
806 (offset (unsigned 64))
807 (info (unsigned 64))
808 (addend (signed 64))))
810 (defun make-elf64-sym (name info)
811 (let ((a (make-array 24 :element-type '(unsigned-byte 8))))
812 (with-pinned-objects (a)
813 (setf (sap-ref-32 (vector-sap a) 0) name
814 (sap-ref-8 (vector-sap a) 4) info))
817 ;;; Return two values: an octet vector comprising a string table
818 ;;; and an alist which maps string to offset in the table.
819 (defun string-table (strings)
820 (let* ((length (+ (1+ (length strings)) ; one more null than there are strings
821 (reduce #'+ strings :key #'length))) ; data length
822 (bytes (make-array length :element-type '(unsigned-byte 8)
823 :initial-element 0))
824 (index 1)
825 (alist))
826 (dolist (string strings)
827 (push (cons string index) alist)
828 (replace bytes (map 'vector #'char-code string) :start1 index)
829 (incf index (1+ (length string))))
830 (cons (nreverse alist) bytes)))
832 (defun write-alien (alien size stream)
833 (dotimes (i size)
834 (write-byte (sap-ref-8 (alien-value-sap alien) i) stream)))
836 (defun copy-bytes (in-stream out-stream nbytes
837 &optional (buffer
838 (make-array 1024 :element-type '(unsigned-byte 8))))
839 (loop (let ((chunksize (min (length buffer) nbytes)))
840 (aver (eql (read-sequence buffer in-stream :end chunksize) chunksize))
841 (write-sequence buffer out-stream :end chunksize)
842 (when (zerop (decf nbytes chunksize)) (return)))))
844 ;;; core header should be an array of words in '.rodata', not a 32K page
845 (defconstant core-header-size +backend-page-bytes+) ; stupidly large (FIXME)
847 ;;; Write everything except for the core file itself into OUTPUT-STREAM
848 ;;; and leave the stream padded to a 4K boundary ready to receive data.
849 (defun prepare-elf (core-size relocs output)
850 (let* ((sym-entry-size 24)
851 (reloc-entry-size 24)
852 (core-align 4096)
853 (sections
854 `#((:core "lisp.core" ,+sht-progbits+ 0 0 0 ,core-align 0)
855 (:sym ".symtab" ,+sht-symtab+ 0 3 1 8 ,sym-entry-size)
856 ; section with the strings -- ^ ^ -- 1+ highest local symbol
857 (:str ".strtab" ,+sht-strtab+ 0 0 0 1 0)
858 (:rel ".relalisp.core" ,+sht-rela+ 0 2 1 8 ,reloc-entry-size)
859 ; symbol table -- ^ ^ -- for which section
860 (:note ".note.GNU-stack" ,+sht-null+ 0 0 0 1 0)))
861 (string-table
862 (string-table (append '("__lisp_code_start") (map 'list #'second sections))))
863 (strings (cdr string-table))
864 (padded-strings-size (logandc2 (+ (length strings) 7) 7))
865 (ehdr-size #.(ceiling (alien-type-bits (parse-alien-type 'elf64-ehdr nil)) 8))
866 (shdr-size #.(ceiling (alien-type-bits (parse-alien-type 'elf64-shdr nil)) 8))
867 (symbols-size (* 2 sym-entry-size))
868 (shdrs-start (+ ehdr-size symbols-size padded-strings-size))
869 (shdrs-end (+ shdrs-start (* (1+ (length sections)) shdr-size)))
870 (relocs-size (* (length relocs) reloc-entry-size))
871 (relocs-end (+ shdrs-end relocs-size))
872 (core-start (logandc2 (+ relocs-end (1- core-align)) (1- core-align)))
873 (ident #.(coerce '(#x7F #x45 #x4C #x46 2 1 1 0 0 0 0 0 0 0 0 0)
874 '(array (unsigned-byte 8) 1))))
876 (with-alien ((ehdr elf64-ehdr))
877 (dotimes (i (ceiling ehdr-size n-word-bytes))
878 (setf (sap-ref-word (alien-value-sap ehdr) (* i n-word-bytes)) 0))
879 (with-pinned-objects (ident)
880 (%byte-blt (vector-sap ident) 0 (alien-value-sap ehdr) 0 16))
881 (setf (slot ehdr 'type) 1
882 (slot ehdr 'machine) #x3E
883 (slot ehdr 'version) 1
884 (slot ehdr 'shoff) shdrs-start
885 (slot ehdr 'ehsize) ehdr-size
886 (slot ehdr 'shentsize) shdr-size
887 (slot ehdr 'shnum) (1+ (length sections)) ; section 0 is implied
888 (slot ehdr 'shstrndx) (1+ (position :str sections :key #'car)))
889 (write-alien ehdr ehdr-size output))
891 ;; Write symbol table
892 (aver (eql (file-position output) ehdr-size))
893 (write-sequence (make-elf64-sym 0 0) output)
894 ;; The symbol name index is always 1 by construction. The type is #x10
895 ;; given: #define STB_GLOBAL 1
896 ;; and: #define ELF32_ST_BIND(val) ((unsigned char) (val)) >> 4)
897 ;; which places the binding in the high 4 bits of the low byte.
898 (write-sequence (make-elf64-sym 1 #x10) output)
900 ;; Write string table
901 (aver (eql (file-position output) (+ ehdr-size symbols-size)))
902 (write-sequence strings output) ; an octet vector at this point
903 (dotimes (i (- padded-strings-size (length strings)))
904 (write-byte 0 output))
906 ;; Write section headers
907 (aver (eql (file-position output) shdrs-start))
908 (with-alien ((shdr elf64-shdr))
909 (dotimes (i (ceiling shdr-size n-word-bytes)) ; Zero-fill
910 (setf (sap-ref-word (alien-value-sap shdr) (* i n-word-bytes)) 0))
911 (dotimes (i (1+ (length sections)))
912 (when (plusp i) ; Write the zero-filled header as section 0
913 (destructuring-bind (key name type flags link info alignment entsize)
914 (aref sections (1- i))
915 (multiple-value-bind (offset size)
916 (ecase key
917 (:sym (values ehdr-size symbols-size))
918 (:str (values (+ ehdr-size symbols-size) (length strings)))
919 (:rel (values shdrs-end relocs-size))
920 (:core (values core-start core-size))
921 (:note (values 0 0)))
922 (let ((name (cdr (assoc name (car string-table) :test #'string=))))
923 (setf (slot shdr 'name) name
924 (slot shdr 'type) type
925 (slot shdr 'flags) flags
926 (slot shdr 'off) offset
927 (slot shdr 'size) size
928 (slot shdr 'link) link
929 (slot shdr 'info) info
930 (slot shdr 'addralign) alignment
931 (slot shdr 'entsize) entsize)))))
932 (write-alien shdr shdr-size output)))
934 ;; Write relocations
935 (aver (eql (file-position output) shdrs-end))
936 (let ((buf (make-array relocs-size :element-type '(unsigned-byte 8)))
937 (ptr 0))
938 (with-alien ((rela elf64-rela))
939 (dovector (reloc relocs)
940 (destructuring-bind (place addend . kind) reloc
941 (setf (slot rela 'offset) place
942 (slot rela 'info) (logior (ash 1 32) kind) ; 1 = symbol index
943 (slot rela 'addend) addend))
944 (setf (%vector-raw-bits buf (+ ptr 0)) (sap-ref-word (alien-value-sap rela) 0)
945 (%vector-raw-bits buf (+ ptr 1)) (sap-ref-word (alien-value-sap rela) 8)
946 (%vector-raw-bits buf (+ ptr 2)) (sap-ref-word (alien-value-sap rela) 16))
947 (incf ptr 3)))
948 (write-sequence buf output))
950 ;; Write padding
951 (dotimes (i (- core-start (file-position output)))
952 (write-byte 0 output))
953 (aver (eq (file-position output) core-start))))
955 ;;; Return a list of fixups (FIXUP-WHERE KIND ADDEND) to peform in a foreign core
956 ;;; whose code space is subject to link-time relocation.
957 (defconstant R_X86_64_64 1) ; /* Direct 64 bit */
958 (defconstant R_X86_64_PC32 2) ; /* PC relative 32 bit signed */
959 (defconstant R_X86_64_32 10) ; /* Direct 32 bit zero extended */
961 (defun collect-relocations (spaces fixups &aux (print nil))
962 (binding* (((static-start static-end)
963 (let ((space (get-space static-core-space-id spaces)))
964 (values (space-addr space) (space-end space))))
965 ((code-start code-end)
966 (let ((space (get-space immobile-varyobj-core-space-id spaces)))
967 (values (space-addr space) (space-end space))))
968 ;; the distance between fixedobj space address (i.e following the pages of
969 ;; dynamic space) in the ELF section which has a presumptive address of 0
970 ;; due to being non-loaded, to where it will be later mapped by coreparse
971 (fixedobj-space-displacement
972 (let ((space (get-space immobile-fixedobj-core-space-id spaces)))
973 (- (* (1+ (space-data-page space)) +backend-page-bytes+) ; 1+ = core header
974 (space-addr space))))
975 (n-abs 0)
976 (n-rel 0))
977 (labels
978 ((abs-fixup (core-offs referent)
979 (incf n-abs)
980 (when print
981 (format t "~x = 0x~(~x~): (a)~%" core-offs (core-to-logical core-offs) #+nil referent))
982 (setf (sap-ref-word (car spaces) core-offs) 0)
983 (vector-push-extend `(,(+ core-header-size core-offs)
984 ,(- referent code-start) . ,R_X86_64_64)
985 fixups))
986 (abs32-fixup (core-offs referent)
987 (incf n-abs)
988 (when print
989 (format t "~x = 0x~(~x~): (a)~%" core-offs (core-to-logical core-offs) #+nil referent))
990 (setf (sap-ref-32 (car spaces) core-offs) 0)
991 (vector-push-extend `(,(+ core-header-size core-offs)
992 ,(- referent code-start) . ,R_X86_64_32)
993 fixups))
994 (rel-fixup (core-offs referent)
995 (incf n-rel)
996 (when print
997 (format t "~x = 0x~(~x~): (r)~%" core-offs (core-to-logical core-offs) #+nil referent))
998 (setf (sap-ref-32 (car spaces) core-offs) 0)
999 (vector-push-extend `(,(+ core-header-size core-offs)
1000 ,(- referent code-start) . ,R_X86_64_PC32)
1001 fixups))
1002 (in-code-space-p (ptr)
1003 (and (<= code-start ptr) (< ptr code-end)))
1004 ;; Given a address which is an offset into the data pages of the target core,
1005 ;; compute the logical address which that offset would be mapped to.
1006 ;; For example core address 0 is the virtual address of static space.
1007 (core-to-logical (core-offs &aux (page (floor core-offs +backend-page-bytes+)))
1008 (dolist (space (cdr spaces)
1009 (bug "Can't translate core offset ~x using ~x"
1010 core-offs spaces))
1011 (let* ((page0 (space-data-page space))
1012 (nwords (space-nwords space))
1013 (id (space-id space))
1014 (npages (ceiling nwords (/ +backend-page-bytes+ n-word-bytes))))
1015 (when (and (<= page0 page (+ page0 (1- npages)))
1016 (/= id immobile-varyobj-core-space-id))
1017 (return (+ (space-addr space)
1018 (* (- page page0) +backend-page-bytes+)
1019 (logand core-offs (1- +backend-page-bytes+))))))))
1020 (scanptrs (obj wordindex-min wordindex-max &aux (n-fixups 0))
1021 (do* ((base-addr (logandc2 (get-lisp-obj-address obj) lowtag-mask))
1022 (sap (int-sap base-addr))
1023 ;; core-offs is the offset in the lisp.core ELF section.
1024 (core-offs (- base-addr (sap-int (car spaces))))
1025 (i wordindex-min (1+ i)))
1026 ((> i wordindex-max) n-fixups)
1027 (let ((ptr (sap-ref-word sap (ash i word-shift))))
1028 (when (and (= (logand ptr 3) 3) (in-code-space-p ptr))
1029 (abs-fixup (+ core-offs (ash i word-shift)) ptr)
1030 (incf n-fixups)))))
1031 (scanptr (obj wordindex)
1032 (plusp (scanptrs obj wordindex wordindex))) ; trivial wrapper
1033 (scan-obj (obj widetag size vaddr
1034 &aux (core-offs (- (logandc2 (get-lisp-obj-address obj) lowtag-mask)
1035 (sap-int (car spaces))))
1036 (nwords (ceiling size n-word-bytes)))
1037 (when (listp obj)
1038 (scanptrs obj 0 1)
1039 (return-from scan-obj))
1040 (case widetag
1041 (#.instance-widetag
1042 (let ((layout (truly-the layout
1043 (translate (%instance-layout obj) spaces))))
1044 ;; FIXME: even though the layout is supplied, it's not good enough,
1045 ;; because the macro references the layout-bitmap which might
1046 ;; be a bignum which is a pointer into the logical core address.
1047 (unless (fixnump (layout-bitmap layout))
1048 (error "Can't process bignum bitmap"))
1049 (do-instance-tagged-slot (i obj :layout layout)
1050 (scanptr obj (1+ i))))
1051 (return-from scan-obj))
1052 (#.simple-vector-widetag
1053 (let ((len (length (the simple-vector obj))))
1054 (when (eql (logand (get-header-data obj) #xFF) vector-valid-hashing-subtype)
1055 (do ((i 2 (+ i 2)) (needs-rehash))
1056 ((= i len)
1057 (when needs-rehash
1058 (setf (svref obj 1) 1)))
1059 (when (scanptr obj (+ vector-data-offset i))
1060 (format t "~&SET REHASH: vector=~X~%" (get-lisp-obj-address obj))
1061 (setq needs-rehash t))
1062 (scanptr obj (+ vector-data-offset i 1)))
1063 (return-from scan-obj))
1064 (setq nwords (+ len 2))))
1065 (#.fdefn-widetag
1066 (scanptrs obj 1 2)
1067 (let* ((fdefn-pc-sap ; where to read to access the rel32 operand
1068 (int-sap (+ (- (get-lisp-obj-address obj) other-pointer-lowtag)
1069 (ash fdefn-raw-addr-slot word-shift))))
1070 ;; what the fdefn's logical PC will be
1071 (fdefn-logical-pc (+ vaddr (ash fdefn-raw-addr-slot word-shift)))
1072 (rel32off (signed-sap-ref-32 fdefn-pc-sap 1))
1073 (target (+ fdefn-logical-pc 1 rel32off))
1074 (space-displacement
1075 (if (<= static-start vaddr static-end)
1076 (+ (- sb-vm:static-space-start) +backend-page-bytes+)
1077 fixedobj-space-displacement)))
1078 (when (in-code-space-p target)
1079 ;; This addend needs to account for the fact that the location
1080 ;; where fixup occurs is not where the fdefn will actually exist.
1081 (rel-fixup (+ core-offs (ash 3 word-shift) 1)
1082 (+ target space-displacement))))
1083 (return-from scan-obj))
1084 ((#.closure-widetag #.funcallable-instance-widetag)
1085 (let ((word (sap-ref-word (int-sap (get-lisp-obj-address obj))
1086 (- n-word-bytes fun-pointer-lowtag))))
1087 (when (in-code-space-p word)
1088 (abs-fixup (+ core-offs (ash 1 word-shift)) word)))
1089 (when (eq widetag funcallable-instance-widetag)
1090 (let ((layout (truly-the layout
1091 (translate (%funcallable-instance-layout obj) spaces))))
1092 (unless (fixnump (layout-bitmap layout))
1093 (error "Can't process bignum bitmap"))
1094 (let ((bitmap (layout-bitmap layout)))
1095 (unless (eql bitmap -1)
1096 ;; tagged slots precede untagged slots,
1097 ;; so integer-length is the count of tagged slots.
1098 (setq nwords (1+ (integer-length bitmap))))))))
1099 ;; mixed boxed/unboxed objects
1100 (#.code-header-widetag
1101 (dolist (loc (code-fixup-locs obj spaces))
1102 (let ((val (sap-ref-32 (code-instructions obj) loc)))
1103 (when (in-code-space-p val)
1104 (abs32-fixup (sap- (sap+ (code-instructions obj) loc) (car spaces))
1105 val))))
1106 (dotimes (i (code-n-entries obj))
1107 (scanptrs (%code-entry-point obj i) 2 5))
1108 (setq nwords (code-header-words obj)))
1109 ;; boxed objects that can reference code/simple-funs
1110 ((#.value-cell-widetag #.symbol-widetag #.weak-pointer-widetag))
1112 (return-from scan-obj)))
1113 (scanptrs obj 1 (1- nwords))))
1114 (dolist (space (reverse (cdr spaces)))
1115 (let* ((logical-addr (space-addr space))
1116 (size (space-size space))
1117 (physical-addr (space-physaddr space spaces))
1118 (physical-end (sap+ physical-addr size))
1119 (vaddr-translation (+ (- (sap-int physical-addr)) logical-addr)))
1120 (unless (= (space-id space) immobile-varyobj-core-space-id)
1121 (dx-flet ((visit (obj widetag size)
1122 ;; Compute the object's intended virtual address
1123 (let ((vaddr (+ (logandc2 (get-lisp-obj-address obj) lowtag-mask)
1124 vaddr-translation)))
1125 (scan-obj obj widetag size vaddr))))
1126 (map-objects-in-range
1127 #'visit
1128 (ash (sap-int physical-addr) (- n-fixnum-tag-bits))
1129 (ash (sap-int physical-end) (- n-fixnum-tag-bits))))
1130 (when (and (plusp (logior n-abs n-rel)) print)
1131 (format t "space @ ~x: ~d absolute + ~d relative fixups~%"
1132 logical-addr n-abs n-rel))
1133 (setq n-abs 0 n-rel 0))))))
1134 (when print
1135 (format t "total of ~D linker fixups~%" (length fixups)))
1136 fixups)
1138 ;;;;
1140 (macrolet ((do-core-header-entry (((id-var len-var ptr-var) buffer) &body body)
1141 `(let ((,ptr-var 1))
1142 (loop
1143 (let ((,id-var (%vector-raw-bits ,buffer ,ptr-var))
1144 (,len-var (%vector-raw-bits ,buffer (1+ ,ptr-var))))
1145 (incf ,ptr-var 2)
1146 (decf ,len-var 2)
1147 (when (= ,id-var end-core-entry-type-code) (return))
1148 ,@body
1149 (incf ,ptr-var ,len-var)))))
1150 (do-directory-entry (((index-var start-index input-nbytes) buffer) &body body)
1151 `(let ((words-per-dirent 5))
1152 (multiple-value-bind (n-entries remainder)
1153 (floor ,input-nbytes words-per-dirent)
1154 (aver (zerop remainder))
1155 (symbol-macrolet ((id (%vector-raw-bits ,buffer index))
1156 (nwords (%vector-raw-bits ,buffer (+ index 1)))
1157 (data-page (%vector-raw-bits ,buffer (+ index 2)))
1158 (addr (%vector-raw-bits ,buffer (+ index 3)))
1159 (npages (%vector-raw-bits ,buffer (+ index 4))))
1160 (do ((,index-var ,start-index (+ ,index-var words-per-dirent)))
1161 ((= ,index-var (+ ,start-index (* n-entries words-per-dirent))))
1162 ,@body)))))
1163 (with-mapped-core ((sap-var start npages stream) &body body)
1164 `(let (,sap-var)
1165 (unwind-protect
1166 (progn
1167 (setq ,sap-var
1168 (sb-posix:mmap nil
1169 (* ,npages +backend-page-bytes+)
1170 (logior sb-posix:prot-read sb-posix:prot-write)
1171 sb-posix:map-private
1172 (sb-sys:fd-stream-fd ,stream)
1173 ;; Skip the core header
1174 (+ ,start +backend-page-bytes+)))
1175 ,@body)
1176 (when ,sap-var
1177 (sb-posix:munmap ,sap-var (* ,npages +backend-page-bytes+)))))))
1179 ;;; Given a native SBCL '.core' file, or one attached to the end of an executable,
1180 ;;; separate it into pieces.
1181 ;;; ASM-PATHNAME is the name of the assembler file that will hold all the Lisp code.
1182 ;;; The other two output pathnames are implicit: "x.s" -> "x.core" and "x-core.o"
1183 ;;; The ".core" file is a native core file used for starting a binary that
1184 ;;; contains the asm code using the "--core" argument. The "-core.o" file
1185 ;;; is for linking in to a binary that needs no "--core" argument.
1186 (defun split-core
1187 (input-pathname asm-pathname
1188 &key emit-sizes (verbose nil)
1189 &aux (split-core-pathname
1190 (merge-pathnames (make-pathname :type "core") asm-pathname))
1191 (elf-core-pathname
1192 (merge-pathnames
1193 (make-pathname :name (concatenate 'string (pathname-name asm-pathname) "-core")
1194 :type "o")
1195 asm-pathname))
1196 (core-header (make-array +backend-page-bytes+ :element-type '(unsigned-byte 8)))
1197 (original-total-npages 0)
1198 (core-offset 0)
1199 (page-adjust 0)
1200 (code-start-fixup-ofs 0) ; where to fixup the core header
1201 (spaces)
1202 (copy-actions)
1203 (fixedobj-range) ; = (START . SIZE-IN-BYTES)
1204 (relocs (make-array 100000 :adjustable t :fill-pointer 0)))
1206 ;; Remove old files
1207 (ignore-errors (delete-file asm-pathname))
1208 (ignore-errors (delete-file split-core-pathname))
1209 (ignore-errors (delete-file elf-core-pathname))
1210 ;; Ensure that all files can be opened
1211 (with-open-file (input input-pathname :element-type '(unsigned-byte 8))
1212 (with-open-file (asm-file asm-pathname :direction :output :if-exists :supersede)
1213 (with-open-file (split-core split-core-pathname :direction :output
1214 :element-type '(unsigned-byte 8) :if-exists :supersede)
1215 (read-sequence core-header input)
1216 (cond ((= (%vector-raw-bits core-header 0) core-magic))
1217 (t ; possible embedded core
1218 (file-position input (- (file-length input)
1219 (* 2 n-word-bytes)))
1220 (aver (eql (read-sequence core-header input) (* 2 n-word-bytes)))
1221 (aver (= (%vector-raw-bits core-header 1) core-magic))
1222 (setq core-offset (%vector-raw-bits core-header 0))
1223 (when verbose
1224 (format t "~&embedded core starts at #x~x into input~%" core-offset))
1225 (file-position input core-offset)
1226 (read-sequence core-header input)
1227 (aver (= (%vector-raw-bits core-header 0) core-magic))))
1228 (do-core-header-entry ((id len ptr) core-header)
1229 (case id
1230 (#.build-id-core-entry-type-code
1231 (when verbose
1232 (let ((string (make-string (%vector-raw-bits core-header ptr)
1233 :element-type 'base-char)))
1234 (%byte-blt core-header (* (1+ ptr) n-word-bytes) string 0 (length string))
1235 (format t "Build ID [~a]~%" string))))
1236 (#.directory-core-entry-type-code
1237 (do-directory-entry ((index ptr len) core-header)
1238 (incf original-total-npages npages)
1239 (push (make-space id addr data-page page-adjust nwords) spaces)
1240 (when verbose
1241 (format t "id=~d page=~5x + ~5x addr=~10x words=~8x~:[~; (drop)~]~%"
1242 id data-page npages addr nwords
1243 (= id immobile-varyobj-core-space-id)))
1244 (cond ((= id immobile-varyobj-core-space-id)
1245 (setq code-start-fixup-ofs (+ index 3))
1246 ;; Keep this entry but delete the page count. We need to know
1247 ;; where the space was supposed to be mapped and at what size.
1248 ;; Subsequent core entries will need to adjust their start page
1249 ;; downward (just the PTEs's start page now).
1250 (setq page-adjust npages data-page 0 npages 0))
1252 ;; Keep track of where the fixedobj space wants to be.
1253 (when (= id immobile-fixedobj-core-space-id)
1254 (setq fixedobj-range (cons addr (ash nwords word-shift))))
1255 (when (plusp npages) ; enqueue
1256 (push (cons data-page (* npages +backend-page-bytes+))
1257 copy-actions))
1258 ;; adjust this entry's start page in the new core
1259 (decf data-page page-adjust)))))
1260 (#.page-table-core-entry-type-code
1261 (aver (= len 3))
1262 (symbol-macrolet ((nbytes (%vector-raw-bits core-header (1+ ptr)))
1263 (data-page (%vector-raw-bits core-header (+ ptr 2))))
1264 (aver (= data-page original-total-npages))
1265 (aver (= (ceiling (space-nwords
1266 (find dynamic-core-space-id spaces :key #'space-id))
1267 (/ +backend-page-bytes+ n-word-bytes))
1268 (%vector-raw-bits core-header ptr))) ; number of PTEs
1269 (when verbose
1270 (format t "PTE: page=~5x~40tbytes=~8x~%" data-page nbytes))
1271 (push (cons data-page nbytes) copy-actions)
1272 (decf data-page page-adjust)))))
1273 (let ((buffer (make-array +backend-page-bytes+
1274 :element-type '(unsigned-byte 8)))
1275 (filepos))
1276 ;; Write the new core file
1277 (write-sequence core-header split-core)
1278 (dolist (action (reverse copy-actions)) ; nondestructive
1279 ;; page index convention assumes absence of core header.
1280 ;; i.e. data page 0 is the file page immediately following the core header
1281 (let ((offset (* (1+ (car action)) +backend-page-bytes+))
1282 (nbytes (cdr action)))
1283 (when verbose
1284 (format t "File offset ~10x: ~10x bytes~%" offset nbytes))
1285 (setq filepos (+ core-offset offset))
1286 (file-position input filepos)
1287 (copy-bytes input split-core nbytes buffer)))
1288 ;; Trailer (runtime options and magic number)
1289 (let ((nbytes (read-sequence buffer input)))
1290 ;; expect trailing magic number
1291 (let ((ptr (floor (- nbytes n-word-bytes) n-word-bytes)))
1292 (aver (= (%vector-raw-bits buffer ptr) core-magic)))
1293 ;; File position of the core header needs to be set to 0
1294 ;; regardless of what it was
1295 (setf (%vector-raw-bits buffer 4) 0)
1296 (when verbose
1297 (format t "Trailer words:(~{~X~^ ~})~%"
1298 (loop for i below (floor nbytes n-word-bytes)
1299 collect (%vector-raw-bits buffer i))))
1300 (write-sequence buffer split-core :end nbytes)
1301 (finish-output split-core))
1302 ;; Sanity test
1303 (aver (= (+ core-offset
1304 (* page-adjust +backend-page-bytes+)
1305 (file-length split-core))
1306 (file-length input)))
1307 ;; Seek back to the PTE pages so they can be copied to the '.o' file
1308 (file-position input filepos)))
1310 ;; Map the original core file to memory
1311 (with-mapped-core (sap core-offset original-total-npages input)
1312 (let* ((data-spaces
1313 (delete immobile-varyobj-core-space-id (reverse spaces)
1314 :key #'space-id))
1315 (map (cons sap (sort (copy-list spaces) #'> :key #'space-addr)))
1316 (pte-nbytes (cdar copy-actions)))
1317 (collect-relocations map relocs)
1318 (with-open-file (output elf-core-pathname
1319 :direction :output :if-exists :supersede
1320 :element-type '(unsigned-byte 8))
1321 (vector-push-extend
1322 `(,(ash code-start-fixup-ofs word-shift) 0 . ,R_X86_64_64) relocs)
1323 (prepare-elf (+ (apply #'+ (mapcar #'space-nbytes-aligned data-spaces))
1324 +backend-page-bytes+ ; core header
1325 pte-nbytes)
1326 relocs output)
1327 (setf (%vector-raw-bits core-header code-start-fixup-ofs) 0)
1328 (write-sequence core-header output) ; Copy prepared header
1329 (force-output output)
1330 ;; Change SB-C::*COMPILE[-FILE]-TO-MEMORY-SPACE* to :DYNAMIC
1331 ;; in case the resulting executable needs to compile anything.
1332 ;; (Call frame info will be missing, but at least it's something.)
1333 (dolist (name '("*COMPILE-FILE-TO-MEMORY-SPACE*"
1334 "*COMPILE-TO-MEMORY-SPACE*"))
1335 (%set-symbol-global-value
1336 (find-target-symbol "SB-C" name map)
1337 (find-target-symbol "KEYWORD" "DYNAMIC" map :logical)))
1339 (dolist (space data-spaces) ; Copy pages from memory
1340 (let ((start (space-physaddr space map))
1341 (size (space-nbytes-aligned space)))
1342 (aver (eql (sb-unix:unix-write (sb-sys:fd-stream-fd output)
1343 start 0 size)
1344 size))))
1345 (when verbose
1346 (format t "Copying ~d bytes (#x~x) from ptes = ~d PTEs~%"
1347 pte-nbytes pte-nbytes (floor pte-nbytes 10)))
1348 (copy-bytes input output pte-nbytes)) ; Copy PTEs from input
1350 ;; There's no relation between emit-sizes and which section to put
1351 ;; C symbol references in, however it's a safe bet that if sizes
1352 ;; are supported then so is the .rodata directive.
1353 (format asm-file (if emit-sizes " .rodata~%" " .data~%"))
1354 (extract-required-c-symbols map asm-file)
1355 (write-assembler-text map fixedobj-range asm-file emit-sizes)))
1357 (format asm-file "~% ~A~%" +noexec-stack-note+))))
1359 ) ; end MACROLET
1361 ;;;;
1363 (defun cl-user::elfinate (&optional (args (cdr sb-ext:*posix-argv*)))
1364 (cond ((string= (car args) "split")
1365 (pop args)
1366 (let ((sizes (string= (car args) "--sizes")))
1367 (when sizes
1368 (pop args))
1369 (destructuring-bind (input asm) args
1370 (split-core input asm :emit-sizes sizes))))
1371 #+nil
1372 ((string= (car args) "relocate")
1373 (destructuring-bind (input output binary start-sym) (cdr args)
1374 (relocate-core
1375 input output binary (parse-integer start-sym :radix 16))))
1377 (error "Unknown command: ~S" args))))
1379 ;; If loaded as a script, do this
1380 (eval-when (:execute)
1381 (let ((args (cdr sb-ext:*posix-argv*)))
1382 (when args
1383 (let ((*print-pretty* nil))
1384 (format t "Args: ~S~%" args)
1385 (cl-user::elfinate args)))))