Don't fail when transforming (coerce x nil).
[sbcl.git] / tools-for-build / editcore.lisp
blob0ae4ccea057f1032b7f8bc8a8c9a3e4e58a70dd5
1 ;;;; Utilities for separating an SBCL core file into two pieces:
2 ;;;; 1. An assembly language file containing the immobile code space
3 ;;;; 2. A '.o' file wrapping a core file containing everything else
4 ;;;; We operate as a "tool" that processes external files rather than
5 ;;;; operating on the in-process data, but it is also possible to dump
6 ;;;; the current image by creating a straight-through translation
7 ;;;; of internal/external code addresses.
9 ;;;; This software is part of the SBCL system. See the README file for
10 ;;;; more information.
11 ;;;;
12 ;;;; This software is derived from the CMU CL system, which was
13 ;;;; written at Carnegie Mellon University and released into the
14 ;;;; public domain. The software is in the public domain and is
15 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
16 ;;;; files for more information.
18 (eval-when (:compile-toplevel :load-toplevel :execute)
19 (require :sb-posix)) ; for mmap
21 (defpackage "SB-EDITCORE"
22 (:use "CL" "SB-VM" "SB-INT" "SB-EXT" "SB-KERNEL" "SB-SYS"
23 "SB-ALIEN")
24 (:import-from "SB-ALIEN-INTERNALS"
25 #:alien-type-bits #:parse-alien-type
26 #:alien-value-sap #:alien-value-type)
27 (:import-from "SB-C" #:+backend-page-bytes+)
28 (:import-from "SB-VM" #:map-objects-in-range #:reconstitute-object
29 #:%closure-callee)
30 (:import-from "SB-DISASSEM" #:get-inst-space #:find-inst
31 #:make-dstate #:%make-segment
32 #:seg-virtual-location #:seg-length #:seg-sap-maker
33 #:map-segment-instructions
34 #:dstate-next-addr #:dstate-cur-offs)
35 (:import-from "SB-X86-64-ASM" #:near-jump-displacement)
36 (:import-from "SB-IMPL" #:package-hashtable #:package-%name
37 #:package-hashtable-cells
38 #:hash-table-table #:hash-table-number-entries))
40 (in-package "SB-EDITCORE")
42 (declaim (muffle-conditions compiler-note))
44 (eval-when (:execute)
45 (setq *evaluator-mode* :compile))
47 (defconstant core-magic
48 (logior (ash (char-code #\S) 24)
49 (ash (char-code #\B) 16)
50 (ash (char-code #\C) 8)
51 (char-code #\L)))
53 (defconstant build-id-core-entry-type-code 3860)
54 (defconstant new-directory-core-entry-type-code 3861)
55 (defconstant initial-fun-core-entry-type-code 3863)
56 (defconstant page-table-core-entry-type-code 3880)
57 (defconstant end-core-entry-type-code 3840)
59 (defconstant dynamic-core-space-id 1)
60 (defconstant static-core-space-id 2)
61 (defconstant immobile-fixedobj-core-space-id 4)
62 (defconstant immobile-varyobj-core-space-id 5)
64 (defglobal +noexec-stack-note+ ".section .note.GNU-stack, \"\", @progbits")
66 (defstruct (core-space ; "space" is a CL symbol
67 (:conc-name space-)
68 (:constructor make-space (id addr data-page page-adjust nwords)))
69 id addr data-page page-adjust nwords)
70 (defun space-size (space) (* (space-nwords space) n-word-bytes))
71 (defun space-end (space) (+ (space-addr space) (space-size space)))
72 (defun space-nbytes-aligned (space)
73 (logandc2 (+ (space-size space) (1- +backend-page-bytes+))
74 (1- +backend-page-bytes+)))
75 (defun space-physaddr (space spaces)
76 (sap+ (car spaces) (* (space-data-page space) +backend-page-bytes+)))
78 ;;; Given ADDR which is an address in the target core, return the address at which
79 ;;; ADDR is currently mapped while performing the split.
80 ;;; SPACES is a cons of a SAP and an alist whose elements are (ADDR . CORE-SPACE)
81 (defun translate-ptr (addr spaces)
82 (let ((space (find addr (cdr spaces) :key #'space-addr :test #'>=)))
83 ;; FIXME: duplicates SPACE-PHYSADDR to avoid consing a SAP.
84 ;; macroize or something.
85 (+ (sap-int (car spaces)) (* (space-data-page space) +backend-page-bytes+)
86 (- addr (space-addr space)))))
88 ;;;
89 (defun get-space (id spaces)
90 (find id (cdr spaces) :key #'space-id))
91 (defun compute-nil-object (spaces)
92 (let ((space (get-space static-core-space-id spaces)))
93 (%make-lisp-obj (logior (space-addr space) #x17))))
95 ;;; Given OBJ which is tagged pointer into the target core, translate it into
96 ;;; the range at which the core is now mapped during execution of this tool,
97 ;;; so that host accessors can dereference its slots.
98 ;;; Use extreme care: while it works to use host accessors on the target core,
99 ;;; we must avoid type checks on instances because LAYOUTs need translation.
100 ;;; Printing boxed objects from the target core will almost always crash.
101 (defun translate (obj spaces)
102 (%make-lisp-obj (translate-ptr (get-lisp-obj-address obj) spaces)))
104 (defstruct (core-sym (:copier nil) (:predicate nil)
105 (:constructor make-core-sym (package name external)))
106 (package nil)
107 (name nil :read-only t)
108 (external nil :read-only t))
110 (defun c-name (lispname pp-state)
111 (when (and (symbolp lispname)
112 (eq (symbol-package lispname) *cl-package*))
113 (return-from c-name (concatenate 'string "cl:" (string-downcase lispname))))
114 ;; Get rid of junk from LAMBDAs
115 (setq lispname
116 (named-let recurse ((x lispname))
117 (cond ((typep x '(cons (eql lambda)))
118 (let ((args (second x)))
119 `(lambda ,(if args sb-c::*debug-name-sharp* "()")
120 ,@(recurse (cddr x)))))
121 ((eq x :in) "in")
122 ((consp x)
123 (recons x (recurse (car x)) (recurse (cdr x))))
124 (t x))))
126 ;; Shorten obnoxiously long printed representations of methods
127 ;; by changing FAST-METHOD to METHOD (because who cares?)
128 ;; and shorten
129 ;; (method my-long-package-name:my-method-name (my-long-package-name:type-name))
130 ;; to
131 ;; (method my-method-name (type-name))
132 ;; I suspect that can use DWARF info to provide even more description,
133 ;; though I also suspect it's relatively unambiguous anyway
134 ;; especially given that file information is available separately.
135 (flet ((unpackageize (thing)
136 (when (typep thing 'core-sym)
137 (setf (core-sym-package thing) nil))
138 thing))
139 (when (typep lispname '(cons (eql sb-pcl::fast-method)))
140 (setq lispname `(method ,@(cdr lispname)))
141 (setf (second lispname) (unpackageize (second lispname)))
142 (dolist (qual (car (last lispname)))
143 (unpackageize qual))))
145 ;; Perform backslash escaping on the exploded string
146 ;; Strings were stringified without surrounding quotes,
147 ;; but there might be quotes embedded anywhere, so escape them,
148 ;; and also remove newlines and non-ASCII.
149 (let ((characters
150 (mapcan (lambda (char)
151 (cond ((not (base-char-p char)) (list #\?))
152 ((member char '(#\\ #\")) (list #\\ char))
153 ((eql char #\newline) (list #\_))
154 (t (list char))))
155 (coerce (if (and (stringp lispname)
156 ;; L denotes a symbol which can not be global on macOS.
157 (char= (char lispname 0) #\L))
158 (concatenate 'string "_" lispname)
159 (write-to-string lispname
160 :pretty t :pprint-dispatch (cdr pp-state)
161 ;; FIXME: should be :level 1, however see
162 ;; https://bugs.launchpad.net/sbcl/+bug/1733222
163 :escape t :level 2 :length 5
164 :case :downcase :gensym nil
165 :right-margin 10000))
166 'list))))
167 (let* ((string (coerce characters 'string))
168 (occurs (incf (gethash string (car pp-state) 0))))
169 (if (> occurs 1)
170 (concatenate 'string string "_" (write-to-string occurs))
171 string))))
173 (defmethod print-object ((sym core-sym) stream)
174 (format stream "~(~:[~*~;~:*~A~:[:~;~]:~]~A~)"
175 (core-sym-package sym)
176 (core-sym-external sym)
177 (core-sym-name sym)))
179 (defun fun-name-from-core (fun spaces core-nil packages
180 &aux (name (%simple-fun-name fun)))
181 (named-let recurse ((depth 0) (x name))
182 (unless (= (logand (get-lisp-obj-address x) 3) 3)
183 (return-from recurse x)) ; immediate object
184 (when (eq x core-nil)
185 (return-from recurse nil))
186 (setq x (translate x spaces))
187 (ecase (lowtag-of x)
188 (#.list-pointer-lowtag
189 (cons (recurse (1+ depth) (car x))
190 (recurse (1+ depth) (cdr x))))
191 ((#.instance-pointer-lowtag #.fun-pointer-lowtag) "?")
192 (#.other-pointer-lowtag
193 (cond
194 ((symbolp x)
195 (let ((name (translate (symbol-name x) spaces)))
196 (if (eq (symbol-package x) core-nil) ; uninterned
197 (string-downcase name)
198 (let* ((package (truly-the package
199 (translate (symbol-package x) spaces)))
200 (package-name (translate (package-%name package) spaces))
201 (compute-externals
202 (not (or (string= package-name "KEYWORD")
203 (string= package-name "COMMON-LISP"))))
204 (externals (if compute-externals
205 (gethash package-name packages)
206 t)))
207 (unless externals
208 (dovector (x (translate
209 (package-hashtable-cells
210 (truly-the package-hashtable
211 (translate (package-external-symbols package)
212 spaces)))
213 spaces))
214 (unless (fixnump x)
215 (push (if (eq x core-nil) ; random packages can export NIL. wow.
216 "NIL"
217 (translate (symbol-name (translate x spaces)) spaces))
218 externals)))
219 (setf externals (coerce externals 'vector)
220 (gethash package-name packages) externals))
221 ;; The name-cleaning code wants to compare against symbols
222 ;; in CL, PCL, and KEYWORD, so use real symbols for those.
223 ;; Other than that, we avoid finding host symbols
224 ;; because the externalness could be wrong and misleading.
225 ;; It's a very subtle point, but best to get it right.
226 (if (member package-name '("COMMON-LISP" "KEYWORD" "SB-PCL")
227 :test #'string=)
228 ; NIL can't occur, because it has list-pointer-lowtag
229 (find-symbol name package-name) ; if existing symbol, use it
230 (make-core-sym (if (string= package-name "KEYWORD") nil package-name)
231 name
232 (if compute-externals
233 (find name externals :test 'string=)
234 t)))))))
235 ((stringp x) x)
236 (t "?"))))))
238 (defstruct (core-state
239 (:conc-name "CS-")
240 (:predicate nil)
241 (:copier nil)
242 (:constructor make-core-state
243 (code-space-start code-space-end
244 fixedobj-space-start fixedobj-space-end
245 &aux (inst-space (get-inst-space))
246 (call-inst (find-inst #b11101000 inst-space))
247 (jmp-inst (find-inst #b11101001 inst-space))
248 (pop-inst (find-inst #x5d inst-space)))))
249 (code-space-start 0 :type fixnum :read-only t)
250 (code-space-end 0 :type fixnum :read-only t)
251 (fixedobj-space-start 0 :type fixnum :read-only t)
252 (fixedobj-space-end 0 :type fixnum :read-only t)
253 (dstate (make-dstate nil) :read-only t)
254 (seg (%make-segment :sap-maker (lambda () (error "Bad sap maker"))
255 :virtual-location 0) :read-only t)
256 (fixup-addrs nil)
257 (call-inst nil :read-only t)
258 (jmp-inst nil :read-only t)
259 (pop-inst nil :read-only t))
261 ;;; Emit .byte or .quad directives dumping memory from SAP for COUNT bytes
262 ;;; to STREAM. SIZE specifies which direcive to emit.
263 ;;; EXCEPTIONS specify offsets at which a specific string should be
264 ;;; written to the file in lieu of memory contents, useful for emitting
265 ;;; expressions involving the assembler '.' symbol (the current PC).
266 (defun emit-asm-directives (size sap count stream &optional exceptions)
267 (declare (optimize speed))
268 (declare (stream stream))
269 (let ((*print-base* 16)
270 (string-buffer (make-array 18 :element-type 'base-char))
271 (fmt #.(coerce "0x%lx" 'base-string))
272 (per-line 0))
273 (declare ((integer 0 32) per-line)
274 (fixnum count))
275 string-buffer fmt
276 (ecase size
277 (:qword
278 (format stream " .quad")
279 (dotimes (i count)
280 (declare ((unsigned-byte 20) i))
281 (declare (simple-vector exceptions))
282 (write-char (if (> per-line 0) #\, #\space) stream)
283 (acond ((and (< i (length exceptions)) (aref exceptions i))
284 (write-string it stream))
286 #+nil
287 (let ((len
288 ;; output-reasonable-integer-in-base is so slow comparated
289 ;; to printf() that the second-most amount of time spent
290 ;; writing the asm file occurs in that function.
291 ;; Unbelievable that we can't do better than that.
292 (with-pinned-objects (string-buffer fmt)
293 (alien-funcall
294 (extern-alien "snprintf"
295 (function int system-area-pointer unsigned system-area-pointer unsigned))
296 (vector-sap string-buffer)
297 (length string-buffer)
298 (vector-sap fmt)
299 (sap-ref-word sap (* i n-word-bytes))))))
300 (write-string string-buffer stream :end len))
301 (write-string "0x" stream)
302 (write (sap-ref-word sap (* i n-word-bytes)) :stream stream)))
303 (when (and (= (incf per-line) 16) (< (1+ i) count))
304 (format stream "~% .quad")
305 (setq per-line 0))))
306 (:byte
307 (aver (not exceptions))
308 (format stream " .byte")
309 (dotimes (i count)
310 (write-char (if (> per-line 0) #\, #\space) stream)
311 (write-string "0x" stream)
312 (write (sap-ref-8 sap i) :stream stream)
313 (when (and (= (incf per-line) 32) (< (1+ i) count))
314 (format stream "~% .byte")
315 (setq per-line 0))))))
316 (terpri stream))
318 (defun emit-lisp-asm-routines (spaces code-component output emit-sizes vector count)
319 (emit-asm-directives :qword
320 (sap+ (code-instructions code-component)
321 (- (* sb-vm:code-constants-offset sb-vm:n-word-bytes)))
322 sb-vm:code-constants-offset
323 output #())
324 (let ((list (loop for i from 2 by 2 repeat count
325 collect
326 (let* ((location (translate (svref vector (1+ i)) spaces))
327 (offset (car location))
328 (nbytes (- (1+ (cdr location)) offset))
329 (name (translate
330 (symbol-name (translate (svref vector i) spaces))
331 spaces)))
332 (list* offset name nbytes)))))
333 (loop for (offset name . nbytes) in (sort list #'< :key #'car)
334 do (format output " .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
335 (format nil "~(\"~a\"~)" name) (if emit-sizes nbytes))
336 (emit-asm-directives
337 :qword
338 (sap+ (code-instructions code-component) offset)
339 (ceiling nbytes sb-vm:n-word-bytes)
340 output #()))))
342 (defun code-fixup-locs (code spaces)
343 (let ((locs (sb-vm::%code-fixups code)))
344 (unless (eql locs 0)
345 (sb-c::unpack-code-fixup-locs
346 (if (fixnump locs) locs (translate locs spaces))))))
348 ;;; Disassemble the function pointed to by SAP for LENGTH bytes, returning
349 ;;; all instructions that should be emitted using assembly language
350 ;;; instead of assembler pseudo-ops. This includes two sets of instructions:
351 ;;; - function prologue instructions that setup the call frame
352 ;;; - jmp/call instructions that transfer control to the fixedoj space
353 ;;; delimited by bounds in STATE.
354 ;;; At execution time the function will have virtual address LOAD-ADDR.
355 (defun list-annotated-instructions (sap length state load-addr emit-cfi)
356 (let ((dstate (cs-dstate state))
357 (seg (cs-seg state))
358 (call-inst (cs-call-inst state))
359 (jmp-inst (cs-jmp-inst state))
360 (pop-inst (cs-pop-inst state))
361 (next-fixup-addr
362 (or (car (cs-fixup-addrs state)) most-positive-word))
363 (list))
364 (setf (seg-virtual-location seg) load-addr
365 (seg-length seg) length
366 (seg-sap-maker seg) (lambda () sap))
367 ;; KLUDGE: "8f 45 08" is the standard prologue
368 (when (and emit-cfi (= (logand (sap-ref-32 sap 0) #xFFFFFF) #x08458f))
369 (push (list* 0 3 "pop" "8(%rbp)") list))
370 (map-segment-instructions
371 (lambda (dchunk inst)
372 (cond
373 ((< next-fixup-addr (dstate-next-addr dstate))
374 (let ((operand (sap-ref-32 sap (- next-fixup-addr load-addr))))
375 (when (<= (cs-code-space-start state) operand (cs-code-space-end state))
376 (aver (eql (sap-ref-8 sap (- next-fixup-addr load-addr 1)) #xB8)) ; mov rax, imm32
377 (push (list* (dstate-cur-offs dstate) 5 "mov" operand) list)))
378 (pop (cs-fixup-addrs state))
379 (setq next-fixup-addr (or (car (cs-fixup-addrs state)) most-positive-word)))
380 ((or (eq inst jmp-inst) (eq inst call-inst))
381 (let ((target-addr (+ (near-jump-displacement dchunk dstate)
382 (dstate-next-addr dstate))))
383 (when (<= (cs-fixedobj-space-start state)
384 target-addr
385 (cs-fixedobj-space-end state))
386 (push (list* (dstate-cur-offs dstate)
387 5 ; length
388 (if (eq inst call-inst) "call" "jmp")
389 target-addr)
390 list))))
391 ((and (eq inst pop-inst) (eq (logand dchunk #xFF) #x5D))
392 (push (list* (dstate-cur-offs dstate) 1 "pop" "%rbp") list))))
394 dstate
395 nil)
396 (nreverse list)))
398 ;;; Using assembler directives and/or real mnemonics, dump COUNT bytes
399 ;;; of memory at PADDR (physical addr) to STREAM.
400 ;;; The function's address as per the core file is VADDR.
401 ;;; (Its eventual address is indeterminate)
402 ;;; If EMIT-CFI is true, then also emit cfi directives.
404 ;;; Notice that we can use one fewer cfi directive than usual because
405 ;;; Lisp always carries a frame pointer as set up by the caller.
407 ;;; C convention
408 ;;; ============
409 ;;; pushq %rbp
410 ;;; .cfi_def_cfa_offset 16 # CFA offset from default register (rsp) is +16
411 ;;; .cfi_offset 6, -16 # old rbp was saved in -16(CFA)
412 ;;; movq %rsp, %rbp
413 ;;; .cfi_def_cfa_register 6 # use rbp as CFA register
415 ;;; Lisp convention
416 ;;; ===============
417 ;;; popq 8(%rbp) # place saved %rip in its ABI-compatible stack slot
418 ;;; # making RSP = RBP after the pop, and RBP = CFA - 16
419 ;;; .cfi_def_cfa 6, 16
420 ;;; .cfi_offset 6, -16
422 ;;; Of course there is a flip-side to this: unwinders think that the new frame
423 ;;; is already begun in the caller. Interruption between these two instructions:
424 ;;; MOV RBP, RSP / CALL #xzzzzz
425 ;;; will show the backtrace as if two invocations of the caller are on stack.
426 ;;; This is tricky to fix because while we can relativize the CFA to the
427 ;;; known frame size, we can't do that based only on a disassembly.
429 (defun emit-lisp-function (paddr vaddr count stream emit-cfi core-state)
430 (when emit-cfi
431 (format stream " .cfi_startproc~%"))
432 ;; Any byte offset that appears as a key in the INSTRUCTIONS causes the indicated
433 ;; bytes to be written as an assembly language instruction rather than opaquely,
434 ;; thereby affecting the ELF data (cfi or relocs) produced.
435 (let ((instructions
436 (list-annotated-instructions (int-sap paddr) count core-state vaddr emit-cfi))
437 (ptr paddr))
438 (symbol-macrolet ((cur-offset (- ptr paddr)))
439 (loop
440 (let ((until (if instructions (caar instructions) count)))
441 ;; if we're not aligned, then write some number of bytes
442 ;; to cause alignment. But do not write past the next offset
443 ;; that needs to be written as an instruction.
444 (when (logtest ptr #x7) ; unaligned
445 (let ((n (min (- (nth-value 1 (ceiling ptr 8)))
446 (- until cur-offset))))
447 (aver (<= 0 n 7))
448 (emit-asm-directives :byte (int-sap ptr) n stream)
449 (incf ptr n)))
450 ;; Now we're either aligned to a multiple of 8, or the current
451 ;; offset needs to be written as a textual instruction.
452 (let ((n (- until cur-offset)))
453 (aver (>= n 0))
454 (multiple-value-bind (qwords remainder) (floor n 8)
455 (when (plusp qwords)
456 (emit-asm-directives :qword (int-sap ptr) qwords stream #())
457 (incf ptr (* qwords 8)))
458 (when (plusp remainder)
459 (emit-asm-directives :byte (int-sap ptr) remainder stream)
460 (incf ptr remainder))))
461 ;; If the current offset is COUNT, we're done.
462 (when (= cur-offset count) (return))
463 (aver (= cur-offset until))
464 (destructuring-bind (length opcode . operand) (cdr (pop instructions))
465 (when (cond ((member opcode '("jmp" "call") :test #'string=)
466 (format stream " ~A 0x~X~%" opcode operand))
467 ((string= opcode "pop")
468 (format stream " ~A ~A~%" opcode operand)
469 (cond ((string= operand "8(%rbp)")
470 (format stream " .cfi_def_cfa 6, 16~% .cfi_offset 6, -16~%"))
471 ((string= operand "%rbp")
472 ;(format stream " .cfi_def_cfa 7, 8~%")
473 nil)
474 (t)))
475 ((string= opcode "mov")
476 (format stream " mov $(__lisp_code_start+0x~x),%eax~%"
477 (- operand (cs-code-space-start core-state))))
478 (t))
479 (bug "Random annotated opcode ~S" opcode))
480 (incf ptr length))
481 (when (= cur-offset count) (return))))))
482 (when emit-cfi
483 (format stream " .cfi_endproc~%")))
485 ;;; Convert immobile CODE-SPACE to an assembly file in OUTPUT.
486 ;;; TODO: relocate fdefns and instances of standard-generic-function
487 ;;; into the space that is dumped into an ELF section.
488 (defun write-assembler-text
489 (spaces fixedobj-range output
490 &optional emit-sizes (emit-cfi t)
491 &aux (code-space (get-space immobile-varyobj-core-space-id spaces))
492 (code-space-start (space-addr code-space)) ; target virtual address
493 (code-space-end (+ code-space-start (space-size code-space)))
494 (code-addr code-space-start)
495 (core-state
496 (make-core-state code-space-start code-space-end
497 (car fixedobj-range)
498 (+ (car fixedobj-range) (cdr fixedobj-range))))
499 (total-code-size 0)
500 (pp-state (cons (make-hash-table :test 'equal)
501 ;; copy no entries for macros/special-operators (flet, etc)
502 (sb-pretty::make-pprint-dispatch-table)))
503 (packages (make-hash-table :test 'equal))
504 (core-nil (compute-nil-object spaces))
505 (prev-namestring "")
506 (n-linker-relocs 0)
507 end-loc)
508 (set-pprint-dispatch 'string
509 ;; Write strings without string quotes
510 (lambda (stream string) (write-string string stream))
512 (cdr pp-state))
513 (labels ((ldsym-quote (name)
514 (concatenate 'string '(#\") name '(#\")))
515 (dumpwords (addr count stream &optional (exceptions #()) logical-addr)
516 (let ((sap (int-sap addr)))
517 (aver (sap>= sap (car spaces)))
518 ;; Make intra-code-space pointers computed at link time
519 (dotimes (i (if logical-addr count 0))
520 (unless (and (< i (length exceptions)) (svref exceptions i))
521 (let ((word (sap-ref-word sap (* i n-word-bytes))))
522 (when (and (= (logand word 3) 3) ; is a pointer
523 (<= code-space-start word (1- code-space-end))) ; to code space
524 #+nil
525 (format t "~&~(~x: ~x~)~%" (+ logical-addr (* i n-word-bytes))
526 word)
527 (incf n-linker-relocs)
528 (setf exceptions (adjust-array exceptions (max (length exceptions) (1+ i))
529 :initial-element nil)
530 (svref exceptions i)
531 (format nil "__lisp_code_start+0x~x"
532 (- word code-space-start)))))))
533 (emit-asm-directives :qword sap count stream exceptions)))
534 (make-code-obj (addr)
535 (let ((translation (translate-ptr addr spaces)))
536 (aver (= (%widetag-of (sap-ref-word (int-sap translation) 0))
537 code-header-widetag))
538 (%make-lisp-obj (logior translation other-pointer-lowtag))))
539 (calc-obj-size (code)
540 ;; No need to pin - it's not managed by GC
541 (nth-value 2
542 (reconstitute-object
543 (ash (logandc2 (get-lisp-obj-address code) lowtag-mask)
544 (- n-fixnum-tag-bits)))))
545 (%widetag-of (word)
546 (logand word widetag-mask)))
547 (format output " .text~% .file \"sbcl.core\"
548 .globl __lisp_code_start, __lisp_code_end~% .balign 4096~%__lisp_code_start:~%")
550 ;; Scan the assembly routines.
551 (let* ((code-component (make-code-obj code-addr))
552 (size (calc-obj-size code-component))
553 (hashtable
554 (truly-the hash-table
555 (translate (car (translate (%code-debug-info code-component)
556 spaces))
557 spaces)))
558 (cells (translate (hash-table-table hashtable) spaces))
559 (count (hash-table-number-entries hashtable)))
560 (incf code-addr size)
561 (setf total-code-size size)
562 (emit-lisp-asm-routines spaces code-component output emit-sizes cells count))
564 (loop
565 (when (>= code-addr code-space-end) (return))
566 ;(format t "~&vaddr ~x paddr ~x~%" code-addr (get-lisp-obj-address (make-code-obj code-addr)))
567 (let* ((code (make-code-obj code-addr))
568 (objsize (calc-obj-size code)))
569 (setq end-loc (+ code-addr objsize))
570 (incf total-code-size objsize)
571 (cond
572 ((< (code-header-words code) 4) ; filler object
573 ;; Shouldn't occur unless defrag was not performed
574 (format output "#x~x:~% .quad 0x~X, 0x~X~% .fill ~D~%"
575 code-addr
576 simple-array-unsigned-byte-8-widetag
577 (ash (- objsize (* 2 n-word-bytes))
578 n-fixnum-tag-bits)
579 (- objsize (* 2 n-word-bytes))))
580 ((%instancep (%code-debug-info code)) ; assume it's a COMPILED-DEBUG-INFO
581 (aver (plusp (code-n-entries code)))
582 (let* ((source
583 (sb-c::compiled-debug-info-source
584 (truly-the sb-c::compiled-debug-info
585 (translate (%code-debug-info code) spaces))))
586 (namestring
587 (sb-c::debug-source-namestring
588 (truly-the sb-c::debug-source (translate source spaces)))))
589 (setq namestring (if (eq namestring core-nil)
590 "sbcl.core"
591 (translate namestring spaces)))
592 (unless (string= namestring prev-namestring)
593 (format output " .file \"~a\"~%" namestring)
594 (setq prev-namestring namestring)))
595 (let* ((code-physaddr (logandc2 (get-lisp-obj-address code) lowtag-mask))
596 (boxed-end (+ code-physaddr
597 (ash (code-header-words code) word-shift)))
598 (first-fun (logandc2 (get-lisp-obj-address (%code-entry-point code 0))
599 lowtag-mask)))
600 (format output "#x~x:~%" code-addr)
601 (dumpwords code-physaddr (code-header-words code) output #() code-addr)
602 ;; Any words after 'boxed' preceding 'first-fun' are unboxed
603 (when (> first-fun boxed-end)
604 (dumpwords boxed-end (floor (- first-fun boxed-end) n-word-bytes)
605 output)))
606 (setf (cs-fixup-addrs core-state)
607 (mapcar (lambda (x)
608 (+ code-addr (ash (code-header-words code) word-shift) x))
609 (code-fixup-locs code spaces)))
610 ;; Loop over all embedded functions.
611 ;; Because simple-fun offsets are relative to the code start
612 ;; (and not in a linked list as they were in the past),
613 ;; iteratation in a "foreign" code object works just fine,
614 ;; subject to the caution about reading boxed words.
615 (dotimes (j (code-n-entries code))
616 (let* ((fun (%code-entry-point code j))
617 (fun-addr (logandc2 (get-lisp-obj-address fun) lowtag-mask))
618 (end (if (< (1+ j) (code-n-entries code))
619 (logandc2 (get-lisp-obj-address (%code-entry-point code (1+ j)))
620 lowtag-mask)
621 (+ (translate-ptr code-addr spaces) objsize)))
622 (entrypoint
623 (+ fun-addr (* simple-fun-code-offset n-word-bytes)))
624 (size (- end entrypoint))
625 (lispname (fun-name-from-core fun spaces core-nil packages))
626 (quotname (ldsym-quote (c-name lispname pp-state))))
627 ;; Globalize the C symbol only if the name is a legal function designator
628 ;; per the standard definition.
629 ;; This is a technique to try to avoid appending a uniquifying suffix
630 ;; on all the junky internal things like "(lambda # in srcfile.lisp)"
631 (format output "~:[~*~; .globl ~a~%~]~@[ .type ~:*~a, @function~%~]"
632 (typep lispname '(or symbol core-sym (cons (eql setf))))
633 quotname emit-sizes)
634 (dumpwords fun-addr
635 simple-fun-code-offset output
636 (load-time-value
637 `#(nil ,(format nil ".+~D"
638 (* (1- simple-fun-code-offset)
639 n-word-bytes)))
641 nil)
642 (format output " .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
643 quotname (if emit-sizes size))
644 ;; entrypoint is the current physical address.
645 ;; Also pass in the virtual address in the core
646 ;; (which will differ from the actual load-time address)
647 (emit-lisp-function entrypoint
648 (+ code-addr (- entrypoint
649 (logandc2 (get-lisp-obj-address code)
650 lowtag-mask)))
651 size output emit-cfi core-state)))
652 (terpri output)
653 ;; All fixups should have been consumed by writing the code out
654 (aver (null (cs-fixup-addrs core-state))))
656 (error "Strange code component: ~S" code)))
657 (incf code-addr objsize))))
659 ;; coreparse uses unpadded __lisp_code_end to set varyobj_free_pointer
660 (format output "__lisp_code_end:~%")
662 ;; Pad so that non-lisp code can't be colocated on a GC page.
663 ;; (Lack of Lisp object headers in C code is the issue)
664 (let ((aligned-end (logandc2 (+ end-loc 4095) 4095)))
665 (when (> aligned-end end-loc)
666 (multiple-value-bind (nwords remainder)
667 (floor (- aligned-end end-loc) n-word-bytes)
668 (aver (>= nwords 2))
669 (aver (zerop remainder))
670 (decf nwords 2)
671 (format output " .quad ~d, ~d # (simple-array fixnum (~d))~%"
672 simple-array-fixnum-widetag
673 (ash nwords n-fixnum-tag-bits)
674 nwords)
675 (when (plusp nwords)
676 (format output " .fill ~d~%" (* nwords n-word-bytes))))))
677 ; (format t "~&linker-relocs=~D~%" n-linker-relocs)
678 (values total-code-size n-linker-relocs))
680 ;;; Return either the physical or logical address of the specified symbol.
681 (defun find-target-symbol (package-name symbol-name spaces
682 &optional (address-mode :physical))
683 (let* ((space (find immobile-fixedobj-core-space-id (cdr spaces) :key #'space-id))
684 (start (translate-ptr (space-addr space) spaces))
685 (end (+ start (space-size space)))
686 (physaddr start))
687 (loop
688 (when (>= physaddr end) (bug "Can't find symbol"))
689 (multiple-value-bind (obj tag size)
690 (reconstitute-object (ash physaddr (- n-fixnum-tag-bits)))
691 (when (and (= tag symbol-widetag)
692 (string= symbol-name (translate (symbol-name obj) spaces))
693 (%instancep (symbol-package obj))
694 (string= package-name
695 (translate
696 (package-%name
697 (truly-the package (translate (symbol-package obj) spaces)))
698 spaces)))
699 (return (%make-lisp-obj
700 (logior (ecase address-mode
701 (:physical physaddr)
702 (:logical (+ (space-addr space) (- physaddr start))))
703 other-pointer-lowtag))))
704 (incf physaddr size)))))
706 (defun extract-required-c-symbols (spaces asm-file &optional (verbose nil))
707 (flet ((symbol-fdefn-fun (symbol)
708 (let ((vector (translate (symbol-info-vector symbol) spaces)))
709 ;; TODO: allow for (plist . info-vector) in the info slot
710 (aver (simple-vector-p vector))
711 (translate (fdefn-fun (translate (info-vector-fdefn vector) spaces))
712 spaces))))
713 (let ((linkage-info
714 (translate (symbol-global-value
715 (find-target-symbol "SB-SYS" "*LINKAGE-INFO*" spaces))
716 spaces))
717 (dyn-syminfo
718 (symbol-fdefn-fun
719 (find-target-symbol "SB-SYS"
720 "ENSURE-DYNAMIC-FOREIGN-SYMBOL-ADDRESS"
721 spaces))))
722 (aver (= (get-closure-length dyn-syminfo) 3))
723 (let* ((ht1 (translate (%closure-index-ref dyn-syminfo 1) spaces))
724 (ht2 (translate (%closure-index-ref dyn-syminfo 0) spaces))
725 (table0 (translate (hash-table-table (truly-the hash-table linkage-info))
726 spaces))
727 (table1 (translate (hash-table-table (truly-the hash-table ht1)) spaces))
728 (table2 (translate (hash-table-table (truly-the hash-table ht2)) spaces))
729 (linkage)
730 (foreign))
731 (declare (simple-vector table0 table1 table2))
732 (flet ((show (x)
733 (push x foreign)
734 (when verbose
735 (format t "~A~%" x)))
736 (scan-table (table name fun &aux (n 0) (end (length table)))
737 (when verbose
738 (format t "~&~A:~%~A~%"
739 name (make-string (1+ (length name)) :initial-element #\-)))
740 (do ((i 2 (+ i 2)))
741 ((= i end))
742 (let ((val (svref table i)))
743 (unless (unbound-marker-p val)
744 (funcall fun (translate val spaces))
745 (incf n))))
746 (when verbose
747 (format t "TOTAL: ~D entries~2%" n))))
748 (scan-table table0 "linkage info"
749 (lambda (x &aux (type #\T))
750 (when (consp x)
751 (setq x (translate (car x) spaces) type #\D))
752 (format asm-file " .long ~A~%" x)
753 (when verbose
754 (format t "~A ~A~%" type x))
755 (push x linkage)))
756 (scan-table table1 "defined" #'show)
757 (scan-table table2 "undefined" #'show)
758 (let ((diff1 ; linkage not in foreign
759 (remove-if (lambda (x) (member x foreign :test #'string=)) linkage))
760 (diff2 ; foreign not in linkage
761 (remove-if (lambda (x) (member x linkage :test #'string=)) foreign)))
762 (when verbose
763 (format t "~&Linkage not in foreign:~%~S~%" diff1)
764 (format t "~&Foreign not in linkage:~%~S~%" diff2))
765 )))))
766 (terpri asm-file))
768 ;;;; ELF file I/O
770 (defconstant +sht-null+ 0)
771 (defconstant +sht-progbits+ 1)
772 (defconstant +sht-symtab+ 2)
773 (defconstant +sht-strtab+ 3)
774 (defconstant +sht-rela+ 4)
775 (defconstant +sht-rel+ 9)
777 (define-alien-type elf64-ehdr
778 (struct elf64-edhr
779 (ident (array unsigned-char 16)) ; 7F 45 4C 46 2 1 1 0 0 0 0 0 0 0 0 0
780 (type (unsigned 16)) ; 1 0
781 (machine (unsigned 16)) ; 3E 0
782 (version (unsigned 32)) ; 1 0 0 0
783 (entry unsigned) ; 0 0 0 0 0 0 0 0
784 (phoff unsigned) ; 0 0 0 0 0 0 0 0
785 (shoff unsigned) ;
786 (flags (unsigned 32)) ; 0 0 0 0
787 (ehsize (unsigned 16)) ; 40 0
788 (phentsize (unsigned 16)) ; 0 0
789 (phnum (unsigned 16)) ; 0 0
790 (shentsize (unsigned 16)) ; 40 0
791 (shnum (unsigned 16)) ; n 0
792 (shstrndx (unsigned 16)))) ; n 0
793 (define-alien-type elf64-shdr
794 (struct elf64-shdr
795 (name (unsigned 32))
796 (type (unsigned 32))
797 (flags (unsigned 64))
798 (addr (unsigned 64))
799 (off (unsigned 64))
800 (size (unsigned 64))
801 (link (unsigned 32))
802 (info (unsigned 32))
803 (addralign (unsigned 64))
804 (entsize (unsigned 64))))
805 (define-alien-type elf64-sym
806 (struct elf64-sym
807 (name (unsigned 32))
808 (info (unsigned 8))
809 (other (unsigned 8))
810 (shndx (unsigned 16))
811 (value unsigned)
812 (size unsigned)))
813 (define-alien-type elf64-rela
814 (struct elf64-rela
815 (offset (unsigned 64))
816 (info (unsigned 64))
817 (addend (signed 64))))
819 (defun make-elf64-sym (name info)
820 (let ((a (make-array 24 :element-type '(unsigned-byte 8))))
821 (with-pinned-objects (a)
822 (setf (sap-ref-32 (vector-sap a) 0) name
823 (sap-ref-8 (vector-sap a) 4) info))
826 ;;; Return two values: an octet vector comprising a string table
827 ;;; and an alist which maps string to offset in the table.
828 (defun string-table (strings)
829 (let* ((length (+ (1+ (length strings)) ; one more null than there are strings
830 (reduce #'+ strings :key #'length))) ; data length
831 (bytes (make-array length :element-type '(unsigned-byte 8)
832 :initial-element 0))
833 (index 1)
834 (alist))
835 (dolist (string strings)
836 (push (cons string index) alist)
837 (replace bytes (map 'vector #'char-code string) :start1 index)
838 (incf index (1+ (length string))))
839 (cons (nreverse alist) bytes)))
841 (defun write-alien (alien size stream)
842 (dotimes (i size)
843 (write-byte (sap-ref-8 (alien-value-sap alien) i) stream)))
845 (defun copy-bytes (in-stream out-stream nbytes
846 &optional (buffer
847 (make-array 1024 :element-type '(unsigned-byte 8))))
848 (loop (let ((chunksize (min (length buffer) nbytes)))
849 (aver (eql (read-sequence buffer in-stream :end chunksize) chunksize))
850 (write-sequence buffer out-stream :end chunksize)
851 (when (zerop (decf nbytes chunksize)) (return)))))
853 ;;; core header should be an array of words in '.rodata', not a 32K page
854 (defconstant core-header-size +backend-page-bytes+) ; stupidly large (FIXME)
856 ;;; Write everything except for the core file itself into OUTPUT-STREAM
857 ;;; and leave the stream padded to a 4K boundary ready to receive data.
858 (defun prepare-elf (core-size relocs output)
859 (let* ((sym-entry-size 24)
860 (reloc-entry-size 24)
861 (core-align 4096)
862 (sections
863 `#((:core "lisp.core" ,+sht-progbits+ 0 0 0 ,core-align 0)
864 (:sym ".symtab" ,+sht-symtab+ 0 3 1 8 ,sym-entry-size)
865 ; section with the strings -- ^ ^ -- 1+ highest local symbol
866 (:str ".strtab" ,+sht-strtab+ 0 0 0 1 0)
867 (:rel ".relalisp.core" ,+sht-rela+ 0 2 1 8 ,reloc-entry-size)
868 ; symbol table -- ^ ^ -- for which section
869 (:note ".note.GNU-stack" ,+sht-null+ 0 0 0 1 0)))
870 (string-table
871 (string-table (append '("__lisp_code_start") (map 'list #'second sections))))
872 (strings (cdr string-table))
873 (padded-strings-size (logandc2 (+ (length strings) 7) 7))
874 (ehdr-size #.(ceiling (alien-type-bits (parse-alien-type 'elf64-ehdr nil)) 8))
875 (shdr-size #.(ceiling (alien-type-bits (parse-alien-type 'elf64-shdr nil)) 8))
876 (symbols-size (* 2 sym-entry-size))
877 (shdrs-start (+ ehdr-size symbols-size padded-strings-size))
878 (shdrs-end (+ shdrs-start (* (1+ (length sections)) shdr-size)))
879 (relocs-size (* (length relocs) reloc-entry-size))
880 (relocs-end (+ shdrs-end relocs-size))
881 (core-start (logandc2 (+ relocs-end (1- core-align)) (1- core-align)))
882 (ident #.(coerce '(#x7F #x45 #x4C #x46 2 1 1 0 0 0 0 0 0 0 0 0)
883 '(array (unsigned-byte 8) 1))))
885 (with-alien ((ehdr elf64-ehdr))
886 (dotimes (i (ceiling ehdr-size n-word-bytes))
887 (setf (sap-ref-word (alien-value-sap ehdr) (* i n-word-bytes)) 0))
888 (with-pinned-objects (ident)
889 (%byte-blt (vector-sap ident) 0 (alien-value-sap ehdr) 0 16))
890 (setf (slot ehdr 'type) 1
891 (slot ehdr 'machine) #x3E
892 (slot ehdr 'version) 1
893 (slot ehdr 'shoff) shdrs-start
894 (slot ehdr 'ehsize) ehdr-size
895 (slot ehdr 'shentsize) shdr-size
896 (slot ehdr 'shnum) (1+ (length sections)) ; section 0 is implied
897 (slot ehdr 'shstrndx) (1+ (position :str sections :key #'car)))
898 (write-alien ehdr ehdr-size output))
900 ;; Write symbol table
901 (aver (eql (file-position output) ehdr-size))
902 (write-sequence (make-elf64-sym 0 0) output)
903 ;; The symbol name index is always 1 by construction. The type is #x10
904 ;; given: #define STB_GLOBAL 1
905 ;; and: #define ELF32_ST_BIND(val) ((unsigned char) (val)) >> 4)
906 ;; which places the binding in the high 4 bits of the low byte.
907 (write-sequence (make-elf64-sym 1 #x10) output)
909 ;; Write string table
910 (aver (eql (file-position output) (+ ehdr-size symbols-size)))
911 (write-sequence strings output) ; an octet vector at this point
912 (dotimes (i (- padded-strings-size (length strings)))
913 (write-byte 0 output))
915 ;; Write section headers
916 (aver (eql (file-position output) shdrs-start))
917 (with-alien ((shdr elf64-shdr))
918 (dotimes (i (ceiling shdr-size n-word-bytes)) ; Zero-fill
919 (setf (sap-ref-word (alien-value-sap shdr) (* i n-word-bytes)) 0))
920 (dotimes (i (1+ (length sections)))
921 (when (plusp i) ; Write the zero-filled header as section 0
922 (destructuring-bind (key name type flags link info alignment entsize)
923 (aref sections (1- i))
924 (multiple-value-bind (offset size)
925 (ecase key
926 (:sym (values ehdr-size symbols-size))
927 (:str (values (+ ehdr-size symbols-size) (length strings)))
928 (:rel (values shdrs-end relocs-size))
929 (:core (values core-start core-size))
930 (:note (values 0 0)))
931 (let ((name (cdr (assoc name (car string-table) :test #'string=))))
932 (setf (slot shdr 'name) name
933 (slot shdr 'type) type
934 (slot shdr 'flags) flags
935 (slot shdr 'off) offset
936 (slot shdr 'size) size
937 (slot shdr 'link) link
938 (slot shdr 'info) info
939 (slot shdr 'addralign) alignment
940 (slot shdr 'entsize) entsize)))))
941 (write-alien shdr shdr-size output)))
943 ;; Write relocations
944 (aver (eql (file-position output) shdrs-end))
945 (let ((buf (make-array relocs-size :element-type '(unsigned-byte 8)))
946 (ptr 0))
947 (with-alien ((rela elf64-rela))
948 (dovector (reloc relocs)
949 (destructuring-bind (place addend . kind) reloc
950 (setf (slot rela 'offset) place
951 (slot rela 'info) (logior (ash 1 32) kind) ; 1 = symbol index
952 (slot rela 'addend) addend))
953 (setf (%vector-raw-bits buf (+ ptr 0)) (sap-ref-word (alien-value-sap rela) 0)
954 (%vector-raw-bits buf (+ ptr 1)) (sap-ref-word (alien-value-sap rela) 8)
955 (%vector-raw-bits buf (+ ptr 2)) (sap-ref-word (alien-value-sap rela) 16))
956 (incf ptr 3)))
957 (write-sequence buf output))
959 ;; Write padding
960 (dotimes (i (- core-start (file-position output)))
961 (write-byte 0 output))
962 (aver (eq (file-position output) core-start))))
964 ;;; Return a list of fixups (FIXUP-WHERE KIND ADDEND) to peform in a foreign core
965 ;;; whose code space is subject to link-time relocation.
966 (defconstant R_X86_64_64 1) ; /* Direct 64 bit */
967 (defconstant R_X86_64_PC32 2) ; /* PC relative 32 bit signed */
968 (defconstant R_X86_64_32 10) ; /* Direct 32 bit zero extended */
970 (defun collect-relocations (spaces fixups &aux (print nil))
971 (binding* (((static-start static-end)
972 (let ((space (get-space static-core-space-id spaces)))
973 (values (space-addr space) (space-end space))))
974 ((code-start code-end)
975 (let ((space (get-space immobile-varyobj-core-space-id spaces)))
976 (values (space-addr space) (space-end space))))
977 ;; the distance between fixedobj space address (i.e following the pages of
978 ;; dynamic space) in the ELF section which has a presumptive address of 0
979 ;; due to being non-loaded, to where it will be later mapped by coreparse
980 (fixedobj-space-displacement
981 (let ((space (get-space immobile-fixedobj-core-space-id spaces)))
982 (- (* (1+ (space-data-page space)) +backend-page-bytes+) ; 1+ = core header
983 (space-addr space))))
984 (n-abs 0)
985 (n-rel 0))
986 (labels
987 ((abs-fixup (core-offs referent)
988 (incf n-abs)
989 (when print
990 (format t "~x = 0x~(~x~): (a)~%" core-offs (core-to-logical core-offs) #+nil referent))
991 (setf (sap-ref-word (car spaces) core-offs) 0)
992 (vector-push-extend `(,(+ core-header-size core-offs)
993 ,(- referent code-start) . ,R_X86_64_64)
994 fixups))
995 (abs32-fixup (core-offs referent)
996 (incf n-abs)
997 (when print
998 (format t "~x = 0x~(~x~): (a)~%" core-offs (core-to-logical core-offs) #+nil referent))
999 (setf (sap-ref-32 (car spaces) core-offs) 0)
1000 (vector-push-extend `(,(+ core-header-size core-offs)
1001 ,(- referent code-start) . ,R_X86_64_32)
1002 fixups))
1003 (rel-fixup (core-offs referent)
1004 (incf n-rel)
1005 (when print
1006 (format t "~x = 0x~(~x~): (r)~%" core-offs (core-to-logical core-offs) #+nil referent))
1007 (setf (sap-ref-32 (car spaces) core-offs) 0)
1008 (vector-push-extend `(,(+ core-header-size core-offs)
1009 ,(- referent code-start) . ,R_X86_64_PC32)
1010 fixups))
1011 (in-code-space-p (ptr)
1012 (and (<= code-start ptr) (< ptr code-end)))
1013 ;; Given a address which is an offset into the data pages of the target core,
1014 ;; compute the logical address which that offset would be mapped to.
1015 ;; For example core address 0 is the virtual address of static space.
1016 (core-to-logical (core-offs &aux (page (floor core-offs +backend-page-bytes+)))
1017 (dolist (space (cdr spaces)
1018 (bug "Can't translate core offset ~x using ~x"
1019 core-offs spaces))
1020 (let* ((page0 (space-data-page space))
1021 (nwords (space-nwords space))
1022 (id (space-id space))
1023 (npages (ceiling nwords (/ +backend-page-bytes+ n-word-bytes))))
1024 (when (and (<= page0 page (+ page0 (1- npages)))
1025 (/= id immobile-varyobj-core-space-id))
1026 (return (+ (space-addr space)
1027 (* (- page page0) +backend-page-bytes+)
1028 (logand core-offs (1- +backend-page-bytes+))))))))
1029 (scanptrs (obj wordindex-min wordindex-max &aux (n-fixups 0))
1030 (do* ((base-addr (logandc2 (get-lisp-obj-address obj) lowtag-mask))
1031 (sap (int-sap base-addr))
1032 ;; core-offs is the offset in the lisp.core ELF section.
1033 (core-offs (- base-addr (sap-int (car spaces))))
1034 (i wordindex-min (1+ i)))
1035 ((> i wordindex-max) n-fixups)
1036 (let ((ptr (sap-ref-word sap (ash i word-shift))))
1037 (when (and (= (logand ptr 3) 3) (in-code-space-p ptr))
1038 (abs-fixup (+ core-offs (ash i word-shift)) ptr)
1039 (incf n-fixups)))))
1040 (scanptr (obj wordindex)
1041 (plusp (scanptrs obj wordindex wordindex))) ; trivial wrapper
1042 (scan-obj (obj widetag size vaddr
1043 &aux (core-offs (- (logandc2 (get-lisp-obj-address obj) lowtag-mask)
1044 (sap-int (car spaces))))
1045 (nwords (ceiling size n-word-bytes)))
1046 (when (listp obj)
1047 (scanptrs obj 0 1)
1048 (return-from scan-obj))
1049 (case widetag
1050 (#.instance-widetag
1051 (let ((layout (truly-the layout
1052 (translate (%instance-layout obj) spaces))))
1053 ;; FIXME: even though the layout is supplied, it's not good enough,
1054 ;; because the macro references the layout-bitmap which might
1055 ;; be a bignum which is a pointer into the logical core address.
1056 (unless (fixnump (layout-bitmap layout))
1057 (error "Can't process bignum bitmap"))
1058 (do-instance-tagged-slot (i obj :layout layout)
1059 (scanptr obj (1+ i))))
1060 (return-from scan-obj))
1061 (#.simple-vector-widetag
1062 (let ((len (length (the simple-vector obj))))
1063 (when (eql (logand (get-header-data obj) #xFF) vector-valid-hashing-subtype)
1064 (do ((i 2 (+ i 2)) (needs-rehash))
1065 ((= i len)
1066 (when needs-rehash
1067 (setf (svref obj 1) 1)))
1068 (when (scanptr obj (+ vector-data-offset i))
1069 (format t "~&SET REHASH: vector=~X~%" (get-lisp-obj-address obj))
1070 (setq needs-rehash t))
1071 (scanptr obj (+ vector-data-offset i 1)))
1072 (return-from scan-obj))
1073 (setq nwords (+ len 2))))
1074 (#.fdefn-widetag
1075 (scanptrs obj 1 2)
1076 (let* ((fdefn-pc-sap ; where to read to access the rel32 operand
1077 (int-sap (+ (- (get-lisp-obj-address obj) other-pointer-lowtag)
1078 (ash fdefn-raw-addr-slot word-shift))))
1079 ;; what the fdefn's logical PC will be
1080 (fdefn-logical-pc (+ vaddr (ash fdefn-raw-addr-slot word-shift)))
1081 (rel32off (signed-sap-ref-32 fdefn-pc-sap 1))
1082 (target (+ fdefn-logical-pc 1 rel32off))
1083 (space-displacement
1084 (if (<= static-start vaddr static-end)
1085 (+ (- sb-vm:static-space-start) +backend-page-bytes+)
1086 fixedobj-space-displacement)))
1087 (when (in-code-space-p target)
1088 ;; This addend needs to account for the fact that the location
1089 ;; where fixup occurs is not where the fdefn will actually exist.
1090 (rel-fixup (+ core-offs (ash 3 word-shift) 1)
1091 (+ target space-displacement))))
1092 (return-from scan-obj))
1093 ((#.closure-widetag #.funcallable-instance-widetag)
1094 (let ((word (sap-ref-word (int-sap (get-lisp-obj-address obj))
1095 (- n-word-bytes fun-pointer-lowtag))))
1096 (when (in-code-space-p word)
1097 (abs-fixup (+ core-offs (ash 1 word-shift)) word)))
1098 (when (eq widetag funcallable-instance-widetag)
1099 (let ((layout (truly-the layout
1100 (translate (%funcallable-instance-layout obj) spaces))))
1101 (unless (fixnump (layout-bitmap layout))
1102 (error "Can't process bignum bitmap"))
1103 (let ((bitmap (layout-bitmap layout)))
1104 (unless (eql bitmap -1)
1105 ;; tagged slots precede untagged slots,
1106 ;; so integer-length is the count of tagged slots.
1107 (setq nwords (1+ (integer-length bitmap))))))))
1108 ;; mixed boxed/unboxed objects
1109 (#.code-header-widetag
1110 (dolist (loc (code-fixup-locs obj spaces))
1111 (let ((val (sap-ref-32 (code-instructions obj) loc)))
1112 (when (in-code-space-p val)
1113 (abs32-fixup (sap- (sap+ (code-instructions obj) loc) (car spaces))
1114 val))))
1115 (dotimes (i (code-n-entries obj))
1116 (scanptrs (%code-entry-point obj i) 2 5))
1117 (setq nwords (code-header-words obj)))
1118 ;; boxed objects that can reference code/simple-funs
1119 ((#.value-cell-widetag #.symbol-widetag #.weak-pointer-widetag))
1121 (return-from scan-obj)))
1122 (scanptrs obj 1 (1- nwords))))
1123 (dolist (space (reverse (cdr spaces)))
1124 (let* ((logical-addr (space-addr space))
1125 (size (space-size space))
1126 (physical-addr (space-physaddr space spaces))
1127 (physical-end (sap+ physical-addr size))
1128 (vaddr-translation (+ (- (sap-int physical-addr)) logical-addr)))
1129 (unless (= (space-id space) immobile-varyobj-core-space-id)
1130 (dx-flet ((visit (obj widetag size)
1131 ;; Compute the object's intended virtual address
1132 (let ((vaddr (+ (logandc2 (get-lisp-obj-address obj) lowtag-mask)
1133 vaddr-translation)))
1134 (scan-obj obj widetag size vaddr))))
1135 (map-objects-in-range
1136 #'visit
1137 (ash (sap-int physical-addr) (- n-fixnum-tag-bits))
1138 (ash (sap-int physical-end) (- n-fixnum-tag-bits))))
1139 (when (and (plusp (logior n-abs n-rel)) print)
1140 (format t "space @ ~x: ~d absolute + ~d relative fixups~%"
1141 logical-addr n-abs n-rel))
1142 (setq n-abs 0 n-rel 0))))))
1143 (when print
1144 (format t "total of ~D linker fixups~%" (length fixups)))
1145 fixups)
1147 ;;;;
1149 (macrolet ((do-core-header-entry (((id-var len-var ptr-var) buffer) &body body)
1150 `(let ((,ptr-var 1))
1151 (loop
1152 (let ((,id-var (%vector-raw-bits ,buffer ,ptr-var))
1153 (,len-var (%vector-raw-bits ,buffer (1+ ,ptr-var))))
1154 (incf ,ptr-var 2)
1155 (decf ,len-var 2)
1156 (when (= ,id-var end-core-entry-type-code) (return))
1157 ,@body
1158 (incf ,ptr-var ,len-var)))))
1159 (do-directory-entry (((index-var start-index input-nbytes) buffer) &body body)
1160 `(let ((words-per-dirent 5))
1161 (multiple-value-bind (n-entries remainder)
1162 (floor ,input-nbytes words-per-dirent)
1163 (aver (zerop remainder))
1164 (symbol-macrolet ((id (%vector-raw-bits ,buffer index))
1165 (nwords (%vector-raw-bits ,buffer (+ index 1)))
1166 (data-page (%vector-raw-bits ,buffer (+ index 2)))
1167 (addr (%vector-raw-bits ,buffer (+ index 3)))
1168 (npages (%vector-raw-bits ,buffer (+ index 4))))
1169 (do ((,index-var ,start-index (+ ,index-var words-per-dirent)))
1170 ((= ,index-var (+ ,start-index (* n-entries words-per-dirent))))
1171 ,@body)))))
1172 (with-mapped-core ((sap-var start npages stream) &body body)
1173 `(let (,sap-var)
1174 (unwind-protect
1175 (progn
1176 (setq ,sap-var
1177 (sb-posix:mmap nil
1178 (* ,npages +backend-page-bytes+)
1179 (logior sb-posix:prot-read sb-posix:prot-write)
1180 sb-posix:map-private
1181 (sb-sys:fd-stream-fd ,stream)
1182 ;; Skip the core header
1183 (+ ,start +backend-page-bytes+)))
1184 ,@body)
1185 (when ,sap-var
1186 (sb-posix:munmap ,sap-var (* ,npages +backend-page-bytes+)))))))
1188 ;;; Given a native SBCL '.core' file, or one attached to the end of an executable,
1189 ;;; separate it into pieces.
1190 ;;; ASM-PATHNAME is the name of the assembler file that will hold all the Lisp code.
1191 ;;; The other two output pathnames are implicit: "x.s" -> "x.core" and "x-core.o"
1192 ;;; The ".core" file is a native core file used for starting a binary that
1193 ;;; contains the asm code using the "--core" argument. The "-core.o" file
1194 ;;; is for linking in to a binary that needs no "--core" argument.
1195 (defun split-core
1196 (input-pathname asm-pathname
1197 &key emit-sizes (verbose nil)
1198 &aux (split-core-pathname
1199 (merge-pathnames (make-pathname :type "core") asm-pathname))
1200 (elf-core-pathname
1201 (merge-pathnames
1202 (make-pathname :name (concatenate 'string (pathname-name asm-pathname) "-core")
1203 :type "o")
1204 asm-pathname))
1205 (core-header (make-array +backend-page-bytes+ :element-type '(unsigned-byte 8)))
1206 (original-total-npages 0)
1207 (core-offset 0)
1208 (page-adjust 0)
1209 (code-start-fixup-ofs 0) ; where to fixup the core header
1210 (spaces)
1211 (copy-actions)
1212 (fixedobj-range) ; = (START . SIZE-IN-BYTES)
1213 (relocs (make-array 100000 :adjustable t :fill-pointer 0)))
1215 ;; Remove old files
1216 (ignore-errors (delete-file asm-pathname))
1217 (ignore-errors (delete-file split-core-pathname))
1218 (ignore-errors (delete-file elf-core-pathname))
1219 ;; Ensure that all files can be opened
1220 (with-open-file (input input-pathname :element-type '(unsigned-byte 8))
1221 (with-open-file (asm-file asm-pathname :direction :output :if-exists :supersede)
1222 (with-open-file (split-core split-core-pathname :direction :output
1223 :element-type '(unsigned-byte 8) :if-exists :supersede)
1224 (read-sequence core-header input)
1225 (cond ((= (%vector-raw-bits core-header 0) core-magic))
1226 (t ; possible embedded core
1227 (file-position input (- (file-length input)
1228 (* 2 n-word-bytes)))
1229 (aver (eql (read-sequence core-header input) (* 2 n-word-bytes)))
1230 (aver (= (%vector-raw-bits core-header 1) core-magic))
1231 (setq core-offset (%vector-raw-bits core-header 0))
1232 (when verbose
1233 (format t "~&embedded core starts at #x~x into input~%" core-offset))
1234 (file-position input core-offset)
1235 (read-sequence core-header input)
1236 (aver (= (%vector-raw-bits core-header 0) core-magic))))
1237 (do-core-header-entry ((id len ptr) core-header)
1238 (case id
1239 (#.build-id-core-entry-type-code
1240 (when verbose
1241 (let ((string (make-string (%vector-raw-bits core-header ptr)
1242 :element-type 'base-char)))
1243 (%byte-blt core-header (* (1+ ptr) n-word-bytes) string 0 (length string))
1244 (format t "Build ID [~a]~%" string))))
1245 (#.new-directory-core-entry-type-code
1246 (do-directory-entry ((index ptr len) core-header)
1247 (incf original-total-npages npages)
1248 (push (make-space id addr data-page page-adjust nwords) spaces)
1249 (when verbose
1250 (format t "id=~d page=~5x + ~5x addr=~10x words=~8x~:[~; (drop)~]~%"
1251 id data-page npages addr nwords
1252 (= id immobile-varyobj-core-space-id)))
1253 (cond ((= id immobile-varyobj-core-space-id)
1254 (setq code-start-fixup-ofs (+ index 3))
1255 ;; Keep this entry but delete the page count. We need to know
1256 ;; where the space was supposed to be mapped and at what size.
1257 ;; Subsequent core entries will need to adjust their start page
1258 ;; downward (just the PTEs's start page now).
1259 (setq page-adjust npages data-page 0 npages 0))
1261 ;; Keep track of where the fixedobj space wants to be.
1262 (when (= id immobile-fixedobj-core-space-id)
1263 (setq fixedobj-range (cons addr (ash nwords word-shift))))
1264 (when (plusp npages) ; enqueue
1265 (push (cons data-page (* npages +backend-page-bytes+))
1266 copy-actions))
1267 ;; adjust this entry's start page in the new core
1268 (decf data-page page-adjust)))))
1269 (#.page-table-core-entry-type-code
1270 (aver (= len 3))
1271 (symbol-macrolet ((nbytes (%vector-raw-bits core-header (1+ ptr)))
1272 (data-page (%vector-raw-bits core-header (+ ptr 2))))
1273 (aver (= data-page original-total-npages))
1274 (aver (= (ceiling (space-nwords
1275 (find dynamic-core-space-id spaces :key #'space-id))
1276 (/ +backend-page-bytes+ n-word-bytes))
1277 (%vector-raw-bits core-header ptr))) ; number of PTEs
1278 (when verbose
1279 (format t "PTE: page=~5x~40tbytes=~8x~%" data-page nbytes))
1280 (push (cons data-page nbytes) copy-actions)
1281 (decf data-page page-adjust)))))
1282 (let ((buffer (make-array +backend-page-bytes+
1283 :element-type '(unsigned-byte 8)))
1284 (filepos))
1285 ;; Write the new core file
1286 (write-sequence core-header split-core)
1287 (dolist (action (reverse copy-actions)) ; nondestructive
1288 ;; page index convention assumes absence of core header.
1289 ;; i.e. data page 0 is the file page immediately following the core header
1290 (let ((offset (* (1+ (car action)) +backend-page-bytes+))
1291 (nbytes (cdr action)))
1292 (when verbose
1293 (format t "File offset ~10x: ~10x bytes~%" offset nbytes))
1294 (setq filepos (+ core-offset offset))
1295 (file-position input filepos)
1296 (copy-bytes input split-core nbytes buffer)))
1297 ;; Trailer (runtime options and magic number)
1298 (let ((nbytes (read-sequence buffer input)))
1299 ;; expect trailing magic number
1300 (let ((ptr (floor (- nbytes n-word-bytes) n-word-bytes)))
1301 (aver (= (%vector-raw-bits buffer ptr) core-magic)))
1302 ;; File position of the core header needs to be set to 0
1303 ;; regardless of what it was
1304 (setf (%vector-raw-bits buffer 4) 0)
1305 (when verbose
1306 (format t "Trailer words:(~{~X~^ ~})~%"
1307 (loop for i below (floor nbytes n-word-bytes)
1308 collect (%vector-raw-bits buffer i))))
1309 (write-sequence buffer split-core :end nbytes)
1310 (finish-output split-core))
1311 ;; Sanity test
1312 (aver (= (+ core-offset
1313 (* page-adjust +backend-page-bytes+)
1314 (file-length split-core))
1315 (file-length input)))
1316 ;; Seek back to the PTE pages so they can be copied to the '.o' file
1317 (file-position input filepos)))
1319 ;; Map the original core file to memory
1320 (with-mapped-core (sap core-offset original-total-npages input)
1321 (let* ((data-spaces
1322 (delete immobile-varyobj-core-space-id (reverse spaces)
1323 :key #'space-id))
1324 (map (cons sap (sort (copy-list spaces) #'> :key #'space-addr)))
1325 (pte-nbytes (cdar copy-actions)))
1326 (collect-relocations map relocs)
1327 (with-open-file (output elf-core-pathname
1328 :direction :output :if-exists :supersede
1329 :element-type '(unsigned-byte 8))
1330 (vector-push-extend
1331 `(,(ash code-start-fixup-ofs word-shift) 0 . ,R_X86_64_64) relocs)
1332 (prepare-elf (+ (apply #'+ (mapcar #'space-nbytes-aligned data-spaces))
1333 +backend-page-bytes+ ; core header
1334 pte-nbytes)
1335 relocs output)
1336 (setf (%vector-raw-bits core-header code-start-fixup-ofs) 0)
1337 (write-sequence core-header output) ; Copy prepared header
1338 (force-output output)
1339 ;; Change SB-C::*COMPILE[-FILE]-TO-MEMORY-SPACE* to :DYNAMIC
1340 ;; in case the resulting executable needs to compile anything.
1341 ;; (Call frame info will be missing, but at least it's something.)
1342 (dolist (name '("*COMPILE-FILE-TO-MEMORY-SPACE*"
1343 "*COMPILE-TO-MEMORY-SPACE*"))
1344 (%set-symbol-global-value
1345 (find-target-symbol "SB-C" name map)
1346 (find-target-symbol "KEYWORD" "DYNAMIC" map :logical)))
1348 (dolist (space data-spaces) ; Copy pages from memory
1349 (let ((start (space-physaddr space map))
1350 (size (space-nbytes-aligned space)))
1351 (aver (eql (sb-unix:unix-write (sb-sys:fd-stream-fd output)
1352 start 0 size)
1353 size))))
1354 (when verbose
1355 (format t "Copying ~d bytes (#x~x) from ptes = ~d PTEs~%"
1356 pte-nbytes pte-nbytes (floor pte-nbytes 10)))
1357 (copy-bytes input output pte-nbytes)) ; Copy PTEs from input
1359 ;; There's no relation between emit-sizes and which section to put
1360 ;; C symbol references in, however it's a safe bet that if sizes
1361 ;; are supported then so is the .rodata directive.
1362 (format asm-file (if emit-sizes " .rodata~%" " .data~%"))
1363 (extract-required-c-symbols map asm-file)
1364 (write-assembler-text map fixedobj-range asm-file emit-sizes)))
1366 (format asm-file "~% ~A~%" +noexec-stack-note+))))
1368 ) ; end MACROLET
1370 ;;;;
1372 (defun cl-user::elfinate (&optional (args (cdr sb-ext:*posix-argv*)))
1373 (cond ((string= (car args) "split")
1374 (pop args)
1375 (let ((sizes (string= (car args) "--sizes")))
1376 (when sizes
1377 (pop args))
1378 (destructuring-bind (input asm) args
1379 (split-core input asm :emit-sizes sizes))))
1380 #+nil
1381 ((string= (car args) "relocate")
1382 (destructuring-bind (input output binary start-sym) (cdr args)
1383 (relocate-core
1384 input output binary (parse-integer start-sym :radix 16))))
1386 (error "Unknown command: ~S" args))))
1388 ;; If loaded as a script, do this
1389 (eval-when (:execute)
1390 (let ((args (cdr sb-ext:*posix-argv*)))
1391 (when args
1392 (let ((*print-pretty* nil))
1393 (format t "Args: ~S~%" args)
1394 (cl-user::elfinate args)))))