1 ;;;; Utilities for separating an SBCL core file into two pieces:
2 ;;;; 1. An assembly language file containing the immobile code space
3 ;;;; 2. A '.o' file wrapping a core file containing everything else
4 ;;;; We operate as a "tool" that processes external files rather than
5 ;;;; operating on the in-process data, but it is also possible to dump
6 ;;;; the current image by creating a straight-through translation
7 ;;;; of internal/external code addresses.
9 ;;;; This software is part of the SBCL system. See the README file for
10 ;;;; more information.
12 ;;;; This software is derived from the CMU CL system, which was
13 ;;;; written at Carnegie Mellon University and released into the
14 ;;;; public domain. The software is in the public domain and is
15 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
16 ;;;; files for more information.
18 (load (merge-pathnames "corefile.lisp" *load-pathname
*))
20 (defpackage "SB-EDITCORE"
21 (:use
"CL" "SB-ALIEN" "SB-COREFILE" "SB-INT" "SB-EXT"
22 "SB-KERNEL" "SB-SYS" "SB-VM")
23 (:export
#:move-dynamic-code-to-text-space
#:redirect-text-space-calls
24 #:split-core
#:copy-to-elf-obj
)
25 (:import-from
"SB-ALIEN-INTERNALS"
26 #:alien-type-bits
#:parse-alien-type
27 #:alien-value-sap
#:alien-value-type
)
28 (:import-from
"SB-C" #:+backend-page-bytes
+)
29 (:import-from
"SB-VM" #:map-objects-in-range
#:reconstitute-object
30 #:%closure-callee
#:code-object-size
)
31 (:import-from
"SB-DISASSEM" #:get-inst-space
#:find-inst
32 #:make-dstate
#:%make-segment
#:make-code-segment
33 #:seg-virtual-location
#:seg-length
#:seg-sap-maker
34 #:map-segment-instructions
#:inst-name
35 #:dstate-next-addr
#:dstate-cur-offs
36 #:dstate-cur-addr
#:sign-extend
)
38 (:import-from
"SB-X86-64-ASM" #:near-jump-displacement
39 #:near-cond-jump-displacement
#:mov
#:call
#:jmp
41 #:machine-ea
#:machine-ea-base
#:machine-ea-index
#:machine-ea-disp
)
42 (:import-from
"SB-IMPL" #:symbol-hashset
#:package-%name
44 #:hash-table-pairs
#:hash-table-%count
))
46 (in-package "SB-EDITCORE")
48 (declaim (muffle-conditions compiler-note
))
51 (setq *evaluator-mode
* :compile
))
53 ;;; Some high address that won't conflict with any of the ordinary spaces
54 ;;; It's more-or-less arbitrary, but we must be able to discern whether a
55 ;;; pointer looks like it points to code in case coreparse has to walk the heap.
56 (defconstant +code-space-nominal-address
+ #x550000000000
)
58 (defglobal +noexec-stack-note
+ ".section .note.GNU-stack, \"\", @progbits")
60 (defstruct (core-space ; "space" is a CL symbol
62 (:constructor make-space
(id addr data-page page-adjust nwords
)))
63 (page-table nil
:type
(or null simple-vector
))
64 id addr data-page page-adjust nwords
)
65 (defmethod print-object ((self core-space
) stream
)
66 (print-unreadable-object (self stream
:type t
)
67 (format stream
"~d" (space-id self
))))
68 (defun space-size (space) (* (space-nwords space
) n-word-bytes
))
69 (defun space-end (space) (+ (space-addr space
) (space-size space
)))
70 (defun space-nbytes-aligned (space)
71 (align-up (space-size space
) +backend-page-bytes
+))
72 (defun space-physaddr (space spacemap
)
73 (sap+ (car spacemap
) (* (space-data-page space
) +backend-page-bytes
+)))
75 ;;; Given VADDR which is an address in the target core, return the address at which
76 ;;; VADDR is currently mapped while performing the split.
77 ;;; SPACEMAP is a cons of a SAP and an alist whose elements are (ADDR . CORE-SPACE)
78 (defun translate-ptr (vaddr spacemap
)
79 (let ((space (find vaddr
(cdr spacemap
) :key
#'space-addr
:test
#'>=)))
80 ;; FIXME: duplicates SPACE-PHYSADDR to avoid consing a SAP.
81 ;; macroize or something.
82 (+ (sap-int (car spacemap
)) (* (space-data-page space
) +backend-page-bytes
+)
83 (- vaddr
(space-addr space
)))))
86 (defun get-space (id spacemap
)
87 (find id
(cdr spacemap
) :key
#'space-id
))
88 (defun compute-nil-object (spacemap)
89 (let ((space (get-space static-core-space-id spacemap
)))
90 ;; TODO: The core should store its address of NIL in the initial function entry
91 ;; so this kludge can be removed.
92 (%make-lisp-obj
(logior (space-addr space
) #x117
)))) ; SUPER KLUDGE
94 ;;; Given OBJ which is tagged pointer into the target core, translate it into
95 ;;; the range at which the core is now mapped during execution of this tool,
96 ;;; so that host accessors can dereference its slots.
97 ;;; Use extreme care: while it works to use host accessors on the target core,
98 ;;; we must avoid type checks on instances because LAYOUTs need translation.
99 ;;; Printing boxed objects from the target core will almost always crash.
100 (defun translate (obj spacemap
)
101 (%make-lisp-obj
(translate-ptr (get-lisp-obj-address obj
) spacemap
)))
103 (defstruct (core-sym (:copier nil
) (:predicate nil
)
104 (:constructor make-core-sym
(package name external
)))
106 (name nil
:read-only t
)
107 (external nil
:read-only t
))
109 (defstruct (bounds (:constructor make-bounds
(low high
)))
110 (low 0 :type word
) (high 0 :type word
))
112 (defstruct (core (:predicate nil
)
114 (:constructor %make-core
))
117 ;; mapping from small integer ID to package
119 ;; mapping from string naming a package to list of symbol names (strings)
120 ;; that are external in the package.
121 (packages (make-hash-table :test
'equal
))
122 ;; hashset of symbol names (as strings) that should be package-qualified.
123 ;; (Prefer not to package-qualify unambiguous names)
124 (nonunique-symbol-names)
125 (code-bounds nil
:type bounds
:read-only t
)
126 (fixedobj-bounds nil
:type bounds
:read-only t
)
127 (linkage-bounds nil
:type bounds
:read-only t
)
128 (linkage-symbols nil
)
129 (linkage-symbol-usedp nil
)
130 (linkage-entry-size nil
)
131 (new-fixups (make-hash-table))
132 (new-fixup-words-used 0)
133 ;; For assembler labels that we want to invent at random
136 (dstate (make-dstate nil
) :read-only t
)
137 (seg (%make-segment
:sap-maker
(lambda () (error "Bad sap maker"))
138 :virtual-location
0) :read-only t
)
140 (call-inst nil
:read-only t
)
141 (jmp-inst nil
:read-only t
)
142 (pop-inst nil
:read-only t
))
144 (defglobal *editcore-ppd
*
145 ;; copy no entries for macros/special-operators (flet, etc)
146 (let ((ppd (sb-pretty::make-pprint-dispatch-table
#() nil nil
)))
147 (set-pprint-dispatch 'string
148 ;; Write strings without string quotes
149 (lambda (stream string
) (write-string string stream
))
154 (defun c-name (lispname core pp-state
&optional
(prefix ""))
155 (when (typep lispname
'(string 0))
156 (setq lispname
"anonymous"))
157 ;; Perform backslash escaping on the exploded string
158 ;; Strings were stringified without surrounding quotes,
159 ;; but there might be quotes embedded anywhere, so escape them,
160 ;; and also remove newlines and non-ASCII.
162 (mapcan (lambda (char)
163 (cond ((not (typep char
'base-char
)) (list #\?))
164 ((member char
'(#\\ #\")) (list #\\ char
))
165 ((eql char
#\newline
) (list #\_
))
169 ((and (stringp lispname
)
170 ;; L denotes a symbol which can not be global on macOS.
171 (char= (char lispname
0) #\L
))
172 (concatenate 'string
"_" lispname
))
174 (write-to-string lispname
175 ;; Printing is a tad faster without a pretty stream
176 :pretty
(not (typep lispname
'core-sym
))
177 :pprint-dispatch
*editcore-ppd
*
178 ;; FIXME: should be :level 1, however see
179 ;; https://bugs.launchpad.net/sbcl/+bug/1733222
180 :escape t
:level
2 :length
5
181 :case
:downcase
:gensym nil
182 :right-margin
10000)))
184 (let ((string (concatenate 'string prefix characters
)))
185 ;; If the string appears in the linker symbols, then string-upcase it
186 ;; so that it looks like a conventional Lisp symbol.
187 (cond ((find-if (lambda (x) (string= string
(if (consp x
) (car x
) x
)))
188 (core-linkage-symbols core
))
189 (setq string
(string-upcase string
)))
190 ((string= string
".") ; can't use the program counter symbol either
191 (setq string
"|.|")))
192 ;; If the symbol is still nonunique, add a random suffix.
193 ;; The secondary value is whether the symbol should be a linker global.
194 ;; For now, make nothing global, thereby avoiding potential conflicts.
195 (let ((occurs (incf (gethash string
(car pp-state
) 0))))
197 (values (concatenate 'string string
"_" (write-to-string occurs
))
202 (defmethod print-object ((sym core-sym
) stream
)
203 (format stream
"~(~:[~*~;~:*~A~:[:~;~]:~]~A~)"
204 (core-sym-package sym
)
205 (core-sym-external sym
)
206 (core-sym-name sym
)))
208 (defun space-bounds (id spacemap
)
209 (let ((space (get-space id spacemap
)))
211 (make-bounds (space-addr space
) (space-end space
))
213 (defun in-bounds-p (addr bounds
)
214 (and (>= addr
(bounds-low bounds
)) (< addr
(bounds-high bounds
))))
216 (defun make-string-hashset (contents count
)
217 (let ((hs (sb-int:make-hashset count
#'string
= #'sxhash
)))
218 (dolist (string contents hs
)
219 (sb-int:hashset-insert hs string
))))
221 (defun scan-symbol-hashset (function table core
)
222 (let* ((spacemap (core-spacemap core
))
223 (nil-object (core-nil-object core
))
224 (cells (translate (symtbl-%cells
(truly-the symbol-hashset
225 (translate table spacemap
)))
227 (dovector (x (translate (cdr cells
) spacemap
))
230 (if (eq x nil-object
) ; any random package can export NIL. wow.
232 (translate (symbol-name (translate x spacemap
)) spacemap
))
235 (defun %fun-name-from-core
(name core
&aux
(spacemap (core-spacemap core
))
236 (packages (core-packages core
))
237 (core-nil (core-nil-object core
)))
238 (named-let recurse
((depth 0) (x name
))
239 (unless (is-lisp-pointer (get-lisp-obj-address x
))
240 (return-from recurse x
)) ; immediate object
241 (when (eq x core-nil
)
242 (return-from recurse nil
))
243 (setq x
(translate x spacemap
))
245 (#.list-pointer-lowtag
246 (cons (recurse (1+ depth
) (car x
))
247 (recurse (1+ depth
) (cdr x
))))
248 ((#.instance-pointer-lowtag
#.fun-pointer-lowtag
) "?")
249 (#.other-pointer-lowtag
252 (let ((p (position #\
/ x
:from-end t
)))
253 (if p
(subseq x
(1+ p
)) x
)))
255 (let ((package-id (symbol-package-id x
))
256 (name (translate (symbol-name x
) spacemap
)))
257 (when (eq package-id
0) ; uninterned
258 (return-from recurse
(string-downcase name
)))
259 (let* ((package (truly-the package
260 (aref (core-pkg-id->package core
) package-id
)))
261 (package-name (translate (package-%name package
) spacemap
)))
262 ;; The name-cleaning code wants to compare against symbols
263 ;; in CL, PCL, and KEYWORD, so use real symbols for those.
264 ;; Other than that, we avoid finding host symbols
265 ;; because the externalness could be wrong and misleading.
266 ;; It's a very subtle point, but best to get it right.
267 (when (member package-name
'("COMMON-LISP" "KEYWORD" "SB-PCL")
269 ;; NIL can't occur. It was picked off above.
270 (awhen (find-symbol name package-name
) ; if existing symbol, use it
271 (return-from recurse it
)))
272 (unless (gethash name
(core-nonunique-symbol-names core
))
273 ;; Don't care about package
274 (return-from recurse
(make-core-sym nil name nil
)))
275 (when (string= package-name
"KEYWORD") ; make an external core-symbol
276 (return-from recurse
(make-core-sym nil name t
)))
277 (let ((externals (gethash package-name packages
))
281 (lambda (string symbol
)
282 (declare (ignore symbol
))
284 (push string externals
))
285 (package-external-symbols package
)
287 (setf externals
(make-string-hashset externals n
)
288 (gethash package-name packages
) externals
))
289 (make-core-sym package-name
291 (sb-int:hashset-find externals name
))))))
294 (defun remove-name-junk (name)
296 (named-let recurse
((x name
))
297 (cond ((typep x
'(cons (eql lambda
)))
298 (let ((args (second x
)))
299 `(lambda ,(if args sb-c
::*debug-name-sharp
* "()")
300 ,@(recurse (cddr x
)))))
302 ((and (typep x
'(or string symbol
))
303 (= (mismatch (string x
) "CLEANUP-FUN-")
304 (length "CLEANUP-FUN-")))
306 ((consp x
) (recons x
(recurse (car x
)) (recurse (cdr x
))))
308 ;; Shorten obnoxiously long printed representations of methods.
309 (flet ((unpackageize (thing)
310 (when (typep thing
'core-sym
)
311 (setf (core-sym-package thing
) nil
))
313 (when (typep name
'(cons (member sb-pcl
::slow-method sb-pcl
::fast-method
314 sb-pcl
::slot-accessor
)))
315 (setq name
`(,(case (car name
)
316 (sb-pcl::fast-method
"method")
317 (sb-pcl::slow-method
"Method") ; something visually distinct
318 (sb-pcl::slot-accessor
"accessor"))
320 (setf (second name
) (unpackageize (second name
)))
321 (let ((last (car (last name
))))
324 (unpackageize qual
))))))
327 (defun fun-name-from-core (name core
)
328 (remove-name-junk (%fun-name-from-core name core
)))
330 ;;; A problem: COMPILED-DEBUG-FUN-ENCODED-LOCS (a packed integer) might be a
331 ;;; bignum - in fact probably is. If so, it points into the target core.
332 ;;; So we have to produce a new instance with an ENCODED-LOCS that
333 ;;; is the translation of the bignum, and call the accessor on that.
334 ;;; The accessors for its sub-fields are abstract - we don't know where the
335 ;;; fields are so we can't otherwise unpack them. (See CDF-DECODE-LOCS if
336 ;;; you really need to know)
337 (defun cdf-offset (compiled-debug-fun spacemap
)
338 ;; (Note that on precisely GC'd platforms, this operation is dangerous,
339 ;; but no more so than everything else in this file)
340 (let ((locs (sb-c::compiled-debug-fun-encoded-locs
341 (truly-the sb-c
::compiled-debug-fun compiled-debug-fun
))))
343 (setq locs
(cdr (translate locs spacemap
))))
344 (sb-c::compiled-debug-fun-offset
345 (sb-c::make-compiled-debug-fun
347 :encoded-locs
(if (fixnump locs
) locs
(translate locs spacemap
))))))
349 ;;; Return a list of ((NAME START . END) ...)
350 ;;; for each C symbol that should be emitted for this code object.
351 ;;; Start and and are relative to the object's base address,
352 ;;; not the start of its instructions. Hence we add HEADER-BYTES
353 ;;; too all the PC offsets.
354 (defun code-symbols (code core
&aux
(spacemap (core-spacemap core
)))
355 (let ((cdf (translate
356 (sb-c::compiled-debug-info-fun-map
357 (truly-the sb-c
::compiled-debug-info
358 (translate (%code-debug-info code
) spacemap
)))
360 (header-bytes (* (code-header-words code
) n-word-bytes
))
364 (let* ((name (fun-name-from-core
365 (sb-c::compiled-debug-fun-name
366 (truly-the sb-c
::compiled-debug-fun cdf
))
368 (next (when (%instancep
(sb-c::compiled-debug-fun-next cdf
))
369 (translate (sb-c::compiled-debug-fun-next cdf
) spacemap
)))
371 (+ header-bytes
(cdf-offset next spacemap
))
372 (code-object-size code
))))
373 (unless (= end-pc start-pc
)
374 ;; Collapse adjacent address ranges named the same.
375 ;; Use EQUALP instead of EQUAL to compare names
376 ;; because instances of CORE-SYMBOL are not interned objects.
377 (if (and blobs
(equalp (caar blobs
) name
))
378 (setf (cddr (car blobs
)) end-pc
)
379 (push (list* name start-pc end-pc
) blobs
)))
381 (setq cdf next start-pc end-pc
)
385 (defstruct (descriptor (:constructor make-descriptor
(bits)))
387 (defmethod print-object ((self descriptor
) stream
)
388 (format stream
"#<ptr ~x>" (descriptor-bits self
)))
389 (defun descriptorize (obj)
390 (if (is-lisp-pointer (get-lisp-obj-address obj
))
391 (make-descriptor (get-lisp-obj-address obj
))
393 (defun undescriptorize (target-descriptor)
394 (%make-lisp-obj
(descriptor-bits target-descriptor
)))
396 (defun target-hash-table-alist (table spacemap
)
397 (let ((table (truly-the hash-table
(translate table spacemap
))))
398 (let ((cells (the simple-vector
(translate (hash-table-pairs table
) spacemap
))))
400 (do ((count (hash-table-%count table
) (1- count
))
404 (pairs (cons (descriptorize (svref cells i
))
405 (descriptorize (svref cells
(1+ i
))))))))))
407 (defmacro package-id
(name) (sb-impl::package-id
(find-package name
)))
409 ;;; Return either the physical or logical address of the specified symbol.
410 (defun %find-target-symbol
(package-id symbol-name spacemap
411 &optional
(address-mode :physical
))
412 (dolist (id `(,immobile-fixedobj-core-space-id
413 ,static-core-space-id
414 ,dynamic-core-space-id
))
415 (binding* ((space (get-space id spacemap
) :exit-if-null
)
416 (start (translate-ptr (space-addr space
) spacemap
))
417 (end (+ start
(space-size space
)))
420 (when (>= physaddr end
) (return))
421 (let* ((word (sap-ref-word (int-sap physaddr
) 0))
423 (if (= (logand word widetag-mask
) filler-widetag
)
424 (ash (ash word -
32) word-shift
)
425 (let ((obj (reconstitute-object (ash physaddr
(- n-fixnum-tag-bits
)))))
426 (when (and (symbolp obj
)
427 (string= symbol-name
(translate (symbol-name obj
) spacemap
))
428 (= (symbol-package-id obj
) package-id
))
429 (return-from %find-target-symbol
431 (logior (ecase address-mode
433 (:logical
(+ (space-addr space
) (- physaddr start
))))
434 other-pointer-lowtag
))))
435 (primitive-object-size obj
)))))
436 (incf physaddr size
))))))
437 (defun find-target-symbol (package-id symbol-name spacemap
&optional
(address-mode :physical
))
438 (or (%find-target-symbol package-id symbol-name spacemap address-mode
)
439 (bug "Can't find symbol ~A::~A" package-id symbol-name
)))
441 (defparameter label-prefix
(if (member :darwin
*features
*) "_" ""))
442 (defun labelize (x) (concatenate 'string label-prefix x
))
444 (defun compute-linkage-symbols (spacemap)
445 (let* ((linkage-info (symbol-global-value
446 (find-target-symbol (package-id "SB-SYS") "*LINKAGE-INFO*"
447 spacemap
:physical
)))
448 (hashtable (car (translate linkage-info spacemap
)))
449 (pairs (target-hash-table-alist hashtable spacemap
))
450 (min (reduce #'min pairs
:key
#'cdr
))
451 (max (reduce #'max pairs
:key
#'cdr
))
453 (vector (make-array n
)))
454 (dolist (entry pairs vector
)
455 (let* ((key (undescriptorize (car entry
)))
456 (entry-index (- (cdr entry
) min
))
457 (string (labelize (translate (if (consp key
) (car (translate key spacemap
)) key
)
459 (setf (aref vector entry-index
)
460 (if (consp key
) (list string
) string
))))))
462 (defun make-core (spacemap code-bounds fixedobj-bounds
&optional enable-pie
)
463 (let* ((linkage-bounds
464 (let ((text-space (get-space immobile-text-core-space-id spacemap
)))
466 (let ((text-addr (space-addr text-space
)))
467 (make-bounds (- text-addr alien-linkage-table-space-size
) text-addr
))
471 (find-target-symbol (package-id "SB-VM") "ALIEN-LINKAGE-TABLE-ENTRY-SIZE"
472 spacemap
:physical
)))
473 (linkage-symbols (compute-linkage-symbols spacemap
))
474 (nil-object (compute-nil-object spacemap
))
475 (ambiguous-symbols (make-hash-table :test
'equal
))
479 :nil-object nil-object
480 :nonunique-symbol-names ambiguous-symbols
481 :code-bounds code-bounds
482 :fixedobj-bounds fixedobj-bounds
483 :linkage-bounds linkage-bounds
484 :linkage-entry-size linkage-entry-size
485 :linkage-symbols linkage-symbols
486 :linkage-symbol-usedp
(make-array (length linkage-symbols
) :element-type
'bit
488 :enable-pie enable-pie
)))
491 (find-target-symbol (package-id "SB-IMPL") "*ALL-PACKAGES*" spacemap
:physical
)))
493 (symbols (make-hash-table :test
'equal
)))
494 (labels ((scan-symtbl (table)
497 (pushnew (get-lisp-obj-address sym
) (gethash str symbols
)))
500 (let ((package (truly-the package
(translate x spacemap
))))
501 ;; a package can appear in *ALL-PACKAGES* under each of its nicknames
502 (unless (assoc (sb-impl::package-id package
) package-alist
)
503 (push (cons (sb-impl::package-id package
) package
) package-alist
)
504 (scan-symtbl (package-external-symbols package
))
505 (scan-symtbl (package-internal-symbols package
))))))
506 (dovector (x (translate package-table spacemap
))
507 (cond ((%instancep x
) (scan-package x
))
508 ((listp x
) (loop (if (eq x nil-object
) (return))
509 (setq x
(translate x spacemap
))
510 (scan-package (car x
))
511 (setq x
(cdr x
)))))))
512 (let ((package-by-id (make-array (1+ (reduce #'max package-alist
:key
#'car
))
513 :initial-element nil
)))
514 (loop for
(id . package
) in package-alist
515 do
(setf (aref package-by-id id
) package
))
516 (setf (core-pkg-id->package core
) package-by-id
))
517 (dohash ((string symbols
) symbols
)
519 (setf (gethash string ambiguous-symbols
) t
))))
522 ;;; Emit .byte or .quad directives dumping memory from SAP for COUNT units
523 ;;; (bytes or qwords) to STREAM. SIZE specifies which direcive to emit.
524 ;;; EXCEPTIONS specify offsets at which a specific string should be
525 ;;; written to the file in lieu of memory contents, useful for emitting
526 ;;; expressions involving the assembler '.' symbol (the current PC).
527 (defun emit-asm-directives (size sap count stream
&optional exceptions
)
528 (declare (optimize speed
))
529 (declare (stream stream
))
530 (let ((*print-base
* 16)
531 (string-buffer (make-array 18 :element-type
'base-char
))
532 (fmt #.
(coerce "0x%lx" 'base-string
))
534 (declare ((integer 0 32) per-line
)
539 (format stream
" .quad")
541 (declare ((unsigned-byte 20) i
))
542 (declare (simple-vector exceptions
))
543 (write-char (if (> per-line
0) #\
, #\space
) stream
)
544 (acond ((and (< i
(length exceptions
)) (aref exceptions i
))
545 (write-string it stream
))
547 (write-string "0x" stream
)
548 (write (sap-ref-word sap
(* i n-word-bytes
)) :stream stream
)))
549 (when (and (= (incf per-line
) 16) (< (1+ i
) count
))
550 (format stream
"~% .quad")
553 (aver (not exceptions
))
554 (format stream
" .byte")
556 (write-char (if (> per-line
0) #\
, #\space
) stream
)
557 (write-string "0x" stream
)
558 (write (sap-ref-8 sap i
) :stream stream
)
559 (when (and (= (incf per-line
) 32) (< (1+ i
) count
))
560 (format stream
"~% .byte")
561 (setq per-line
0))))))
564 (defun code-fixup-locs (code spacemap
)
565 (let ((locs (sb-vm::%code-fixups code
)))
566 ;; Return only the absolute fixups
567 ;; Ensure that a bignum LOCS is translated before using it.
568 (values (sb-c::unpack-code-fixup-locs
569 (if (fixnump locs
) locs
(translate locs spacemap
))))))
572 (defun list-textual-instructions (sap length core load-addr emit-cfi
)
573 (declare (ignore emit-cfi
))
574 (let ((dstate (core-dstate core
))
575 (spaces (core-spacemap core
))
576 (seg (core-seg core
))
578 (inst-ldr-reg (load-time-value (find-inst #xF940002A
(get-inst-space))))
579 (inst-bl (load-time-value (find-inst #x97EC8AEB
(get-inst-space))))
580 (inst-b (load-time-value (find-inst #x17FFFFE4
(get-inst-space))))
581 (inst-adrp (load-time-value (find-inst #xB0FFA560
(get-inst-space))))
582 (inst-add (load-time-value (find-inst #x91003F7C
(get-inst-space))))
584 (setf (seg-virtual-location seg
) load-addr
585 (seg-length seg
) length
586 (seg-sap-maker seg
) (lambda () sap
))
587 (map-segment-instructions
588 (lambda (dchunk inst
)
591 (let ((reg (ldb (byte 5 0) dchunk
))
592 (next-dchunk (sb-arm64-asm::current-instruction dstate
4))
593 (current-page (ash (dstate-cur-addr dstate
) -
12))
595 (sign-extend (+ (ldb (byte 2 29) dchunk
)
596 (ash (ldb (byte 19 5) dchunk
) 2))
598 (cond ((and (eq inst-add
599 (find-inst next-dchunk
(load-time-value (get-inst-space))))
600 (= reg
(ldb (byte 5 0) next-dchunk
))
601 (= reg
(ldb (byte 5 5) next-dchunk
)))
602 ;; Rewrite any ADRP, ADD sequences which compute addresses
603 ;; into the linkage table into references into the GOT.
604 (let ((target-addr (+ (ash (+ current-page page-displacement
) 12)
605 (ldb (byte 12 10) next-dchunk
))))
606 (when (or (in-bounds-p target-addr
(core-fixedobj-bounds core
))
607 (in-bounds-p target-addr
(core-linkage-bounds core
)))
608 (push (list (dstate-cur-offs dstate
)
612 (format nil
"x~d" reg
))
614 (push (list (+ 4 (dstate-cur-offs dstate
))
618 (format nil
"x~d" reg
))
620 ((and (eq inst-ldr-reg
621 (find-inst next-dchunk
(load-time-value (get-inst-space))))
622 (= reg
(ldb (byte 5 0) next-dchunk
))
623 (= reg
(ldb (byte 5 5) next-dchunk
)))
624 ;; Rewrite any ADRP, LDR sequences which load
625 ;; foreign-dataref addresses into the linkage table
626 ;; into references into the GOT.
627 (let ((target-addr (+ (ash (+ current-page page-displacement
) 12)
628 (ash (ldb (byte 12 10) next-dchunk
) word-shift
))))
629 (when (or (in-bounds-p target-addr
(core-fixedobj-bounds core
))
630 (in-bounds-p target-addr
(core-linkage-bounds core
)))
631 (push (list (dstate-cur-offs dstate
)
635 (format nil
"x~d" reg
))
637 (push (list (+ 4 (dstate-cur-offs dstate
))
641 (format nil
"x~d" reg
))
643 ((or (eq inst inst-bl
) (eq inst inst-b
))
644 ;; Rewrite any BLs which jump to the trampoline in linkage
645 ;; space to instead jump directly to the alien function in
647 (let ((target-addr (+ (dstate-cur-addr dstate
)
648 (* 4 (sign-extend (ldb (byte 26 0) dchunk
) 26)))))
649 (when (or (in-bounds-p target-addr
(core-fixedobj-bounds core
))
650 (in-bounds-p target-addr
(core-linkage-bounds core
)))
651 (push (list* (dstate-cur-offs dstate
)
653 (if (eq inst inst-bl
) "bl" "b")
661 ;;; Disassemble the function pointed to by SAP for LENGTH bytes, returning
662 ;;; all instructions that should be emitted using assembly language
663 ;;; instead of .quad and/or .byte directives.
664 ;;; This includes (at least) two categories of instructions:
665 ;;; - function prologue instructions that setup the call frame
666 ;;; - jmp/call instructions that transfer control to the fixedoj space
667 ;;; delimited by bounds in STATE.
668 ;;; At execution time the function will have virtual address LOAD-ADDR.
670 (defun list-textual-instructions (sap length core load-addr emit-cfi
)
671 (let ((dstate (core-dstate core
))
672 (seg (core-seg core
))
674 (or (car (core-fixup-addrs core
)) most-positive-word
))
676 (inst-call (load-time-value (find-inst #b11101000
(get-inst-space))))
677 (inst-jmp (load-time-value (find-inst #b11101001
(get-inst-space))))
678 (inst-jmpz (load-time-value (find-inst #x840f
(get-inst-space))))
679 (inst-pop (load-time-value (find-inst #x5d
(get-inst-space))))
680 (inst-mov (load-time-value (find-inst #x8B
(get-inst-space))))
681 (inst-lea (load-time-value (find-inst #x8D
(get-inst-space)))))
682 (setf (seg-virtual-location seg
) load-addr
683 (seg-length seg
) length
684 (seg-sap-maker seg
) (lambda () sap
))
685 ;; KLUDGE: "8f 45 08" is the standard prologue
686 (when (and emit-cfi
(= (logand (sap-ref-32 sap
0) #xFFFFFF
) #x08458f
))
687 (push (list* 0 3 "pop" "8(%rbp)") list
))
688 (map-segment-instructions
689 (lambda (dchunk inst
)
691 ((< next-fixup-addr
(dstate-next-addr dstate
))
692 (let ((operand (sap-ref-32 sap
(- next-fixup-addr load-addr
)))
693 (offs (dstate-cur-offs dstate
)))
694 (when (in-bounds-p operand
(core-code-bounds core
))
696 ((and (eq (inst-name inst
) 'mov
) ; match "mov eax, imm32"
697 (eql (sap-ref-8 sap offs
) #xB8
))
698 (let ((text (format nil
"mov $(CS+0x~x),%eax"
699 (- operand
(bounds-low (core-code-bounds core
))))))
700 (push (list* (dstate-cur-offs dstate
) 5 "mov" text
) list
)))
701 ((and (eq (inst-name inst
) 'mov
) ; match "mov qword ptr [R+disp8], imm32"
702 (member (sap-ref-8 sap
(1- offs
)) '(#x48
#x49
)) ; REX.w and maybe REX.b
703 (eql (sap-ref-8 sap offs
) #xC7
)
704 ;; modRegRm = #b01 #b000 #b___
705 (eql (logand (sap-ref-8 sap
(1+ offs
)) #o370
) #o100
))
706 (let* ((reg (ldb (byte 3 0) (sap-ref-8 sap
(1+ offs
))))
707 (text (format nil
"movq $(CS+0x~x),~d(%~a)"
708 (- operand
(bounds-low (core-code-bounds core
)))
709 (signed-sap-ref-8 sap
(+ offs
2))
710 (reg-name (get-gpr :qword reg
)))))
711 (push (list* (1- (dstate-cur-offs dstate
)) 8 "mov" text
) list
)))
712 ((let ((bytes (ldb (byte 24 0) (sap-ref-32 sap offs
))))
713 (or (and (eq (inst-name inst
) 'call
) ; match "{call,jmp} qword ptr [addr]"
714 (eql bytes
#x2514FF
)) ; ModRM+SIB encodes disp32, no base, no index
715 (and (eq (inst-name inst
) 'jmp
)
716 (eql bytes
#x2524FF
))))
717 (let ((new-opcode (ecase (sap-ref-8 sap
(1+ offs
))
720 ;; This instruction form is employed for asm routines when
721 ;; compile-to-memory-space is :AUTO. If the code were to be loaded
722 ;; into dynamic space, the offset to the called routine isn't
723 ;; a (signed-byte 32), so we need the indirection.
724 (push (list* (dstate-cur-offs dstate
) 7 new-opcode operand
) list
)))
726 (bug "Can't reverse-engineer fixup: ~s ~x"
727 (inst-name inst
) (sap-ref-64 sap offs
))))))
728 (pop (core-fixup-addrs core
))
729 (setq next-fixup-addr
(or (car (core-fixup-addrs core
)) most-positive-word
)))
730 ((or (eq inst inst-jmp
) (eq inst inst-call
))
731 (let ((target-addr (+ (near-jump-displacement dchunk dstate
)
732 (dstate-next-addr dstate
))))
733 (when (or (in-bounds-p target-addr
(core-fixedobj-bounds core
))
734 (in-bounds-p target-addr
(core-linkage-bounds core
)))
735 (push (list* (dstate-cur-offs dstate
)
737 (if (eq inst inst-call
) "call" "jmp")
741 (let ((target-addr (+ (near-cond-jump-displacement dchunk dstate
)
742 (dstate-next-addr dstate
))))
743 (when (in-bounds-p target-addr
(core-linkage-bounds core
))
744 (push (list* (dstate-cur-offs dstate
) 6 "je" target-addr
)
746 ((and (or (and (eq inst inst-mov
)
747 (eql (sap-ref-8 sap
(dstate-cur-offs dstate
)) #x8B
))
749 (let ((modrm (sap-ref-8 sap
(1+ (dstate-cur-offs dstate
)))))
750 (= (logand modrm
#b11000111
) #b00000101
)) ; RIP-relative mode
751 (in-bounds-p (+ (signed-sap-ref-32 sap
(+ (dstate-cur-offs dstate
) 2))
752 (dstate-next-addr dstate
))
753 (core-linkage-bounds core
)))
754 (let* ((abs-addr (+ (signed-sap-ref-32 sap
(+ (dstate-cur-offs dstate
) 2))
755 (dstate-next-addr dstate
)))
756 (reg (logior (ldb (byte 3 3) (sap-ref-8 sap
(1+ (dstate-cur-offs dstate
))))
757 (if (logtest (sb-disassem::dstate-inst-properties dstate
)
760 (op (if (eq inst inst-lea
) "lea" "mov-gotpcrel"))
761 (args (list abs-addr
(reg-name (get-gpr :qword reg
)))))
762 (push (list* (1- (dstate-cur-offs dstate
)) 7 op args
) list
)))
763 ((and (eq inst inst-pop
) (eq (logand dchunk
#xFF
) #x5D
))
764 (push (list* (dstate-cur-offs dstate
) 1 "pop" "%rbp") list
))))
770 ;;; Using assembler directives and/or real mnemonics, dump COUNT bytes
771 ;;; of memory at PADDR (physical addr) to STREAM.
772 ;;; The function's address as per the core file is VADDR.
773 ;;; (Its eventual address is indeterminate)
774 ;;; If EMIT-CFI is true, then also emit cfi directives.
776 ;;; Notice that we can use one fewer cfi directive than usual because
777 ;;; Lisp always carries a frame pointer as set up by the caller.
782 ;;; .cfi_def_cfa_offset 16 # CFA offset from default register (rsp) is +16
783 ;;; .cfi_offset 6, -16 # old rbp was saved in -16(CFA)
785 ;;; .cfi_def_cfa_register 6 # use rbp as CFA register
789 ;;; popq 8(%rbp) # place saved %rip in its ABI-compatible stack slot
790 ;;; # making RSP = RBP after the pop, and RBP = CFA - 16
791 ;;; .cfi_def_cfa 6, 16
792 ;;; .cfi_offset 6, -16
794 ;;; Of course there is a flip-side to this: unwinders think that the new frame
795 ;;; is already begun in the caller. Interruption between these two instructions:
796 ;;; MOV RBP, RSP / CALL #xzzzzz
797 ;;; will show the backtrace as if two invocations of the caller are on stack.
798 ;;; This is tricky to fix because while we can relativize the CFA to the
799 ;;; known frame size, we can't do that based only on a disassembly.
801 ;;; Return the list of locations which must be added to code-fixups
802 ;;; in the event that heap relocation occurs on image restart.
803 (defun emit-lisp-function (paddr vaddr count stream emit-cfi core
&optional labels
)
805 (format stream
" .cfi_startproc~%"))
806 ;; Any byte offset that appears as a key in the INSTRUCTIONS causes the indicated
807 ;; bytes to be written as an assembly language instruction rather than opaquely,
808 ;; thereby affecting the ELF data (cfi or relocs) produced.
811 (list-textual-instructions (int-sap paddr
) count core vaddr emit-cfi
)
815 (symbol-macrolet ((cur-offset (- ptr paddr
)))
817 (let ((until (if instructions
(caar instructions
) count
)))
818 ;; if we're not aligned, then write some number of bytes
819 ;; to cause alignment. But do not write past the next offset
820 ;; that needs to be written as an instruction.
821 (when (logtest ptr
#x7
) ; unaligned
822 (let ((n (min (- (nth-value 1 (ceiling ptr
8)))
823 (- until cur-offset
))))
825 (emit-asm-directives :byte
(int-sap ptr
) n stream
)
827 ;; Now we're either aligned to a multiple of 8, or the current
828 ;; offset needs to be written as a textual instruction.
829 (let ((n (- until cur-offset
)))
831 (multiple-value-bind (qwords remainder
) (floor n
8)
833 (emit-asm-directives :qword
(int-sap ptr
) qwords stream
#())
834 (incf ptr
(* qwords
8)))
835 (when (plusp remainder
)
836 (emit-asm-directives :byte
(int-sap ptr
) remainder stream
)
837 (incf ptr remainder
))))
838 ;; If the current offset is COUNT, we're done.
839 (when (= cur-offset count
) (return))
840 (aver (= cur-offset until
))
841 ;; A label and a textual instruction could co-occur.
842 ;; If so, the label must be emitted first.
843 (when (eq (cadar instructions
) :label
)
844 (destructuring-bind (c-symbol globalp
) (cddr (pop instructions
))
845 ;; The C symbol is global only if the Lisp name is a legal function
846 ;; designator and not random noise.
847 ;; This is a technique to try to avoid appending a uniquifying suffix
848 ;; on all the junky internal things like "(lambda # in srcfile.lisp)"
850 (format stream
"~:[~; .globl ~a~:*~%~] .type ~a, @function~%"
852 (format stream
"~a:~%" c-symbol
)))
853 ;; If both a label and textual instruction occur here, handle the latter.
854 ;; [This could could be simpler if all labels were emitted as
855 ;; '.set "thing", .+const' together in a single place, but it's more readable
856 ;; to see them where they belong in the instruction stream]
857 (when (and instructions
(= (caar instructions
) cur-offset
))
858 (destructuring-bind (length opcode . operand
) (cdr (pop instructions
))
859 (when (cond ((member opcode
#+arm64
'("bl" "b")
860 #+x86-64
'("jmp" "je" "call")
862 (when (in-bounds-p operand
(core-linkage-bounds core
))
864 (/ (- operand
(bounds-low (core-linkage-bounds core
)))
865 (core-linkage-entry-size core
))))
866 (setf (bit (core-linkage-symbol-usedp core
) entry-index
) 1
867 operand
(aref (core-linkage-symbols core
) entry-index
))))
868 (when (and (integerp operand
)
869 (in-bounds-p operand
(core-fixedobj-bounds core
)))
870 (push (+ vaddr cur-offset
) extra-fixup-locs
))
871 (format stream
" ~A ~:[0x~X~;~a~:[~;@PLT~]~]~%"
872 opcode
(stringp operand
) operand
874 (core-enable-pie core
)
875 #+arm64 nil
; arm64 doesn't need the extra @PLT
877 ((or #+x86-64
(string= opcode
"mov-gotpcrel")
878 #+arm64
(string= opcode
"ldr-gotpcrel")
879 #+arm64
(string= opcode
"adrp-gotpcrel"))
881 (/ (- (car operand
) (bounds-low (core-linkage-bounds core
)))
882 (core-linkage-entry-size core
)))
883 (c-symbol (let ((thing (aref (core-linkage-symbols core
) entry-index
)))
884 (if (consp thing
) (car thing
) thing
))))
885 (setf (bit (core-linkage-symbol-usedp core
) entry-index
) 1)
887 (format stream
" mov ~A@GOTPCREL(%rip), %~(~A~)~%" c-symbol
(cadr operand
))
889 (cond ((string= opcode
"adrp-gotpcrel")
890 (format stream
" adrp ~A,:got:~A~%" (cadr operand
) c-symbol
))
891 ((string= opcode
"ldr-gotpcrel")
892 (format stream
" ldr ~A, [~A, #:got_lo12:~A]~%"
896 (t (error "unreachable")))))
898 ((string= opcode
"lea") ; lea becomes "mov" with gotpcrel as src, which becomes lea
900 (/ (- (car operand
) (bounds-low (core-linkage-bounds core
)))
901 (core-linkage-entry-size core
)))
902 (c-symbol (aref (core-linkage-symbols core
) entry-index
)))
903 (setf (bit (core-linkage-symbol-usedp core
) entry-index
) 1)
904 (format stream
" mov ~A@GOTPCREL(%rip), %~(~A~)~%" c-symbol
(cadr operand
))))
906 ((string= opcode
"pop")
907 (format stream
" ~A ~A~%" opcode operand
)
908 (cond ((string= operand
"8(%rbp)")
909 (format stream
" .cfi_def_cfa 6, 16~% .cfi_offset 6, -16~%"))
910 ((string= operand
"%rbp")
911 ;(format stream " .cfi_def_cfa 7, 8~%")
915 ((string= opcode
"mov")
916 ;; the so-called "operand" is the entire instruction
917 (write-string operand stream
)
919 ((or (string= opcode
"call *") (string= opcode
"jmp *"))
920 ;; Indirect call - since the code is in immobile space,
921 ;; we could render this as a 2-byte NOP followed by a direct
922 ;; call. For simplicity I'm leaving it exactly as it was.
923 (format stream
" ~A(CS+0x~x)~%"
924 opcode
; contains a "*" as needed for the syntax
925 (- operand
(bounds-low (core-code-bounds core
)))))
927 (bug "Random annotated opcode ~S" opcode
))
929 (when (= cur-offset count
) (return)))))
931 (format stream
" .cfi_endproc~%"))
934 ;;; Examine CODE, returning a list of lists describing how to emit
935 ;;; the contents into the assembly file.
936 ;;; ({:data | :padding} . N) | (start-pc . end-pc)
937 (defun get-text-ranges (code spacemap
)
938 (let ((cdf (translate (sb-c::compiled-debug-info-fun-map
939 (truly-the sb-c
::compiled-debug-info
940 (translate (%code-debug-info code
) spacemap
)))
942 (next-simple-fun-pc-offs (%code-fun-offset code
0))
943 (start-pc (code-n-unboxed-data-bytes code
))
944 (simple-fun-index -
1)
947 (when (plusp start-pc
)
948 (aver (zerop (rem start-pc n-word-bytes
)))
949 (push `(:data .
,(ash start-pc
(- word-shift
))) blobs
))
951 (let* ((next (when (%instancep
(sb-c::compiled-debug-fun-next
952 (truly-the sb-c
::compiled-debug-fun cdf
)))
953 (translate (sb-c::compiled-debug-fun-next
954 (truly-the sb-c
::compiled-debug-fun cdf
))
957 (cdf-offset next spacemap
)
958 (%code-text-size code
))))
960 ((= start-pc end-pc
)) ; crazy shiat. do not add to blobs
961 ((<= start-pc next-simple-fun-pc-offs
(1- end-pc
))
962 (incf simple-fun-index
)
963 (setq simple-fun
(%code-entry-point code simple-fun-index
))
964 (let ((padding (- next-simple-fun-pc-offs start-pc
)))
965 (when (plusp padding
)
966 ;; Assert that SIMPLE-FUN always begins at an entry
967 ;; in the fun-map, and not somewhere in the middle:
968 ;; |<-- fun -->|<-- fun -->|
969 ;; ^- start (GOOD) ^- alleged start (BAD)
970 (cond ((eq simple-fun
(%code-entry-point code
0))
971 (bug "Misaligned fun start"))
972 (t ; sanity-check the length of the filler
973 (aver (< padding
(* 2 n-word-bytes
)))))
974 (push `(:pad .
,padding
) blobs
)
975 (incf start-pc padding
)))
976 (push `(,start-pc .
,end-pc
) blobs
)
977 (setq next-simple-fun-pc-offs
978 (if (< (1+ simple-fun-index
) (code-n-entries code
))
979 (%code-fun-offset code
(1+ simple-fun-index
))
982 (let ((current-blob (car blobs
)))
983 (setf (cdr current-blob
) end-pc
)))) ; extend this blob
985 (return (nreverse blobs
)))
986 (setq cdf next start-pc end-pc
)))))
988 (defun c-symbol-quote (name)
989 (concatenate 'string
'(#\") name
'(#\")))
991 (defun emit-symbols (blobs core pp-state output
&aux base-symbol
)
992 (dolist (blob blobs base-symbol
)
993 (destructuring-bind (name start . end
) blob
994 (let ((c-name (c-name (or name
"anonymous") core pp-state
)))
996 (setq base-symbol c-name
))
997 (format output
" lsym \"~a\", 0x~x, 0x~x~%"
998 c-name start
(- end start
))))))
1000 (defun emit-funs (code vaddr core dumpwords output base-symbol emit-cfi
)
1001 (let* ((spacemap (core-spacemap core
))
1002 (ranges (get-text-ranges code spacemap
))
1003 (text-sap (code-instructions code
))
1004 (text (sap-int text-sap
))
1005 ;; Like CODE-INSTRUCTIONS, but where the text virtually was
1006 (text-vaddr (+ vaddr
(* (code-header-words code
) n-word-bytes
)))
1007 (additional-relative-fixups)
1009 ;; There is *always* at least 1 word of unboxed data now
1010 (aver (eq (caar ranges
) :data
))
1011 (let ((jump-table-size (code-jump-table-words code
))
1012 (total-nwords (cdr (pop ranges
))))
1013 (cond ((> jump-table-size
1)
1014 (format output
"# jump table~%")
1015 (format output
".quad ~d" (sap-ref-word text-sap
0))
1016 (dotimes (i (1- jump-table-size
))
1017 (format output
",\"~a\"+0x~x"
1019 (- (sap-ref-word text-sap
(ash (1+ i
) word-shift
))
1022 (let ((remaining (- total-nwords jump-table-size
)))
1023 (when (plusp remaining
)
1025 (sap+ text-sap
(ash jump-table-size word-shift
))
1026 remaining output
))))
1028 (funcall dumpwords text-sap total-nwords output
))))
1030 (destructuring-bind (start . end
) (pop ranges
)
1032 ;; FIXME: it seems like this should just be reduced to emitting 2 words
1033 ;; now that simple-fun headers don't hold any boxed words.
1034 ;; (generality here is without merit)
1035 (funcall dumpwords
(sap+ text-sap start
) simple-fun-insts-offset output
1036 #(nil #.
(format nil
".+~D" (* (1- simple-fun-insts-offset
)
1038 (incf start
(* simple-fun-insts-offset n-word-bytes
))
1039 ;; Pass the current physical address at which to disassemble,
1040 ;; the notional core address (which changes after linker relocation),
1042 (let ((new-relative-fixups
1043 (emit-lisp-function (+ text start
) (+ text-vaddr start
) (- end start
)
1044 output emit-cfi core
)))
1045 (setq additional-relative-fixups
1046 (nconc new-relative-fixups additional-relative-fixups
)))
1047 (cond ((not ranges
) (return))
1048 ((eq (caar ranges
) :pad
)
1049 (format output
" .byte ~{0x~x~^,~}~%"
1050 (loop for i from
0 below
(cdr (pop ranges
))
1051 collect
(sap-ref-8 text-sap
(+ end i
))))))))
1052 ;; All fixups should have been consumed by writing out the text.
1053 (aver (null (core-fixup-addrs core
)))
1054 ;; Emit bytes from the maximum function end to the object end.
1055 ;; We can't just round up %CODE-CODE-SIZE to a double-lispword
1056 ;; because the boxed header could end at an odd word, requiring that
1057 ;; the unboxed bytes have an odd size in words making the total even.
1058 (format output
" .byte ~{0x~x~^,~}~%"
1059 (loop for i from max-end
1060 below
(- (code-object-size code
)
1061 (* (code-header-words code
) n-word-bytes
))
1062 collect
(sap-ref-8 text-sap i
)))
1063 (when additional-relative-fixups
1064 (binding* ((existing-fixups (sb-vm::%code-fixups code
))
1065 ((absolute relative immediate
)
1066 (sb-c::unpack-code-fixup-locs
1067 (if (fixnump existing-fixups
)
1069 (translate existing-fixups spacemap
))))
1071 (sort (mapcar (lambda (x)
1072 ;; compute offset of the fixup from CODE-INSTRUCTIONS.
1073 ;; X is the location of the CALL instruction,
1074 ;; 1+ is the location of the fixup.
1076 (+ vaddr
(ash (code-header-words code
)
1078 additional-relative-fixups
)
1080 (sb-c:pack-code-fixup-locs
1081 absolute
(merge 'list relative new-sorted
#'<) immediate
)))))
1083 (defconstant +gf-name-slot
+ 5)
1085 (defun output-bignum (label bignum stream
)
1086 (let ((nwords (sb-bignum:%bignum-length bignum
)))
1087 (format stream
"~@[~a:~] .quad 0x~x"
1088 label
(logior (ash nwords
8) bignum-widetag
))
1090 (format stream
",0x~x" (sb-bignum:%bignum-ref bignum i
)))
1091 (when (evenp nwords
) ; pad
1092 (format stream
",0"))
1093 (format stream
"~%")))
1095 (defconstant core-align
#+x86-64
4096 #+arm64
65536)
1097 (defun write-preamble (output)
1098 (format output
" .text~% .file \"sbcl.core\"
1099 ~:[~; .macro .size sym size # ignore
1101 .macro .type sym type # ignore
1103 .macro lasmsym name size
1105 .size \"\\name\", \\size
1106 .type \"\\name\", function
1108 .macro lsym name start size
1109 .set \"\\name\", . + \\start
1110 .size \"\\name\", \\size
1111 .type \"\\name\", function
1113 .globl ~alisp_code_start, ~alisp_jit_code, ~alisp_code_end
1114 .balign ~a~%~alisp_code_start:~%CS: # code space~%"
1115 (member :darwin
*features
*)
1116 label-prefix label-prefix label-prefix
1120 (defun %widetag-of
(word) (logand word widetag-mask
))
1122 (defun make-code-obj (addr spacemap
)
1123 (let ((translation (translate-ptr addr spacemap
)))
1124 (aver (= (%widetag-of
(sap-ref-word (int-sap translation
) 0))
1125 code-header-widetag
))
1126 (%make-lisp-obj
(logior translation other-pointer-lowtag
))))
1128 (defun output-lisp-asm-routines (core spacemap code-addr output
&aux
(skip 0))
1129 (write-preamble output
)
1131 (let* ((paddr (int-sap (translate-ptr code-addr spacemap
)))
1132 (word (sap-ref-word paddr
0)))
1133 ;; After running the converter which moves dynamic-space code to text space,
1134 ;; the text space starts with an array of uint32 for the offsets to each object
1135 ;; and an array of uint64 containing some JMP instructions.
1136 (unless (or (= (%widetag-of word
) simple-array-unsigned-byte-32-widetag
)
1137 (= (%widetag-of word
) simple-array-unsigned-byte-64-widetag
))
1139 (let* ((array (%make-lisp-obj
(logior (sap-int paddr
) other-pointer-lowtag
)))
1140 (size (primitive-object-size array
))
1141 (nwords (ash size
(- word-shift
))))
1143 (format output
"~A 0x~x"
1145 (0 #.
(format nil
"~% .quad"))
1147 (sap-ref-word paddr
(ash i word-shift
))))
1150 (incf code-addr size
))))
1151 (let* ((code-component (make-code-obj code-addr spacemap
))
1152 (obj-sap (int-sap (- (get-lisp-obj-address code-component
)
1153 other-pointer-lowtag
)))
1154 (header-len (code-header-words code-component
))
1155 (jump-table-count (sap-ref-word (code-instructions code-component
) 0)))
1156 ;; Write the code component header
1157 (format output
"lar: # lisp assembly routines~%")
1158 (emit-asm-directives :qword obj-sap header-len output
#())
1159 ;; Write the jump table
1160 (format output
" .quad ~D" jump-table-count
)
1161 (dotimes (i (1- jump-table-count
))
1162 (format output
",lar+0x~x"
1163 (- (sap-ref-word (code-instructions code-component
)
1164 (ash (1+ i
) word-shift
))
1168 ;; the CDR of each alist item is a target cons (needing translation)
1170 (mapcar (lambda (entry &aux
(name (translate (undescriptorize (car entry
)) spacemap
)) ; symbol
1171 ;; VAL is (start end . index)
1172 (val (translate (undescriptorize (cdr entry
)) spacemap
))
1174 (end (car (translate (cdr val
) spacemap
))))
1175 (list* (translate (symbol-name name
) spacemap
) start end
))
1176 (target-hash-table-alist (%code-debug-info code-component
) spacemap
))
1178 ;; Possibly unboxed words and/or padding
1179 (let ((here (ash jump-table-count word-shift
))
1180 (first-entry-point (cadar name-
>addr
)))
1181 (when (> first-entry-point here
)
1182 (format output
" .quad ~{0x~x~^,~}~%"
1183 (loop for offs
= here then
(+ offs
8)
1184 while
(< offs first-entry-point
)
1185 collect
(sap-ref-word (code-instructions code-component
) offs
)))))
1186 ;; Loop over the embedded routines
1187 (let ((list name-
>addr
)
1188 (obj-size (code-object-size code-component
)))
1190 (destructuring-bind (name start-offs . end-offs
) (pop list
)
1191 (let ((nbytes (- (if (endp list
)
1192 (- obj-size
(* header-len n-word-bytes
))
1195 (format output
" lasmsym ~(\"~a\"~), ~d~%" name nbytes
)
1198 (+ (sap-int (code-instructions code-component
))
1201 (ash (code-header-words code-component
) word-shift
)
1203 nbytes output nil core
)))
1204 (aver (null fixups
)))))
1205 (when (endp list
) (return)))
1206 (format output
"~%# end of lisp asm routines~2%")
1207 (+ skip obj-size
)))))
1209 ;;; Convert immobile text space to an assembly file in OUTPUT.
1210 (defun write-assembler-text
1212 &optional enable-pie
(emit-cfi t
)
1213 &aux
(code-bounds (space-bounds immobile-text-core-space-id spacemap
))
1214 (fixedobj-bounds (space-bounds immobile-fixedobj-core-space-id spacemap
))
1215 (core (make-core spacemap code-bounds fixedobj-bounds enable-pie
))
1216 (code-addr (bounds-low code-bounds
))
1218 (pp-state (cons (make-hash-table :test
'equal
) nil
))
1219 (prev-namestring "")
1221 (temp-output (make-string-output-stream :element-type
'base-char
))
1223 (labels ((dumpwords (sap count stream
&optional
(exceptions #()) logical-addr
)
1224 (aver (sap>= sap
(car spacemap
)))
1225 ;; Add any new "header exceptions" that cause intra-code-space pointers
1226 ;; to be computed at link time
1227 (dotimes (i (if logical-addr count
0))
1228 (unless (and (< i
(length exceptions
)) (svref exceptions i
))
1229 (let ((word (sap-ref-word sap
(* i n-word-bytes
))))
1230 (when (and (= (logand word
3) 3) ; is a pointer
1231 (in-bounds-p word code-bounds
)) ; to code space
1233 (format t
"~&~(~x: ~x~)~%" (+ logical-addr
(* i n-word-bytes
))
1235 (incf n-linker-relocs
)
1236 (setf exceptions
(adjust-array exceptions
(max (length exceptions
) (1+ i
))
1237 :initial-element nil
)
1238 (svref exceptions i
)
1239 (format nil
"CS+0x~x"
1240 (- word
(bounds-low code-bounds
))))))))
1241 (emit-asm-directives :qword sap count stream exceptions
)))
1243 (let ((skip (output-lisp-asm-routines core spacemap code-addr output
)))
1244 (incf code-addr skip
)
1245 (incf total-code-size skip
))
1248 (when (>= code-addr
(bounds-high code-bounds
))
1249 (setq end-loc code-addr
)
1251 (ecase (%widetag-of
(sap-ref-word (int-sap (translate-ptr code-addr spacemap
)) 0))
1252 (#.code-header-widetag
1253 (let* ((code (make-code-obj code-addr spacemap
))
1254 (objsize (code-object-size code
)))
1255 (incf total-code-size objsize
)
1257 ((%instancep
(%code-debug-info code
)) ; assume it's a COMPILED-DEBUG-INFO
1258 (aver (plusp (code-n-entries code
)))
1260 (sb-c::compiled-debug-info-source
1261 (truly-the sb-c
::compiled-debug-info
1262 (translate (%code-debug-info code
) spacemap
))))
1264 (debug-source-namestring
1265 (truly-the sb-c
::debug-source
(translate source spacemap
)))))
1266 (setq namestring
(if (eq namestring
(core-nil-object core
))
1268 (translate namestring spacemap
)))
1269 (unless (string= namestring prev-namestring
)
1270 (format output
" .file \"~a\"~%" namestring
)
1271 (setq prev-namestring namestring
)))
1272 (setf (core-fixup-addrs core
)
1274 (+ code-addr
(ash (code-header-words code
) word-shift
) x
))
1275 (code-fixup-locs code spacemap
)))
1276 (let ((code-physaddr (logandc2 (get-lisp-obj-address code
) lowtag-mask
)))
1277 (format output
"#x~x:~%" code-addr
)
1278 ;; Emit symbols before the code header data, because the symbols
1279 ;; refer to "." (the current PC) which is the base of the object.
1280 (let* ((base (emit-symbols (code-symbols code core
) core pp-state output
))
1282 (emit-funs code code-addr core
#'dumpwords temp-output base emit-cfi
))
1283 (header-exceptions (vector nil nil nil nil
))
1285 (when altered-fixups
1286 (setf (aref header-exceptions sb-vm
:code-fixups-slot
)
1287 (cond ((fixnump altered-fixups
)
1288 (format nil
"0x~x" (ash altered-fixups n-fixnum-tag-bits
)))
1290 (let ((ht (core-new-fixups core
)))
1291 (setq fixups-ptr
(gethash altered-fixups ht
))
1293 (setq fixups-ptr
(ash (core-new-fixup-words-used core
)
1295 (setf (gethash altered-fixups ht
) fixups-ptr
)
1296 (incf (core-new-fixup-words-used core
)
1297 (align-up (1+ (sb-bignum:%bignum-length altered-fixups
)) 2))))
1298 ;; tag the pointer properly for a bignum
1299 (format nil
"lisp_fixups+0x~x"
1300 (logior fixups-ptr other-pointer-lowtag
))))))
1301 (dumpwords (int-sap code-physaddr
)
1302 (code-header-words code
) output header-exceptions code-addr
)
1303 (write-string (get-output-stream-string temp-output
) output
))))
1305 (error "Strange code component: ~S" code
)))
1306 (incf code-addr objsize
)))
1308 (let* ((word (sap-ref-word (int-sap (translate-ptr code-addr spacemap
)) 0))
1309 (nwords (ash word -
32))
1310 (nbytes (* nwords n-word-bytes
)))
1311 (format output
" .quad 0x~x~% .fill ~d~%" word
(- nbytes n-word-bytes
))
1312 (incf code-addr nbytes
)))
1313 ;; This is a trailing array which contains a jump instruction for each
1314 ;; element of *C-LINKAGE-REDIRECTS* (see "rewrite-asmcalls.lisp").
1315 (#.simple-array-unsigned-byte-64-widetag
1316 (let* ((paddr (translate-ptr code-addr spacemap
))
1317 (array (%make-lisp-obj
(logior paddr other-pointer-lowtag
)))
1318 (nwords (+ vector-data-offset
(align-up (length array
) 2))))
1319 (format output
"# alien linkage redirects:~% .quad")
1320 (dotimes (i nwords
(terpri output
))
1321 (format output
"~a0x~x" (if (= i
0) " " ",")
1322 (sap-ref-word (int-sap paddr
) (ash i word-shift
))))
1323 (incf code-addr
(ash nwords word-shift
))
1324 (setq end-loc code-addr
)
1327 ;; coreparse uses the 'lisp_jit_code' symbol to set text_space_highwatermark
1328 ;; The intent is that compilation to memory can use this reserved area
1329 ;; (if space remains) so that profilers can associate a C symbol with the
1330 ;; program counter range. It's better than nothing.
1331 (format output
"~a:~%" (labelize "lisp_jit_code"))
1333 ;; Pad so that non-lisp code can't be colocated on a GC page.
1334 ;; (Lack of Lisp object headers in C code is the issue)
1335 (let ((aligned-end (align-up end-loc core-align
)))
1336 (when (> aligned-end end-loc
)
1337 (multiple-value-bind (nwords remainder
)
1338 (floor (- aligned-end end-loc
) n-word-bytes
)
1339 (aver (>= nwords
2))
1340 (aver (zerop remainder
))
1342 (format output
" .quad 0x~x, ~d # (simple-array fixnum (~d))~%"
1343 simple-array-fixnum-widetag
1344 (ash nwords n-fixnum-tag-bits
)
1346 (when (plusp nwords
)
1347 (format output
" .fill ~d~%" (* nwords n-word-bytes
))))))
1348 ;; Extend with 1 MB of filler
1349 (format output
" .fill ~D~%~alisp_code_end:
1350 .size lisp_jit_code, .-lisp_jit_code~%"
1351 (* 1024 1024) label-prefix
)
1352 (values core total-code-size n-linker-relocs
))
1356 (defconstant +sht-null
+ 0)
1357 (defconstant +sht-progbits
+ 1)
1358 (defconstant +sht-symtab
+ 2)
1359 (defconstant +sht-strtab
+ 3)
1360 (defconstant +sht-rela
+ 4)
1361 (defconstant +sht-rel
+ 9)
1363 (define-alien-type elf64-ehdr
1365 (ident (array unsigned-char
16)) ; 7F 45 4C 46 2 1 1 0 0 0 0 0 0 0 0 0
1366 (type (unsigned 16)) ; 1 0
1367 (machine (unsigned 16)) ; 3E 0
1368 (version (unsigned 32)) ; 1 0 0 0
1369 (entry unsigned
) ; 0 0 0 0 0 0 0 0
1370 (phoff unsigned
) ; 0 0 0 0 0 0 0 0
1372 (flags (unsigned 32)) ; 0 0 0 0
1373 (ehsize (unsigned 16)) ; 40 0
1374 (phentsize (unsigned 16)) ; 0 0
1375 (phnum (unsigned 16)) ; 0 0
1376 (shentsize (unsigned 16)) ; 40 0
1377 (shnum (unsigned 16)) ; n 0
1378 (shstrndx (unsigned 16)))) ; n 0
1379 (defconstant ehdr-size
(ceiling (alien-type-bits (parse-alien-type 'elf64-ehdr nil
)) 8))
1380 (define-alien-type elf64-shdr
1382 (name (unsigned 32))
1383 (type (unsigned 32))
1384 (flags (unsigned 64))
1385 (addr (unsigned 64))
1387 (size (unsigned 64))
1388 (link (unsigned 32))
1389 (info (unsigned 32))
1390 (addralign (unsigned 64))
1391 (entsize (unsigned 64))))
1392 (defconstant shdr-size
(ceiling (alien-type-bits (parse-alien-type 'elf64-shdr nil
)) 8))
1393 (define-alien-type elf64-sym
1395 (name (unsigned 32))
1397 (other (unsigned 8))
1398 (shndx (unsigned 16))
1401 (define-alien-type elf64-rela
1403 (offset (unsigned 64))
1404 (info (unsigned 64))
1405 (addend (signed 64))))
1407 (defun make-elf64-sym (name info
)
1408 (let ((a (make-array 24 :element-type
'(unsigned-byte 8) :initial-element
0)))
1409 (with-pinned-objects (a)
1410 (setf (sap-ref-32 (vector-sap a
) 0) name
1411 (sap-ref-8 (vector-sap a
) 4) info
))
1414 ;;; Return two values: an octet vector comprising a string table
1415 ;;; and an alist which maps string to offset in the table.
1416 (defun string-table (strings)
1417 (let* ((length (+ (1+ (length strings
)) ; one more null than there are strings
1418 (reduce #'+ strings
:key
#'length
))) ; data length
1419 (bytes (make-array length
:element-type
'(unsigned-byte 8)
1420 :initial-element
0))
1423 (dolist (string strings
)
1424 (push (cons string index
) alist
)
1425 (replace bytes
(map 'vector
#'char-code string
) :start1 index
)
1426 (incf index
(1+ (length string
))))
1427 (cons (nreverse alist
) bytes
)))
1429 (defun write-alien (alien size stream
)
1431 (write-byte (sap-ref-8 (alien-value-sap alien
) i
) stream
)))
1433 (defun copy-bytes (in-stream out-stream nbytes
1435 (make-array 1024 :element-type
'(unsigned-byte 8))))
1436 (loop (let ((chunksize (min (length buffer
) nbytes
)))
1437 (aver (eql (read-sequence buffer in-stream
:end chunksize
) chunksize
))
1438 (write-sequence buffer out-stream
:end chunksize
)
1439 (when (zerop (decf nbytes chunksize
)) (return)))))
1441 ;;; core header should be an array of words in '.rodata', not a 32K page
1442 (defconstant core-header-size
+backend-page-bytes
+) ; stupidly large (FIXME)
1444 (defconstant e-machine
#+x86-64
#x3E
#+arm64
#xB7
)
1446 (defun write-elf-header (shdrs-start sections output
)
1447 (let ((shnum (1+ (length sections
))) ; section 0 is implied
1448 (shstrndx (1+ (position :str sections
:key
#'car
)))
1449 (ident #.
(coerce '(#x7F
#x45
#x4C
#x46
2 1 1 0 0 0 0 0 0 0 0 0)
1450 '(array (unsigned-byte 8) 1))))
1451 (with-alien ((ehdr elf64-ehdr
))
1452 (dotimes (i (ceiling ehdr-size n-word-bytes
))
1453 (setf (sap-ref-word (alien-value-sap ehdr
) (* i n-word-bytes
)) 0))
1454 (with-pinned-objects (ident)
1455 (%byte-blt
(vector-sap ident
) 0 (alien-value-sap ehdr
) 0 16))
1456 (setf (slot ehdr
'type
) 1
1457 (slot ehdr
'machine
) e-machine
1458 (slot ehdr
'version
) 1
1459 (slot ehdr
'shoff
) shdrs-start
1460 (slot ehdr
'ehsize
) ehdr-size
1461 (slot ehdr
'shentsize
) shdr-size
1462 (slot ehdr
'shnum
) shnum
1463 (slot ehdr
'shstrndx
) shstrndx
)
1464 (write-alien ehdr ehdr-size output
))))
1466 (defun write-section-headers (placements sections string-table output
)
1467 (with-alien ((shdr elf64-shdr
))
1468 (dotimes (i (ceiling shdr-size n-word-bytes
)) ; Zero-fill
1469 (setf (sap-ref-word (alien-value-sap shdr
) (* i n-word-bytes
)) 0))
1470 (dotimes (i (1+ (length sections
)))
1471 (when (plusp i
) ; Write the zero-filled header as section 0
1472 (destructuring-bind (name type flags link info alignment entsize
)
1473 (cdr (aref sections
(1- i
)))
1474 (destructuring-bind (offset . size
)
1476 (setf (slot shdr
'name
) (cdr (assoc name
(car string-table
)))
1477 (slot shdr
'type
) type
1478 (slot shdr
'flags
) flags
1479 (slot shdr
'off
) offset
1480 (slot shdr
'size
) size
1481 (slot shdr
'link
) link
1482 (slot shdr
'info
) info
1483 (slot shdr
'addralign
) alignment
1484 (slot shdr
'entsize
) entsize
))))
1485 (write-alien shdr shdr-size output
))))
1487 (defconstant sym-entry-size
24)
1489 ;;; Write everything except for the core file itself into OUTPUT-STREAM
1490 ;;; and leave the stream padded to a 4K boundary ready to receive data.
1491 (defun prepare-elf (core-size relocs output pie
)
1492 ;; PIE uses coreparse relocs which are 8 bytes each, and no linker relocs.
1493 ;; Otherwise, linker relocs are 24 bytes each.
1494 (let* ((reloc-entry-size (if pie
8 24))
1496 ;; name | type | flags | link | info | alignment | entry size
1497 `#((:core
"lisp.core" ,+sht-progbits
+ 0 0 0 ,core-align
0)
1498 (:sym
".symtab" ,+sht-symtab
+ 0 3 1 8 ,sym-entry-size
)
1499 ; section with the strings -- ^ ^ -- 1+ highest local symbol
1500 (:str
".strtab" ,+sht-strtab
+ 0 0 0 1 0)
1503 ;; Don't bother with an ELF reloc section; it won't do any good.
1504 ;; It would apply at executable link time, which is without purpose,
1505 ;; it just offsets the numbers based on however far the lisp.core
1506 ;; section is into the physical file. Non-loaded sections don't get
1507 ;; further relocated on execution, so 'coreparse' has to fix the
1508 ;; entire dynamic space at execution time anyway.
1509 `("lisp.rel" ,+sht-progbits
+ 0 0 0 8 8)
1510 `(".relalisp.core" ,+sht-rela
+ 0 2 1 8 ,reloc-entry-size
)))
1511 ; symbol table -- ^ ^ -- for which section
1512 (:note
".note.GNU-stack" ,+sht-progbits
+ 0 0 0 1 0)))
1514 (string-table (append '("lisp_code_start") (map 'list
#'second sections
))))
1515 (strings (cdr string-table
))
1516 (padded-strings-size (align-up (length strings
) 8))
1517 (symbols-size (* 2 sym-entry-size
))
1518 (shdrs-start (+ ehdr-size symbols-size padded-strings-size
))
1519 (shdrs-end (+ shdrs-start
(* (1+ (length sections
)) shdr-size
)))
1520 (relocs-size (* (length relocs
) reloc-entry-size
))
1521 (relocs-end (+ shdrs-end relocs-size
))
1522 (core-start (align-up relocs-end core-align
)))
1524 (write-elf-header shdrs-start sections output
)
1526 ;; Write symbol table
1527 (aver (eql (file-position output
) ehdr-size
))
1528 (write-sequence (make-elf64-sym 0 0) output
)
1529 ;; The symbol name index is always 1 by construction. The type is #x10
1530 ;; given: #define STB_GLOBAL 1
1531 ;; and: #define ELF32_ST_BIND(val) ((unsigned char) (val)) >> 4)
1532 ;; which places the binding in the high 4 bits of the low byte.
1533 (write-sequence (make-elf64-sym 1 #x10
) output
)
1535 ;; Write string table
1536 (aver (eql (file-position output
) (+ ehdr-size symbols-size
)))
1537 (write-sequence strings output
) ; an octet vector at this point
1538 (dotimes (i (- padded-strings-size
(length strings
)))
1539 (write-byte 0 output
))
1541 ;; Write section headers
1542 (aver (eql (file-position output
) shdrs-start
))
1543 (write-section-headers
1548 (:sym
(cons ehdr-size symbols-size
))
1549 (:str
(cons (+ ehdr-size symbols-size
) (length strings
)))
1550 (:rel
(cons shdrs-end relocs-size
))
1551 (:core
(cons core-start core-size
))))
1553 sections string-table output
)
1555 ;; Write relocations
1556 (aver (eql (file-position output
) shdrs-end
))
1557 (let ((buf (make-array relocs-size
:element-type
'(unsigned-byte 8)))
1560 (dovector (reloc relocs
)
1561 (setf (%vector-raw-bits buf ptr
) reloc
)
1563 (with-alien ((rela elf64-rela
))
1564 (dovector (reloc relocs
)
1565 (destructuring-bind (place addend . kind
) reloc
1566 (setf (slot rela
'offset
) place
1567 (slot rela
'info
) (logior (ash 1 32) kind
) ; 1 = symbol index
1568 (slot rela
'addend
) addend
))
1569 (setf (%vector-raw-bits buf
(+ ptr
0)) (sap-ref-word (alien-value-sap rela
) 0)
1570 (%vector-raw-bits buf
(+ ptr
1)) (sap-ref-word (alien-value-sap rela
) 8)
1571 (%vector-raw-bits buf
(+ ptr
2)) (sap-ref-word (alien-value-sap rela
) 16))
1573 (write-sequence buf output
))
1576 (dotimes (i (- core-start
(file-position output
)))
1577 (write-byte 0 output
))
1578 (aver (eq (file-position output
) core-start
))))
1580 (defconstant R_ABS64
#+x86-64
1 #+arm64
257) ; /* Direct 64 bit */
1581 (defconstant R_ABS32
#+x86-64
10 #+arm64
258) ; /* Direct 32 bit zero extended */
1583 ;;; Fill in the FIXUPS vector with a list of places to fixup.
1584 ;;; For PIE-enabled cores, each place is just a virtual address.
1585 ;;; For non-PIE-enabled, the fixup corresponds to an ELF relocation which will be
1586 ;;; applied at link time of the excutable.
1587 ;;; Note that while this "works" for PIE, it is fairly inefficient because
1588 ;;; fundamentally Lisp objects contain absolute pointers, and there may be
1589 ;;; millions of words that need fixing at load (execution) time.
1590 ;;; Several techniques can mitigate this:
1591 ;;; * for funcallable-instances, put a second copy of funcallable-instance-tramp
1592 ;;; in dynamic space so that funcallable-instances can jump to a known address.
1593 ;;; * for each closure, create a one-instruction trampoline in dynamic space,
1594 ;;; - embedded in a (simple-array word) perhaps - which jumps to the correct
1595 ;;; place in the text section. Point all closures over the same function
1596 ;;; to the new closure stub. The trampoline, being pseudostatic, is effectively
1597 ;;; immovable. (And you can't re-save from an ELF core)
1598 ;;; * for arbitrary pointers to simple-funs, create a proxy simple-fun in dynamic
1599 ;;; space whose entry point is the real function in the ELF text section.
1600 ;;; The GC might have to learn how to handle simple-funs that point externally
1601 ;;; to themselves. Also there's a minor problem of hash-table test functions
1602 ;;; The above techniques will reduce by a huge factor the number of fixups
1603 ;;; that need to be applied on startup of a position-independent executable.
1605 (defun collect-relocations (spacemap fixups pie
&key
(verbose nil
) (print nil
))
1606 (let* ((code-bounds (space-bounds immobile-text-core-space-id spacemap
))
1607 (code-start (bounds-low code-bounds
))
1610 (affected-pages (make-hash-table)))
1612 ((abs-fixup (vaddr core-offs referent
)
1615 (format t
"~x = 0x~(~x~): (a)~%" core-offs vaddr
#+nil referent
))
1616 (touch-core-page core-offs
)
1617 ;; PIE relocations are output as a file section that is
1618 ;; interpreted by 'coreparse'. The addend is implicit.
1619 (setf (sap-ref-word (car spacemap
) core-offs
)
1621 (+ (- referent code-start
) +code-space-nominal-address
+)
1624 (vector-push-extend vaddr fixups
)
1625 (vector-push-extend `(,(+ core-header-size core-offs
)
1626 ,(- referent code-start
) .
,R_ABS64
)
1628 (abs32-fixup (core-offs referent
)
1632 (format t
"~x = 0x~(~x~): (a)~%" core-offs
(core-to-logical core-offs
) #+nil referent
))
1633 (touch-core-page core-offs
)
1634 (setf (sap-ref-32 (car spacemap
) core-offs
) 0)
1635 (vector-push-extend `(,(+ core-header-size core-offs
)
1636 ,(- referent code-start
) .
,R_ABS32
)
1638 (touch-core-page (core-offs)
1639 ;; use the OS page size, not +backend-page-bytes+
1640 (setf (gethash (floor core-offs core-align
) affected-pages
) t
))
1641 ;; Given a address which is an offset into the data pages of the target core,
1642 ;; compute the logical address which that offset would be mapped to.
1643 ;; For example core address 0 is the virtual address of static space.
1644 (core-to-logical (core-offs &aux
(page (floor core-offs
+backend-page-bytes
+)))
1645 (setf (gethash page affected-pages
) t
)
1646 (dolist (space (cdr spacemap
)
1647 (bug "Can't translate core offset ~x using ~x"
1648 core-offs spacemap
))
1649 (let* ((page0 (space-data-page space
))
1650 (nwords (space-nwords space
))
1651 (id (space-id space
))
1652 (npages (ceiling nwords
(/ +backend-page-bytes
+ n-word-bytes
))))
1653 (when (and (<= page0 page
(+ page0
(1- npages
)))
1654 (/= id immobile-text-core-space-id
))
1655 (return (+ (space-addr space
)
1656 (* (- page page0
) +backend-page-bytes
+)
1657 (logand core-offs
(1- +backend-page-bytes
+))))))))
1658 (scanptrs (vaddr obj wordindex-min wordindex-max
&optional force
&aux
(n-fixups 0))
1659 (do* ((base-addr (logandc2 (get-lisp-obj-address obj
) lowtag-mask
))
1660 (sap (int-sap base-addr
))
1661 ;; core-offs is the offset in the lisp.core ELF section.
1662 (core-offs (- base-addr
(sap-int (car spacemap
))))
1663 (i wordindex-min
(1+ i
)))
1664 ((> i wordindex-max
) n-fixups
)
1665 (let* ((byte-offs (ash i word-shift
))
1666 (ptr (sap-ref-word sap byte-offs
)))
1667 (when (and (or (is-lisp-pointer ptr
) force
) (in-bounds-p ptr code-bounds
))
1668 (abs-fixup (+ vaddr byte-offs
) (+ core-offs byte-offs
) ptr
)
1670 (scanptr (vaddr obj wordindex
)
1671 (plusp (scanptrs vaddr obj wordindex wordindex
))) ; trivial wrapper
1672 (scan-obj (vaddr obj widetag size
1673 &aux
(core-offs (- (logandc2 (get-lisp-obj-address obj
) lowtag-mask
)
1674 (sap-int (car spacemap
))))
1675 (nwords (ceiling size n-word-bytes
)))
1677 (scanptrs vaddr obj
0 1)
1678 (return-from scan-obj
))
1681 (let ((type (truly-the layout
(translate (%instance-layout obj
) spacemap
))))
1682 (do-layout-bitmap (i taggedp type
(%instance-length obj
))
1684 (scanptr vaddr obj
(1+ i
))))))
1685 (#.simple-vector-widetag
1686 (let ((len (length (the simple-vector obj
))))
1687 ;; FIXME: VECTOR-ADDR-HASHING-FLAG must be left-shifted by ARRAY-FLAGS-DATA-POSITION
1688 ;; for this LOGTEST to be correct. I think it broke when array-rank was placed adjacent
1689 ;; to the widetag and the flags bits moved over. The fact that it seems to be useless
1690 ;; suggests that I need to come up with a way to assert that it does anything, which
1691 ;; is all but impossible. We'd need a correctly-hashed table containing a key in which
1692 ;; nothing moves in the final S-L-A-D, but does have a function move during elfination.
1693 ;; I'm leaving this line broken for now because frankly I think it might be safer to
1694 ;; assert that IF the table is address-sensitive THEN it has the rehash flag already set
1695 ;; in the k/v vector. That's not quite true either, because it only needs to rehash if
1696 ;; any key was actually hashed by address.
1697 (cond ((logtest (get-header-data obj
) vector-addr-hashing-flag
) ; BUG
1698 (do ((i 2 (+ i
2)) (needs-rehash))
1699 ;; Refer to the figure at the top of src/code/hash-table.lisp.
1700 ;; LEN is an odd number.
1703 (setf (svref obj
1) 1)))
1704 ;; A weak or EQ-based hash table any of whose keys is a function
1705 ;; or code-component might need the 'rehash' flag set.
1706 ;; In practice, it is likely already set, because any object that
1707 ;; could move in the final GC probably did move.
1708 (when (scanptr vaddr obj
(+ vector-data-offset i
))
1709 (setq needs-rehash t
))
1710 (scanptr vaddr obj
(+ vector-data-offset i
1))))
1712 (scanptrs vaddr obj
1 (+ len
1))))))
1714 (scanptrs vaddr obj
1 2)
1715 (scanptrs vaddr obj
3 3 t
))
1716 ((#.closure-widetag
#.funcallable-instance-widetag
)
1717 ;; read the trampoline slot
1718 (let ((word (sap-ref-word (int-sap (get-lisp-obj-address obj
))
1719 (- n-word-bytes fun-pointer-lowtag
))))
1720 (when (in-bounds-p word code-bounds
)
1721 (abs-fixup (+ vaddr n-word-bytes
)
1722 (+ core-offs n-word-bytes
)
1724 ;; untaggged pointers are generally not supported in
1725 ;; funcallable instances, so scan everything.
1726 (scanptrs vaddr obj
1 (1- nwords
)))
1727 ;; mixed boxed/unboxed objects
1728 (#.code-header-widetag
1730 (dolist (loc (code-fixup-locs obj spacemap
))
1731 (let ((val (sap-ref-32 (code-instructions obj
) loc
)))
1732 (when (in-bounds-p val code-bounds
)
1733 (abs32-fixup (sap- (sap+ (code-instructions obj
) loc
) (car spacemap
))
1735 (dotimes (i (code-n-entries obj
))
1736 ;; I'm being lazy and not computing vaddr, which is wrong,
1737 ;; but does not matter if non-pie; and if PIE, we can't get here.
1738 ;; [PIE requires all code in immobile space, and this reloc
1739 ;; is for a dynamic space object]
1740 (scanptrs 0 (%code-entry-point obj i
) 2 5))
1741 (scanptrs vaddr obj
1 (1- (code-header-words obj
))))
1742 ;; boxed objects that can reference code/simple-funs
1743 ((#.value-cell-widetag
#.symbol-widetag
#.weak-pointer-widetag
)
1744 (scanptrs vaddr obj
1 (1- nwords
))))))
1745 (dolist (space (cdr spacemap
))
1746 (unless (= (space-id space
) immobile-text-core-space-id
)
1747 (let* ((logical-addr (space-addr space
))
1748 (size (space-size space
))
1749 (physical-addr (space-physaddr space spacemap
))
1750 (physical-end (sap+ physical-addr size
))
1751 (vaddr-translation (+ (- (sap-int physical-addr
)) logical-addr
)))
1752 (dx-flet ((visit (obj widetag size
)
1753 ;; Compute the object's intended virtual address
1754 (scan-obj (+ (logandc2 (get-lisp-obj-address obj
) lowtag-mask
)
1757 (map-objects-in-range
1759 (ash (sap-int physical-addr
) (- n-fixnum-tag-bits
))
1760 (ash (sap-int physical-end
) (- n-fixnum-tag-bits
))))
1761 (when (and (plusp (logior n-abs n-rel
)) verbose
)
1762 (format t
"space @ ~10x: ~6d absolute + ~4d relative fixups~%"
1763 logical-addr n-abs n-rel
))
1764 (setq n-abs
0 n-rel
0)))))
1766 (format t
"total of ~D linker fixups affecting ~D/~D pages~%"
1768 (hash-table-count affected-pages
)
1769 (/ (reduce #'+ (cdr spacemap
) :key
#'space-nbytes-aligned
)
1775 (defun read-core-header (input core-header verbose
&aux
(core-offset 0))
1776 (read-sequence core-header input
)
1777 (cond ((= (%vector-raw-bits core-header
0) core-magic
))
1778 (t ; possible embedded core
1779 (file-position input
(- (file-length input
)
1780 (* 2 n-word-bytes
)))
1781 (aver (eql (read-sequence core-header input
) (* 2 n-word-bytes
)))
1782 (aver (= (%vector-raw-bits core-header
1) core-magic
))
1783 (setq core-offset
(%vector-raw-bits core-header
0))
1785 (format t
"~&embedded core starts at #x~x into input~%" core-offset
))
1786 (file-position input core-offset
)
1787 (read-sequence core-header input
)
1788 (aver (= (%vector-raw-bits core-header
0) core-magic
))))
1791 (defmacro do-core-header-entry
(((id-var len-var ptr-var
) buffer
) &body body
)
1792 `(let ((,ptr-var
1))
1794 (let ((,id-var
(%vector-raw-bits
,buffer
,ptr-var
))
1795 (,len-var
(%vector-raw-bits
,buffer
(1+ ,ptr-var
))))
1796 ;; (format t "~&entry type ~D @ ~d len ~d words~%" id ptr len)
1799 (when (= ,id-var end-core-entry-type-code
)
1800 (aver (not (find 0 ,buffer
:start
(ash ,ptr-var word-shift
) :test
#'/=)))
1803 (incf ,ptr-var
,len-var
)))))
1805 (defmacro do-directory-entry
(((index-var start-index input-nbytes
) buffer
) &body body
)
1806 `(let ((words-per-dirent 5))
1807 (multiple-value-bind (n-entries remainder
)
1808 (floor ,input-nbytes words-per-dirent
)
1809 (aver (zerop remainder
))
1810 (symbol-macrolet ((id (%vector-raw-bits
,buffer index
))
1811 (nwords (%vector-raw-bits
,buffer
(+ index
1)))
1812 (data-page (%vector-raw-bits
,buffer
(+ index
2)))
1813 (addr (%vector-raw-bits
,buffer
(+ index
3)))
1814 (npages (%vector-raw-bits
,buffer
(+ index
4))))
1815 (do ((,index-var
,start-index
(+ ,index-var words-per-dirent
)))
1816 ((= ,index-var
(+ ,start-index
(* n-entries words-per-dirent
))))
1819 (defmacro with-mapped-core
((sap-var start npages stream
) &body body
)
1825 (extern-alien "load_core_bytes"
1826 (function system-area-pointer
1827 int int unsigned unsigned int
))
1828 (sb-sys:fd-stream-fd
,stream
)
1829 (+ ,start
+backend-page-bytes
+) ; Skip the core header
1830 0 ; place it anywhere
1831 (* ,npages
+backend-page-bytes
+) ; len
1836 (extern-alien "os_deallocate"
1837 (function void system-area-pointer unsigned
))
1838 ,sap-var
(* ,npages
+backend-page-bytes
+))))))
1840 (defun core-header-nwords (core-header &aux
(sum 2))
1841 ;; SUM starts as 2, as the core's magic number occupies 1 word
1842 ;; and the ending tag of END-CORE-ENTRY-TYPE-CODE counts as 1.
1843 (do-core-header-entry ((id len ptr
) core-header
)
1844 ;; LEN as bound by the macro does not count 1 for the
1845 ;; the entry identifier or LEN itself so add them in.
1846 (incf sum
(+ len
2)))
1849 (defun change-dynamic-space-size (core-header new-size
) ; expressed in MiB
1851 (return-from change-dynamic-space-size core-header
))
1852 (let ((new (copy-seq core-header
)))
1853 ;; memsize options if present must immediately follow the core magic number
1854 ;; so it might require a byte-blt to move other entries over.
1855 (unless (= (%vector-raw-bits new
1) runtime-options-magic
)
1856 ;; slide the header to right by 5 words
1857 (replace new core-header
:start1
(* 6 n-word-bytes
) :start2
(* 1 n-word-bytes
))
1858 ;; see write_memsize_options for the format of this entry
1859 ;; All words have to be stored since we're creating it from nothing.
1860 (setf (%vector-raw-bits new
1) runtime-options-magic
1861 (%vector-raw-bits new
2) 5 ; number of words in this entry
1862 (%vector-raw-bits new
4) (extern-alien "thread_control_stack_size" unsigned
)
1863 (%vector-raw-bits new
5) (extern-alien "dynamic_values_bytes" (unsigned 32))))
1864 (setf (%vector-raw-bits new
3) (* new-size
1024 1024))
1867 ;;; Given a native SBCL '.core' file, or one attached to the end of an executable,
1868 ;;; separate it into pieces.
1869 ;;; ASM-PATHNAME is the name of the assembler file that will hold all the Lisp code.
1870 ;;; The other two output pathnames are implicit: "x.s" -> "x.core" and "x-core.o"
1871 ;;; The ".core" file is a native core file used for starting a binary that
1872 ;;; contains the asm code using the "--core" argument. The "-core.o" file
1873 ;;; is for linking in to a binary that needs no "--core" argument.
1875 (input-pathname asm-pathname
1876 &key enable-pie
(verbose nil
) dynamic-space-size
1877 &aux
(elf-core-pathname
1879 (make-pathname :name
(concatenate 'string
(pathname-name asm-pathname
) "-core")
1882 (core-header (make-array +backend-page-bytes
+ :element-type
'(unsigned-byte 8)))
1883 (original-total-npages 0)
1886 (code-start-fixup-ofs 0) ; where to fixup the core header
1889 (fixedobj-range) ; = (START . SIZE-IN-BYTES)
1890 (relocs (make-array 100000 :adjustable t
:fill-pointer
1)))
1892 (declare (ignorable fixedobj-range
))
1894 (ignore-errors (delete-file asm-pathname
))
1895 (ignore-errors (delete-file elf-core-pathname
))
1896 ;; Ensure that all files can be opened
1897 (with-open-file (input input-pathname
:element-type
'(unsigned-byte 8))
1898 (with-open-file (asm-file asm-pathname
:direction
:output
:if-exists
:supersede
)
1899 ;;(with-open-file (split-core split-core-pathname :direction :output
1900 ;; :element-type '(unsigned-byte 8) :if-exists :supersede)
1901 (let ((split-core nil
))
1902 (setq core-offset
(read-core-header input core-header verbose
))
1903 (do-core-header-entry ((id len ptr
) core-header
)
1905 (#.build-id-core-entry-type-code
1907 (let ((string (make-string (%vector-raw-bits core-header ptr
)
1908 :element-type
'base-char
)))
1909 (%byte-blt core-header
(* (1+ ptr
) n-word-bytes
) string
0 (length string
))
1910 (format t
"Build ID [~a]~%" string
))))
1911 (#.directory-core-entry-type-code
1912 (do-directory-entry ((index ptr len
) core-header
)
1913 (incf original-total-npages npages
)
1914 (push (make-space id addr data-page page-adjust nwords
) space-list
)
1916 (format t
"id=~d page=~5x + ~5x addr=~10x words=~8x~:[~; (drop)~]~%"
1917 id data-page npages addr nwords
1918 (= id immobile-text-core-space-id
)))
1919 (cond ((= id immobile-text-core-space-id
)
1920 (setq code-start-fixup-ofs
(+ index
3))
1921 ;; Keep this entry but delete the page count. We need to know
1922 ;; where the space was supposed to be mapped and at what size.
1923 ;; Subsequent core entries will need to adjust their start page
1924 ;; downward (just the PTEs's start page now).
1925 (setq page-adjust npages data-page
0 npages
0))
1927 ;; Keep track of where the fixedobj space wants to be.
1928 (when (= id immobile-fixedobj-core-space-id
)
1929 (setq fixedobj-range
(cons addr
(ash nwords word-shift
))))
1930 (when (plusp npages
) ; enqueue
1931 (push (cons data-page
(* npages
+backend-page-bytes
+))
1933 ;; adjust this entry's start page in the new core
1934 (decf data-page page-adjust
)))))
1935 (#.page-table-core-entry-type-code
1937 (symbol-macrolet ((n-ptes (%vector-raw-bits core-header
(+ ptr
1)))
1938 (nbytes (%vector-raw-bits core-header
(+ ptr
2)))
1939 (data-page (%vector-raw-bits core-header
(+ ptr
3))))
1940 (aver (= data-page original-total-npages
))
1941 (aver (= (ceiling (space-nwords
1942 (find dynamic-core-space-id space-list
:key
#'space-id
))
1943 (/ gencgc-page-bytes n-word-bytes
))
1946 (format t
"PTE: page=~5x~40tbytes=~8x~%" data-page nbytes
))
1947 (push (cons data-page nbytes
) copy-actions
)
1948 (decf data-page page-adjust
)))))
1949 (let ((buffer (make-array +backend-page-bytes
+
1950 :element-type
'(unsigned-byte 8)))
1952 ;; Write the new core file
1954 (write-sequence core-header split-core
))
1955 (dolist (action (reverse copy-actions
)) ; nondestructive
1956 ;; page index convention assumes absence of core header.
1957 ;; i.e. data page 0 is the file page immediately following the core header
1958 (let ((offset (* (1+ (car action
)) +backend-page-bytes
+))
1959 (nbytes (cdr action
)))
1961 (format t
"File offset ~10x: ~10x bytes~%" offset nbytes
))
1962 (setq filepos
(+ core-offset offset
))
1964 (file-position input filepos
)
1965 (copy-bytes input split-core nbytes buffer
))
1967 (file-position input
(+ filepos nbytes
))))))
1968 ;; Trailer (runtime options and magic number)
1969 (let ((nbytes (read-sequence buffer input
)))
1970 ;; expect trailing magic number
1971 (let ((ptr (floor (- nbytes n-word-bytes
) n-word-bytes
)))
1972 (aver (= (%vector-raw-bits buffer ptr
) core-magic
)))
1973 ;; File position of the core header needs to be set to 0
1974 ;; regardless of what it was
1975 (setf (%vector-raw-bits buffer
4) 0)
1977 (format t
"Trailer words:(~{~X~^ ~})~%"
1978 (loop for i below
(floor nbytes n-word-bytes
)
1979 collect
(%vector-raw-bits buffer i
))))
1981 (write-sequence buffer split-core
:end nbytes
)
1982 (finish-output split-core
)))
1985 (aver (= (+ core-offset
1986 (* page-adjust
+backend-page-bytes
+)
1987 (file-length split-core
))
1988 (file-length input
))))
1989 ;; Seek back to the PTE pages so they can be copied to the '.o' file
1990 (file-position input filepos
)))
1992 ;; Map the original core file to memory
1993 (with-mapped-core (sap core-offset original-total-npages input
)
1995 (delete immobile-text-core-space-id
(reverse space-list
)
1997 (spacemap (cons sap
(sort (copy-list space-list
) #'> :key
#'space-addr
)))
1998 (pte-nbytes (cdar copy-actions
)))
1999 (collect-relocations spacemap relocs enable-pie
)
2000 (with-open-file (output elf-core-pathname
2001 :direction
:output
:if-exists
:supersede
2002 :element-type
'(unsigned-byte 8))
2003 ;; If we're going to write memory size options and they weren't already
2004 ;; present, then it will be inserted after the core magic,
2005 ;; and the rest of the header moves over by 5 words.
2006 (when (and dynamic-space-size
2007 (/= (%vector-raw-bits core-header
1) runtime-options-magic
))
2008 (incf code-start-fixup-ofs
5))
2010 ;; This fixup sets the 'address' field of the core directory entry
2011 ;; for code space. If PIE-enabled, we'll figure it out in the C code
2012 ;; because space relocation is going to happen no matter what.
2013 (setf (aref relocs
0)
2014 `(,(ash code-start-fixup-ofs word-shift
) 0 .
,R_ABS64
)))
2015 (prepare-elf (+ (apply #'+ (mapcar #'space-nbytes-aligned data-spaces
))
2016 +backend-page-bytes
+ ; core header
2018 relocs output enable-pie
)
2019 (let ((new-header (change-dynamic-space-size core-header dynamic-space-size
)))
2020 ;; This word will be fixed up by the system linker
2021 (setf (%vector-raw-bits new-header code-start-fixup-ofs
)
2022 (if enable-pie
+code-space-nominal-address
+ 0))
2023 (write-sequence new-header output
))
2024 (force-output output
)
2025 ;; ELF cores created from #-immobile-space cores use +required-foreign-symbols+.
2026 ;; But if #+immobile-space the alien-linkage-table values are computed
2027 ;; by 'ld' and we don't scan +required-foreign-symbols+.
2028 (when (get-space immobile-fixedobj-core-space-id spacemap
)
2029 (let* ((sym (find-target-symbol (package-id "SB-VM")
2030 "+REQUIRED-FOREIGN-SYMBOLS+" spacemap
:physical
))
2031 (vector (translate (symbol-global-value sym
) spacemap
)))
2033 (setf (%array-fill-pointer vector
) 0)))
2034 ;; Change SB-C::*COMPILE-FILE-TO-MEMORY-SPACE* to :DYNAMIC
2035 ;; and SB-C::*COMPILE-TO-MEMORY-SPACE* to :AUTO
2036 ;; in case the resulting executable needs to compile anything.
2037 ;; (Call frame info will be missing, but at least it's something.)
2038 (dolist (item '(("*COMPILE-FILE-TO-MEMORY-SPACE*" .
"DYNAMIC")
2039 ("*COMPILE-TO-MEMORY-SPACE*" .
"DYNAMIC")))
2040 (destructuring-bind (symbol . value
) item
2041 (awhen (%find-target-symbol
(package-id "SB-C") symbol spacemap
)
2042 (%set-symbol-global-value
2043 it
(find-target-symbol (package-id "KEYWORD") value spacemap
:logical
)))))
2045 (dolist (space data-spaces
) ; Copy pages from memory
2046 (let ((start (space-physaddr space spacemap
))
2047 (size (space-nbytes-aligned space
)))
2048 (aver (eql (sb-unix:unix-write
(sb-sys:fd-stream-fd output
)
2052 (format t
"Copying ~d bytes (#x~x) from ptes = ~d PTEs~%"
2053 pte-nbytes pte-nbytes
(floor pte-nbytes
10)))
2054 (copy-bytes input output pte-nbytes
)) ; Copy PTEs from input
2055 (let ((core (write-assembler-text spacemap asm-file enable-pie
)))
2056 (format asm-file
" .section .rodata~% .p2align 4~%lisp_fixups:~%")
2057 ;; Sort the hash-table in emit order.
2058 (dolist (x (sort (%hash-table-alist
(core-new-fixups core
)) #'< :key
#'cdr
))
2059 (output-bignum nil
(car x
) asm-file
))
2061 (t ; (get-space immobile-fixedobj-core-space-id spacemap)
2062 (format asm-file
(if (member :darwin
*features
*)
2064 "~% .section .rodata~%"))
2065 (format asm-file
" .globl ~A~%~:*~A:
2067 (labelize "alien_linkage_values")
2068 (length (core-linkage-symbols core
)))
2069 ;; -1 (not a plausible function address) signifies that word
2070 ;; following it is a data, not text, reference.
2071 (loop for s across
(core-linkage-symbols core
)
2072 do
(format asm-file
" .quad ~:[~;-1, ~]~a~%"
2074 (if (consp s
) (car s
) s
))))
2076 (format asm-file
"~% .section .rodata~%")
2077 (format asm-file
" .globl anchor_junk~%")
2078 (format asm-file
"anchor_junk: .quad lseek_largefile, get_timezone, compute_udiv_magic32~%"))))))
2079 (when (member :linux
*features
*)
2080 (format asm-file
"~% ~A~%" +noexec-stack-note
+)))))
2082 ;;; Copy the input core into an ELF section without splitting into code & data.
2083 ;;; Also force a linker reference to each C symbol that the Lisp core mentions.
2084 (defun copy-to-elf-obj (input-pathname output-pathname
)
2086 (ignore-errors (delete-file output-pathname
))
2087 ;; Ensure that all files can be opened
2088 (with-open-file (input input-pathname
:element-type
'(unsigned-byte 8))
2089 (with-open-file (output output-pathname
:direction
:output
2090 :element-type
'(unsigned-byte 8) :if-exists
:supersede
)
2091 (let* ((core-header (make-array +backend-page-bytes
+
2092 :element-type
'(unsigned-byte 8)))
2093 (core-offset (read-core-header input core-header nil
))
2095 (total-npages 0) ; excluding core header page
2097 (do-core-header-entry ((id len ptr
) core-header
)
2099 (#.directory-core-entry-type-code
2100 (do-directory-entry ((index ptr len
) core-header
)
2101 (incf total-npages npages
)
2102 (when (plusp nwords
)
2103 (push (make-space id addr data-page
0 nwords
) space-list
))))
2104 (#.page-table-core-entry-type-code
2106 (symbol-macrolet ((nbytes (%vector-raw-bits core-header
(+ ptr
2)))
2107 (data-page (%vector-raw-bits core-header
(+ ptr
3))))
2108 (aver (= data-page total-npages
))
2109 (setq core-size
(+ (* total-npages
+backend-page-bytes
+) nbytes
))))))
2110 (incf core-size
+backend-page-bytes
+) ; add in core header page
2111 ;; Map the core file to memory
2112 (with-mapped-core (sap core-offset total-npages input
)
2113 (let* ((spacemap (cons sap
(sort (copy-list space-list
) #'> :key
#'space-addr
)))
2114 (core (make-core spacemap
2115 (space-bounds immobile-text-core-space-id spacemap
)
2116 (space-bounds immobile-fixedobj-core-space-id spacemap
)))
2117 (c-symbols (map 'list
(lambda (x) (if (consp x
) (car x
) x
))
2118 (core-linkage-symbols core
)))
2119 (sections `#((:str
".strtab" ,+sht-strtab
+ 0 0 0 1 0)
2120 (:sym
".symtab" ,+sht-symtab
+ 0 1 1 8 ,sym-entry-size
)
2121 ;; section with the strings -- ^ ^ -- 1+ highest local symbol
2122 (:core
"lisp.core" ,+sht-progbits
+ 0 0 0 ,core-align
0)
2123 (:note
".note.GNU-stack" ,+sht-progbits
+ 0 0 0 1 0)))
2124 (string-table (string-table (append (map 'list
#'second sections
)
2126 (packed-strings (cdr string-table
))
2127 (strings-start (+ ehdr-size
(* (1+ (length sections
)) shdr-size
)))
2128 (strings-end (+ strings-start
(length packed-strings
)))
2129 (symbols-start (align-up strings-end
8))
2130 (symbols-size (* (1+ (length c-symbols
)) sym-entry-size
))
2131 (symbols-end (+ symbols-start symbols-size
))
2132 (core-start (align-up symbols-end core-align
)))
2133 (write-elf-header ehdr-size sections output
)
2134 (write-section-headers `((,strings-start .
,(length packed-strings
))
2135 (,symbols-start .
,symbols-size
)
2136 (,core-start .
,core-size
)
2138 sections string-table output
)
2139 (write-sequence packed-strings output
)
2140 ;; Write symbol table
2141 (file-position output symbols-start
)
2142 (write-sequence (make-elf64-sym 0 0) output
)
2143 (dolist (sym c-symbols
)
2144 (let ((name-ptr (cdr (assoc sym
(car string-table
)))))
2145 (write-sequence (make-elf64-sym name-ptr
#x10
) output
)))
2147 (file-position output core-start
)
2148 (file-position input core-offset
)
2149 (let ((remaining core-size
))
2150 (loop (let ((n (read-sequence core-header input
2151 :end
(min +backend-page-bytes
+ remaining
))))
2152 (write-sequence core-header output
:end n
)
2153 (unless (plusp (decf remaining n
)) (return))))
2154 (aver (zerop remaining
)))))))))
2156 ;; These will get set to 0 if the target is not using mark-region-gc
2157 (defglobal *bitmap-bits-per-page
* (/ gencgc-page-bytes
(* cons-size n-word-bytes
)))
2158 (defglobal *bitmap-bytes-per-page
* (/ *bitmap-bits-per-page
* n-byte-bits
))
2167 (defun read-page-table (stream n-ptes nbytes data-page
&optional
(print nil
))
2168 (declare (ignore nbytes
))
2169 (let ((table (make-array n-ptes
)))
2170 (file-position stream
(* (1+ data-page
) sb-c
:+backend-page-bytes
+))
2172 (let* ((bitmap (make-array *bitmap-bits-per-page
* :element-type
'bit
))
2173 (temp (make-array *bitmap-bytes-per-page
* :element-type
'(unsigned-byte 8))))
2174 (when (plusp *bitmap-bits-per-page
*)
2175 (read-sequence temp stream
))
2176 (dotimes (i (/ (length bitmap
) n-word-bits
))
2177 (setf (%vector-raw-bits bitmap i
) (%vector-raw-bits temp i
)))
2178 (setf (aref table i
) (make-page :bitmap bitmap
))))
2179 ;; a PTE is a lispword and a uint16_t
2180 (let ((buf (make-array 10 :element-type
'(unsigned-byte 8))))
2181 (with-pinned-objectS (buf)
2183 (read-sequence buf stream
)
2184 (let ((sso (sap-ref-word (vector-sap buf
) 0))
2185 (words-used (sap-ref-16 (vector-sap buf
) 8))
2187 (setf (page-words-used p
) (logandc2 words-used
1)
2188 (page-single-obj-p p
) (logand words-used
1)
2189 (page-scan-start p
) (logandc2 sso
7)
2190 (page-type p
) (logand sso
7))
2191 (when (and print
(plusp (page-words-used p
)))
2192 (format t
"~4d: ~4x ~2x~:[~; -~x~]~%"
2193 i
(ash (page-words-used p
) word-shift
)
2195 (if (= (page-single-obj-p p
) 0) nil
1)
2196 (page-scan-start p
)))))))
2199 (defun decode-page-type (type)
2209 (defun calc-page-index (vaddr space
)
2210 (let ((vaddr (if (system-area-pointer-p vaddr
) (sap-int vaddr
) vaddr
)))
2211 (floor (- vaddr
(space-addr space
)) gencgc-page-bytes
)))
2212 (defun calc-page-base (vaddr)
2213 (logandc2 vaddr
(1- gencgc-page-bytes
)))
2214 (defun calc-object-index (vaddr)
2215 (ash (- vaddr
(calc-page-base vaddr
)) (- n-lowtag-bits
)))
2217 (defun page-bytes-used (index ptes
)
2218 (ash (page-words-used (svref ptes index
)) word-shift
))
2220 (defun find-ending-page (index ptes
)
2221 ;; A page ends a contiguous block if it is not wholly used,
2222 ;; or if there is no next page,
2223 ;; or the next page starts its own contiguous block
2224 (if (or (< (page-bytes-used index ptes
) gencgc-page-bytes
)
2225 (= (1+ index
) (length ptes
))
2226 (zerop (page-scan-start (svref ptes
(1+ index
)))))
2228 (find-ending-page (1+ index
) ptes
)))
2230 (defun page-addr (index space
) (+ (space-addr space
) (* index gencgc-page-bytes
)))
2232 (defun walk-dynamic-space (page-type spacemap function
)
2233 (do* ((space (get-space dynamic-core-space-id spacemap
))
2234 (ptes (space-page-table space
))
2235 (nptes (length ptes
))
2238 ((>= first-page nptes
) (nreverse page-ranges
))
2240 (let* ((last-page (find-ending-page first-page ptes
))
2241 (pte (aref (space-page-table space
) first-page
))
2242 (start-vaddr (page-addr first-page space
))
2243 (end-vaddr (+ (page-addr last-page space
) (page-bytes-used last-page ptes
))))
2244 (when (and (plusp (page-type pte
))
2245 (or (null page-type
) (eq page-type
(decode-page-type (page-type pte
)))))
2246 ;; Because gencgc has page-spanning objects, it's easiest to zero-fill later
2247 ;; if we track the range boundaries now.
2248 (push (list nil first-page last-page
) page-ranges
) ; NIL = no funcallable-instance
2249 (do ((vaddr (int-sap start-vaddr
))
2250 (paddr (int-sap (translate-ptr start-vaddr spacemap
))))
2251 ((>= (sap-int vaddr
) end-vaddr
))
2252 (let* ((word (sap-ref-word paddr
0))
2253 (widetag (logand word widetag-mask
))
2254 (size (if (eq widetag filler-widetag
)
2255 (ash (ash word -
32) word-shift
) ; -> words -> bytes
2256 (let* ((obj (reconstitute-object (%make-lisp-obj
(sap-int paddr
))))
2257 (size (primitive-object-size obj
)))
2258 ;; page types codes are never defined for Lisp
2259 (when (eq page-type
7) ; KLUDGE: PAGE_TYPE_CODE
2260 (aver (or (= widetag code-header-widetag
)
2261 (= widetag funcallable-instance-widetag
))))
2262 (when (= widetag funcallable-instance-widetag
)
2263 (setf (caar page-ranges
) t
)) ; T = has funcallable-instance
2264 (funcall function obj vaddr size
:ignore
)
2266 (setq vaddr
(sap+ vaddr size
)
2267 paddr
(sap+ paddr size
)))))
2268 (setq first-page
(1+ last-page
)))
2270 (let* ((vaddr (int-sap (+ (space-addr space
) (* first-page gencgc-page-bytes
))))
2271 (paddr (int-sap (translate-ptr (sap-int vaddr
) spacemap
)))
2272 (pte (aref (space-page-table space
) first-page
))
2273 (bitmap (page-bitmap pte
)))
2274 (cond ((= (page-single-obj-p pte
) 1)
2275 ;; last page is located by doing some arithmetic
2276 (let* ((obj (reconstitute-object (%make-lisp-obj
(sap-int paddr
))))
2277 (size (primitive-object-size obj
))
2278 (last-page (calc-page-index (sap+ vaddr
(1- size
)) space
)))
2279 #+nil
(format t
"~&Page ~4d..~4d ~A LARGE~%" first-page last-page
(decode-page-type (page-type pte
)))
2280 (funcall function obj vaddr size t
)
2281 (setq first-page last-page
)))
2282 ((plusp (page-type pte
))
2283 #+nil
(format t
"~&Page ~4D : ~A~%" first-page
(decode-page-type (page-type pte
)))
2284 (when (or (null page-type
) (eq page-type
(decode-page-type (page-type pte
))))
2285 (do ((object-offset-in-dualwords 0))
2286 ((>= object-offset-in-dualwords
*bitmap-bits-per-page
*))
2288 (cond ((zerop (sbit bitmap object-offset-in-dualwords
))
2289 (unless (and (zerop (sap-ref-word paddr
0))
2290 (zerop (sap-ref-word paddr
8)))
2291 (error "Unallocated object @ ~X: ~X ~X"
2292 vaddr
(sap-ref-word paddr
0) (sap-ref-word paddr
8)))
2295 (let* ((obj (reconstitute-object (%make-lisp-obj
(sap-int paddr
))))
2296 (size (primitive-object-size obj
)))
2297 (funcall function obj vaddr size nil
)
2299 (setq vaddr
(sap+ vaddr size
)
2300 paddr
(sap+ paddr size
))
2301 (incf object-offset-in-dualwords
(ash size
(- (1+ word-shift
)))))))))
2302 (incf first-page
))))
2304 ;;; Unfortunately the idea of using target features to decide whether to
2305 ;;; read a bitmap from PAGE_TABLE_CORE_ENTRY_TYPE_CODE falls flat,
2306 ;;; because we can't scan for symbols until the core is read, but we can't
2307 ;;; read the core until we decide whether there is a bitmap, which needs the
2308 ;;; feature symbols. Some possible solutions (and there are others too):
2309 ;;; 1) make a separate core entry for the bitmap
2310 ;;; 2) add a word to that core entry indicating that it has a bitmap
2311 ;;; 3) make a different entry type code for PTES_WITH_BITMAP
2312 (defun detect-target-features (spacemap &aux result
)
2313 (flet ((scan (symbol)
2314 (let ((list (symbol-global-value symbol
))
2315 (target-nil (compute-nil-object spacemap
)))
2317 (when (eq list target-nil
) (return))
2318 (setq list
(translate list spacemap
))
2319 (let ((feature (translate (car list
) spacemap
)))
2320 (aver (symbolp feature
))
2321 ;; convert keywords and only keywords into host keywords
2322 (when (eq (symbol-package-id feature
) (symbol-package-id :sbcl
))
2323 (let ((string (translate (symbol-name feature
) spacemap
)))
2324 (push (intern string
"KEYWORD") result
))))
2325 (setq list
(cdr list
))))))
2329 (lambda (obj vaddr size large
)
2330 (declare (ignore vaddr size large
))
2332 (when (or (and (eq (symbol-package-id obj
) #.
(symbol-package-id 'sb-impl
:+internal-features
+))
2333 (string= (translate (symbol-name obj
) spacemap
) "+INTERNAL-FEATURES+"))
2334 (and (eq (symbol-package-id obj
) #.
(symbol-package-id '*features
*))
2335 (string= (translate (symbol-name obj
) spacemap
) "*FEATURES*")))
2337 ;;(format t "~&Target-features=~S~%" result)
2340 (defun transport-dynamic-space-code (codeblobs spacemap new-space free-ptr
)
2341 (do ((list codeblobs
(cdr list
))
2342 (offsets-vector-data (sap+ new-space
(* 2 n-word-bytes
)))
2343 (object-index 0 (1+ object-index
)))
2345 ;; FROM-VADDR is the original logical (virtual) address, and FROM-PADDR
2346 ;; is where the respective object is currently resident in memory now.
2347 ;; Similarly-named "TO-" values correspond to the location in new space.
2348 (destructuring-bind (from-vaddr . size
) (car list
)
2349 (let ((from-paddr (int-sap (translate-ptr (sap-int from-vaddr
) spacemap
)))
2350 (to-vaddr (+ +code-space-nominal-address
+ free-ptr
))
2351 (to-paddr (sap+ new-space free-ptr
)))
2352 (setf (sap-ref-32 offsets-vector-data
(ash object-index
2)) free-ptr
)
2353 ;; copy to code space
2354 (%byte-blt from-paddr
0 new-space free-ptr size
)
2356 (%make-lisp-obj
(logior (sap-int to-paddr
) other-pointer-lowtag
)))
2357 (header-bytes (ash (code-header-words new-physobj
) word-shift
))
2358 (new-insts (code-instructions new-physobj
)))
2359 ;; fix the jump table words which, if present, start at NEW-INSTS
2360 (let ((wordcount (code-jump-table-words new-physobj
))
2361 (disp (- to-vaddr
(sap-int from-vaddr
))))
2362 (loop for i from
1 below wordcount
2363 do
(let ((w (sap-ref-word new-insts
(ash i word-shift
))))
2365 (setf (sap-ref-word new-insts
(ash i word-shift
)) (+ w disp
))))))
2366 ;; fix the simple-fun pointers
2367 (dotimes (i (code-n-entries new-physobj
))
2368 (let ((fun-offs (%code-fun-offset new-physobj i
)))
2369 ;; Assign the address that each simple-fun will have assuming
2370 ;; the object will reside at its new logical address.
2371 (setf (sap-ref-word new-insts
(+ fun-offs n-word-bytes
))
2372 (+ to-vaddr header-bytes fun-offs
(* 2 n-word-bytes
))))))
2373 (incf free-ptr size
)))))
2375 (defun remap-to-quasi-static-code (val spacemap fwdmap
)
2376 (when (is-lisp-pointer (get-lisp-obj-address val
))
2377 (binding* ((translated (translate val spacemap
))
2378 (vaddr (get-lisp-obj-address val
))
2380 (cond ((simple-fun-p translated
)
2381 ;; the code component has to be computed "by hand" because FUN-CODE-HEADER
2382 ;; would return the physically mapped object, but we need
2383 ;; to get the logical address of the code.
2384 (- (- vaddr fun-pointer-lowtag
)
2385 (ash (ldb (byte 24 8)
2386 (sap-ref-word (int-sap (get-lisp-obj-address translated
))
2387 (- fun-pointer-lowtag
)))
2389 ((code-component-p translated
)
2390 (- vaddr other-pointer-lowtag
)))
2392 (new-code-offset (gethash code-base-addr fwdmap
) :exit-if-null
))
2393 (%make-lisp-obj
(+ (if (functionp translated
)
2394 (- vaddr code-base-addr
) ; function tag is in the difference
2395 other-pointer-lowtag
)
2396 +code-space-nominal-address
+
2397 new-code-offset
)))))
2399 ;;; It's not worth trying to use the host's DO-REFERENCED-OBJECT because it requires
2400 ;;; completely different behavior for INSTANCE and FUNCALLABLE-INSTANCE to avoid using
2401 ;;; the layout pointers as-is. And closures don't really work either. So unfortunately
2402 ;;; this is essentially a reimplementation. Thankfully we only have to deal with pointers
2403 ;;; that could possibly point to code.
2404 (defun update-quasi-static-code-ptrs
2405 (obj spacemap fwdmap displacement
&optional print
2406 &aux
(sap (int-sap (logandc2 (get-lisp-obj-address obj
) lowtag-mask
))))
2408 (format t
"paddr ~X vaddr ~X~%" (get-lisp-obj-address obj
)
2409 (+ (get-lisp-obj-address obj
) displacement
)))
2410 (macrolet ((visit (place)
2411 `(let* ((oldval ,place
) (newval (remap oldval
)))
2413 (setf ,place newval
)))))
2414 (flet ((fun-entrypoint (fun)
2415 (+ (get-lisp-obj-address fun
) (- fun-pointer-lowtag
) (ash 2 word-shift
)))
2417 (remap-to-quasi-static-code x spacemap fwdmap
)))
2419 ((listp obj
) (visit (car obj
)) (visit (cdr obj
)))
2420 ((simple-vector-p obj
)
2421 (dotimes (i (length obj
)) (visit (svref obj i
))))
2423 (let ((type (truly-the layout
(translate (%instance-layout obj
) spacemap
))))
2424 (do-layout-bitmap (i taggedp type
(%instance-length obj
))
2425 (when taggedp
(visit (%instance-ref obj i
))))))
2428 (cond ((funcallable-instance-p obj
)
2429 ;; The trampoline points to the function itself (so is ignorable)
2430 ;; and following that word are 2 words of machine code.
2433 (aver (closurep obj
))
2434 (let ((fun (remap (%closure-fun obj
))))
2435 ;; there is no setter for closure-fun
2436 (setf (sap-ref-word sap n-word-bytes
) (fun-entrypoint fun
)))
2438 (loop for i from start to
(logior (get-closure-length obj
) 1)
2439 do
(visit (sap-ref-lispobj sap
(ash i word-shift
))))))
2440 ((code-component-p obj
)
2441 (loop for i from
2 below
(code-header-words obj
)
2442 do
(visit (code-header-ref obj i
))))
2444 (visit (sap-ref-lispobj sap
(ash symbol-value-slot word-shift
))))
2445 ((weak-pointer-p obj
)
2446 (visit (sap-ref-lispobj sap
(ash weak-pointer-value-slot word-shift
))))
2448 (let ((raw (sap-ref-word sap
(ash fdefn-raw-addr-slot word-shift
))))
2449 (unless (in-bounds-p raw
(space-bounds static-core-space-id spacemap
))
2450 (awhen (remap (%make-lisp-obj
(+ raw
(ash -
2 word-shift
) fun-pointer-lowtag
)))
2451 (setf (sap-ref-word sap
(ash fdefn-raw-addr-slot word-shift
))
2452 (fun-entrypoint it
)))))
2453 (visit (sap-ref-lispobj sap
(ash fdefn-fun-slot word-shift
))))
2454 ((= (%other-pointer-widetag obj
) value-cell-widetag
)
2455 (visit (sap-ref-lispobj sap
(ash value-cell-value-slot word-shift
))))))))
2457 ;;; Clear all the old objects. Funcallable instances can be co-mingled with
2458 ;;; code, so a code page might not be empty but most will be. Free those pages.
2459 (defun zerofill-old-code (spacemap codeblobs page-ranges
)
2460 (declare (ignorable page-ranges
))
2461 (with-alien ((memset (function void unsigned int unsigned
) :extern
))
2462 (flet ((reset-pte (pte)
2463 (setf (page-words-used pte
) 0
2464 (page-single-obj-p pte
) 0
2466 (page-scan-start pte
) 0)))
2467 (let ((space (get-space dynamic-core-space-id spacemap
)))
2469 (dolist (range page-ranges
(aver (null codeblobs
)))
2470 (destructuring-bind (in-use first last
) range
2471 ;;(format t "~&Working on range ~D..~D~%" first last)
2472 (loop while codeblobs
2473 do
(destructuring-bind (vaddr . size
) (car codeblobs
)
2474 (let ((page (calc-page-index vaddr space
)))
2475 (cond ((> page last
) (return))
2476 ((< page first
) (bug "Incorrect sort"))
2478 (let ((paddr (translate-ptr (sap-int vaddr
) spacemap
)))
2479 (alien-funcall memset paddr
0 size
)
2480 (when in-use
; store a filler widetag
2481 (let* ((nwords (ash size
(- word-shift
)))
2482 (header (logior (ash nwords
32) filler-widetag
)))
2483 (setf (sap-ref-word (int-sap paddr
) 0) header
))))
2484 (pop codeblobs
))))))
2486 (loop for page-index from first to last
2487 do
(reset-pte (svref (space-page-table space
) page-index
))))))
2489 (dolist (code codeblobs
)
2490 (destructuring-bind (vaddr . size
) code
2491 (alien-funcall memset
(translate-ptr (sap-int vaddr
) spacemap
) 0 size
)
2492 (let* ((page-index (calc-page-index vaddr space
))
2493 (pte (aref (space-page-table space
) page-index
))
2494 (object-index (calc-object-index (sap-int vaddr
))))
2495 (setf (sbit (page-bitmap pte
) object-index
) 0)
2496 (cond ((= (page-single-obj-p pte
) 1)
2497 ;(format t "~&Cleared large-object pages @ ~x~%" (sap-int vaddr))
2498 (loop for p from page-index to
(calc-page-index (sap+ vaddr
(1- size
)) space
)
2499 do
(let ((pte (svref (space-page-table space
) p
)))
2500 (aver (not (find 1 (page-bitmap pte
))))
2502 ((not (find 1 (page-bitmap pte
)))
2503 ;; is the #+gencgc logic above actually more efficient?
2504 ;;(format t "~&Code page ~D is now empty~%" page-index)
2505 (reset-pte pte
))))))))))
2507 (defun parse-core-header (input core-header
)
2509 (total-npages 0) ; excluding core header page
2513 (do-core-header-entry ((id len ptr
) core-header
)
2515 (#.directory-core-entry-type-code
2516 (setq core-dir-start
(- ptr
2))
2517 (do-directory-entry ((index ptr len
) core-header
)
2518 (incf total-npages npages
)
2519 (push (make-space id addr data-page
0 nwords
) space-list
)))
2520 (#.page-table-core-entry-type-code
2522 (symbol-macrolet ((n-ptes (%vector-raw-bits core-header
(+ ptr
1)))
2523 (nbytes (%vector-raw-bits core-header
(+ ptr
2)))
2524 (data-page (%vector-raw-bits core-header
(+ ptr
3))))
2525 (aver (= data-page total-npages
))
2526 (setf card-mask-nbits
(%vector-raw-bits core-header ptr
))
2527 (format nil
"~&card-nbits = ~D~%" card-mask-nbits
)
2528 (let ((space (get-space dynamic-core-space-id
(cons nil space-list
))))
2529 (setf (space-page-table space
) (read-page-table input n-ptes nbytes data-page
)))))
2530 (#.build-id-core-entry-type-code
2531 (let ((string (make-string (%vector-raw-bits core-header ptr
)
2532 :element-type
'base-char
)))
2533 (%byte-blt core-header
(* (1+ ptr
) n-word-bytes
) string
0 (length string
))
2534 (format nil
"Build ID [~a] len=~D ptr=~D actual-len=~D~%" string len ptr
(length string
))))
2535 (#.runtime-options-magic
) ; ignore
2536 (#.initial-fun-core-entry-type-code
2537 (setq initfun
(%vector-raw-bits core-header ptr
)))))
2538 (values total-npages space-list card-mask-nbits core-dir-start initfun
)))
2540 (defconstant +lispwords-per-corefile-page
+ (/ sb-c
:+backend-page-bytes
+ n-word-bytes
))
2542 (defun rewrite-core (directory spacemap card-mask-nbits initfun core-header offset output
2543 &aux
(dynamic-space (get-space dynamic-core-space-id spacemap
)))
2544 (aver (= (%vector-raw-bits core-header offset
) directory-core-entry-type-code
))
2545 (let ((nwords (+ (* (length directory
) 5) 2)))
2546 (setf (%vector-raw-bits core-header
(incf offset
)) nwords
))
2547 (let ((page-count 0)
2548 (n-ptes (length (space-page-table dynamic-space
))))
2549 (dolist (dir-entry directory
)
2550 (setf (car dir-entry
) page-count
)
2551 (destructuring-bind (id paddr vaddr nwords
) (cdr dir-entry
)
2552 (declare (ignore paddr
))
2553 (let ((npages (ceiling nwords
+lispwords-per-corefile-page
+)))
2554 (when (= id dynamic-core-space-id
)
2555 (aver (= npages n-ptes
)))
2556 (dolist (word (list id nwords page-count vaddr npages
))
2557 (setf (%vector-raw-bits core-header
(incf offset
)) word
))
2558 (incf page-count npages
))))
2559 (let* ((sizeof-corefile-pte (+ n-word-bytes
2))
2560 (pte-bytes (align-up (* sizeof-corefile-pte n-ptes
) n-word-bytes
)))
2561 (dolist (word (list page-table-core-entry-type-code
2562 6 ; = number of words in this core header entry
2564 n-ptes
(+ (* n-ptes
*bitmap-bytes-per-page
*) pte-bytes
)
2566 (setf (%vector-raw-bits core-header
(incf offset
)) word
)))
2567 (dolist (word (list initial-fun-core-entry-type-code
3 initfun
2568 end-core-entry-type-code
2))
2569 (setf (%vector-raw-bits core-header
(incf offset
)) word
))
2570 (write-sequence core-header output
)
2571 ;; write out the data from each space
2572 (dolist (dir-entry directory
)
2573 (destructuring-bind (page id paddr vaddr nwords
) dir-entry
2574 (declare (ignore id vaddr
))
2575 (aver (= (file-position output
) (* sb-c
:+backend-page-bytes
+ (1+ page
))))
2576 (let* ((npages (ceiling nwords
+lispwords-per-corefile-page
+))
2577 (nbytes (* npages sb-c
:+backend-page-bytes
+))
2579 (sb-unix:unix-write
(sb-impl::fd-stream-fd output
) paddr
0 nbytes
)))
2580 (aver (= wrote nbytes
)))))
2581 (aver (= (file-position output
) (* sb-c
:+backend-page-bytes
+ (1+ page-count
))))
2582 #+mark-region-gc
; write the bitmap
2583 (dovector (pte (space-page-table dynamic-space
))
2584 (let ((bitmap (page-bitmap pte
)))
2585 (sb-sys:with-pinned-objects
(bitmap)
2586 ;; WRITE-SEQUENCE on a bit vector would write one octet per bit
2587 (sb-unix:unix-write
(sb-impl::fd-stream-fd output
) bitmap
0 (/ (length bitmap
) 8)))))
2589 (let ((buffer (make-array 10 :element-type
'(unsigned-byte 8))))
2590 (sb-sys:with-pinned-objects
(buffer)
2591 (let ((sap (vector-sap buffer
)))
2592 (dovector (pte (space-page-table dynamic-space
))
2593 (setf (sap-ref-64 sap
0) (logior (page-scan-start pte
) (page-type pte
))
2594 (sap-ref-16 sap
8) (logior (page-words-used pte
) (page-single-obj-p pte
)))
2595 (write-sequence buffer output
)))
2596 (let* ((bytes-written (* 10 (length (space-page-table dynamic-space
))))
2597 (diff (- (align-up bytes-written sb-vm
:n-word-bytes
)
2600 (write-sequence buffer output
:end diff
))))
2601 ;; write the trailer
2602 (let ((buffer (make-array 16 :element-type
'(unsigned-byte 8)
2603 :initial-element
0)))
2604 (sb-sys:with-pinned-objects
(buffer)
2605 (setf (%vector-raw-bits buffer
0) 0
2606 (%vector-raw-bits buffer
1) core-magic
)
2607 (write-sequence buffer output
)))
2608 (force-output output
)))
2610 (defun walk-target-space (function space-id spacemap
)
2611 (let* ((space (get-space space-id spacemap
))
2612 (paddr (space-physaddr space spacemap
)))
2613 (map-objects-in-range function
2615 (if (= space-id static-core-space-id
)
2616 ;; must not visit NIL, bad things happen
2617 (translate-ptr (+ static-space-start sb-vm
::static-space-objects-offset
)
2620 (%make-lisp-obj
(sap-int (sap+ paddr
(space-size space
)))))))
2622 (defun find-target-asm-code (spacemap)
2623 (walk-target-space (lambda (obj widetag size
)
2624 (declare (ignore size
))
2625 (when (= widetag code-header-widetag
)
2626 (return-from find-target-asm-code
2627 (let* ((space (get-space static-core-space-id spacemap
))
2628 (vaddr (space-addr space
))
2629 (paddr (space-physaddr space spacemap
)))
2631 (+ vaddr
(- (get-lisp-obj-address obj
)
2632 (sap-int paddr
))))))))
2633 static-core-space-id spacemap
))
2635 (defun move-dynamic-code-to-text-space (input-pathname output-pathname
)
2637 (ignore-errors (delete-file output-pathname
))
2638 ;; Ensure that all files can be opened
2639 (with-open-file (input input-pathname
:element-type
'(unsigned-byte 8))
2640 (with-open-file (output output-pathname
:direction
:output
2641 :element-type
'(unsigned-byte 8) :if-exists
:supersede
)
2642 ;; KLUDGE: see comment above DETECT-TARGET-FEATURES
2643 #+gencgc
(setq *bitmap-bits-per-page
* 0 *bitmap-bytes-per-page
* 0)
2644 (binding* ((core-header (make-array +backend-page-bytes
+ :element-type
'(unsigned-byte 8)))
2645 (core-offset (read-core-header input core-header t
))
2646 ((npages space-list card-mask-nbits core-dir-start initfun
)
2647 (parse-core-header input core-header
)))
2648 ;; Map the core file to memory
2649 (with-mapped-core (sap core-offset npages input
)
2650 (let* ((spacemap (cons sap
(sort (copy-list space-list
) #'> :key
#'space-addr
)))
2651 (target-features (detect-target-features spacemap
))
2653 (fwdmap (make-hash-table))
2655 (offsets-vector-size)
2656 ;; We only need enough space to write C linkage call redirections from the
2657 ;; assembler routine codeblob, because those are the calls which assume that
2658 ;; asm code can directly call into the linkage space using "CALL rel32" form.
2659 ;; Dynamic-space calls do not assume that - they use "CALL [ea]" form.
2660 (c-linkage-reserved-words 12) ; arbitrary overestimate
2662 ;; text space will contain a copy of the asm code so it can use call rel32 form
2663 (asm-code (find-target-asm-code spacemap
))
2664 (asm-code-size (primitive-object-size (translate asm-code spacemap
)))
2665 (freeptr asm-code-size
)
2669 (lambda (obj vaddr size large
)
2670 (declare (ignore large
))
2671 (when (code-component-p obj
)
2672 (push (cons vaddr size
) codeblobs
)
2673 ;; new object will be at FREEPTR bytes from new space start
2674 (setf (gethash (sap-int vaddr
) fwdmap
) freeptr
)
2675 (incf freeptr size
))))))
2676 ;; FIXME: this _still_ doesn't work, because if the buid has :IMMOBILE-SPACE
2677 ;; then the symbols CL:*FEATURES* and SB-IMPL:+INTERNAL-FEATURES+
2678 ;; are not in dynamic space.
2679 (when (member :immobile-space target-features
)
2680 (error "Can't relocate code to text space since text space already exists"))
2682 (acons (int-sap (logandc2 (get-lisp-obj-address asm-code
) lowtag-mask
))
2684 (nreverse codeblobs
))
2685 n-objects
(length codeblobs
))
2686 ;; Preceding the code objects are two vectors:
2687 ;; (1) a vector of uint32_t indicating the starting offset (from the space start)
2688 ;; of each code object.
2689 ;; (2) a vector of uint64_t which embeds a JMP instruction to a C linkage table entry.
2690 ;; These instructions are near enough to be called via 'rel32' form. (The ordinary
2691 ;; alien linkage space is NOT near enough, after code is moved to text space)
2692 ;; The size of the new text space has to account for the sizes of the vectors.
2693 (let* ((n-vector1-data-words (ceiling n-objects
2)) ; two uint32s fit in a lispword
2694 (vector1-size (ash (+ (align-up n-vector1-data-words
2) ; round to even
2697 (n-vector2-data-words c-linkage-reserved-words
)
2698 (vector2-size (ash (+ n-vector2-data-words vector-data-offset
)
2700 (setf offsets-vector-size vector1-size
2701 reserved-amount
(+ vector1-size vector2-size
))
2702 ;; Adjust all code offsets upward to avoid doing more math later
2703 (maphash (lambda (k v
)
2704 (setf (gethash k fwdmap
) (+ v reserved-amount
)))
2706 (incf freeptr reserved-amount
)
2707 (format nil
"~&Code: ~D objects, ~D bytes~%" (length codeblobs
) freeptr
))
2708 (let* ((new-space-nbytes (align-up freeptr sb-c
:+backend-page-bytes
+))
2709 (new-space (sb-sys:allocate-system-memory new-space-nbytes
)))
2710 ;; Write header of "vector 1"
2711 (setf (sap-ref-word new-space
0) simple-array-unsigned-byte-32-widetag
2712 (sap-ref-word new-space n-word-bytes
) (fixnumize n-objects
))
2713 ;; write header of "vector 2"
2714 (setf (sap-ref-word new-space offsets-vector-size
) simple-array-unsigned-byte-64-widetag
2715 (sap-ref-word new-space
(+ offsets-vector-size n-word-bytes
))
2716 (fixnumize c-linkage-reserved-words
))
2717 ;; Transport code contiguously into new space
2718 (transport-dynamic-space-code codeblobs spacemap new-space reserved-amount
)
2719 ;; Walk static space and dynamic-space changing any pointers that
2720 ;; should point to new space.
2721 (dolist (space-id `(,dynamic-core-space-id
,static-core-space-id
))
2722 (let* ((space (get-space space-id spacemap
))
2723 (vaddr (space-addr space
))
2724 (paddr (space-physaddr space spacemap
))
2725 (diff (+ (- (sap-int paddr
)) vaddr
)))
2726 (format nil
"~&Fixing ~A~%" space
)
2728 (lambda (object widetag size
)
2729 (declare (ignore widetag size
))
2730 (unless (and (code-component-p object
) (= space-id dynamic-core-space-id
))
2731 (update-quasi-static-code-ptrs object spacemap fwdmap diff
)))
2732 space-id spacemap
)))
2733 ;; Walk new space and fix pointers into itself
2734 (format nil
"~&Fixing newspace~%")
2735 (map-objects-in-range
2736 (lambda (object widetag size
)
2737 (declare (ignore widetag size
))
2738 (update-quasi-static-code-ptrs object spacemap fwdmap
0))
2739 (%make-lisp-obj
(sap-int new-space
))
2740 (%make-lisp-obj
(sap-int (sap+ new-space freeptr
))))
2741 ;; don't zerofill asm code in static space
2742 (zerofill-old-code spacemap
(cdr codeblobs
) page-ranges
)
2743 ;; Update the core header to contain newspace
2744 (let ((spaces (nreconc
2745 (mapcar (lambda (space)
2746 (list 0 (space-id space
)
2747 (int-sap (translate-ptr (space-addr space
) spacemap
))
2749 (space-nwords space
)))
2751 `((0 ,immobile-text-core-space-id
,new-space
2752 ,+code-space-nominal-address
+
2753 ,(ash freeptr
(- word-shift
)))))))
2754 (rewrite-core spaces spacemap card-mask-nbits initfun
2755 core-header core-dir-start output
)
2760 (defun cl-user::elfinate
(&optional
(args (cdr *posix-argv
*)))
2761 (cond ((string= (car args
) "split")
2764 (loop (cond ((string= (car args
) "--pie")
2767 ((string= (car args
) "--dynamic-space-size")
2769 (setq dss
(parse-integer (pop args
))))
2772 (destructuring-bind (input asm
) args
2773 (split-core input asm
:enable-pie pie
2774 :dynamic-space-size dss
))))
2775 ((string= (car args
) "copy")
2776 (apply #'copy-to-elf-obj
(cdr args
)))
2777 ((string= (car args
) "extract")
2778 (apply #'move-dynamic-code-to-text-space
(cdr args
)))
2780 ((string= (car args
) "relocate")
2781 (destructuring-bind (input output binary start-sym
) (cdr args
)
2783 input output binary
(parse-integer start-sym
:radix
16))))
2785 (error "Unknown command: ~S" args
))))
2787 ;;; Processing a core without immobile-space
2789 ;;; This file provides a recipe which gets a little bit closer to being able to
2790 ;;; emulate #+immobile-space in so far as producing an ELF core is concerned.
2791 ;;; The recipe is a bit more complicated than I'd like, but it works.
2792 ;;; Let's say you want a core with contiguous text space containing the code
2793 ;;; of a quicklisp system.
2796 ;;; * (ql:quickload :one-more-re-nightmare-tests)
2797 ;;; * (save-lisp-and-die "step1.core")
2799 ;;; * (load "tools-for-build/editcore")
2800 ;;; * (sb-editcore:move-dynamic-code-to-text-space "step1.core" "step2.core")
2801 ;;; * (sb-editcore:redirect:text-space-calls "step2.core")
2802 ;;; Now "step2.core" has a text space, and all lisp-to-lisp calls bypass their FDEFN.
2803 ;;; At this point split-core on "step2.core" can run in the manner of elfcore.test.sh
2805 (defun get-code-segments (code vaddr spacemap
)
2806 (let ((di (%code-debug-info code
))
2807 (inst-base (+ vaddr
(ash (code-header-words code
) word-shift
)))
2809 (aver (%instancep di
))
2810 (if (zerop (code-n-entries code
)) ; assembler routines
2811 (dolist (entry (target-hash-table-alist di spacemap
))
2812 (let* ((val (translate (undescriptorize (cdr entry
)) spacemap
))
2813 ;; VAL is (start end . index)
2814 (start (the fixnum
(car val
)))
2815 (end (the fixnum
(car (translate (cdr val
) spacemap
)))))
2816 (push (make-code-segment code start
(- (1+ end
) start
)
2817 :virtual-location
(+ inst-base start
))
2819 (dolist (range (get-text-ranges code spacemap
))
2820 (let ((car (car range
)))
2821 (when (integerp car
)
2822 (push (make-code-segment code car
(- (cdr range
) car
)
2823 :virtual-location
(+ inst-base car
))
2825 (sort result
#'< :key
#'sb-disassem
:seg-virtual-location
)))
2827 (defstruct (range (:constructor make-range
(labeled vaddr bytecount
)))
2828 labeled vaddr bytecount
)
2830 (defun inst-vaddr (inst) (range-vaddr (car inst
)))
2831 (defun inst-length (inst) (range-bytecount (car inst
)))
2832 (defun inst-end (inst &aux
(range (car inst
)))
2833 (+ (range-vaddr range
) (range-bytecount range
)))
2835 (defmethod print-object ((self range
) stream
)
2836 (format stream
"~A~x,~x"
2837 (if (range-labeled self
) "L:" " ")
2839 (range-bytecount self
)))
2840 (defun get-code-instruction-model (code vaddr spacemap
)
2841 (let* ((segments (get-code-segments code vaddr spacemap
))
2842 (insts-vaddr (+ vaddr
(ash (code-header-words code
) word-shift
)))
2843 (dstate (sb-disassem:make-dstate
))
2845 (loop for i from
0 below
(code-n-entries code
)
2846 collect
(+ insts-vaddr
(%code-fun-offset code i
))))
2848 (sb-disassem:label-segments segments dstate
)
2849 ;; are labels not already sorted?
2850 (setq labels
(sort (sb-disassem::dstate-labels dstate
) #'< :key
#'car
))
2851 (sb-int:collect
((result))
2852 (dolist (seg segments
(coerce (result) 'vector
))
2853 (setf (sb-disassem:dstate-segment dstate
) seg
2854 (sb-disassem:dstate-segment-sap dstate
)
2855 (funcall (sb-disassem:seg-sap-maker seg
)))
2856 (setf (sb-disassem:dstate-cur-offs dstate
) 0)
2858 (when (eql (sb-disassem:dstate-cur-addr dstate
) (car fun-header-locs
))
2859 (incf (sb-disassem:dstate-cur-offs dstate
) (* simple-fun-insts-offset n-word-bytes
))
2860 (pop fun-header-locs
))
2861 (let* ((pc (sb-disassem:dstate-cur-addr dstate
))
2862 (labeled (when (and labels
(= pc
(caar labels
)))
2865 (inst (sb-disassem:disassemble-instruction dstate
))
2866 (nbytes (- (sb-disassem:dstate-cur-addr dstate
) pc
)))
2867 (result (cons (make-range labeled pc nbytes
) inst
)))
2868 (when (>= (sb-disassem:dstate-cur-offs dstate
) (sb-disassem:seg-length seg
))
2871 ;; The extra copy of ASM routines, particularly C-calling trampolines, that now reside in text
2872 ;; space have to be modified to correctly reference their C functions. They assume that static
2873 ;; space is near alien-linkage space, and so they use this form:
2874 ;; xxxx: E8A1F0EFFF CALL #x50000060 ; alloc
2875 ;; which unforuntately means that after relocating to text space, that instruction refers
2876 ;; to random garbage, and more unfortunately there is no room to squeeze in an instruction
2877 ;; that encodes to 7 bytes.
2878 ;; So we have to create an extra jump "somewhere" that indirects through the linkage table
2879 ;; but is callable from the text-space code.
2880 ;;; I don't feel like programmatically scanning the asm code to determine these.
2881 ;;; Hardcoded is good enough (until it isn't)
2882 (defparameter *c-linkage-redirects
*
2883 (mapcar (lambda (x) (cons x
(foreign-symbol-sap x
)))
2890 "allocation_tracker_counted"
2891 "allocation_tracker_sized")))
2893 (defun get-text-space-asm-code-replica (space spacemap
)
2894 (let* ((physaddr (sap-int (space-physaddr space spacemap
)))
2895 (offsets-vector (%make-lisp-obj
(logior physaddr other-pointer-lowtag
)))
2896 (offset (aref offsets-vector
0)))
2897 (values (+ (space-addr space
) offset
)
2898 (%make-lisp-obj
(+ physaddr offset other-pointer-lowtag
)))))
2900 (defun get-static-space-asm-code (space spacemap
)
2903 (sb-editcore::walk-target-space
2904 (lambda (x widetag size
)
2905 (declare (ignore widetag size
))
2906 (when (code-component-p x
)
2908 static-core-space-id spacemap
))))
2909 (values (+ (- (get-lisp-obj-address found
)
2910 (sap-int (space-physaddr space spacemap
))
2911 other-pointer-lowtag
)
2915 (defun patch-assembly-codeblob (spacemap)
2916 (binding* ((static-space (get-space static-core-space-id spacemap
))
2917 (text-space (get-space immobile-text-core-space-id spacemap
))
2918 ((new-code-vaddr new-code
) (get-text-space-asm-code-replica text-space spacemap
))
2919 ((old-code-vaddr old-code
) (get-static-space-asm-code static-space spacemap
))
2920 (code-offsets-vector
2921 (%make-lisp-obj
(logior (sap-int (space-physaddr text-space spacemap
))
2922 other-pointer-lowtag
)))
2923 (header-bytes (ash (code-header-words old-code
) word-shift
))
2924 (old-insts-vaddr (+ old-code-vaddr header-bytes
))
2925 (new-insts-vaddr (+ new-code-vaddr header-bytes
))
2926 (items *c-linkage-redirects
*)
2927 (inst-buffer (make-array 8 :element-type
'(unsigned-byte 8)))
2928 (code-offsets-vector-size (primitive-object-size code-offsets-vector
))
2929 (c-linkage-vector-vaddr (+ (space-addr text-space
) code-offsets-vector-size
))
2930 (c-linkage-vector ; physical
2931 (%make-lisp-obj
(logior (sap-int (sap+ (space-physaddr text-space spacemap
)
2932 code-offsets-vector-size
))
2933 other-pointer-lowtag
))))
2934 (aver (<= (length items
) (length c-linkage-vector
)))
2935 (with-pinned-objects (inst-buffer)
2936 (do ((sap (vector-sap inst-buffer
))
2937 (item-index 0 (1+ item-index
))
2938 (items items
(cdr items
)))
2940 ;; Each new quasi-linkage-table entry takes 8 bytes to encode.
2941 ;; The JMP is 7 bytes, followed by a nop.
2942 ;; FF2425nnnnnnnn = JMP [ea]
2943 (setf (sap-ref-8 sap
0) #xFF
2944 (sap-ref-8 sap
1) #x24
2945 (sap-ref-8 sap
2) #x25
2946 (sap-ref-32 sap
3) (sap-int (sap+ (cdar items
) 8))
2947 (sap-ref-8 sap
7) #x90
) ; nop
2948 (setf (aref c-linkage-vector item-index
) (%vector-raw-bits inst-buffer
0))))
2949 ;; Produce a model of the instructions. It doesn't really matter whether we scan
2950 ;; OLD-CODE or NEW-CODE since we're supplying the proper virtual address either way.
2951 (let ((insts (get-code-instruction-model old-code old-code-vaddr spacemap
)))
2952 ;; (dovector (inst insts) (write inst :base 16 :pretty nil :escape nil) (terpri))
2953 (dovector (inst insts
)
2954 ;; Look for any call to a linkage table entry.
2955 (when (eq (second inst
) 'call
)
2956 (let ((operand (third inst
)))
2957 (when (and (integerp operand
)
2958 (>= operand alien-linkage-table-space-start
)
2959 (< operand
(+ alien-linkage-table-space-start
2960 alien-linkage-table-space-size
)))
2961 (let* ((index (position (int-sap operand
) *c-linkage-redirects
*
2962 :key
#'cdr
:test
#'sap
=))
2963 (branch-target (+ c-linkage-vector-vaddr
2964 (ash vector-data-offset word-shift
)
2965 ;; each new linkage entry takes up exactly 1 word
2966 (* index n-word-bytes
)))
2967 (old-next-ip-abs (int-sap (inst-end inst
))) ; virtual
2968 (next-ip-rel (sap- old-next-ip-abs
(int-sap old-insts-vaddr
)))
2969 (new-next-ip (+ new-insts-vaddr next-ip-rel
)))
2970 (setf (signed-sap-ref-32 (code-instructions new-code
) (- next-ip-rel
4))
2971 (- branch-target new-next-ip
))))))))))
2973 (defun get-mov-src-constant (code code-vaddr inst ea spacemap
)
2974 (let* ((next-ip (inst-end inst
))
2975 ;; this is a virtual adrress
2976 (abs-addr (+ next-ip
(machine-ea-disp ea
))))
2977 (when (and (not (logtest abs-addr
#b111
)) ; lispword-aligned
2978 (>= abs-addr code-vaddr
)
2979 (< abs-addr
(+ code-vaddr
(ash (code-header-words code
) word-shift
))))
2980 (let ((paddr (translate-ptr abs-addr spacemap
)))
2981 (translate (sap-ref-lispobj (int-sap paddr
) 0) spacemap
)))))
2984 (defun locate-const-move-to-rax (code vaddr insts start spacemap fdefns
)
2985 ;; Look for a MOV to RAX from a code header constant
2986 ;; Technically this should fail if it finds _any_ instruction
2987 ;; that affects RAX before it finds the one we're looking for.
2988 (loop for i downfrom start to
1
2989 do
(let ((inst (svref insts i
)))
2990 (cond ((range-labeled (first inst
)) (return)) ; labeled statement - fail
2991 ((and (eq (second inst
) 'mov
)
2992 (eq (third inst
) (load-time-value (get-gpr :qword
0)))
2993 (typep (fourth inst
) '(cons machine-ea
(eql :qword
))))
2994 (let ((ea (car (fourth inst
))))
2995 (when (and (eq (machine-ea-base ea
) :rip
)
2996 (minusp (machine-ea-disp ea
)))
2998 (let ((fdefn (get-mov-src-constant code vaddr inst ea spacemap
)))
2999 (when (and (fdefn-p fdefn
) (memq fdefn fdefns
))
3000 (sb-vm::set-fdefn-has-static-callers fdefn
1)
3001 (values i
(fdefn-fun fdefn
))))))))))))
3004 (defun replacement-opcode (inst)
3005 (ecase (second inst
) ; opcode
3010 (defun patch-fdefn-call (code vaddr insts inst i spacemap fdefns
&optional print
)
3011 ;; START is the index into INSTS of the instructon that loads RAX
3012 (multiple-value-bind (start callee
)
3013 (locate-const-move-to-rax code vaddr insts
(1- i
) spacemap fdefns
)
3015 (let ((text-space (get-space immobile-text-core-space-id spacemap
)))
3016 (< (space-addr text-space
)
3017 ;; CALLEE is an untranslated address
3018 (get-lisp-obj-address callee
)
3019 (space-end text-space
))))
3021 (let ((addr (inst-vaddr (svref insts start
))) ; starting address
3022 (end (inst-end inst
)))
3023 (sb-c:dis
(translate-ptr addr spacemap
) (- end addr
))))
3024 ;; Several instructions have to be replaced to make room for the new CALL
3025 ;; which is a longer than the old, but it's ok since a MOV is eliminated.
3027 (loop for j from start to i sum
(inst-length (svref insts j
))))
3028 (new-bytes (make-array sum-lengths
:element-type
'(unsigned-byte 8)))
3030 (loop for j from
(1+ start
) below i
3031 do
(let* ((old-inst (svref insts j
))
3032 (ip (inst-vaddr old-inst
))
3033 (physaddr (int-sap (translate-ptr ip spacemap
)))
3034 (nbytes (inst-length old-inst
)))
3036 (setf (aref new-bytes new-index
) (sap-ref-8 physaddr k
))
3038 ;; insert padding given that the new call takes 5 bytes to encode
3039 (let* ((nop-len (- sum-lengths
(+ new-index
5)))
3040 (nop-pattern (ecase nop-len
3041 (5 '(#x0f
#x1f
#x44
#x00
#x00
)))))
3042 (dolist (byte nop-pattern
)
3043 (setf (aref new-bytes new-index
) byte
)
3046 (let* ((branch-target
3047 (simple-fun-entry-sap (translate callee spacemap
)))
3048 (next-pc (int-sap (inst-end inst
)))
3049 (rel32 (sap- branch-target next-pc
)))
3050 (setf (aref new-bytes new-index
) (replacement-opcode inst
))
3051 (with-pinned-objects (new-bytes)
3052 (setf (signed-sap-ref-32 (vector-sap new-bytes
) (1+ new-index
)) rel32
)
3054 (format t
"~&Replaced by:~%")
3055 (let ((s (sb-disassem::make-vector-segment new-bytes
0 sum-lengths
3056 :virtual-location vaddr
)))
3057 (sb-disassem::disassemble-segment
3058 s
*standard-output
* (sb-disassem:make-dstate
))))
3059 (let* ((vaddr (inst-vaddr (svref insts start
)))
3060 (paddr (translate-ptr vaddr spacemap
)))
3061 (%byte-blt new-bytes
0 (int-sap paddr
) 0 sum-lengths
))))))))
3063 (defun find-static-call-target-in-text-space (inst addr spacemap static-asm-code text-asm-code
)
3064 (declare (ignorable inst
))
3065 ;; this will (for better or for worse) find static fdefns as well as asm routines,
3066 ;; so we have to figure out which it is.
3067 (let ((asm-codeblob-size
3068 (primitive-object-size
3069 (%make-lisp-obj
(logior (translate-ptr static-asm-code spacemap
)
3070 other-pointer-lowtag
)))))
3071 (cond ((<= static-asm-code addr
(+ static-asm-code
(1- asm-codeblob-size
)))
3072 (let* ((offset-from-base (- addr static-asm-code
))
3073 (new-vaddr (+ text-asm-code offset-from-base
)))
3074 (sap-ref-word (int-sap (translate-ptr new-vaddr spacemap
)) 0)))
3076 (let* ((fdefn-vaddr (- addr
(ash fdefn-raw-addr-slot word-shift
)))
3077 (fdefn-paddr (int-sap (translate-ptr fdefn-vaddr spacemap
))))
3078 ;; Confirm it looks like a static fdefn
3079 (aver (= (logand (sap-ref-word fdefn-paddr
0) widetag-mask
) fdefn-widetag
))
3080 (let ((entrypoint (sap-ref-word fdefn-paddr
(ash fdefn-raw-addr-slot word-shift
))))
3081 ;; Confirm there is a simple-fun header where expected
3083 (sap-ref-word (int-sap (translate-ptr entrypoint spacemap
))
3084 (- (ash simple-fun-insts-offset word-shift
)))))
3085 (aver (= (logand header widetag-mask
) simple-fun-widetag
))
3086 ;; Return the entrypoint which already point to text space
3089 ;; Patch either a ca through a static-space fdefn or an asm routine indirect jump.
3090 (defun patch-static-space-call (inst spacemap static-asm-code text-asm-code
)
3091 (let* ((new-bytes (make-array 7 :element-type
'(unsigned-byte 8)))
3092 (addr (machine-ea-disp (car (third inst
))))
3094 (find-static-call-target-in-text-space
3095 inst addr spacemap static-asm-code text-asm-code
)))
3097 (setf (aref new-bytes
0) #x66
(aref new-bytes
1) #x90
) ; 2-byte NOP
3098 (setf (aref new-bytes
2) (replacement-opcode inst
))
3099 (let ((next-ip (inst-end inst
)))
3100 (with-pinned-objects (new-bytes)
3101 (setf (signed-sap-ref-32 (vector-sap new-bytes
) 3) (- branch-target next-ip
)))
3102 (%byte-blt new-bytes
0 (int-sap (translate-ptr (inst-vaddr inst
) spacemap
)) 0 7)))))
3104 ;;; Avoid splicing out any fdefn not uniquely identified by its function binding.
3105 (defun get-patchable-fdefns (code spacemap
&aux alist result
)
3106 (multiple-value-bind (start count
) (code-header-fdefn-range code
)
3107 (loop for i from start repeat count
3108 do
(let* ((fdefn (translate (code-header-ref code i
) spacemap
))
3109 (fun (translate (fdefn-fun fdefn
) spacemap
)))
3110 (when (simple-fun-p fun
)
3111 ;; It is dangerous to create heap cons cells holding pointers to
3112 ;; objects at their logical address in the target core.
3113 ;; TBH, all target objects should be wrapped in a DESCRIPTOR
3114 ;; structure defined at the top of this file.
3115 (push (cons fun fdefn
) alist
)))))
3116 (dolist (cell alist result
)
3117 (destructuring-bind (fun . fdefn
) cell
3118 (unless (find-if (lambda (other)
3119 (and (eq (car other
) fun
) (neq (cdr other
) fdefn
)))
3121 (push fdefn result
)))))
3123 ;;; Since dynamic-space code is pretty much relocatable,
3124 ;;; disassembling it at a random physical address is fine.
3126 (defun patch-lisp-codeblob
3127 (code vaddr spacemap static-asm-code text-asm-code
3128 &aux
(insts (get-code-instruction-model code vaddr spacemap
))
3129 (fdefns (get-patchable-fdefns code spacemap
)))
3130 (declare (simple-vector insts
))
3132 ((>= i
(length insts
)))
3133 (let* ((inst (svref insts i
))
3134 (this-op (second inst
)))
3135 (when (member this-op
'(call jmp
))
3136 ;; is it potentially a call via an fdefn or an asm code indirection?
3137 (let* ((operand (third inst
))
3138 (ea (if (listp operand
) (car operand
))))
3139 (when (and (typep operand
'(cons machine-ea
(eql :qword
)))
3140 (or (and (eql (machine-ea-base ea
) 0) ; [RAX-9]
3141 (eql (machine-ea-disp ea
) 9)
3142 (not (machine-ea-index ea
)))
3143 (and (not (machine-ea-base ea
))
3144 (not (machine-ea-index ea
))
3145 (<= static-space-start
(machine-ea-disp ea
)
3146 (sap-int *static-space-free-pointer
*)))))
3147 (if (eql (machine-ea-base ea
) 0) ; based on RAX
3148 (patch-fdefn-call code vaddr insts inst i spacemap fdefns
)
3149 (patch-static-space-call inst spacemap
3150 static-asm-code text-asm-code
))))))))
3152 (defun persist-to-file (spacemap core-offset stream
)
3153 (aver (zerop core-offset
))
3154 (dolist (space-id `(,static-core-space-id
3155 ,immobile-text-core-space-id
3156 ,dynamic-core-space-id
))
3157 (let ((space (get-space space-id spacemap
)))
3158 (file-position stream
(* (1+ (space-data-page space
)) +backend-page-bytes
+))
3159 (sb-unix:unix-write
(sb-impl::fd-stream-fd stream
)
3160 (space-physaddr space spacemap
)
3162 (align-up (* (space-nwords space
) n-word-bytes
)
3163 +backend-page-bytes
+)))))
3165 (defun redirect-text-space-calls (pathname)
3166 (with-open-file (stream pathname
:element-type
'(unsigned-byte 8)
3167 :direction
:io
:if-exists
:overwrite
)
3168 (binding* ((core-header (make-array +backend-page-bytes
+ :element-type
'(unsigned-byte 8)))
3169 (core-offset (read-core-header stream core-header t
))
3170 ((npages space-list card-mask-nbits core-dir-start initfun
)
3171 (parse-core-header stream core-header
)))
3172 (declare (ignore card-mask-nbits core-dir-start initfun
))
3173 (with-mapped-core (sap core-offset npages stream
)
3174 (let ((spacemap (cons sap
(sort (copy-list space-list
) #'> :key
#'space-addr
))))
3175 (patch-assembly-codeblob spacemap
)
3176 (let* ((text-space (get-space immobile-text-core-space-id spacemap
))
3177 (offsets-vector (%make-lisp-obj
(logior (sap-int (space-physaddr text-space spacemap
))
3179 (static-space-asm-code
3180 (get-static-space-asm-code (get-space static-core-space-id spacemap
) spacemap
))
3181 (text-space-asm-code
3182 (get-text-space-asm-code-replica text-space spacemap
)))
3184 ;; offset 0 is the offset of the ASM routine codeblob which was already processed.
3185 (loop for j from
1 below
(length offsets-vector
)
3186 do
(let ((vaddr (+ (space-addr text-space
) (aref offsets-vector j
)))
3187 (physobj (%make-lisp-obj
3188 (logior (sap-int (sap+ (space-physaddr text-space spacemap
)
3189 (aref offsets-vector j
)))
3190 other-pointer-lowtag
))))
3191 ;; Assert that there are no fixups other than GC card table mask fixups
3192 (let ((fixups (sb-vm::%code-fixups physobj
)))
3193 (unless (fixnump fixups
)
3194 (setq fixups
(translate fixups spacemap
))
3195 (aver (typep fixups
'bignum
)))
3196 (multiple-value-bind (list1 list2 list3
)
3197 (sb-c::unpack-code-fixup-locs fixups
)
3198 (declare (ignore list3
))
3200 (aver (null list2
))))
3201 (patch-lisp-codeblob physobj vaddr spacemap
3202 static-space-asm-code text-space-asm-code
))))
3203 (persist-to-file spacemap core-offset stream
))))))
3206 ;; If loaded as a script, do this
3207 (eval-when (:execute
)
3208 (let ((args (cdr *posix-argv
*)))
3210 (let ((*print-pretty
* nil
))
3211 (format t
"Args: ~S~%" args
)
3212 (cl-user::elfinate args
)))))