editcore: Add support for arm64 and enable PIE support.
[sbcl.git] / tools-for-build / editcore.lisp
blob9ec0ae74ad2a838ae174183b76af40b1a23863db
1 ;;;; Utilities for separating an SBCL core file into two pieces:
2 ;;;; 1. An assembly language file containing the immobile code space
3 ;;;; 2. A '.o' file wrapping a core file containing everything else
4 ;;;; We operate as a "tool" that processes external files rather than
5 ;;;; operating on the in-process data, but it is also possible to dump
6 ;;;; the current image by creating a straight-through translation
7 ;;;; of internal/external code addresses.
9 ;;;; This software is part of the SBCL system. See the README file for
10 ;;;; more information.
11 ;;;;
12 ;;;; This software is derived from the CMU CL system, which was
13 ;;;; written at Carnegie Mellon University and released into the
14 ;;;; public domain. The software is in the public domain and is
15 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
16 ;;;; files for more information.
18 (load (merge-pathnames "corefile.lisp" *load-pathname*))
20 (defpackage "SB-EDITCORE"
21 (:use "CL" "SB-ALIEN" "SB-COREFILE" "SB-INT" "SB-EXT"
22 "SB-KERNEL" "SB-SYS" "SB-VM")
23 (:export #:move-dynamic-code-to-text-space #:redirect-text-space-calls
24 #:split-core #:copy-to-elf-obj)
25 (:import-from "SB-ALIEN-INTERNALS"
26 #:alien-type-bits #:parse-alien-type
27 #:alien-value-sap #:alien-value-type)
28 (:import-from "SB-C" #:+backend-page-bytes+)
29 (:import-from "SB-VM" #:map-objects-in-range #:reconstitute-object
30 #:%closure-callee #:code-object-size)
31 (:import-from "SB-DISASSEM" #:get-inst-space #:find-inst
32 #:make-dstate #:%make-segment #:make-code-segment
33 #:seg-virtual-location #:seg-length #:seg-sap-maker
34 #:map-segment-instructions #:inst-name
35 #:dstate-next-addr #:dstate-cur-offs
36 #:dstate-cur-addr #:sign-extend)
37 #+x86-64
38 (:import-from "SB-X86-64-ASM" #:near-jump-displacement
39 #:near-cond-jump-displacement #:mov #:call #:jmp
40 #:get-gpr #:reg-name
41 #:machine-ea #:machine-ea-base #:machine-ea-index #:machine-ea-disp)
42 (:import-from "SB-IMPL" #:symbol-hashset #:package-%name
43 #:symtbl-%cells
44 #:hash-table-pairs #:hash-table-%count))
46 (in-package "SB-EDITCORE")
48 (declaim (muffle-conditions compiler-note))
50 (eval-when (:execute)
51 (setq *evaluator-mode* :compile))
53 ;;; Some high address that won't conflict with any of the ordinary spaces
54 ;;; It's more-or-less arbitrary, but we must be able to discern whether a
55 ;;; pointer looks like it points to code in case coreparse has to walk the heap.
56 (defconstant +code-space-nominal-address+ #x550000000000)
58 (defglobal +noexec-stack-note+ ".section .note.GNU-stack, \"\", @progbits")
60 (defstruct (core-space ; "space" is a CL symbol
61 (:conc-name space-)
62 (:constructor make-space (id addr data-page page-adjust nwords)))
63 (page-table nil :type (or null simple-vector))
64 id addr data-page page-adjust nwords)
65 (defmethod print-object ((self core-space) stream)
66 (print-unreadable-object (self stream :type t)
67 (format stream "~d" (space-id self))))
68 (defun space-size (space) (* (space-nwords space) n-word-bytes))
69 (defun space-end (space) (+ (space-addr space) (space-size space)))
70 (defun space-nbytes-aligned (space)
71 (align-up (space-size space) +backend-page-bytes+))
72 (defun space-physaddr (space spacemap)
73 (sap+ (car spacemap) (* (space-data-page space) +backend-page-bytes+)))
75 ;;; Given VADDR which is an address in the target core, return the address at which
76 ;;; VADDR is currently mapped while performing the split.
77 ;;; SPACEMAP is a cons of a SAP and an alist whose elements are (ADDR . CORE-SPACE)
78 (defun translate-ptr (vaddr spacemap)
79 (let ((space (find vaddr (cdr spacemap) :key #'space-addr :test #'>=)))
80 ;; FIXME: duplicates SPACE-PHYSADDR to avoid consing a SAP.
81 ;; macroize or something.
82 (+ (sap-int (car spacemap)) (* (space-data-page space) +backend-page-bytes+)
83 (- vaddr (space-addr space)))))
85 ;;;
86 (defun get-space (id spacemap)
87 (find id (cdr spacemap) :key #'space-id))
88 (defun compute-nil-object (spacemap)
89 (let ((space (get-space static-core-space-id spacemap)))
90 ;; TODO: The core should store its address of NIL in the initial function entry
91 ;; so this kludge can be removed.
92 (%make-lisp-obj (logior (space-addr space) #x117)))) ; SUPER KLUDGE
94 ;;; Given OBJ which is tagged pointer into the target core, translate it into
95 ;;; the range at which the core is now mapped during execution of this tool,
96 ;;; so that host accessors can dereference its slots.
97 ;;; Use extreme care: while it works to use host accessors on the target core,
98 ;;; we must avoid type checks on instances because LAYOUTs need translation.
99 ;;; Printing boxed objects from the target core will almost always crash.
100 (defun translate (obj spacemap)
101 (%make-lisp-obj (translate-ptr (get-lisp-obj-address obj) spacemap)))
103 (defstruct (core-sym (:copier nil) (:predicate nil)
104 (:constructor make-core-sym (package name external)))
105 (package nil)
106 (name nil :read-only t)
107 (external nil :read-only t))
109 (defstruct (bounds (:constructor make-bounds (low high)))
110 (low 0 :type word) (high 0 :type word))
112 (defstruct (core (:predicate nil)
113 (:copier nil)
114 (:constructor %make-core))
115 (spacemap)
116 (nil-object)
117 ;; mapping from small integer ID to package
118 (pkg-id->package)
119 ;; mapping from string naming a package to list of symbol names (strings)
120 ;; that are external in the package.
121 (packages (make-hash-table :test 'equal))
122 ;; hashset of symbol names (as strings) that should be package-qualified.
123 ;; (Prefer not to package-qualify unambiguous names)
124 (nonunique-symbol-names)
125 (code-bounds nil :type bounds :read-only t)
126 (fixedobj-bounds nil :type bounds :read-only t)
127 (linkage-bounds nil :type bounds :read-only t)
128 (linkage-symbols nil)
129 (linkage-symbol-usedp nil)
130 (linkage-entry-size nil)
131 (new-fixups (make-hash-table))
132 (new-fixup-words-used 0)
133 ;; For assembler labels that we want to invent at random
134 (label-counter 0)
135 (enable-pie nil)
136 (dstate (make-dstate nil) :read-only t)
137 (seg (%make-segment :sap-maker (lambda () (error "Bad sap maker"))
138 :virtual-location 0) :read-only t)
139 (fixup-addrs nil)
140 (call-inst nil :read-only t)
141 (jmp-inst nil :read-only t)
142 (pop-inst nil :read-only t))
144 (defglobal *editcore-ppd*
145 ;; copy no entries for macros/special-operators (flet, etc)
146 (let ((ppd (sb-pretty::make-pprint-dispatch-table #() nil nil)))
147 (set-pprint-dispatch 'string
148 ;; Write strings without string quotes
149 (lambda (stream string) (write-string string stream))
151 ppd)
152 ppd))
154 (defun c-name (lispname core pp-state &optional (prefix ""))
155 (when (typep lispname '(string 0))
156 (setq lispname "anonymous"))
157 ;; Perform backslash escaping on the exploded string
158 ;; Strings were stringified without surrounding quotes,
159 ;; but there might be quotes embedded anywhere, so escape them,
160 ;; and also remove newlines and non-ASCII.
161 (let ((characters
162 (mapcan (lambda (char)
163 (cond ((not (typep char 'base-char)) (list #\?))
164 ((member char '(#\\ #\")) (list #\\ char))
165 ((eql char #\newline) (list #\_))
166 (t (list char))))
167 (coerce (cond
168 #+darwin
169 ((and (stringp lispname)
170 ;; L denotes a symbol which can not be global on macOS.
171 (char= (char lispname 0) #\L))
172 (concatenate 'string "_" lispname))
174 (write-to-string lispname
175 ;; Printing is a tad faster without a pretty stream
176 :pretty (not (typep lispname 'core-sym))
177 :pprint-dispatch *editcore-ppd*
178 ;; FIXME: should be :level 1, however see
179 ;; https://bugs.launchpad.net/sbcl/+bug/1733222
180 :escape t :level 2 :length 5
181 :case :downcase :gensym nil
182 :right-margin 10000)))
183 'list))))
184 (let ((string (concatenate 'string prefix characters)))
185 ;; If the string appears in the linker symbols, then string-upcase it
186 ;; so that it looks like a conventional Lisp symbol.
187 (cond ((find-if (lambda (x) (string= string (if (consp x) (car x) x)))
188 (core-linkage-symbols core))
189 (setq string (string-upcase string)))
190 ((string= string ".") ; can't use the program counter symbol either
191 (setq string "|.|")))
192 ;; If the symbol is still nonunique, add a random suffix.
193 ;; The secondary value is whether the symbol should be a linker global.
194 ;; For now, make nothing global, thereby avoiding potential conflicts.
195 (let ((occurs (incf (gethash string (car pp-state) 0))))
196 (if (> occurs 1)
197 (values (concatenate 'string string "_" (write-to-string occurs))
198 nil)
199 (values string
200 nil))))))
202 (defmethod print-object ((sym core-sym) stream)
203 (format stream "~(~:[~*~;~:*~A~:[:~;~]:~]~A~)"
204 (core-sym-package sym)
205 (core-sym-external sym)
206 (core-sym-name sym)))
208 (defun space-bounds (id spacemap)
209 (let ((space (get-space id spacemap)))
210 (if space
211 (make-bounds (space-addr space) (space-end space))
212 (make-bounds 0 0))))
213 (defun in-bounds-p (addr bounds)
214 (and (>= addr (bounds-low bounds)) (< addr (bounds-high bounds))))
216 (defun make-string-hashset (contents count)
217 (let ((hs (sb-int:make-hashset count #'string= #'sxhash)))
218 (dolist (string contents hs)
219 (sb-int:hashset-insert hs string))))
221 (defun scan-symbol-hashset (function table core)
222 (let* ((spacemap (core-spacemap core))
223 (nil-object (core-nil-object core))
224 (cells (translate (symtbl-%cells (truly-the symbol-hashset
225 (translate table spacemap)))
226 spacemap)))
227 (dovector (x (translate (cdr cells) spacemap))
228 (unless (fixnump x)
229 (funcall function
230 (if (eq x nil-object) ; any random package can export NIL. wow.
231 "NIL"
232 (translate (symbol-name (translate x spacemap)) spacemap))
233 x)))))
235 (defun %fun-name-from-core (name core &aux (spacemap (core-spacemap core))
236 (packages (core-packages core))
237 (core-nil (core-nil-object core)))
238 (named-let recurse ((depth 0) (x name))
239 (unless (is-lisp-pointer (get-lisp-obj-address x))
240 (return-from recurse x)) ; immediate object
241 (when (eq x core-nil)
242 (return-from recurse nil))
243 (setq x (translate x spacemap))
244 (ecase (lowtag-of x)
245 (#.list-pointer-lowtag
246 (cons (recurse (1+ depth) (car x))
247 (recurse (1+ depth) (cdr x))))
248 ((#.instance-pointer-lowtag #.fun-pointer-lowtag) "?")
249 (#.other-pointer-lowtag
250 (cond
251 ((stringp x)
252 (let ((p (position #\/ x :from-end t)))
253 (if p (subseq x (1+ p)) x)))
254 ((symbolp x)
255 (let ((package-id (symbol-package-id x))
256 (name (translate (symbol-name x) spacemap)))
257 (when (eq package-id 0) ; uninterned
258 (return-from recurse (string-downcase name)))
259 (let* ((package (truly-the package
260 (aref (core-pkg-id->package core) package-id)))
261 (package-name (translate (package-%name package) spacemap)))
262 ;; The name-cleaning code wants to compare against symbols
263 ;; in CL, PCL, and KEYWORD, so use real symbols for those.
264 ;; Other than that, we avoid finding host symbols
265 ;; because the externalness could be wrong and misleading.
266 ;; It's a very subtle point, but best to get it right.
267 (when (member package-name '("COMMON-LISP" "KEYWORD" "SB-PCL")
268 :test #'string=)
269 ;; NIL can't occur. It was picked off above.
270 (awhen (find-symbol name package-name) ; if existing symbol, use it
271 (return-from recurse it)))
272 (unless (gethash name (core-nonunique-symbol-names core))
273 ;; Don't care about package
274 (return-from recurse (make-core-sym nil name nil)))
275 (when (string= package-name "KEYWORD") ; make an external core-symbol
276 (return-from recurse (make-core-sym nil name t)))
277 (let ((externals (gethash package-name packages))
278 (n 0))
279 (unless externals
280 (scan-symbol-hashset
281 (lambda (string symbol)
282 (declare (ignore symbol))
283 (incf n)
284 (push string externals))
285 (package-external-symbols package)
286 core)
287 (setf externals (make-string-hashset externals n)
288 (gethash package-name packages) externals))
289 (make-core-sym package-name
290 name
291 (sb-int:hashset-find externals name))))))
292 (t "?"))))))
294 (defun remove-name-junk (name)
295 (setq name
296 (named-let recurse ((x name))
297 (cond ((typep x '(cons (eql lambda)))
298 (let ((args (second x)))
299 `(lambda ,(if args sb-c::*debug-name-sharp* "()")
300 ,@(recurse (cddr x)))))
301 ((eq x :in) "in")
302 ((and (typep x '(or string symbol))
303 (= (mismatch (string x) "CLEANUP-FUN-")
304 (length "CLEANUP-FUN-")))
305 '#:cleanup-fun)
306 ((consp x) (recons x (recurse (car x)) (recurse (cdr x))))
307 (t x))))
308 ;; Shorten obnoxiously long printed representations of methods.
309 (flet ((unpackageize (thing)
310 (when (typep thing 'core-sym)
311 (setf (core-sym-package thing) nil))
312 thing))
313 (when (typep name '(cons (member sb-pcl::slow-method sb-pcl::fast-method
314 sb-pcl::slot-accessor)))
315 (setq name `(,(case (car name)
316 (sb-pcl::fast-method "method")
317 (sb-pcl::slow-method "Method") ; something visually distinct
318 (sb-pcl::slot-accessor "accessor"))
319 ,@(cdr name)))
320 (setf (second name) (unpackageize (second name)))
321 (let ((last (car (last name))))
322 (when (listp last)
323 (dolist (qual last)
324 (unpackageize qual))))))
325 name)
327 (defun fun-name-from-core (name core)
328 (remove-name-junk (%fun-name-from-core name core)))
330 ;;; A problem: COMPILED-DEBUG-FUN-ENCODED-LOCS (a packed integer) might be a
331 ;;; bignum - in fact probably is. If so, it points into the target core.
332 ;;; So we have to produce a new instance with an ENCODED-LOCS that
333 ;;; is the translation of the bignum, and call the accessor on that.
334 ;;; The accessors for its sub-fields are abstract - we don't know where the
335 ;;; fields are so we can't otherwise unpack them. (See CDF-DECODE-LOCS if
336 ;;; you really need to know)
337 (defun cdf-offset (compiled-debug-fun spacemap)
338 ;; (Note that on precisely GC'd platforms, this operation is dangerous,
339 ;; but no more so than everything else in this file)
340 (let ((locs (sb-c::compiled-debug-fun-encoded-locs
341 (truly-the sb-c::compiled-debug-fun compiled-debug-fun))))
342 (when (consp locs)
343 (setq locs (cdr (translate locs spacemap))))
344 (sb-c::compiled-debug-fun-offset
345 (sb-c::make-compiled-debug-fun
346 :name nil
347 :encoded-locs (if (fixnump locs) locs (translate locs spacemap))))))
349 ;;; Return a list of ((NAME START . END) ...)
350 ;;; for each C symbol that should be emitted for this code object.
351 ;;; Start and and are relative to the object's base address,
352 ;;; not the start of its instructions. Hence we add HEADER-BYTES
353 ;;; too all the PC offsets.
354 (defun code-symbols (code core &aux (spacemap (core-spacemap core)))
355 (let ((cdf (translate
356 (sb-c::compiled-debug-info-fun-map
357 (truly-the sb-c::compiled-debug-info
358 (translate (%code-debug-info code) spacemap)))
359 spacemap))
360 (header-bytes (* (code-header-words code) n-word-bytes))
361 (start-pc 0)
362 (blobs))
363 (loop
364 (let* ((name (fun-name-from-core
365 (sb-c::compiled-debug-fun-name
366 (truly-the sb-c::compiled-debug-fun cdf))
367 core))
368 (next (when (%instancep (sb-c::compiled-debug-fun-next cdf))
369 (translate (sb-c::compiled-debug-fun-next cdf) spacemap)))
370 (end-pc (if next
371 (+ header-bytes (cdf-offset next spacemap))
372 (code-object-size code))))
373 (unless (= end-pc start-pc)
374 ;; Collapse adjacent address ranges named the same.
375 ;; Use EQUALP instead of EQUAL to compare names
376 ;; because instances of CORE-SYMBOL are not interned objects.
377 (if (and blobs (equalp (caar blobs) name))
378 (setf (cddr (car blobs)) end-pc)
379 (push (list* name start-pc end-pc) blobs)))
380 (if next
381 (setq cdf next start-pc end-pc)
382 (return))))
383 (nreverse blobs)))
385 (defstruct (descriptor (:constructor make-descriptor (bits)))
386 (bits 0 :type word))
387 (defmethod print-object ((self descriptor) stream)
388 (format stream "#<ptr ~x>" (descriptor-bits self)))
389 (defun descriptorize (obj)
390 (if (is-lisp-pointer (get-lisp-obj-address obj))
391 (make-descriptor (get-lisp-obj-address obj))
392 obj))
393 (defun undescriptorize (target-descriptor)
394 (%make-lisp-obj (descriptor-bits target-descriptor)))
396 (defun target-hash-table-alist (table spacemap)
397 (let ((table (truly-the hash-table (translate table spacemap))))
398 (let ((cells (the simple-vector (translate (hash-table-pairs table) spacemap))))
399 (collect ((pairs))
400 (do ((count (hash-table-%count table) (1- count))
401 (i 2 (+ i 2)))
402 ((zerop count)
403 (pairs))
404 (pairs (cons (descriptorize (svref cells i))
405 (descriptorize (svref cells (1+ i))))))))))
407 (defmacro package-id (name) (sb-impl::package-id (find-package name)))
409 ;;; Return either the physical or logical address of the specified symbol.
410 (defun %find-target-symbol (package-id symbol-name spacemap
411 &optional (address-mode :physical))
412 (dolist (id `(,immobile-fixedobj-core-space-id
413 ,static-core-space-id
414 ,dynamic-core-space-id))
415 (binding* ((space (get-space id spacemap) :exit-if-null)
416 (start (translate-ptr (space-addr space) spacemap))
417 (end (+ start (space-size space)))
418 (physaddr start))
419 (loop
420 (when (>= physaddr end) (return))
421 (let* ((word (sap-ref-word (int-sap physaddr) 0))
422 (size
423 (if (= (logand word widetag-mask) filler-widetag)
424 (ash (ash word -32) word-shift)
425 (let ((obj (reconstitute-object (ash physaddr (- n-fixnum-tag-bits)))))
426 (when (and (symbolp obj)
427 (string= symbol-name (translate (symbol-name obj) spacemap))
428 (= (symbol-package-id obj) package-id))
429 (return-from %find-target-symbol
430 (%make-lisp-obj
431 (logior (ecase address-mode
432 (:physical physaddr)
433 (:logical (+ (space-addr space) (- physaddr start))))
434 other-pointer-lowtag))))
435 (primitive-object-size obj)))))
436 (incf physaddr size))))))
437 (defun find-target-symbol (package-id symbol-name spacemap &optional (address-mode :physical))
438 (or (%find-target-symbol package-id symbol-name spacemap address-mode)
439 (bug "Can't find symbol ~A::~A" package-id symbol-name)))
441 (defparameter label-prefix (if (member :darwin *features*) "_" ""))
442 (defun labelize (x) (concatenate 'string label-prefix x))
444 (defun compute-linkage-symbols (spacemap)
445 (let* ((linkage-info (symbol-global-value
446 (find-target-symbol (package-id "SB-SYS") "*LINKAGE-INFO*"
447 spacemap :physical)))
448 (hashtable (car (translate linkage-info spacemap)))
449 (pairs (target-hash-table-alist hashtable spacemap))
450 (min (reduce #'min pairs :key #'cdr))
451 (max (reduce #'max pairs :key #'cdr))
452 (n (1+ (- max min)))
453 (vector (make-array n)))
454 (dolist (entry pairs vector)
455 (let* ((key (undescriptorize (car entry)))
456 (entry-index (- (cdr entry) min))
457 (string (labelize (translate (if (consp key) (car (translate key spacemap)) key)
458 spacemap))))
459 (setf (aref vector entry-index)
460 (if (consp key) (list string) string))))))
462 (defun make-core (spacemap code-bounds fixedobj-bounds &optional enable-pie)
463 (let* ((linkage-bounds
464 (let ((text-space (get-space immobile-text-core-space-id spacemap)))
465 (if text-space
466 (let ((text-addr (space-addr text-space)))
467 (make-bounds (- text-addr alien-linkage-table-space-size) text-addr))
468 (make-bounds 0 0))))
469 (linkage-entry-size
470 (symbol-global-value
471 (find-target-symbol (package-id "SB-VM") "ALIEN-LINKAGE-TABLE-ENTRY-SIZE"
472 spacemap :physical)))
473 (linkage-symbols (compute-linkage-symbols spacemap))
474 (nil-object (compute-nil-object spacemap))
475 (ambiguous-symbols (make-hash-table :test 'equal))
476 (core
477 (%make-core
478 :spacemap spacemap
479 :nil-object nil-object
480 :nonunique-symbol-names ambiguous-symbols
481 :code-bounds code-bounds
482 :fixedobj-bounds fixedobj-bounds
483 :linkage-bounds linkage-bounds
484 :linkage-entry-size linkage-entry-size
485 :linkage-symbols linkage-symbols
486 :linkage-symbol-usedp (make-array (length linkage-symbols) :element-type 'bit
487 :initial-element 0)
488 :enable-pie enable-pie)))
489 (let ((package-table
490 (symbol-global-value
491 (find-target-symbol (package-id "SB-IMPL") "*ALL-PACKAGES*" spacemap :physical)))
492 (package-alist)
493 (symbols (make-hash-table :test 'equal)))
494 (labels ((scan-symtbl (table)
495 (scan-symbol-hashset
496 (lambda (str sym)
497 (pushnew (get-lisp-obj-address sym) (gethash str symbols)))
498 table core))
499 (scan-package (x)
500 (let ((package (truly-the package (translate x spacemap))))
501 ;; a package can appear in *ALL-PACKAGES* under each of its nicknames
502 (unless (assoc (sb-impl::package-id package) package-alist)
503 (push (cons (sb-impl::package-id package) package) package-alist)
504 (scan-symtbl (package-external-symbols package))
505 (scan-symtbl (package-internal-symbols package))))))
506 (dovector (x (translate package-table spacemap))
507 (cond ((%instancep x) (scan-package x))
508 ((listp x) (loop (if (eq x nil-object) (return))
509 (setq x (translate x spacemap))
510 (scan-package (car x))
511 (setq x (cdr x)))))))
512 (let ((package-by-id (make-array (1+ (reduce #'max package-alist :key #'car))
513 :initial-element nil)))
514 (loop for (id . package) in package-alist
515 do (setf (aref package-by-id id) package))
516 (setf (core-pkg-id->package core) package-by-id))
517 (dohash ((string symbols) symbols)
518 (when (cdr symbols)
519 (setf (gethash string ambiguous-symbols) t))))
520 core))
522 ;;; Emit .byte or .quad directives dumping memory from SAP for COUNT units
523 ;;; (bytes or qwords) to STREAM. SIZE specifies which direcive to emit.
524 ;;; EXCEPTIONS specify offsets at which a specific string should be
525 ;;; written to the file in lieu of memory contents, useful for emitting
526 ;;; expressions involving the assembler '.' symbol (the current PC).
527 (defun emit-asm-directives (size sap count stream &optional exceptions)
528 (declare (optimize speed))
529 (declare (stream stream))
530 (let ((*print-base* 16)
531 (string-buffer (make-array 18 :element-type 'base-char))
532 (fmt #.(coerce "0x%lx" 'base-string))
533 (per-line 0))
534 (declare ((integer 0 32) per-line)
535 (fixnum count))
536 string-buffer fmt
537 (ecase size
538 (:qword
539 (format stream " .quad")
540 (dotimes (i count)
541 (declare ((unsigned-byte 20) i))
542 (declare (simple-vector exceptions))
543 (write-char (if (> per-line 0) #\, #\space) stream)
544 (acond ((and (< i (length exceptions)) (aref exceptions i))
545 (write-string it stream))
547 (write-string "0x" stream)
548 (write (sap-ref-word sap (* i n-word-bytes)) :stream stream)))
549 (when (and (= (incf per-line) 16) (< (1+ i) count))
550 (format stream "~% .quad")
551 (setq per-line 0))))
552 (:byte
553 (aver (not exceptions))
554 (format stream " .byte")
555 (dotimes (i count)
556 (write-char (if (> per-line 0) #\, #\space) stream)
557 (write-string "0x" stream)
558 (write (sap-ref-8 sap i) :stream stream)
559 (when (and (= (incf per-line) 32) (< (1+ i) count))
560 (format stream "~% .byte")
561 (setq per-line 0))))))
562 (terpri stream))
564 (defun code-fixup-locs (code spacemap)
565 (let ((locs (sb-vm::%code-fixups code)))
566 ;; Return only the absolute fixups
567 ;; Ensure that a bignum LOCS is translated before using it.
568 (values (sb-c::unpack-code-fixup-locs
569 (if (fixnump locs) locs (translate locs spacemap))))))
571 #+arm64
572 (defun list-textual-instructions (sap length core load-addr emit-cfi)
573 (declare (ignore emit-cfi))
574 (let ((dstate (core-dstate core))
575 (spaces (core-spacemap core))
576 (seg (core-seg core))
577 (list)
578 (inst-ldr-reg (load-time-value (find-inst #xF940002A (get-inst-space))))
579 (inst-bl (load-time-value (find-inst #x97EC8AEB (get-inst-space))))
580 (inst-b (load-time-value (find-inst #x17FFFFE4 (get-inst-space))))
581 (inst-adrp (load-time-value (find-inst #xB0FFA560 (get-inst-space))))
582 (inst-add (load-time-value (find-inst #x91003F7C (get-inst-space))))
583 (adrp-reg/addr nil))
584 (setf (seg-virtual-location seg) load-addr
585 (seg-length seg) length
586 (seg-sap-maker seg) (lambda () sap))
587 (map-segment-instructions
588 (lambda (dchunk inst)
589 (cond
590 ((eq inst inst-adrp)
591 (let ((reg (ldb (byte 5 0) dchunk))
592 (next-dchunk (sb-arm64-asm::current-instruction dstate 4))
593 (current-page (ash (dstate-cur-addr dstate) -12))
594 (page-displacement
595 (sign-extend (+ (ldb (byte 2 29) dchunk)
596 (ash (ldb (byte 19 5) dchunk) 2))
597 21)))
598 (cond ((and (eq inst-add
599 (find-inst next-dchunk (load-time-value (get-inst-space))))
600 (= reg (ldb (byte 5 0) next-dchunk))
601 (= reg (ldb (byte 5 5) next-dchunk)))
602 ;; Rewrite any ADRP, ADD sequences which compute addresses
603 ;; into the linkage table into references into the GOT.
604 (let ((target-addr (+ (ash (+ current-page page-displacement) 12)
605 (ldb (byte 12 10) next-dchunk))))
606 (when (or (in-bounds-p target-addr (core-fixedobj-bounds core))
607 (in-bounds-p target-addr (core-linkage-bounds core)))
608 (push (list (dstate-cur-offs dstate)
609 4 ; length
610 "adrp-gotpcrel"
611 target-addr
612 (format nil "x~d" reg))
613 list)
614 (push (list (+ 4 (dstate-cur-offs dstate))
615 4 ; length
616 "ldr-gotpcrel"
617 target-addr
618 (format nil "x~d" reg))
619 list))))
620 ((and (eq inst-ldr-reg
621 (find-inst next-dchunk (load-time-value (get-inst-space))))
622 (= reg (ldb (byte 5 0) next-dchunk))
623 (= reg (ldb (byte 5 5) next-dchunk)))
624 ;; Rewrite any ADRP, LDR sequences which load
625 ;; foreign-dataref addresses into the linkage table
626 ;; into references into the GOT.
627 (let ((target-addr (+ (ash (+ current-page page-displacement) 12)
628 (ash (ldb (byte 12 10) next-dchunk) word-shift))))
629 (when (or (in-bounds-p target-addr (core-fixedobj-bounds core))
630 (in-bounds-p target-addr (core-linkage-bounds core)))
631 (push (list (dstate-cur-offs dstate)
632 4 ; length
633 "adrp-gotpcrel"
634 target-addr
635 (format nil "x~d" reg))
636 list)
637 (push (list (+ 4 (dstate-cur-offs dstate))
638 4 ; length
639 "ldr-gotpcrel"
640 target-addr
641 (format nil "x~d" reg))
642 list)))))))
643 ((or (eq inst inst-bl) (eq inst inst-b))
644 ;; Rewrite any BLs which jump to the trampoline in linkage
645 ;; space to instead jump directly to the alien function in
646 ;; the text section.
647 (let ((target-addr (+ (dstate-cur-addr dstate)
648 (* 4 (sign-extend (ldb (byte 26 0) dchunk) 26)))))
649 (when (or (in-bounds-p target-addr (core-fixedobj-bounds core))
650 (in-bounds-p target-addr (core-linkage-bounds core)))
651 (push (list* (dstate-cur-offs dstate)
652 4 ; length
653 (if (eq inst inst-bl) "bl" "b")
654 target-addr)
655 list))))))
657 dstate
658 nil)
659 (nreverse list)))
661 ;;; Disassemble the function pointed to by SAP for LENGTH bytes, returning
662 ;;; all instructions that should be emitted using assembly language
663 ;;; instead of .quad and/or .byte directives.
664 ;;; This includes (at least) two categories of instructions:
665 ;;; - function prologue instructions that setup the call frame
666 ;;; - jmp/call instructions that transfer control to the fixedoj space
667 ;;; delimited by bounds in STATE.
668 ;;; At execution time the function will have virtual address LOAD-ADDR.
669 #+x86-64
670 (defun list-textual-instructions (sap length core load-addr emit-cfi)
671 (let ((dstate (core-dstate core))
672 (seg (core-seg core))
673 (next-fixup-addr
674 (or (car (core-fixup-addrs core)) most-positive-word))
675 (list)
676 (inst-call (load-time-value (find-inst #b11101000 (get-inst-space))))
677 (inst-jmp (load-time-value (find-inst #b11101001 (get-inst-space))))
678 (inst-jmpz (load-time-value (find-inst #x840f (get-inst-space))))
679 (inst-pop (load-time-value (find-inst #x5d (get-inst-space))))
680 (inst-mov (load-time-value (find-inst #x8B (get-inst-space))))
681 (inst-lea (load-time-value (find-inst #x8D (get-inst-space)))))
682 (setf (seg-virtual-location seg) load-addr
683 (seg-length seg) length
684 (seg-sap-maker seg) (lambda () sap))
685 ;; KLUDGE: "8f 45 08" is the standard prologue
686 (when (and emit-cfi (= (logand (sap-ref-32 sap 0) #xFFFFFF) #x08458f))
687 (push (list* 0 3 "pop" "8(%rbp)") list))
688 (map-segment-instructions
689 (lambda (dchunk inst)
690 (cond
691 ((< next-fixup-addr (dstate-next-addr dstate))
692 (let ((operand (sap-ref-32 sap (- next-fixup-addr load-addr)))
693 (offs (dstate-cur-offs dstate)))
694 (when (in-bounds-p operand (core-code-bounds core))
695 (cond
696 ((and (eq (inst-name inst) 'mov) ; match "mov eax, imm32"
697 (eql (sap-ref-8 sap offs) #xB8))
698 (let ((text (format nil "mov $(CS+0x~x),%eax"
699 (- operand (bounds-low (core-code-bounds core))))))
700 (push (list* (dstate-cur-offs dstate) 5 "mov" text) list)))
701 ((and (eq (inst-name inst) 'mov) ; match "mov qword ptr [R+disp8], imm32"
702 (member (sap-ref-8 sap (1- offs)) '(#x48 #x49)) ; REX.w and maybe REX.b
703 (eql (sap-ref-8 sap offs) #xC7)
704 ;; modRegRm = #b01 #b000 #b___
705 (eql (logand (sap-ref-8 sap (1+ offs)) #o370) #o100))
706 (let* ((reg (ldb (byte 3 0) (sap-ref-8 sap (1+ offs))))
707 (text (format nil "movq $(CS+0x~x),~d(%~a)"
708 (- operand (bounds-low (core-code-bounds core)))
709 (signed-sap-ref-8 sap (+ offs 2))
710 (reg-name (get-gpr :qword reg)))))
711 (push (list* (1- (dstate-cur-offs dstate)) 8 "mov" text) list)))
712 ((let ((bytes (ldb (byte 24 0) (sap-ref-32 sap offs))))
713 (or (and (eq (inst-name inst) 'call) ; match "{call,jmp} qword ptr [addr]"
714 (eql bytes #x2514FF)) ; ModRM+SIB encodes disp32, no base, no index
715 (and (eq (inst-name inst) 'jmp)
716 (eql bytes #x2524FF))))
717 (let ((new-opcode (ecase (sap-ref-8 sap (1+ offs))
718 (#x14 "call *")
719 (#x24 "jmp *"))))
720 ;; This instruction form is employed for asm routines when
721 ;; compile-to-memory-space is :AUTO. If the code were to be loaded
722 ;; into dynamic space, the offset to the called routine isn't
723 ;; a (signed-byte 32), so we need the indirection.
724 (push (list* (dstate-cur-offs dstate) 7 new-opcode operand) list)))
726 (bug "Can't reverse-engineer fixup: ~s ~x"
727 (inst-name inst) (sap-ref-64 sap offs))))))
728 (pop (core-fixup-addrs core))
729 (setq next-fixup-addr (or (car (core-fixup-addrs core)) most-positive-word)))
730 ((or (eq inst inst-jmp) (eq inst inst-call))
731 (let ((target-addr (+ (near-jump-displacement dchunk dstate)
732 (dstate-next-addr dstate))))
733 (when (or (in-bounds-p target-addr (core-fixedobj-bounds core))
734 (in-bounds-p target-addr (core-linkage-bounds core)))
735 (push (list* (dstate-cur-offs dstate)
736 5 ; length
737 (if (eq inst inst-call) "call" "jmp")
738 target-addr)
739 list))))
740 ((eq inst inst-jmpz)
741 (let ((target-addr (+ (near-cond-jump-displacement dchunk dstate)
742 (dstate-next-addr dstate))))
743 (when (in-bounds-p target-addr (core-linkage-bounds core))
744 (push (list* (dstate-cur-offs dstate) 6 "je" target-addr)
745 list))))
746 ((and (or (and (eq inst inst-mov)
747 (eql (sap-ref-8 sap (dstate-cur-offs dstate)) #x8B))
748 (eq inst inst-lea))
749 (let ((modrm (sap-ref-8 sap (1+ (dstate-cur-offs dstate)))))
750 (= (logand modrm #b11000111) #b00000101)) ; RIP-relative mode
751 (in-bounds-p (+ (signed-sap-ref-32 sap (+ (dstate-cur-offs dstate) 2))
752 (dstate-next-addr dstate))
753 (core-linkage-bounds core)))
754 (let* ((abs-addr (+ (signed-sap-ref-32 sap (+ (dstate-cur-offs dstate) 2))
755 (dstate-next-addr dstate)))
756 (reg (logior (ldb (byte 3 3) (sap-ref-8 sap (1+ (dstate-cur-offs dstate))))
757 (if (logtest (sb-disassem::dstate-inst-properties dstate)
758 #b0100) ; REX.r
759 8 0)))
760 (op (if (eq inst inst-lea) "lea" "mov-gotpcrel"))
761 (args (list abs-addr (reg-name (get-gpr :qword reg)))))
762 (push (list* (1- (dstate-cur-offs dstate)) 7 op args) list)))
763 ((and (eq inst inst-pop) (eq (logand dchunk #xFF) #x5D))
764 (push (list* (dstate-cur-offs dstate) 1 "pop" "%rbp") list))))
766 dstate
767 nil)
768 (nreverse list)))
770 ;;; Using assembler directives and/or real mnemonics, dump COUNT bytes
771 ;;; of memory at PADDR (physical addr) to STREAM.
772 ;;; The function's address as per the core file is VADDR.
773 ;;; (Its eventual address is indeterminate)
774 ;;; If EMIT-CFI is true, then also emit cfi directives.
776 ;;; Notice that we can use one fewer cfi directive than usual because
777 ;;; Lisp always carries a frame pointer as set up by the caller.
779 ;;; C convention
780 ;;; ============
781 ;;; pushq %rbp
782 ;;; .cfi_def_cfa_offset 16 # CFA offset from default register (rsp) is +16
783 ;;; .cfi_offset 6, -16 # old rbp was saved in -16(CFA)
784 ;;; movq %rsp, %rbp
785 ;;; .cfi_def_cfa_register 6 # use rbp as CFA register
787 ;;; Lisp convention
788 ;;; ===============
789 ;;; popq 8(%rbp) # place saved %rip in its ABI-compatible stack slot
790 ;;; # making RSP = RBP after the pop, and RBP = CFA - 16
791 ;;; .cfi_def_cfa 6, 16
792 ;;; .cfi_offset 6, -16
794 ;;; Of course there is a flip-side to this: unwinders think that the new frame
795 ;;; is already begun in the caller. Interruption between these two instructions:
796 ;;; MOV RBP, RSP / CALL #xzzzzz
797 ;;; will show the backtrace as if two invocations of the caller are on stack.
798 ;;; This is tricky to fix because while we can relativize the CFA to the
799 ;;; known frame size, we can't do that based only on a disassembly.
801 ;;; Return the list of locations which must be added to code-fixups
802 ;;; in the event that heap relocation occurs on image restart.
803 (defun emit-lisp-function (paddr vaddr count stream emit-cfi core &optional labels)
804 (when emit-cfi
805 (format stream " .cfi_startproc~%"))
806 ;; Any byte offset that appears as a key in the INSTRUCTIONS causes the indicated
807 ;; bytes to be written as an assembly language instruction rather than opaquely,
808 ;; thereby affecting the ELF data (cfi or relocs) produced.
809 (let ((instructions
810 (merge 'list labels
811 (list-textual-instructions (int-sap paddr) count core vaddr emit-cfi)
812 #'< :key #'car))
813 (extra-fixup-locs)
814 (ptr paddr))
815 (symbol-macrolet ((cur-offset (- ptr paddr)))
816 (loop
817 (let ((until (if instructions (caar instructions) count)))
818 ;; if we're not aligned, then write some number of bytes
819 ;; to cause alignment. But do not write past the next offset
820 ;; that needs to be written as an instruction.
821 (when (logtest ptr #x7) ; unaligned
822 (let ((n (min (- (nth-value 1 (ceiling ptr 8)))
823 (- until cur-offset))))
824 (aver (<= 0 n 7))
825 (emit-asm-directives :byte (int-sap ptr) n stream)
826 (incf ptr n)))
827 ;; Now we're either aligned to a multiple of 8, or the current
828 ;; offset needs to be written as a textual instruction.
829 (let ((n (- until cur-offset)))
830 (aver (>= n 0))
831 (multiple-value-bind (qwords remainder) (floor n 8)
832 (when (plusp qwords)
833 (emit-asm-directives :qword (int-sap ptr) qwords stream #())
834 (incf ptr (* qwords 8)))
835 (when (plusp remainder)
836 (emit-asm-directives :byte (int-sap ptr) remainder stream)
837 (incf ptr remainder))))
838 ;; If the current offset is COUNT, we're done.
839 (when (= cur-offset count) (return))
840 (aver (= cur-offset until))
841 ;; A label and a textual instruction could co-occur.
842 ;; If so, the label must be emitted first.
843 (when (eq (cadar instructions) :label)
844 (destructuring-bind (c-symbol globalp) (cddr (pop instructions))
845 ;; The C symbol is global only if the Lisp name is a legal function
846 ;; designator and not random noise.
847 ;; This is a technique to try to avoid appending a uniquifying suffix
848 ;; on all the junky internal things like "(lambda # in srcfile.lisp)"
849 (when emit-cfi
850 (format stream "~:[~; .globl ~a~:*~%~] .type ~a, @function~%"
851 globalp c-symbol))
852 (format stream "~a:~%" c-symbol)))
853 ;; If both a label and textual instruction occur here, handle the latter.
854 ;; [This could could be simpler if all labels were emitted as
855 ;; '.set "thing", .+const' together in a single place, but it's more readable
856 ;; to see them where they belong in the instruction stream]
857 (when (and instructions (= (caar instructions) cur-offset))
858 (destructuring-bind (length opcode . operand) (cdr (pop instructions))
859 (when (cond ((member opcode #+arm64 '("bl" "b")
860 #+x86-64 '("jmp" "je" "call")
861 :test #'string=)
862 (when (in-bounds-p operand (core-linkage-bounds core))
863 (let ((entry-index
864 (/ (- operand (bounds-low (core-linkage-bounds core)))
865 (core-linkage-entry-size core))))
866 (setf (bit (core-linkage-symbol-usedp core) entry-index) 1
867 operand (aref (core-linkage-symbols core) entry-index))))
868 (when (and (integerp operand)
869 (in-bounds-p operand (core-fixedobj-bounds core)))
870 (push (+ vaddr cur-offset) extra-fixup-locs))
871 (format stream " ~A ~:[0x~X~;~a~:[~;@PLT~]~]~%"
872 opcode (stringp operand) operand
873 #+x86-64
874 (core-enable-pie core)
875 #+arm64 nil ; arm64 doesn't need the extra @PLT
877 ((or #+x86-64 (string= opcode "mov-gotpcrel")
878 #+arm64 (string= opcode "ldr-gotpcrel")
879 #+arm64 (string= opcode "adrp-gotpcrel"))
880 (let* ((entry-index
881 (/ (- (car operand) (bounds-low (core-linkage-bounds core)))
882 (core-linkage-entry-size core)))
883 (c-symbol (let ((thing (aref (core-linkage-symbols core) entry-index)))
884 (if (consp thing) (car thing) thing))))
885 (setf (bit (core-linkage-symbol-usedp core) entry-index) 1)
886 #+x86-64
887 (format stream " mov ~A@GOTPCREL(%rip), %~(~A~)~%" c-symbol (cadr operand))
888 #+arm64
889 (cond ((string= opcode "adrp-gotpcrel")
890 (format stream " adrp ~A,:got:~A~%" (cadr operand) c-symbol))
891 ((string= opcode "ldr-gotpcrel")
892 (format stream " ldr ~A, [~A, #:got_lo12:~A]~%"
893 (cadr operand)
894 (cadr operand)
895 c-symbol))
896 (t (error "unreachable")))))
897 #+x86-64
898 ((string= opcode "lea") ; lea becomes "mov" with gotpcrel as src, which becomes lea
899 (let* ((entry-index
900 (/ (- (car operand) (bounds-low (core-linkage-bounds core)))
901 (core-linkage-entry-size core)))
902 (c-symbol (aref (core-linkage-symbols core) entry-index)))
903 (setf (bit (core-linkage-symbol-usedp core) entry-index) 1)
904 (format stream " mov ~A@GOTPCREL(%rip), %~(~A~)~%" c-symbol (cadr operand))))
905 #+x86-64
906 ((string= opcode "pop")
907 (format stream " ~A ~A~%" opcode operand)
908 (cond ((string= operand "8(%rbp)")
909 (format stream " .cfi_def_cfa 6, 16~% .cfi_offset 6, -16~%"))
910 ((string= operand "%rbp")
911 ;(format stream " .cfi_def_cfa 7, 8~%")
912 nil)
913 (t)))
914 #+x86-64
915 ((string= opcode "mov")
916 ;; the so-called "operand" is the entire instruction
917 (write-string operand stream)
918 (terpri stream))
919 ((or (string= opcode "call *") (string= opcode "jmp *"))
920 ;; Indirect call - since the code is in immobile space,
921 ;; we could render this as a 2-byte NOP followed by a direct
922 ;; call. For simplicity I'm leaving it exactly as it was.
923 (format stream " ~A(CS+0x~x)~%"
924 opcode ; contains a "*" as needed for the syntax
925 (- operand (bounds-low (core-code-bounds core)))))
926 (t))
927 (bug "Random annotated opcode ~S" opcode))
928 (incf ptr length)))
929 (when (= cur-offset count) (return)))))
930 (when emit-cfi
931 (format stream " .cfi_endproc~%"))
932 extra-fixup-locs))
934 ;;; Examine CODE, returning a list of lists describing how to emit
935 ;;; the contents into the assembly file.
936 ;;; ({:data | :padding} . N) | (start-pc . end-pc)
937 (defun get-text-ranges (code spacemap)
938 (let ((cdf (translate (sb-c::compiled-debug-info-fun-map
939 (truly-the sb-c::compiled-debug-info
940 (translate (%code-debug-info code) spacemap)))
941 spacemap))
942 (next-simple-fun-pc-offs (%code-fun-offset code 0))
943 (start-pc (code-n-unboxed-data-bytes code))
944 (simple-fun-index -1)
945 (simple-fun)
946 (blobs))
947 (when (plusp start-pc)
948 (aver (zerop (rem start-pc n-word-bytes)))
949 (push `(:data . ,(ash start-pc (- word-shift))) blobs))
950 (loop
951 (let* ((next (when (%instancep (sb-c::compiled-debug-fun-next
952 (truly-the sb-c::compiled-debug-fun cdf)))
953 (translate (sb-c::compiled-debug-fun-next
954 (truly-the sb-c::compiled-debug-fun cdf))
955 spacemap)))
956 (end-pc (if next
957 (cdf-offset next spacemap)
958 (%code-text-size code))))
959 (cond
960 ((= start-pc end-pc)) ; crazy shiat. do not add to blobs
961 ((<= start-pc next-simple-fun-pc-offs (1- end-pc))
962 (incf simple-fun-index)
963 (setq simple-fun (%code-entry-point code simple-fun-index))
964 (let ((padding (- next-simple-fun-pc-offs start-pc)))
965 (when (plusp padding)
966 ;; Assert that SIMPLE-FUN always begins at an entry
967 ;; in the fun-map, and not somewhere in the middle:
968 ;; |<-- fun -->|<-- fun -->|
969 ;; ^- start (GOOD) ^- alleged start (BAD)
970 (cond ((eq simple-fun (%code-entry-point code 0))
971 (bug "Misaligned fun start"))
972 (t ; sanity-check the length of the filler
973 (aver (< padding (* 2 n-word-bytes)))))
974 (push `(:pad . ,padding) blobs)
975 (incf start-pc padding)))
976 (push `(,start-pc . ,end-pc) blobs)
977 (setq next-simple-fun-pc-offs
978 (if (< (1+ simple-fun-index ) (code-n-entries code))
979 (%code-fun-offset code (1+ simple-fun-index))
980 -1)))
982 (let ((current-blob (car blobs)))
983 (setf (cdr current-blob) end-pc)))) ; extend this blob
984 (unless next
985 (return (nreverse blobs)))
986 (setq cdf next start-pc end-pc)))))
988 (defun c-symbol-quote (name)
989 (concatenate 'string '(#\") name '(#\")))
991 (defun emit-symbols (blobs core pp-state output &aux base-symbol)
992 (dolist (blob blobs base-symbol)
993 (destructuring-bind (name start . end) blob
994 (let ((c-name (c-name (or name "anonymous") core pp-state)))
995 (unless base-symbol
996 (setq base-symbol c-name))
997 (format output " lsym \"~a\", 0x~x, 0x~x~%"
998 c-name start (- end start))))))
1000 (defun emit-funs (code vaddr core dumpwords output base-symbol emit-cfi)
1001 (let* ((spacemap (core-spacemap core))
1002 (ranges (get-text-ranges code spacemap))
1003 (text-sap (code-instructions code))
1004 (text (sap-int text-sap))
1005 ;; Like CODE-INSTRUCTIONS, but where the text virtually was
1006 (text-vaddr (+ vaddr (* (code-header-words code) n-word-bytes)))
1007 (additional-relative-fixups)
1008 (max-end 0))
1009 ;; There is *always* at least 1 word of unboxed data now
1010 (aver (eq (caar ranges) :data))
1011 (let ((jump-table-size (code-jump-table-words code))
1012 (total-nwords (cdr (pop ranges))))
1013 (cond ((> jump-table-size 1)
1014 (format output "# jump table~%")
1015 (format output ".quad ~d" (sap-ref-word text-sap 0))
1016 (dotimes (i (1- jump-table-size))
1017 (format output ",\"~a\"+0x~x"
1018 base-symbol
1019 (- (sap-ref-word text-sap (ash (1+ i) word-shift))
1020 vaddr)))
1021 (terpri output)
1022 (let ((remaining (- total-nwords jump-table-size)))
1023 (when (plusp remaining)
1024 (funcall dumpwords
1025 (sap+ text-sap (ash jump-table-size word-shift))
1026 remaining output))))
1028 (funcall dumpwords text-sap total-nwords output))))
1029 (loop
1030 (destructuring-bind (start . end) (pop ranges)
1031 (setq max-end end)
1032 ;; FIXME: it seems like this should just be reduced to emitting 2 words
1033 ;; now that simple-fun headers don't hold any boxed words.
1034 ;; (generality here is without merit)
1035 (funcall dumpwords (sap+ text-sap start) simple-fun-insts-offset output
1036 #(nil #.(format nil ".+~D" (* (1- simple-fun-insts-offset)
1037 n-word-bytes))))
1038 (incf start (* simple-fun-insts-offset n-word-bytes))
1039 ;; Pass the current physical address at which to disassemble,
1040 ;; the notional core address (which changes after linker relocation),
1041 ;; and the length.
1042 (let ((new-relative-fixups
1043 (emit-lisp-function (+ text start) (+ text-vaddr start) (- end start)
1044 output emit-cfi core)))
1045 (setq additional-relative-fixups
1046 (nconc new-relative-fixups additional-relative-fixups)))
1047 (cond ((not ranges) (return))
1048 ((eq (caar ranges) :pad)
1049 (format output " .byte ~{0x~x~^,~}~%"
1050 (loop for i from 0 below (cdr (pop ranges))
1051 collect (sap-ref-8 text-sap (+ end i))))))))
1052 ;; All fixups should have been consumed by writing out the text.
1053 (aver (null (core-fixup-addrs core)))
1054 ;; Emit bytes from the maximum function end to the object end.
1055 ;; We can't just round up %CODE-CODE-SIZE to a double-lispword
1056 ;; because the boxed header could end at an odd word, requiring that
1057 ;; the unboxed bytes have an odd size in words making the total even.
1058 (format output " .byte ~{0x~x~^,~}~%"
1059 (loop for i from max-end
1060 below (- (code-object-size code)
1061 (* (code-header-words code) n-word-bytes))
1062 collect (sap-ref-8 text-sap i)))
1063 (when additional-relative-fixups
1064 (binding* ((existing-fixups (sb-vm::%code-fixups code))
1065 ((absolute relative immediate)
1066 (sb-c::unpack-code-fixup-locs
1067 (if (fixnump existing-fixups)
1068 existing-fixups
1069 (translate existing-fixups spacemap))))
1070 (new-sorted
1071 (sort (mapcar (lambda (x)
1072 ;; compute offset of the fixup from CODE-INSTRUCTIONS.
1073 ;; X is the location of the CALL instruction,
1074 ;; 1+ is the location of the fixup.
1075 (- (1+ x)
1076 (+ vaddr (ash (code-header-words code)
1077 word-shift))))
1078 additional-relative-fixups)
1079 #'<)))
1080 (sb-c:pack-code-fixup-locs
1081 absolute (merge 'list relative new-sorted #'<) immediate)))))
1083 (defconstant +gf-name-slot+ 5)
1085 (defun output-bignum (label bignum stream)
1086 (let ((nwords (sb-bignum:%bignum-length bignum)))
1087 (format stream "~@[~a:~] .quad 0x~x"
1088 label (logior (ash nwords 8) bignum-widetag))
1089 (dotimes (i nwords)
1090 (format stream ",0x~x" (sb-bignum:%bignum-ref bignum i)))
1091 (when (evenp nwords) ; pad
1092 (format stream ",0"))
1093 (format stream "~%")))
1095 (defconstant core-align #+x86-64 4096 #+arm64 65536)
1097 (defun write-preamble (output)
1098 (format output " .text~% .file \"sbcl.core\"
1099 ~:[~; .macro .size sym size # ignore
1100 .endm
1101 .macro .type sym type # ignore
1102 .endm~]
1103 .macro lasmsym name size
1104 .set \"\\name\", .
1105 .size \"\\name\", \\size
1106 .type \"\\name\", function
1107 .endm
1108 .macro lsym name start size
1109 .set \"\\name\", . + \\start
1110 .size \"\\name\", \\size
1111 .type \"\\name\", function
1112 .endm
1113 .globl ~alisp_code_start, ~alisp_jit_code, ~alisp_code_end
1114 .balign ~a~%~alisp_code_start:~%CS: # code space~%"
1115 (member :darwin *features*)
1116 label-prefix label-prefix label-prefix
1117 core-align
1118 label-prefix))
1120 (defun %widetag-of (word) (logand word widetag-mask))
1122 (defun make-code-obj (addr spacemap)
1123 (let ((translation (translate-ptr addr spacemap)))
1124 (aver (= (%widetag-of (sap-ref-word (int-sap translation) 0))
1125 code-header-widetag))
1126 (%make-lisp-obj (logior translation other-pointer-lowtag))))
1128 (defun output-lisp-asm-routines (core spacemap code-addr output &aux (skip 0))
1129 (write-preamble output)
1130 (dotimes (i 2)
1131 (let* ((paddr (int-sap (translate-ptr code-addr spacemap)))
1132 (word (sap-ref-word paddr 0)))
1133 ;; After running the converter which moves dynamic-space code to text space,
1134 ;; the text space starts with an array of uint32 for the offsets to each object
1135 ;; and an array of uint64 containing some JMP instructions.
1136 (unless (or (= (%widetag-of word) simple-array-unsigned-byte-32-widetag)
1137 (= (%widetag-of word) simple-array-unsigned-byte-64-widetag))
1138 (return))
1139 (let* ((array (%make-lisp-obj (logior (sap-int paddr) other-pointer-lowtag)))
1140 (size (primitive-object-size array))
1141 (nwords (ash size (- word-shift))))
1142 (dotimes (i nwords)
1143 (format output "~A 0x~x"
1144 (case (mod i 8)
1145 (0 #.(format nil "~% .quad"))
1146 (t ","))
1147 (sap-ref-word paddr (ash i word-shift))))
1148 (terpri output)
1149 (incf skip size)
1150 (incf code-addr size))))
1151 (let* ((code-component (make-code-obj code-addr spacemap))
1152 (obj-sap (int-sap (- (get-lisp-obj-address code-component)
1153 other-pointer-lowtag)))
1154 (header-len (code-header-words code-component))
1155 (jump-table-count (sap-ref-word (code-instructions code-component) 0)))
1156 ;; Write the code component header
1157 (format output "lar: # lisp assembly routines~%")
1158 (emit-asm-directives :qword obj-sap header-len output #())
1159 ;; Write the jump table
1160 (format output " .quad ~D" jump-table-count)
1161 (dotimes (i (1- jump-table-count))
1162 (format output ",lar+0x~x"
1163 (- (sap-ref-word (code-instructions code-component)
1164 (ash (1+ i) word-shift))
1165 code-addr)))
1166 (terpri output)
1167 (let ((name->addr
1168 ;; the CDR of each alist item is a target cons (needing translation)
1169 (sort
1170 (mapcar (lambda (entry &aux (name (translate (undescriptorize (car entry)) spacemap)) ; symbol
1171 ;; VAL is (start end . index)
1172 (val (translate (undescriptorize (cdr entry)) spacemap))
1173 (start (car val))
1174 (end (car (translate (cdr val) spacemap))))
1175 (list* (translate (symbol-name name) spacemap) start end))
1176 (target-hash-table-alist (%code-debug-info code-component) spacemap))
1177 #'< :key #'cadr)))
1178 ;; Possibly unboxed words and/or padding
1179 (let ((here (ash jump-table-count word-shift))
1180 (first-entry-point (cadar name->addr)))
1181 (when (> first-entry-point here)
1182 (format output " .quad ~{0x~x~^,~}~%"
1183 (loop for offs = here then (+ offs 8)
1184 while (< offs first-entry-point)
1185 collect (sap-ref-word (code-instructions code-component) offs)))))
1186 ;; Loop over the embedded routines
1187 (let ((list name->addr)
1188 (obj-size (code-object-size code-component)))
1189 (loop
1190 (destructuring-bind (name start-offs . end-offs) (pop list)
1191 (let ((nbytes (- (if (endp list)
1192 (- obj-size (* header-len n-word-bytes))
1193 (1+ end-offs))
1194 start-offs)))
1195 (format output " lasmsym ~(\"~a\"~), ~d~%" name nbytes)
1196 (let ((fixups
1197 (emit-lisp-function
1198 (+ (sap-int (code-instructions code-component))
1199 start-offs)
1200 (+ code-addr
1201 (ash (code-header-words code-component) word-shift)
1202 start-offs)
1203 nbytes output nil core)))
1204 (aver (null fixups)))))
1205 (when (endp list) (return)))
1206 (format output "~%# end of lisp asm routines~2%")
1207 (+ skip obj-size)))))
1209 ;;; Convert immobile text space to an assembly file in OUTPUT.
1210 (defun write-assembler-text
1211 (spacemap output
1212 &optional enable-pie (emit-cfi t)
1213 &aux (code-bounds (space-bounds immobile-text-core-space-id spacemap))
1214 (fixedobj-bounds (space-bounds immobile-fixedobj-core-space-id spacemap))
1215 (core (make-core spacemap code-bounds fixedobj-bounds enable-pie))
1216 (code-addr (bounds-low code-bounds))
1217 (total-code-size 0)
1218 (pp-state (cons (make-hash-table :test 'equal) nil))
1219 (prev-namestring "")
1220 (n-linker-relocs 0)
1221 (temp-output (make-string-output-stream :element-type 'base-char))
1222 end-loc)
1223 (labels ((dumpwords (sap count stream &optional (exceptions #()) logical-addr)
1224 (aver (sap>= sap (car spacemap)))
1225 ;; Add any new "header exceptions" that cause intra-code-space pointers
1226 ;; to be computed at link time
1227 (dotimes (i (if logical-addr count 0))
1228 (unless (and (< i (length exceptions)) (svref exceptions i))
1229 (let ((word (sap-ref-word sap (* i n-word-bytes))))
1230 (when (and (= (logand word 3) 3) ; is a pointer
1231 (in-bounds-p word code-bounds)) ; to code space
1232 #+nil
1233 (format t "~&~(~x: ~x~)~%" (+ logical-addr (* i n-word-bytes))
1234 word)
1235 (incf n-linker-relocs)
1236 (setf exceptions (adjust-array exceptions (max (length exceptions) (1+ i))
1237 :initial-element nil)
1238 (svref exceptions i)
1239 (format nil "CS+0x~x"
1240 (- word (bounds-low code-bounds))))))))
1241 (emit-asm-directives :qword sap count stream exceptions)))
1243 (let ((skip (output-lisp-asm-routines core spacemap code-addr output)))
1244 (incf code-addr skip)
1245 (incf total-code-size skip))
1247 (loop
1248 (when (>= code-addr (bounds-high code-bounds))
1249 (setq end-loc code-addr)
1250 (return))
1251 (ecase (%widetag-of (sap-ref-word (int-sap (translate-ptr code-addr spacemap)) 0))
1252 (#.code-header-widetag
1253 (let* ((code (make-code-obj code-addr spacemap))
1254 (objsize (code-object-size code)))
1255 (incf total-code-size objsize)
1256 (cond
1257 ((%instancep (%code-debug-info code)) ; assume it's a COMPILED-DEBUG-INFO
1258 (aver (plusp (code-n-entries code)))
1259 (let* ((source
1260 (sb-c::compiled-debug-info-source
1261 (truly-the sb-c::compiled-debug-info
1262 (translate (%code-debug-info code) spacemap))))
1263 (namestring
1264 (debug-source-namestring
1265 (truly-the sb-c::debug-source (translate source spacemap)))))
1266 (setq namestring (if (eq namestring (core-nil-object core))
1267 "sbcl.core"
1268 (translate namestring spacemap)))
1269 (unless (string= namestring prev-namestring)
1270 (format output " .file \"~a\"~%" namestring)
1271 (setq prev-namestring namestring)))
1272 (setf (core-fixup-addrs core)
1273 (mapcar (lambda (x)
1274 (+ code-addr (ash (code-header-words code) word-shift) x))
1275 (code-fixup-locs code spacemap)))
1276 (let ((code-physaddr (logandc2 (get-lisp-obj-address code) lowtag-mask)))
1277 (format output "#x~x:~%" code-addr)
1278 ;; Emit symbols before the code header data, because the symbols
1279 ;; refer to "." (the current PC) which is the base of the object.
1280 (let* ((base (emit-symbols (code-symbols code core) core pp-state output))
1281 (altered-fixups
1282 (emit-funs code code-addr core #'dumpwords temp-output base emit-cfi))
1283 (header-exceptions (vector nil nil nil nil))
1284 (fixups-ptr))
1285 (when altered-fixups
1286 (setf (aref header-exceptions sb-vm:code-fixups-slot)
1287 (cond ((fixnump altered-fixups)
1288 (format nil "0x~x" (ash altered-fixups n-fixnum-tag-bits)))
1290 (let ((ht (core-new-fixups core)))
1291 (setq fixups-ptr (gethash altered-fixups ht))
1292 (unless fixups-ptr
1293 (setq fixups-ptr (ash (core-new-fixup-words-used core)
1294 word-shift))
1295 (setf (gethash altered-fixups ht) fixups-ptr)
1296 (incf (core-new-fixup-words-used core)
1297 (align-up (1+ (sb-bignum:%bignum-length altered-fixups)) 2))))
1298 ;; tag the pointer properly for a bignum
1299 (format nil "lisp_fixups+0x~x"
1300 (logior fixups-ptr other-pointer-lowtag))))))
1301 (dumpwords (int-sap code-physaddr)
1302 (code-header-words code) output header-exceptions code-addr)
1303 (write-string (get-output-stream-string temp-output) output))))
1305 (error "Strange code component: ~S" code)))
1306 (incf code-addr objsize)))
1307 (#.filler-widetag
1308 (let* ((word (sap-ref-word (int-sap (translate-ptr code-addr spacemap)) 0))
1309 (nwords (ash word -32))
1310 (nbytes (* nwords n-word-bytes)))
1311 (format output " .quad 0x~x~% .fill ~d~%" word (- nbytes n-word-bytes))
1312 (incf code-addr nbytes)))
1313 ;; This is a trailing array which contains a jump instruction for each
1314 ;; element of *C-LINKAGE-REDIRECTS* (see "rewrite-asmcalls.lisp").
1315 (#.simple-array-unsigned-byte-64-widetag
1316 (let* ((paddr (translate-ptr code-addr spacemap))
1317 (array (%make-lisp-obj (logior paddr other-pointer-lowtag)))
1318 (nwords (+ vector-data-offset (align-up (length array) 2))))
1319 (format output "# alien linkage redirects:~% .quad")
1320 (dotimes (i nwords (terpri output))
1321 (format output "~a0x~x" (if (= i 0) " " ",")
1322 (sap-ref-word (int-sap paddr) (ash i word-shift))))
1323 (incf code-addr (ash nwords word-shift))
1324 (setq end-loc code-addr)
1325 (return))))))
1327 ;; coreparse uses the 'lisp_jit_code' symbol to set text_space_highwatermark
1328 ;; The intent is that compilation to memory can use this reserved area
1329 ;; (if space remains) so that profilers can associate a C symbol with the
1330 ;; program counter range. It's better than nothing.
1331 (format output "~a:~%" (labelize "lisp_jit_code"))
1333 ;; Pad so that non-lisp code can't be colocated on a GC page.
1334 ;; (Lack of Lisp object headers in C code is the issue)
1335 (let ((aligned-end (align-up end-loc core-align)))
1336 (when (> aligned-end end-loc)
1337 (multiple-value-bind (nwords remainder)
1338 (floor (- aligned-end end-loc) n-word-bytes)
1339 (aver (>= nwords 2))
1340 (aver (zerop remainder))
1341 (decf nwords 2)
1342 (format output " .quad 0x~x, ~d # (simple-array fixnum (~d))~%"
1343 simple-array-fixnum-widetag
1344 (ash nwords n-fixnum-tag-bits)
1345 nwords)
1346 (when (plusp nwords)
1347 (format output " .fill ~d~%" (* nwords n-word-bytes))))))
1348 ;; Extend with 1 MB of filler
1349 (format output " .fill ~D~%~alisp_code_end:
1350 .size lisp_jit_code, .-lisp_jit_code~%"
1351 (* 1024 1024) label-prefix)
1352 (values core total-code-size n-linker-relocs))
1354 ;;;; ELF file I/O
1356 (defconstant +sht-null+ 0)
1357 (defconstant +sht-progbits+ 1)
1358 (defconstant +sht-symtab+ 2)
1359 (defconstant +sht-strtab+ 3)
1360 (defconstant +sht-rela+ 4)
1361 (defconstant +sht-rel+ 9)
1363 (define-alien-type elf64-ehdr
1364 (struct elf64-edhr
1365 (ident (array unsigned-char 16)) ; 7F 45 4C 46 2 1 1 0 0 0 0 0 0 0 0 0
1366 (type (unsigned 16)) ; 1 0
1367 (machine (unsigned 16)) ; 3E 0
1368 (version (unsigned 32)) ; 1 0 0 0
1369 (entry unsigned) ; 0 0 0 0 0 0 0 0
1370 (phoff unsigned) ; 0 0 0 0 0 0 0 0
1371 (shoff unsigned) ;
1372 (flags (unsigned 32)) ; 0 0 0 0
1373 (ehsize (unsigned 16)) ; 40 0
1374 (phentsize (unsigned 16)) ; 0 0
1375 (phnum (unsigned 16)) ; 0 0
1376 (shentsize (unsigned 16)) ; 40 0
1377 (shnum (unsigned 16)) ; n 0
1378 (shstrndx (unsigned 16)))) ; n 0
1379 (defconstant ehdr-size (ceiling (alien-type-bits (parse-alien-type 'elf64-ehdr nil)) 8))
1380 (define-alien-type elf64-shdr
1381 (struct elf64-shdr
1382 (name (unsigned 32))
1383 (type (unsigned 32))
1384 (flags (unsigned 64))
1385 (addr (unsigned 64))
1386 (off (unsigned 64))
1387 (size (unsigned 64))
1388 (link (unsigned 32))
1389 (info (unsigned 32))
1390 (addralign (unsigned 64))
1391 (entsize (unsigned 64))))
1392 (defconstant shdr-size (ceiling (alien-type-bits (parse-alien-type 'elf64-shdr nil)) 8))
1393 (define-alien-type elf64-sym
1394 (struct elf64-sym
1395 (name (unsigned 32))
1396 (info (unsigned 8))
1397 (other (unsigned 8))
1398 (shndx (unsigned 16))
1399 (value unsigned)
1400 (size unsigned)))
1401 (define-alien-type elf64-rela
1402 (struct elf64-rela
1403 (offset (unsigned 64))
1404 (info (unsigned 64))
1405 (addend (signed 64))))
1407 (defun make-elf64-sym (name info)
1408 (let ((a (make-array 24 :element-type '(unsigned-byte 8) :initial-element 0)))
1409 (with-pinned-objects (a)
1410 (setf (sap-ref-32 (vector-sap a) 0) name
1411 (sap-ref-8 (vector-sap a) 4) info))
1414 ;;; Return two values: an octet vector comprising a string table
1415 ;;; and an alist which maps string to offset in the table.
1416 (defun string-table (strings)
1417 (let* ((length (+ (1+ (length strings)) ; one more null than there are strings
1418 (reduce #'+ strings :key #'length))) ; data length
1419 (bytes (make-array length :element-type '(unsigned-byte 8)
1420 :initial-element 0))
1421 (index 1)
1422 (alist))
1423 (dolist (string strings)
1424 (push (cons string index) alist)
1425 (replace bytes (map 'vector #'char-code string) :start1 index)
1426 (incf index (1+ (length string))))
1427 (cons (nreverse alist) bytes)))
1429 (defun write-alien (alien size stream)
1430 (dotimes (i size)
1431 (write-byte (sap-ref-8 (alien-value-sap alien) i) stream)))
1433 (defun copy-bytes (in-stream out-stream nbytes
1434 &optional (buffer
1435 (make-array 1024 :element-type '(unsigned-byte 8))))
1436 (loop (let ((chunksize (min (length buffer) nbytes)))
1437 (aver (eql (read-sequence buffer in-stream :end chunksize) chunksize))
1438 (write-sequence buffer out-stream :end chunksize)
1439 (when (zerop (decf nbytes chunksize)) (return)))))
1441 ;;; core header should be an array of words in '.rodata', not a 32K page
1442 (defconstant core-header-size +backend-page-bytes+) ; stupidly large (FIXME)
1444 (defconstant e-machine #+x86-64 #x3E #+arm64 #xB7)
1446 (defun write-elf-header (shdrs-start sections output)
1447 (let ((shnum (1+ (length sections))) ; section 0 is implied
1448 (shstrndx (1+ (position :str sections :key #'car)))
1449 (ident #.(coerce '(#x7F #x45 #x4C #x46 2 1 1 0 0 0 0 0 0 0 0 0)
1450 '(array (unsigned-byte 8) 1))))
1451 (with-alien ((ehdr elf64-ehdr))
1452 (dotimes (i (ceiling ehdr-size n-word-bytes))
1453 (setf (sap-ref-word (alien-value-sap ehdr) (* i n-word-bytes)) 0))
1454 (with-pinned-objects (ident)
1455 (%byte-blt (vector-sap ident) 0 (alien-value-sap ehdr) 0 16))
1456 (setf (slot ehdr 'type) 1
1457 (slot ehdr 'machine) e-machine
1458 (slot ehdr 'version) 1
1459 (slot ehdr 'shoff) shdrs-start
1460 (slot ehdr 'ehsize) ehdr-size
1461 (slot ehdr 'shentsize) shdr-size
1462 (slot ehdr 'shnum) shnum
1463 (slot ehdr 'shstrndx) shstrndx)
1464 (write-alien ehdr ehdr-size output))))
1466 (defun write-section-headers (placements sections string-table output)
1467 (with-alien ((shdr elf64-shdr))
1468 (dotimes (i (ceiling shdr-size n-word-bytes)) ; Zero-fill
1469 (setf (sap-ref-word (alien-value-sap shdr) (* i n-word-bytes)) 0))
1470 (dotimes (i (1+ (length sections)))
1471 (when (plusp i) ; Write the zero-filled header as section 0
1472 (destructuring-bind (name type flags link info alignment entsize)
1473 (cdr (aref sections (1- i)))
1474 (destructuring-bind (offset . size)
1475 (pop placements)
1476 (setf (slot shdr 'name) (cdr (assoc name (car string-table)))
1477 (slot shdr 'type) type
1478 (slot shdr 'flags) flags
1479 (slot shdr 'off) offset
1480 (slot shdr 'size) size
1481 (slot shdr 'link) link
1482 (slot shdr 'info) info
1483 (slot shdr 'addralign) alignment
1484 (slot shdr 'entsize) entsize))))
1485 (write-alien shdr shdr-size output))))
1487 (defconstant sym-entry-size 24)
1489 ;;; Write everything except for the core file itself into OUTPUT-STREAM
1490 ;;; and leave the stream padded to a 4K boundary ready to receive data.
1491 (defun prepare-elf (core-size relocs output pie)
1492 ;; PIE uses coreparse relocs which are 8 bytes each, and no linker relocs.
1493 ;; Otherwise, linker relocs are 24 bytes each.
1494 (let* ((reloc-entry-size (if pie 8 24))
1495 (sections
1496 ;; name | type | flags | link | info | alignment | entry size
1497 `#((:core "lisp.core" ,+sht-progbits+ 0 0 0 ,core-align 0)
1498 (:sym ".symtab" ,+sht-symtab+ 0 3 1 8 ,sym-entry-size)
1499 ; section with the strings -- ^ ^ -- 1+ highest local symbol
1500 (:str ".strtab" ,+sht-strtab+ 0 0 0 1 0)
1501 (:rel
1502 ,@(if pie
1503 ;; Don't bother with an ELF reloc section; it won't do any good.
1504 ;; It would apply at executable link time, which is without purpose,
1505 ;; it just offsets the numbers based on however far the lisp.core
1506 ;; section is into the physical file. Non-loaded sections don't get
1507 ;; further relocated on execution, so 'coreparse' has to fix the
1508 ;; entire dynamic space at execution time anyway.
1509 `("lisp.rel" ,+sht-progbits+ 0 0 0 8 8)
1510 `(".relalisp.core" ,+sht-rela+ 0 2 1 8 ,reloc-entry-size)))
1511 ; symbol table -- ^ ^ -- for which section
1512 (:note ".note.GNU-stack" ,+sht-progbits+ 0 0 0 1 0)))
1513 (string-table
1514 (string-table (append '("lisp_code_start") (map 'list #'second sections))))
1515 (strings (cdr string-table))
1516 (padded-strings-size (align-up (length strings) 8))
1517 (symbols-size (* 2 sym-entry-size))
1518 (shdrs-start (+ ehdr-size symbols-size padded-strings-size))
1519 (shdrs-end (+ shdrs-start (* (1+ (length sections)) shdr-size)))
1520 (relocs-size (* (length relocs) reloc-entry-size))
1521 (relocs-end (+ shdrs-end relocs-size))
1522 (core-start (align-up relocs-end core-align)))
1524 (write-elf-header shdrs-start sections output)
1526 ;; Write symbol table
1527 (aver (eql (file-position output) ehdr-size))
1528 (write-sequence (make-elf64-sym 0 0) output)
1529 ;; The symbol name index is always 1 by construction. The type is #x10
1530 ;; given: #define STB_GLOBAL 1
1531 ;; and: #define ELF32_ST_BIND(val) ((unsigned char) (val)) >> 4)
1532 ;; which places the binding in the high 4 bits of the low byte.
1533 (write-sequence (make-elf64-sym 1 #x10) output)
1535 ;; Write string table
1536 (aver (eql (file-position output) (+ ehdr-size symbols-size)))
1537 (write-sequence strings output) ; an octet vector at this point
1538 (dotimes (i (- padded-strings-size (length strings)))
1539 (write-byte 0 output))
1541 ;; Write section headers
1542 (aver (eql (file-position output) shdrs-start))
1543 (write-section-headers
1544 (map 'list
1545 (lambda (x)
1546 (ecase (car x)
1547 (:note '(0 . 0))
1548 (:sym (cons ehdr-size symbols-size))
1549 (:str (cons (+ ehdr-size symbols-size) (length strings)))
1550 (:rel (cons shdrs-end relocs-size))
1551 (:core (cons core-start core-size))))
1552 sections)
1553 sections string-table output)
1555 ;; Write relocations
1556 (aver (eql (file-position output) shdrs-end))
1557 (let ((buf (make-array relocs-size :element-type '(unsigned-byte 8)))
1558 (ptr 0))
1559 (if pie
1560 (dovector (reloc relocs)
1561 (setf (%vector-raw-bits buf ptr) reloc)
1562 (incf ptr))
1563 (with-alien ((rela elf64-rela))
1564 (dovector (reloc relocs)
1565 (destructuring-bind (place addend . kind) reloc
1566 (setf (slot rela 'offset) place
1567 (slot rela 'info) (logior (ash 1 32) kind) ; 1 = symbol index
1568 (slot rela 'addend) addend))
1569 (setf (%vector-raw-bits buf (+ ptr 0)) (sap-ref-word (alien-value-sap rela) 0)
1570 (%vector-raw-bits buf (+ ptr 1)) (sap-ref-word (alien-value-sap rela) 8)
1571 (%vector-raw-bits buf (+ ptr 2)) (sap-ref-word (alien-value-sap rela) 16))
1572 (incf ptr 3))))
1573 (write-sequence buf output))
1575 ;; Write padding
1576 (dotimes (i (- core-start (file-position output)))
1577 (write-byte 0 output))
1578 (aver (eq (file-position output) core-start))))
1580 (defconstant R_ABS64 #+x86-64 1 #+arm64 257) ; /* Direct 64 bit */
1581 (defconstant R_ABS32 #+x86-64 10 #+arm64 258) ; /* Direct 32 bit zero extended */
1583 ;;; Fill in the FIXUPS vector with a list of places to fixup.
1584 ;;; For PIE-enabled cores, each place is just a virtual address.
1585 ;;; For non-PIE-enabled, the fixup corresponds to an ELF relocation which will be
1586 ;;; applied at link time of the excutable.
1587 ;;; Note that while this "works" for PIE, it is fairly inefficient because
1588 ;;; fundamentally Lisp objects contain absolute pointers, and there may be
1589 ;;; millions of words that need fixing at load (execution) time.
1590 ;;; Several techniques can mitigate this:
1591 ;;; * for funcallable-instances, put a second copy of funcallable-instance-tramp
1592 ;;; in dynamic space so that funcallable-instances can jump to a known address.
1593 ;;; * for each closure, create a one-instruction trampoline in dynamic space,
1594 ;;; - embedded in a (simple-array word) perhaps - which jumps to the correct
1595 ;;; place in the text section. Point all closures over the same function
1596 ;;; to the new closure stub. The trampoline, being pseudostatic, is effectively
1597 ;;; immovable. (And you can't re-save from an ELF core)
1598 ;;; * for arbitrary pointers to simple-funs, create a proxy simple-fun in dynamic
1599 ;;; space whose entry point is the real function in the ELF text section.
1600 ;;; The GC might have to learn how to handle simple-funs that point externally
1601 ;;; to themselves. Also there's a minor problem of hash-table test functions
1602 ;;; The above techniques will reduce by a huge factor the number of fixups
1603 ;;; that need to be applied on startup of a position-independent executable.
1605 (defun collect-relocations (spacemap fixups pie &key (verbose nil) (print nil))
1606 (let* ((code-bounds (space-bounds immobile-text-core-space-id spacemap))
1607 (code-start (bounds-low code-bounds))
1608 (n-abs 0)
1609 (n-rel 0)
1610 (affected-pages (make-hash-table)))
1611 (labels
1612 ((abs-fixup (vaddr core-offs referent)
1613 (incf n-abs)
1614 (when print
1615 (format t "~x = 0x~(~x~): (a)~%" core-offs vaddr #+nil referent))
1616 (touch-core-page core-offs)
1617 ;; PIE relocations are output as a file section that is
1618 ;; interpreted by 'coreparse'. The addend is implicit.
1619 (setf (sap-ref-word (car spacemap) core-offs)
1620 (if pie
1621 (+ (- referent code-start) +code-space-nominal-address+)
1623 (if pie
1624 (vector-push-extend vaddr fixups)
1625 (vector-push-extend `(,(+ core-header-size core-offs)
1626 ,(- referent code-start) . ,R_ABS64)
1627 fixups)))
1628 (abs32-fixup (core-offs referent)
1629 (aver (not pie))
1630 (incf n-abs)
1631 (when print
1632 (format t "~x = 0x~(~x~): (a)~%" core-offs (core-to-logical core-offs) #+nil referent))
1633 (touch-core-page core-offs)
1634 (setf (sap-ref-32 (car spacemap) core-offs) 0)
1635 (vector-push-extend `(,(+ core-header-size core-offs)
1636 ,(- referent code-start) . ,R_ABS32)
1637 fixups))
1638 (touch-core-page (core-offs)
1639 ;; use the OS page size, not +backend-page-bytes+
1640 (setf (gethash (floor core-offs core-align) affected-pages) t))
1641 ;; Given a address which is an offset into the data pages of the target core,
1642 ;; compute the logical address which that offset would be mapped to.
1643 ;; For example core address 0 is the virtual address of static space.
1644 (core-to-logical (core-offs &aux (page (floor core-offs +backend-page-bytes+)))
1645 (setf (gethash page affected-pages) t)
1646 (dolist (space (cdr spacemap)
1647 (bug "Can't translate core offset ~x using ~x"
1648 core-offs spacemap))
1649 (let* ((page0 (space-data-page space))
1650 (nwords (space-nwords space))
1651 (id (space-id space))
1652 (npages (ceiling nwords (/ +backend-page-bytes+ n-word-bytes))))
1653 (when (and (<= page0 page (+ page0 (1- npages)))
1654 (/= id immobile-text-core-space-id))
1655 (return (+ (space-addr space)
1656 (* (- page page0) +backend-page-bytes+)
1657 (logand core-offs (1- +backend-page-bytes+))))))))
1658 (scanptrs (vaddr obj wordindex-min wordindex-max &optional force &aux (n-fixups 0))
1659 (do* ((base-addr (logandc2 (get-lisp-obj-address obj) lowtag-mask))
1660 (sap (int-sap base-addr))
1661 ;; core-offs is the offset in the lisp.core ELF section.
1662 (core-offs (- base-addr (sap-int (car spacemap))))
1663 (i wordindex-min (1+ i)))
1664 ((> i wordindex-max) n-fixups)
1665 (let* ((byte-offs (ash i word-shift))
1666 (ptr (sap-ref-word sap byte-offs)))
1667 (when (and (or (is-lisp-pointer ptr) force) (in-bounds-p ptr code-bounds))
1668 (abs-fixup (+ vaddr byte-offs) (+ core-offs byte-offs) ptr)
1669 (incf n-fixups)))))
1670 (scanptr (vaddr obj wordindex)
1671 (plusp (scanptrs vaddr obj wordindex wordindex))) ; trivial wrapper
1672 (scan-obj (vaddr obj widetag size
1673 &aux (core-offs (- (logandc2 (get-lisp-obj-address obj) lowtag-mask)
1674 (sap-int (car spacemap))))
1675 (nwords (ceiling size n-word-bytes)))
1676 (when (listp obj)
1677 (scanptrs vaddr obj 0 1)
1678 (return-from scan-obj))
1679 (case widetag
1680 (#.instance-widetag
1681 (let ((type (truly-the layout (translate (%instance-layout obj) spacemap))))
1682 (do-layout-bitmap (i taggedp type (%instance-length obj))
1683 (when taggedp
1684 (scanptr vaddr obj (1+ i))))))
1685 (#.simple-vector-widetag
1686 (let ((len (length (the simple-vector obj))))
1687 ;; FIXME: VECTOR-ADDR-HASHING-FLAG must be left-shifted by ARRAY-FLAGS-DATA-POSITION
1688 ;; for this LOGTEST to be correct. I think it broke when array-rank was placed adjacent
1689 ;; to the widetag and the flags bits moved over. The fact that it seems to be useless
1690 ;; suggests that I need to come up with a way to assert that it does anything, which
1691 ;; is all but impossible. We'd need a correctly-hashed table containing a key in which
1692 ;; nothing moves in the final S-L-A-D, but does have a function move during elfination.
1693 ;; I'm leaving this line broken for now because frankly I think it might be safer to
1694 ;; assert that IF the table is address-sensitive THEN it has the rehash flag already set
1695 ;; in the k/v vector. That's not quite true either, because it only needs to rehash if
1696 ;; any key was actually hashed by address.
1697 (cond ((logtest (get-header-data obj) vector-addr-hashing-flag) ; BUG
1698 (do ((i 2 (+ i 2)) (needs-rehash))
1699 ;; Refer to the figure at the top of src/code/hash-table.lisp.
1700 ;; LEN is an odd number.
1701 ((>= i (1- len))
1702 (when needs-rehash
1703 (setf (svref obj 1) 1)))
1704 ;; A weak or EQ-based hash table any of whose keys is a function
1705 ;; or code-component might need the 'rehash' flag set.
1706 ;; In practice, it is likely already set, because any object that
1707 ;; could move in the final GC probably did move.
1708 (when (scanptr vaddr obj (+ vector-data-offset i))
1709 (setq needs-rehash t))
1710 (scanptr vaddr obj (+ vector-data-offset i 1))))
1712 (scanptrs vaddr obj 1 (+ len 1))))))
1713 (#.fdefn-widetag
1714 (scanptrs vaddr obj 1 2)
1715 (scanptrs vaddr obj 3 3 t))
1716 ((#.closure-widetag #.funcallable-instance-widetag)
1717 ;; read the trampoline slot
1718 (let ((word (sap-ref-word (int-sap (get-lisp-obj-address obj))
1719 (- n-word-bytes fun-pointer-lowtag))))
1720 (when (in-bounds-p word code-bounds)
1721 (abs-fixup (+ vaddr n-word-bytes)
1722 (+ core-offs n-word-bytes)
1723 word)))
1724 ;; untaggged pointers are generally not supported in
1725 ;; funcallable instances, so scan everything.
1726 (scanptrs vaddr obj 1 (1- nwords)))
1727 ;; mixed boxed/unboxed objects
1728 (#.code-header-widetag
1729 (aver (not pie))
1730 (dolist (loc (code-fixup-locs obj spacemap))
1731 (let ((val (sap-ref-32 (code-instructions obj) loc)))
1732 (when (in-bounds-p val code-bounds)
1733 (abs32-fixup (sap- (sap+ (code-instructions obj) loc) (car spacemap))
1734 val))))
1735 (dotimes (i (code-n-entries obj))
1736 ;; I'm being lazy and not computing vaddr, which is wrong,
1737 ;; but does not matter if non-pie; and if PIE, we can't get here.
1738 ;; [PIE requires all code in immobile space, and this reloc
1739 ;; is for a dynamic space object]
1740 (scanptrs 0 (%code-entry-point obj i) 2 5))
1741 (scanptrs vaddr obj 1 (1- (code-header-words obj))))
1742 ;; boxed objects that can reference code/simple-funs
1743 ((#.value-cell-widetag #.symbol-widetag #.weak-pointer-widetag)
1744 (scanptrs vaddr obj 1 (1- nwords))))))
1745 (dolist (space (cdr spacemap))
1746 (unless (= (space-id space) immobile-text-core-space-id)
1747 (let* ((logical-addr (space-addr space))
1748 (size (space-size space))
1749 (physical-addr (space-physaddr space spacemap))
1750 (physical-end (sap+ physical-addr size))
1751 (vaddr-translation (+ (- (sap-int physical-addr)) logical-addr)))
1752 (dx-flet ((visit (obj widetag size)
1753 ;; Compute the object's intended virtual address
1754 (scan-obj (+ (logandc2 (get-lisp-obj-address obj) lowtag-mask)
1755 vaddr-translation)
1756 obj widetag size)))
1757 (map-objects-in-range
1758 #'visit
1759 (ash (sap-int physical-addr) (- n-fixnum-tag-bits))
1760 (ash (sap-int physical-end) (- n-fixnum-tag-bits))))
1761 (when (and (plusp (logior n-abs n-rel)) verbose)
1762 (format t "space @ ~10x: ~6d absolute + ~4d relative fixups~%"
1763 logical-addr n-abs n-rel))
1764 (setq n-abs 0 n-rel 0)))))
1765 (when verbose
1766 (format t "total of ~D linker fixups affecting ~D/~D pages~%"
1767 (length fixups)
1768 (hash-table-count affected-pages)
1769 (/ (reduce #'+ (cdr spacemap) :key #'space-nbytes-aligned)
1770 core-align))))
1771 fixups)
1773 ;;;;
1775 (defun read-core-header (input core-header verbose &aux (core-offset 0))
1776 (read-sequence core-header input)
1777 (cond ((= (%vector-raw-bits core-header 0) core-magic))
1778 (t ; possible embedded core
1779 (file-position input (- (file-length input)
1780 (* 2 n-word-bytes)))
1781 (aver (eql (read-sequence core-header input) (* 2 n-word-bytes)))
1782 (aver (= (%vector-raw-bits core-header 1) core-magic))
1783 (setq core-offset (%vector-raw-bits core-header 0))
1784 (when verbose
1785 (format t "~&embedded core starts at #x~x into input~%" core-offset))
1786 (file-position input core-offset)
1787 (read-sequence core-header input)
1788 (aver (= (%vector-raw-bits core-header 0) core-magic))))
1789 core-offset)
1791 (defmacro do-core-header-entry (((id-var len-var ptr-var) buffer) &body body)
1792 `(let ((,ptr-var 1))
1793 (loop
1794 (let ((,id-var (%vector-raw-bits ,buffer ,ptr-var))
1795 (,len-var (%vector-raw-bits ,buffer (1+ ,ptr-var))))
1796 ;; (format t "~&entry type ~D @ ~d len ~d words~%" id ptr len)
1797 (incf ,ptr-var 2)
1798 (decf ,len-var 2)
1799 (when (= ,id-var end-core-entry-type-code)
1800 (aver (not (find 0 ,buffer :start (ash ,ptr-var word-shift) :test #'/=)))
1801 (return ,ptr-var))
1802 ,@body
1803 (incf ,ptr-var ,len-var)))))
1805 (defmacro do-directory-entry (((index-var start-index input-nbytes) buffer) &body body)
1806 `(let ((words-per-dirent 5))
1807 (multiple-value-bind (n-entries remainder)
1808 (floor ,input-nbytes words-per-dirent)
1809 (aver (zerop remainder))
1810 (symbol-macrolet ((id (%vector-raw-bits ,buffer index))
1811 (nwords (%vector-raw-bits ,buffer (+ index 1)))
1812 (data-page (%vector-raw-bits ,buffer (+ index 2)))
1813 (addr (%vector-raw-bits ,buffer (+ index 3)))
1814 (npages (%vector-raw-bits ,buffer (+ index 4))))
1815 (do ((,index-var ,start-index (+ ,index-var words-per-dirent)))
1816 ((= ,index-var (+ ,start-index (* n-entries words-per-dirent))))
1817 ,@body)))))
1819 (defmacro with-mapped-core ((sap-var start npages stream) &body body)
1820 `(let (,sap-var)
1821 (unwind-protect
1822 (progn
1823 (setq ,sap-var
1824 (alien-funcall
1825 (extern-alien "load_core_bytes"
1826 (function system-area-pointer
1827 int int unsigned unsigned int))
1828 (sb-sys:fd-stream-fd ,stream)
1829 (+ ,start +backend-page-bytes+) ; Skip the core header
1830 0 ; place it anywhere
1831 (* ,npages +backend-page-bytes+) ; len
1833 ,@body)
1834 (when ,sap-var
1835 (alien-funcall
1836 (extern-alien "os_deallocate"
1837 (function void system-area-pointer unsigned))
1838 ,sap-var (* ,npages +backend-page-bytes+))))))
1840 (defun core-header-nwords (core-header &aux (sum 2))
1841 ;; SUM starts as 2, as the core's magic number occupies 1 word
1842 ;; and the ending tag of END-CORE-ENTRY-TYPE-CODE counts as 1.
1843 (do-core-header-entry ((id len ptr) core-header)
1844 ;; LEN as bound by the macro does not count 1 for the
1845 ;; the entry identifier or LEN itself so add them in.
1846 (incf sum (+ len 2)))
1847 sum)
1849 (defun change-dynamic-space-size (core-header new-size) ; expressed in MiB
1850 (unless new-size
1851 (return-from change-dynamic-space-size core-header))
1852 (let ((new (copy-seq core-header)))
1853 ;; memsize options if present must immediately follow the core magic number
1854 ;; so it might require a byte-blt to move other entries over.
1855 (unless (= (%vector-raw-bits new 1) runtime-options-magic)
1856 ;; slide the header to right by 5 words
1857 (replace new core-header :start1 (* 6 n-word-bytes) :start2 (* 1 n-word-bytes))
1858 ;; see write_memsize_options for the format of this entry
1859 ;; All words have to be stored since we're creating it from nothing.
1860 (setf (%vector-raw-bits new 1) runtime-options-magic
1861 (%vector-raw-bits new 2) 5 ; number of words in this entry
1862 (%vector-raw-bits new 4) (extern-alien "thread_control_stack_size" unsigned)
1863 (%vector-raw-bits new 5) (extern-alien "dynamic_values_bytes" (unsigned 32))))
1864 (setf (%vector-raw-bits new 3) (* new-size 1024 1024))
1865 new))
1867 ;;; Given a native SBCL '.core' file, or one attached to the end of an executable,
1868 ;;; separate it into pieces.
1869 ;;; ASM-PATHNAME is the name of the assembler file that will hold all the Lisp code.
1870 ;;; The other two output pathnames are implicit: "x.s" -> "x.core" and "x-core.o"
1871 ;;; The ".core" file is a native core file used for starting a binary that
1872 ;;; contains the asm code using the "--core" argument. The "-core.o" file
1873 ;;; is for linking in to a binary that needs no "--core" argument.
1874 (defun split-core
1875 (input-pathname asm-pathname
1876 &key enable-pie (verbose nil) dynamic-space-size
1877 &aux (elf-core-pathname
1878 (merge-pathnames
1879 (make-pathname :name (concatenate 'string (pathname-name asm-pathname) "-core")
1880 :type "o")
1881 asm-pathname))
1882 (core-header (make-array +backend-page-bytes+ :element-type '(unsigned-byte 8)))
1883 (original-total-npages 0)
1884 (core-offset 0)
1885 (page-adjust 0)
1886 (code-start-fixup-ofs 0) ; where to fixup the core header
1887 (space-list)
1888 (copy-actions)
1889 (fixedobj-range) ; = (START . SIZE-IN-BYTES)
1890 (relocs (make-array 100000 :adjustable t :fill-pointer 1)))
1892 (declare (ignorable fixedobj-range))
1893 ;; Remove old files
1894 (ignore-errors (delete-file asm-pathname))
1895 (ignore-errors (delete-file elf-core-pathname))
1896 ;; Ensure that all files can be opened
1897 (with-open-file (input input-pathname :element-type '(unsigned-byte 8))
1898 (with-open-file (asm-file asm-pathname :direction :output :if-exists :supersede)
1899 ;;(with-open-file (split-core split-core-pathname :direction :output
1900 ;; :element-type '(unsigned-byte 8) :if-exists :supersede)
1901 (let ((split-core nil))
1902 (setq core-offset (read-core-header input core-header verbose))
1903 (do-core-header-entry ((id len ptr) core-header)
1904 (case id
1905 (#.build-id-core-entry-type-code
1906 (when verbose
1907 (let ((string (make-string (%vector-raw-bits core-header ptr)
1908 :element-type 'base-char)))
1909 (%byte-blt core-header (* (1+ ptr) n-word-bytes) string 0 (length string))
1910 (format t "Build ID [~a]~%" string))))
1911 (#.directory-core-entry-type-code
1912 (do-directory-entry ((index ptr len) core-header)
1913 (incf original-total-npages npages)
1914 (push (make-space id addr data-page page-adjust nwords) space-list)
1915 (when verbose
1916 (format t "id=~d page=~5x + ~5x addr=~10x words=~8x~:[~; (drop)~]~%"
1917 id data-page npages addr nwords
1918 (= id immobile-text-core-space-id)))
1919 (cond ((= id immobile-text-core-space-id)
1920 (setq code-start-fixup-ofs (+ index 3))
1921 ;; Keep this entry but delete the page count. We need to know
1922 ;; where the space was supposed to be mapped and at what size.
1923 ;; Subsequent core entries will need to adjust their start page
1924 ;; downward (just the PTEs's start page now).
1925 (setq page-adjust npages data-page 0 npages 0))
1927 ;; Keep track of where the fixedobj space wants to be.
1928 (when (= id immobile-fixedobj-core-space-id)
1929 (setq fixedobj-range (cons addr (ash nwords word-shift))))
1930 (when (plusp npages) ; enqueue
1931 (push (cons data-page (* npages +backend-page-bytes+))
1932 copy-actions))
1933 ;; adjust this entry's start page in the new core
1934 (decf data-page page-adjust)))))
1935 (#.page-table-core-entry-type-code
1936 (aver (= len 4))
1937 (symbol-macrolet ((n-ptes (%vector-raw-bits core-header (+ ptr 1)))
1938 (nbytes (%vector-raw-bits core-header (+ ptr 2)))
1939 (data-page (%vector-raw-bits core-header (+ ptr 3))))
1940 (aver (= data-page original-total-npages))
1941 (aver (= (ceiling (space-nwords
1942 (find dynamic-core-space-id space-list :key #'space-id))
1943 (/ gencgc-page-bytes n-word-bytes))
1944 n-ptes))
1945 (when verbose
1946 (format t "PTE: page=~5x~40tbytes=~8x~%" data-page nbytes))
1947 (push (cons data-page nbytes) copy-actions)
1948 (decf data-page page-adjust)))))
1949 (let ((buffer (make-array +backend-page-bytes+
1950 :element-type '(unsigned-byte 8)))
1951 (filepos))
1952 ;; Write the new core file
1953 (when split-core
1954 (write-sequence core-header split-core))
1955 (dolist (action (reverse copy-actions)) ; nondestructive
1956 ;; page index convention assumes absence of core header.
1957 ;; i.e. data page 0 is the file page immediately following the core header
1958 (let ((offset (* (1+ (car action)) +backend-page-bytes+))
1959 (nbytes (cdr action)))
1960 (when verbose
1961 (format t "File offset ~10x: ~10x bytes~%" offset nbytes))
1962 (setq filepos (+ core-offset offset))
1963 (cond (split-core
1964 (file-position input filepos)
1965 (copy-bytes input split-core nbytes buffer))
1967 (file-position input (+ filepos nbytes))))))
1968 ;; Trailer (runtime options and magic number)
1969 (let ((nbytes (read-sequence buffer input)))
1970 ;; expect trailing magic number
1971 (let ((ptr (floor (- nbytes n-word-bytes) n-word-bytes)))
1972 (aver (= (%vector-raw-bits buffer ptr) core-magic)))
1973 ;; File position of the core header needs to be set to 0
1974 ;; regardless of what it was
1975 (setf (%vector-raw-bits buffer 4) 0)
1976 (when verbose
1977 (format t "Trailer words:(~{~X~^ ~})~%"
1978 (loop for i below (floor nbytes n-word-bytes)
1979 collect (%vector-raw-bits buffer i))))
1980 (when split-core
1981 (write-sequence buffer split-core :end nbytes)
1982 (finish-output split-core)))
1983 ;; Sanity test
1984 (when split-core
1985 (aver (= (+ core-offset
1986 (* page-adjust +backend-page-bytes+)
1987 (file-length split-core))
1988 (file-length input))))
1989 ;; Seek back to the PTE pages so they can be copied to the '.o' file
1990 (file-position input filepos)))
1992 ;; Map the original core file to memory
1993 (with-mapped-core (sap core-offset original-total-npages input)
1994 (let* ((data-spaces
1995 (delete immobile-text-core-space-id (reverse space-list)
1996 :key #'space-id))
1997 (spacemap (cons sap (sort (copy-list space-list) #'> :key #'space-addr)))
1998 (pte-nbytes (cdar copy-actions)))
1999 (collect-relocations spacemap relocs enable-pie)
2000 (with-open-file (output elf-core-pathname
2001 :direction :output :if-exists :supersede
2002 :element-type '(unsigned-byte 8))
2003 ;; If we're going to write memory size options and they weren't already
2004 ;; present, then it will be inserted after the core magic,
2005 ;; and the rest of the header moves over by 5 words.
2006 (when (and dynamic-space-size
2007 (/= (%vector-raw-bits core-header 1) runtime-options-magic))
2008 (incf code-start-fixup-ofs 5))
2009 (unless enable-pie
2010 ;; This fixup sets the 'address' field of the core directory entry
2011 ;; for code space. If PIE-enabled, we'll figure it out in the C code
2012 ;; because space relocation is going to happen no matter what.
2013 (setf (aref relocs 0)
2014 `(,(ash code-start-fixup-ofs word-shift) 0 . ,R_ABS64)))
2015 (prepare-elf (+ (apply #'+ (mapcar #'space-nbytes-aligned data-spaces))
2016 +backend-page-bytes+ ; core header
2017 pte-nbytes)
2018 relocs output enable-pie)
2019 (let ((new-header (change-dynamic-space-size core-header dynamic-space-size)))
2020 ;; This word will be fixed up by the system linker
2021 (setf (%vector-raw-bits new-header code-start-fixup-ofs)
2022 (if enable-pie +code-space-nominal-address+ 0))
2023 (write-sequence new-header output))
2024 (force-output output)
2025 ;; ELF cores created from #-immobile-space cores use +required-foreign-symbols+.
2026 ;; But if #+immobile-space the alien-linkage-table values are computed
2027 ;; by 'ld' and we don't scan +required-foreign-symbols+.
2028 (when (get-space immobile-fixedobj-core-space-id spacemap)
2029 (let* ((sym (find-target-symbol (package-id "SB-VM")
2030 "+REQUIRED-FOREIGN-SYMBOLS+" spacemap :physical))
2031 (vector (translate (symbol-global-value sym) spacemap)))
2032 (fill vector 0)
2033 (setf (%array-fill-pointer vector) 0)))
2034 ;; Change SB-C::*COMPILE-FILE-TO-MEMORY-SPACE* to :DYNAMIC
2035 ;; and SB-C::*COMPILE-TO-MEMORY-SPACE* to :AUTO
2036 ;; in case the resulting executable needs to compile anything.
2037 ;; (Call frame info will be missing, but at least it's something.)
2038 (dolist (item '(("*COMPILE-FILE-TO-MEMORY-SPACE*" . "DYNAMIC")
2039 ("*COMPILE-TO-MEMORY-SPACE*" . "DYNAMIC")))
2040 (destructuring-bind (symbol . value) item
2041 (awhen (%find-target-symbol (package-id "SB-C") symbol spacemap)
2042 (%set-symbol-global-value
2043 it (find-target-symbol (package-id "KEYWORD") value spacemap :logical)))))
2045 (dolist (space data-spaces) ; Copy pages from memory
2046 (let ((start (space-physaddr space spacemap))
2047 (size (space-nbytes-aligned space)))
2048 (aver (eql (sb-unix:unix-write (sb-sys:fd-stream-fd output)
2049 start 0 size)
2050 size))))
2051 (when verbose
2052 (format t "Copying ~d bytes (#x~x) from ptes = ~d PTEs~%"
2053 pte-nbytes pte-nbytes (floor pte-nbytes 10)))
2054 (copy-bytes input output pte-nbytes)) ; Copy PTEs from input
2055 (let ((core (write-assembler-text spacemap asm-file enable-pie)))
2056 (format asm-file " .section .rodata~% .p2align 4~%lisp_fixups:~%")
2057 ;; Sort the hash-table in emit order.
2058 (dolist (x (sort (%hash-table-alist (core-new-fixups core)) #'< :key #'cdr))
2059 (output-bignum nil (car x) asm-file))
2060 (cond
2061 (t ; (get-space immobile-fixedobj-core-space-id spacemap)
2062 (format asm-file (if (member :darwin *features*)
2063 "~% .data~%"
2064 "~% .section .rodata~%"))
2065 (format asm-file " .globl ~A~%~:*~A:
2066 .quad ~d~%"
2067 (labelize "alien_linkage_values")
2068 (length (core-linkage-symbols core)))
2069 ;; -1 (not a plausible function address) signifies that word
2070 ;; following it is a data, not text, reference.
2071 (loop for s across (core-linkage-symbols core)
2072 do (format asm-file " .quad ~:[~;-1, ~]~a~%"
2073 (consp s)
2074 (if (consp s) (car s) s))))
2076 (format asm-file "~% .section .rodata~%")
2077 (format asm-file " .globl anchor_junk~%")
2078 (format asm-file "anchor_junk: .quad lseek_largefile, get_timezone, compute_udiv_magic32~%"))))))
2079 (when (member :linux *features*)
2080 (format asm-file "~% ~A~%" +noexec-stack-note+)))))
2082 ;;; Copy the input core into an ELF section without splitting into code & data.
2083 ;;; Also force a linker reference to each C symbol that the Lisp core mentions.
2084 (defun copy-to-elf-obj (input-pathname output-pathname)
2085 ;; Remove old files
2086 (ignore-errors (delete-file output-pathname))
2087 ;; Ensure that all files can be opened
2088 (with-open-file (input input-pathname :element-type '(unsigned-byte 8))
2089 (with-open-file (output output-pathname :direction :output
2090 :element-type '(unsigned-byte 8) :if-exists :supersede)
2091 (let* ((core-header (make-array +backend-page-bytes+
2092 :element-type '(unsigned-byte 8)))
2093 (core-offset (read-core-header input core-header nil))
2094 (space-list)
2095 (total-npages 0) ; excluding core header page
2096 (core-size 0))
2097 (do-core-header-entry ((id len ptr) core-header)
2098 (case id
2099 (#.directory-core-entry-type-code
2100 (do-directory-entry ((index ptr len) core-header)
2101 (incf total-npages npages)
2102 (when (plusp nwords)
2103 (push (make-space id addr data-page 0 nwords) space-list))))
2104 (#.page-table-core-entry-type-code
2105 (aver (= len 4))
2106 (symbol-macrolet ((nbytes (%vector-raw-bits core-header (+ ptr 2)))
2107 (data-page (%vector-raw-bits core-header (+ ptr 3))))
2108 (aver (= data-page total-npages))
2109 (setq core-size (+ (* total-npages +backend-page-bytes+) nbytes))))))
2110 (incf core-size +backend-page-bytes+) ; add in core header page
2111 ;; Map the core file to memory
2112 (with-mapped-core (sap core-offset total-npages input)
2113 (let* ((spacemap (cons sap (sort (copy-list space-list) #'> :key #'space-addr)))
2114 (core (make-core spacemap
2115 (space-bounds immobile-text-core-space-id spacemap)
2116 (space-bounds immobile-fixedobj-core-space-id spacemap)))
2117 (c-symbols (map 'list (lambda (x) (if (consp x) (car x) x))
2118 (core-linkage-symbols core)))
2119 (sections `#((:str ".strtab" ,+sht-strtab+ 0 0 0 1 0)
2120 (:sym ".symtab" ,+sht-symtab+ 0 1 1 8 ,sym-entry-size)
2121 ;; section with the strings -- ^ ^ -- 1+ highest local symbol
2122 (:core "lisp.core" ,+sht-progbits+ 0 0 0 ,core-align 0)
2123 (:note ".note.GNU-stack" ,+sht-progbits+ 0 0 0 1 0)))
2124 (string-table (string-table (append (map 'list #'second sections)
2125 c-symbols)))
2126 (packed-strings (cdr string-table))
2127 (strings-start (+ ehdr-size (* (1+ (length sections)) shdr-size)))
2128 (strings-end (+ strings-start (length packed-strings)))
2129 (symbols-start (align-up strings-end 8))
2130 (symbols-size (* (1+ (length c-symbols)) sym-entry-size))
2131 (symbols-end (+ symbols-start symbols-size))
2132 (core-start (align-up symbols-end core-align)))
2133 (write-elf-header ehdr-size sections output)
2134 (write-section-headers `((,strings-start . ,(length packed-strings))
2135 (,symbols-start . ,symbols-size)
2136 (,core-start . ,core-size)
2137 (0 . 0))
2138 sections string-table output)
2139 (write-sequence packed-strings output)
2140 ;; Write symbol table
2141 (file-position output symbols-start)
2142 (write-sequence (make-elf64-sym 0 0) output)
2143 (dolist (sym c-symbols)
2144 (let ((name-ptr (cdr (assoc sym (car string-table)))))
2145 (write-sequence (make-elf64-sym name-ptr #x10) output)))
2146 ;; Copy core
2147 (file-position output core-start)
2148 (file-position input core-offset)
2149 (let ((remaining core-size))
2150 (loop (let ((n (read-sequence core-header input
2151 :end (min +backend-page-bytes+ remaining))))
2152 (write-sequence core-header output :end n)
2153 (unless (plusp (decf remaining n)) (return))))
2154 (aver (zerop remaining)))))))))
2156 ;; These will get set to 0 if the target is not using mark-region-gc
2157 (defglobal *bitmap-bits-per-page* (/ gencgc-page-bytes (* cons-size n-word-bytes)))
2158 (defglobal *bitmap-bytes-per-page* (/ *bitmap-bits-per-page* n-byte-bits))
2160 (defstruct page
2161 words-used
2162 single-obj-p
2163 type
2164 scan-start
2165 bitmap)
2167 (defun read-page-table (stream n-ptes nbytes data-page &optional (print nil))
2168 (declare (ignore nbytes))
2169 (let ((table (make-array n-ptes)))
2170 (file-position stream (* (1+ data-page) sb-c:+backend-page-bytes+))
2171 (dotimes (i n-ptes)
2172 (let* ((bitmap (make-array *bitmap-bits-per-page* :element-type 'bit))
2173 (temp (make-array *bitmap-bytes-per-page* :element-type '(unsigned-byte 8))))
2174 (when (plusp *bitmap-bits-per-page*)
2175 (read-sequence temp stream))
2176 (dotimes (i (/ (length bitmap) n-word-bits))
2177 (setf (%vector-raw-bits bitmap i) (%vector-raw-bits temp i)))
2178 (setf (aref table i) (make-page :bitmap bitmap))))
2179 ;; a PTE is a lispword and a uint16_t
2180 (let ((buf (make-array 10 :element-type '(unsigned-byte 8))))
2181 (with-pinned-objectS (buf)
2182 (dotimes (i n-ptes)
2183 (read-sequence buf stream)
2184 (let ((sso (sap-ref-word (vector-sap buf) 0))
2185 (words-used (sap-ref-16 (vector-sap buf) 8))
2186 (p (aref table i)))
2187 (setf (page-words-used p) (logandc2 words-used 1)
2188 (page-single-obj-p p) (logand words-used 1)
2189 (page-scan-start p) (logandc2 sso 7)
2190 (page-type p) (logand sso 7))
2191 (when (and print (plusp (page-words-used p)))
2192 (format t "~4d: ~4x ~2x~:[~; -~x~]~%"
2193 i (ash (page-words-used p) word-shift)
2194 (page-type p)
2195 (if (= (page-single-obj-p p) 0) nil 1)
2196 (page-scan-start p)))))))
2197 table))
2199 (defun decode-page-type (type)
2200 (ecase type
2201 (0 :free)
2202 (1 :unboxed)
2203 (2 :boxed)
2204 (3 :mixed)
2205 (4 :small-mixed)
2206 (5 :cons)
2207 (7 :code)))
2209 (defun calc-page-index (vaddr space)
2210 (let ((vaddr (if (system-area-pointer-p vaddr) (sap-int vaddr) vaddr)))
2211 (floor (- vaddr (space-addr space)) gencgc-page-bytes)))
2212 (defun calc-page-base (vaddr)
2213 (logandc2 vaddr (1- gencgc-page-bytes)))
2214 (defun calc-object-index (vaddr)
2215 (ash (- vaddr (calc-page-base vaddr)) (- n-lowtag-bits)))
2217 (defun page-bytes-used (index ptes)
2218 (ash (page-words-used (svref ptes index)) word-shift))
2220 (defun find-ending-page (index ptes)
2221 ;; A page ends a contiguous block if it is not wholly used,
2222 ;; or if there is no next page,
2223 ;; or the next page starts its own contiguous block
2224 (if (or (< (page-bytes-used index ptes) gencgc-page-bytes)
2225 (= (1+ index) (length ptes))
2226 (zerop (page-scan-start (svref ptes (1+ index)))))
2227 index
2228 (find-ending-page (1+ index) ptes)))
2230 (defun page-addr (index space) (+ (space-addr space) (* index gencgc-page-bytes)))
2232 (defun walk-dynamic-space (page-type spacemap function)
2233 (do* ((space (get-space dynamic-core-space-id spacemap))
2234 (ptes (space-page-table space))
2235 (nptes (length ptes))
2236 (page-ranges)
2237 (first-page 0))
2238 ((>= first-page nptes) (nreverse page-ranges))
2239 #+gencgc
2240 (let* ((last-page (find-ending-page first-page ptes))
2241 (pte (aref (space-page-table space) first-page))
2242 (start-vaddr (page-addr first-page space))
2243 (end-vaddr (+ (page-addr last-page space) (page-bytes-used last-page ptes))))
2244 (when (and (plusp (page-type pte))
2245 (or (null page-type) (eq page-type (decode-page-type (page-type pte)))))
2246 ;; Because gencgc has page-spanning objects, it's easiest to zero-fill later
2247 ;; if we track the range boundaries now.
2248 (push (list nil first-page last-page) page-ranges) ; NIL = no funcallable-instance
2249 (do ((vaddr (int-sap start-vaddr))
2250 (paddr (int-sap (translate-ptr start-vaddr spacemap))))
2251 ((>= (sap-int vaddr) end-vaddr))
2252 (let* ((word (sap-ref-word paddr 0))
2253 (widetag (logand word widetag-mask))
2254 (size (if (eq widetag filler-widetag)
2255 (ash (ash word -32) word-shift) ; -> words -> bytes
2256 (let* ((obj (reconstitute-object (%make-lisp-obj (sap-int paddr))))
2257 (size (primitive-object-size obj)))
2258 ;; page types codes are never defined for Lisp
2259 (when (eq page-type 7) ; KLUDGE: PAGE_TYPE_CODE
2260 (aver (or (= widetag code-header-widetag)
2261 (= widetag funcallable-instance-widetag))))
2262 (when (= widetag funcallable-instance-widetag)
2263 (setf (caar page-ranges) t)) ; T = has funcallable-instance
2264 (funcall function obj vaddr size :ignore)
2265 size))))
2266 (setq vaddr (sap+ vaddr size)
2267 paddr (sap+ paddr size)))))
2268 (setq first-page (1+ last-page)))
2269 #+mark-region-gc
2270 (let* ((vaddr (int-sap (+ (space-addr space) (* first-page gencgc-page-bytes))))
2271 (paddr (int-sap (translate-ptr (sap-int vaddr) spacemap)))
2272 (pte (aref (space-page-table space) first-page))
2273 (bitmap (page-bitmap pte)))
2274 (cond ((= (page-single-obj-p pte) 1)
2275 ;; last page is located by doing some arithmetic
2276 (let* ((obj (reconstitute-object (%make-lisp-obj (sap-int paddr))))
2277 (size (primitive-object-size obj))
2278 (last-page (calc-page-index (sap+ vaddr (1- size)) space)))
2279 #+nil (format t "~&Page ~4d..~4d ~A LARGE~%" first-page last-page (decode-page-type (page-type pte)))
2280 (funcall function obj vaddr size t)
2281 (setq first-page last-page)))
2282 ((plusp (page-type pte))
2283 #+nil (format t "~&Page ~4D : ~A~%" first-page (decode-page-type (page-type pte)))
2284 (when (or (null page-type) (eq page-type (decode-page-type (page-type pte))))
2285 (do ((object-offset-in-dualwords 0))
2286 ((>= object-offset-in-dualwords *bitmap-bits-per-page*))
2287 (let ((size
2288 (cond ((zerop (sbit bitmap object-offset-in-dualwords))
2289 (unless (and (zerop (sap-ref-word paddr 0))
2290 (zerop (sap-ref-word paddr 8)))
2291 (error "Unallocated object @ ~X: ~X ~X"
2292 vaddr (sap-ref-word paddr 0) (sap-ref-word paddr 8)))
2293 (* 2 n-word-bytes))
2295 (let* ((obj (reconstitute-object (%make-lisp-obj (sap-int paddr))))
2296 (size (primitive-object-size obj)))
2297 (funcall function obj vaddr size nil)
2298 size)))))
2299 (setq vaddr (sap+ vaddr size)
2300 paddr (sap+ paddr size))
2301 (incf object-offset-in-dualwords (ash size (- (1+ word-shift)))))))))
2302 (incf first-page))))
2304 ;;; Unfortunately the idea of using target features to decide whether to
2305 ;;; read a bitmap from PAGE_TABLE_CORE_ENTRY_TYPE_CODE falls flat,
2306 ;;; because we can't scan for symbols until the core is read, but we can't
2307 ;;; read the core until we decide whether there is a bitmap, which needs the
2308 ;;; feature symbols. Some possible solutions (and there are others too):
2309 ;;; 1) make a separate core entry for the bitmap
2310 ;;; 2) add a word to that core entry indicating that it has a bitmap
2311 ;;; 3) make a different entry type code for PTES_WITH_BITMAP
2312 (defun detect-target-features (spacemap &aux result)
2313 (flet ((scan (symbol)
2314 (let ((list (symbol-global-value symbol))
2315 (target-nil (compute-nil-object spacemap)))
2316 (loop
2317 (when (eq list target-nil) (return))
2318 (setq list (translate list spacemap))
2319 (let ((feature (translate (car list) spacemap)))
2320 (aver (symbolp feature))
2321 ;; convert keywords and only keywords into host keywords
2322 (when (eq (symbol-package-id feature) (symbol-package-id :sbcl))
2323 (let ((string (translate (symbol-name feature) spacemap)))
2324 (push (intern string "KEYWORD") result))))
2325 (setq list (cdr list))))))
2326 (walk-dynamic-space
2328 spacemap
2329 (lambda (obj vaddr size large)
2330 (declare (ignore vaddr size large))
2331 (when (symbolp obj)
2332 (when (or (and (eq (symbol-package-id obj) #.(symbol-package-id 'sb-impl:+internal-features+))
2333 (string= (translate (symbol-name obj) spacemap) "+INTERNAL-FEATURES+"))
2334 (and (eq (symbol-package-id obj) #.(symbol-package-id '*features*))
2335 (string= (translate (symbol-name obj) spacemap) "*FEATURES*")))
2336 (scan obj))))))
2337 ;;(format t "~&Target-features=~S~%" result)
2338 result)
2340 (defun transport-dynamic-space-code (codeblobs spacemap new-space free-ptr)
2341 (do ((list codeblobs (cdr list))
2342 (offsets-vector-data (sap+ new-space (* 2 n-word-bytes)))
2343 (object-index 0 (1+ object-index)))
2344 ((null list))
2345 ;; FROM-VADDR is the original logical (virtual) address, and FROM-PADDR
2346 ;; is where the respective object is currently resident in memory now.
2347 ;; Similarly-named "TO-" values correspond to the location in new space.
2348 (destructuring-bind (from-vaddr . size) (car list)
2349 (let ((from-paddr (int-sap (translate-ptr (sap-int from-vaddr) spacemap)))
2350 (to-vaddr (+ +code-space-nominal-address+ free-ptr))
2351 (to-paddr (sap+ new-space free-ptr)))
2352 (setf (sap-ref-32 offsets-vector-data (ash object-index 2)) free-ptr)
2353 ;; copy to code space
2354 (%byte-blt from-paddr 0 new-space free-ptr size)
2355 (let* ((new-physobj
2356 (%make-lisp-obj (logior (sap-int to-paddr) other-pointer-lowtag)))
2357 (header-bytes (ash (code-header-words new-physobj) word-shift))
2358 (new-insts (code-instructions new-physobj)))
2359 ;; fix the jump table words which, if present, start at NEW-INSTS
2360 (let ((wordcount (code-jump-table-words new-physobj))
2361 (disp (- to-vaddr (sap-int from-vaddr))))
2362 (loop for i from 1 below wordcount
2363 do (let ((w (sap-ref-word new-insts (ash i word-shift))))
2364 (unless (zerop w)
2365 (setf (sap-ref-word new-insts (ash i word-shift)) (+ w disp))))))
2366 ;; fix the simple-fun pointers
2367 (dotimes (i (code-n-entries new-physobj))
2368 (let ((fun-offs (%code-fun-offset new-physobj i)))
2369 ;; Assign the address that each simple-fun will have assuming
2370 ;; the object will reside at its new logical address.
2371 (setf (sap-ref-word new-insts (+ fun-offs n-word-bytes))
2372 (+ to-vaddr header-bytes fun-offs (* 2 n-word-bytes))))))
2373 (incf free-ptr size)))))
2375 (defun remap-to-quasi-static-code (val spacemap fwdmap)
2376 (when (is-lisp-pointer (get-lisp-obj-address val))
2377 (binding* ((translated (translate val spacemap))
2378 (vaddr (get-lisp-obj-address val))
2379 (code-base-addr
2380 (cond ((simple-fun-p translated)
2381 ;; the code component has to be computed "by hand" because FUN-CODE-HEADER
2382 ;; would return the physically mapped object, but we need
2383 ;; to get the logical address of the code.
2384 (- (- vaddr fun-pointer-lowtag)
2385 (ash (ldb (byte 24 8)
2386 (sap-ref-word (int-sap (get-lisp-obj-address translated))
2387 (- fun-pointer-lowtag)))
2388 word-shift)))
2389 ((code-component-p translated)
2390 (- vaddr other-pointer-lowtag)))
2391 :exit-if-null)
2392 (new-code-offset (gethash code-base-addr fwdmap) :exit-if-null))
2393 (%make-lisp-obj (+ (if (functionp translated)
2394 (- vaddr code-base-addr) ; function tag is in the difference
2395 other-pointer-lowtag)
2396 +code-space-nominal-address+
2397 new-code-offset)))))
2399 ;;; It's not worth trying to use the host's DO-REFERENCED-OBJECT because it requires
2400 ;;; completely different behavior for INSTANCE and FUNCALLABLE-INSTANCE to avoid using
2401 ;;; the layout pointers as-is. And closures don't really work either. So unfortunately
2402 ;;; this is essentially a reimplementation. Thankfully we only have to deal with pointers
2403 ;;; that could possibly point to code.
2404 (defun update-quasi-static-code-ptrs
2405 (obj spacemap fwdmap displacement &optional print
2406 &aux (sap (int-sap (logandc2 (get-lisp-obj-address obj) lowtag-mask))))
2407 (when print
2408 (format t "paddr ~X vaddr ~X~%" (get-lisp-obj-address obj)
2409 (+ (get-lisp-obj-address obj) displacement)))
2410 (macrolet ((visit (place)
2411 `(let* ((oldval ,place) (newval (remap oldval)))
2412 (when newval
2413 (setf ,place newval)))))
2414 (flet ((fun-entrypoint (fun)
2415 (+ (get-lisp-obj-address fun) (- fun-pointer-lowtag) (ash 2 word-shift)))
2416 (remap (x)
2417 (remap-to-quasi-static-code x spacemap fwdmap)))
2418 (cond
2419 ((listp obj) (visit (car obj)) (visit (cdr obj)))
2420 ((simple-vector-p obj)
2421 (dotimes (i (length obj)) (visit (svref obj i))))
2422 ((%instancep obj)
2423 (let ((type (truly-the layout (translate (%instance-layout obj) spacemap))))
2424 (do-layout-bitmap (i taggedp type (%instance-length obj))
2425 (when taggedp (visit (%instance-ref obj i))))))
2426 ((functionp obj)
2427 (let ((start
2428 (cond ((funcallable-instance-p obj)
2429 ;; The trampoline points to the function itself (so is ignorable)
2430 ;; and following that word are 2 words of machine code.
2433 (aver (closurep obj))
2434 (let ((fun (remap (%closure-fun obj))))
2435 ;; there is no setter for closure-fun
2436 (setf (sap-ref-word sap n-word-bytes) (fun-entrypoint fun)))
2437 2))))
2438 (loop for i from start to (logior (get-closure-length obj) 1)
2439 do (visit (sap-ref-lispobj sap (ash i word-shift))))))
2440 ((code-component-p obj)
2441 (loop for i from 2 below (code-header-words obj)
2442 do (visit (code-header-ref obj i))))
2443 ((symbolp obj)
2444 (visit (sap-ref-lispobj sap (ash symbol-value-slot word-shift))))
2445 ((weak-pointer-p obj)
2446 (visit (sap-ref-lispobj sap (ash weak-pointer-value-slot word-shift))))
2447 ((fdefn-p obj)
2448 (let ((raw (sap-ref-word sap (ash fdefn-raw-addr-slot word-shift))))
2449 (unless (in-bounds-p raw (space-bounds static-core-space-id spacemap))
2450 (awhen (remap (%make-lisp-obj (+ raw (ash -2 word-shift) fun-pointer-lowtag)))
2451 (setf (sap-ref-word sap (ash fdefn-raw-addr-slot word-shift))
2452 (fun-entrypoint it)))))
2453 (visit (sap-ref-lispobj sap (ash fdefn-fun-slot word-shift))))
2454 ((= (%other-pointer-widetag obj) value-cell-widetag)
2455 (visit (sap-ref-lispobj sap (ash value-cell-value-slot word-shift))))))))
2457 ;;; Clear all the old objects. Funcallable instances can be co-mingled with
2458 ;;; code, so a code page might not be empty but most will be. Free those pages.
2459 (defun zerofill-old-code (spacemap codeblobs page-ranges)
2460 (declare (ignorable page-ranges))
2461 (with-alien ((memset (function void unsigned int unsigned) :extern))
2462 (flet ((reset-pte (pte)
2463 (setf (page-words-used pte) 0
2464 (page-single-obj-p pte) 0
2465 (page-type pte) 0
2466 (page-scan-start pte) 0)))
2467 (let ((space (get-space dynamic-core-space-id spacemap)))
2468 #+gencgc
2469 (dolist (range page-ranges (aver (null codeblobs)))
2470 (destructuring-bind (in-use first last) range
2471 ;;(format t "~&Working on range ~D..~D~%" first last)
2472 (loop while codeblobs
2473 do (destructuring-bind (vaddr . size) (car codeblobs)
2474 (let ((page (calc-page-index vaddr space)))
2475 (cond ((> page last) (return))
2476 ((< page first) (bug "Incorrect sort"))
2478 (let ((paddr (translate-ptr (sap-int vaddr) spacemap)))
2479 (alien-funcall memset paddr 0 size)
2480 (when in-use ; store a filler widetag
2481 (let* ((nwords (ash size (- word-shift)))
2482 (header (logior (ash nwords 32) filler-widetag)))
2483 (setf (sap-ref-word (int-sap paddr) 0) header))))
2484 (pop codeblobs))))))
2485 (unless in-use
2486 (loop for page-index from first to last
2487 do (reset-pte (svref (space-page-table space) page-index))))))
2488 #+mark-region-gc
2489 (dolist (code codeblobs)
2490 (destructuring-bind (vaddr . size) code
2491 (alien-funcall memset (translate-ptr (sap-int vaddr) spacemap) 0 size)
2492 (let* ((page-index (calc-page-index vaddr space))
2493 (pte (aref (space-page-table space) page-index))
2494 (object-index (calc-object-index (sap-int vaddr))))
2495 (setf (sbit (page-bitmap pte) object-index) 0)
2496 (cond ((= (page-single-obj-p pte) 1)
2497 ;(format t "~&Cleared large-object pages @ ~x~%" (sap-int vaddr))
2498 (loop for p from page-index to (calc-page-index (sap+ vaddr (1- size)) space)
2499 do (let ((pte (svref (space-page-table space) p)))
2500 (aver (not (find 1 (page-bitmap pte))))
2501 (reset-pte pte))))
2502 ((not (find 1 (page-bitmap pte)))
2503 ;; is the #+gencgc logic above actually more efficient?
2504 ;;(format t "~&Code page ~D is now empty~%" page-index)
2505 (reset-pte pte))))))))))
2507 (defun parse-core-header (input core-header)
2508 (let ((space-list)
2509 (total-npages 0) ; excluding core header page
2510 (card-mask-nbits)
2511 (core-dir-start)
2512 (initfun))
2513 (do-core-header-entry ((id len ptr) core-header)
2514 (ecase id
2515 (#.directory-core-entry-type-code
2516 (setq core-dir-start (- ptr 2))
2517 (do-directory-entry ((index ptr len) core-header)
2518 (incf total-npages npages)
2519 (push (make-space id addr data-page 0 nwords) space-list)))
2520 (#.page-table-core-entry-type-code
2521 (aver (= len 4))
2522 (symbol-macrolet ((n-ptes (%vector-raw-bits core-header (+ ptr 1)))
2523 (nbytes (%vector-raw-bits core-header (+ ptr 2)))
2524 (data-page (%vector-raw-bits core-header (+ ptr 3))))
2525 (aver (= data-page total-npages))
2526 (setf card-mask-nbits (%vector-raw-bits core-header ptr))
2527 (format nil "~&card-nbits = ~D~%" card-mask-nbits)
2528 (let ((space (get-space dynamic-core-space-id (cons nil space-list))))
2529 (setf (space-page-table space) (read-page-table input n-ptes nbytes data-page)))))
2530 (#.build-id-core-entry-type-code
2531 (let ((string (make-string (%vector-raw-bits core-header ptr)
2532 :element-type 'base-char)))
2533 (%byte-blt core-header (* (1+ ptr) n-word-bytes) string 0 (length string))
2534 (format nil "Build ID [~a] len=~D ptr=~D actual-len=~D~%" string len ptr (length string))))
2535 (#.runtime-options-magic) ; ignore
2536 (#.initial-fun-core-entry-type-code
2537 (setq initfun (%vector-raw-bits core-header ptr)))))
2538 (values total-npages space-list card-mask-nbits core-dir-start initfun)))
2540 (defconstant +lispwords-per-corefile-page+ (/ sb-c:+backend-page-bytes+ n-word-bytes))
2542 (defun rewrite-core (directory spacemap card-mask-nbits initfun core-header offset output
2543 &aux (dynamic-space (get-space dynamic-core-space-id spacemap)))
2544 (aver (= (%vector-raw-bits core-header offset) directory-core-entry-type-code))
2545 (let ((nwords (+ (* (length directory) 5) 2)))
2546 (setf (%vector-raw-bits core-header (incf offset)) nwords))
2547 (let ((page-count 0)
2548 (n-ptes (length (space-page-table dynamic-space))))
2549 (dolist (dir-entry directory)
2550 (setf (car dir-entry) page-count)
2551 (destructuring-bind (id paddr vaddr nwords) (cdr dir-entry)
2552 (declare (ignore paddr))
2553 (let ((npages (ceiling nwords +lispwords-per-corefile-page+)))
2554 (when (= id dynamic-core-space-id)
2555 (aver (= npages n-ptes)))
2556 (dolist (word (list id nwords page-count vaddr npages))
2557 (setf (%vector-raw-bits core-header (incf offset)) word))
2558 (incf page-count npages))))
2559 (let* ((sizeof-corefile-pte (+ n-word-bytes 2))
2560 (pte-bytes (align-up (* sizeof-corefile-pte n-ptes) n-word-bytes)))
2561 (dolist (word (list page-table-core-entry-type-code
2562 6 ; = number of words in this core header entry
2563 card-mask-nbits
2564 n-ptes (+ (* n-ptes *bitmap-bytes-per-page*) pte-bytes)
2565 page-count))
2566 (setf (%vector-raw-bits core-header (incf offset)) word)))
2567 (dolist (word (list initial-fun-core-entry-type-code 3 initfun
2568 end-core-entry-type-code 2))
2569 (setf (%vector-raw-bits core-header (incf offset)) word))
2570 (write-sequence core-header output)
2571 ;; write out the data from each space
2572 (dolist (dir-entry directory)
2573 (destructuring-bind (page id paddr vaddr nwords) dir-entry
2574 (declare (ignore id vaddr))
2575 (aver (= (file-position output) (* sb-c:+backend-page-bytes+ (1+ page))))
2576 (let* ((npages (ceiling nwords +lispwords-per-corefile-page+))
2577 (nbytes (* npages sb-c:+backend-page-bytes+))
2578 (wrote
2579 (sb-unix:unix-write (sb-impl::fd-stream-fd output) paddr 0 nbytes)))
2580 (aver (= wrote nbytes)))))
2581 (aver (= (file-position output) (* sb-c:+backend-page-bytes+ (1+ page-count))))
2582 #+mark-region-gc ; write the bitmap
2583 (dovector (pte (space-page-table dynamic-space))
2584 (let ((bitmap (page-bitmap pte)))
2585 (sb-sys:with-pinned-objects (bitmap)
2586 ;; WRITE-SEQUENCE on a bit vector would write one octet per bit
2587 (sb-unix:unix-write (sb-impl::fd-stream-fd output) bitmap 0 (/ (length bitmap) 8)))))
2588 ;; write the PTEs
2589 (let ((buffer (make-array 10 :element-type '(unsigned-byte 8))))
2590 (sb-sys:with-pinned-objects (buffer)
2591 (let ((sap (vector-sap buffer)))
2592 (dovector (pte (space-page-table dynamic-space))
2593 (setf (sap-ref-64 sap 0) (logior (page-scan-start pte) (page-type pte))
2594 (sap-ref-16 sap 8) (logior (page-words-used pte) (page-single-obj-p pte)))
2595 (write-sequence buffer output)))
2596 (let* ((bytes-written (* 10 (length (space-page-table dynamic-space))))
2597 (diff (- (align-up bytes-written sb-vm:n-word-bytes)
2598 bytes-written)))
2599 (fill buffer 0)
2600 (write-sequence buffer output :end diff))))
2601 ;; write the trailer
2602 (let ((buffer (make-array 16 :element-type '(unsigned-byte 8)
2603 :initial-element 0)))
2604 (sb-sys:with-pinned-objects (buffer)
2605 (setf (%vector-raw-bits buffer 0) 0
2606 (%vector-raw-bits buffer 1) core-magic)
2607 (write-sequence buffer output)))
2608 (force-output output)))
2610 (defun walk-target-space (function space-id spacemap)
2611 (let* ((space (get-space space-id spacemap))
2612 (paddr (space-physaddr space spacemap)))
2613 (map-objects-in-range function
2614 (%make-lisp-obj
2615 (if (= space-id static-core-space-id)
2616 ;; must not visit NIL, bad things happen
2617 (translate-ptr (+ static-space-start sb-vm::static-space-objects-offset)
2618 spacemap)
2619 (sap-int paddr)))
2620 (%make-lisp-obj (sap-int (sap+ paddr (space-size space)))))))
2622 (defun find-target-asm-code (spacemap)
2623 (walk-target-space (lambda (obj widetag size)
2624 (declare (ignore size))
2625 (when (= widetag code-header-widetag)
2626 (return-from find-target-asm-code
2627 (let* ((space (get-space static-core-space-id spacemap))
2628 (vaddr (space-addr space))
2629 (paddr (space-physaddr space spacemap)))
2630 (%make-lisp-obj
2631 (+ vaddr (- (get-lisp-obj-address obj)
2632 (sap-int paddr))))))))
2633 static-core-space-id spacemap))
2635 (defun move-dynamic-code-to-text-space (input-pathname output-pathname)
2636 ;; Remove old files
2637 (ignore-errors (delete-file output-pathname))
2638 ;; Ensure that all files can be opened
2639 (with-open-file (input input-pathname :element-type '(unsigned-byte 8))
2640 (with-open-file (output output-pathname :direction :output
2641 :element-type '(unsigned-byte 8) :if-exists :supersede)
2642 ;; KLUDGE: see comment above DETECT-TARGET-FEATURES
2643 #+gencgc (setq *bitmap-bits-per-page* 0 *bitmap-bytes-per-page* 0)
2644 (binding* ((core-header (make-array +backend-page-bytes+ :element-type '(unsigned-byte 8)))
2645 (core-offset (read-core-header input core-header t))
2646 ((npages space-list card-mask-nbits core-dir-start initfun)
2647 (parse-core-header input core-header)))
2648 ;; Map the core file to memory
2649 (with-mapped-core (sap core-offset npages input)
2650 (let* ((spacemap (cons sap (sort (copy-list space-list) #'> :key #'space-addr)))
2651 (target-features (detect-target-features spacemap))
2652 (codeblobs nil)
2653 (fwdmap (make-hash-table))
2654 (n-objects)
2655 (offsets-vector-size)
2656 ;; We only need enough space to write C linkage call redirections from the
2657 ;; assembler routine codeblob, because those are the calls which assume that
2658 ;; asm code can directly call into the linkage space using "CALL rel32" form.
2659 ;; Dynamic-space calls do not assume that - they use "CALL [ea]" form.
2660 (c-linkage-reserved-words 12) ; arbitrary overestimate
2661 (reserved-amount)
2662 ;; text space will contain a copy of the asm code so it can use call rel32 form
2663 (asm-code (find-target-asm-code spacemap))
2664 (asm-code-size (primitive-object-size (translate asm-code spacemap)))
2665 (freeptr asm-code-size)
2666 (page-ranges
2667 (walk-dynamic-space
2668 :code spacemap
2669 (lambda (obj vaddr size large)
2670 (declare (ignore large))
2671 (when (code-component-p obj)
2672 (push (cons vaddr size) codeblobs)
2673 ;; new object will be at FREEPTR bytes from new space start
2674 (setf (gethash (sap-int vaddr) fwdmap) freeptr)
2675 (incf freeptr size))))))
2676 ;; FIXME: this _still_ doesn't work, because if the buid has :IMMOBILE-SPACE
2677 ;; then the symbols CL:*FEATURES* and SB-IMPL:+INTERNAL-FEATURES+
2678 ;; are not in dynamic space.
2679 (when (member :immobile-space target-features)
2680 (error "Can't relocate code to text space since text space already exists"))
2681 (setq codeblobs
2682 (acons (int-sap (logandc2 (get-lisp-obj-address asm-code) lowtag-mask))
2683 asm-code-size
2684 (nreverse codeblobs))
2685 n-objects (length codeblobs))
2686 ;; Preceding the code objects are two vectors:
2687 ;; (1) a vector of uint32_t indicating the starting offset (from the space start)
2688 ;; of each code object.
2689 ;; (2) a vector of uint64_t which embeds a JMP instruction to a C linkage table entry.
2690 ;; These instructions are near enough to be called via 'rel32' form. (The ordinary
2691 ;; alien linkage space is NOT near enough, after code is moved to text space)
2692 ;; The size of the new text space has to account for the sizes of the vectors.
2693 (let* ((n-vector1-data-words (ceiling n-objects 2)) ; two uint32s fit in a lispword
2694 (vector1-size (ash (+ (align-up n-vector1-data-words 2) ; round to even
2695 vector-data-offset)
2696 word-shift))
2697 (n-vector2-data-words c-linkage-reserved-words)
2698 (vector2-size (ash (+ n-vector2-data-words vector-data-offset)
2699 word-shift)))
2700 (setf offsets-vector-size vector1-size
2701 reserved-amount (+ vector1-size vector2-size))
2702 ;; Adjust all code offsets upward to avoid doing more math later
2703 (maphash (lambda (k v)
2704 (setf (gethash k fwdmap) (+ v reserved-amount)))
2705 fwdmap)
2706 (incf freeptr reserved-amount)
2707 (format nil "~&Code: ~D objects, ~D bytes~%" (length codeblobs) freeptr))
2708 (let* ((new-space-nbytes (align-up freeptr sb-c:+backend-page-bytes+))
2709 (new-space (sb-sys:allocate-system-memory new-space-nbytes)))
2710 ;; Write header of "vector 1"
2711 (setf (sap-ref-word new-space 0) simple-array-unsigned-byte-32-widetag
2712 (sap-ref-word new-space n-word-bytes) (fixnumize n-objects))
2713 ;; write header of "vector 2"
2714 (setf (sap-ref-word new-space offsets-vector-size) simple-array-unsigned-byte-64-widetag
2715 (sap-ref-word new-space (+ offsets-vector-size n-word-bytes))
2716 (fixnumize c-linkage-reserved-words))
2717 ;; Transport code contiguously into new space
2718 (transport-dynamic-space-code codeblobs spacemap new-space reserved-amount)
2719 ;; Walk static space and dynamic-space changing any pointers that
2720 ;; should point to new space.
2721 (dolist (space-id `(,dynamic-core-space-id ,static-core-space-id))
2722 (let* ((space (get-space space-id spacemap))
2723 (vaddr (space-addr space))
2724 (paddr (space-physaddr space spacemap))
2725 (diff (+ (- (sap-int paddr)) vaddr)))
2726 (format nil "~&Fixing ~A~%" space)
2727 (walk-target-space
2728 (lambda (object widetag size)
2729 (declare (ignore widetag size))
2730 (unless (and (code-component-p object) (= space-id dynamic-core-space-id))
2731 (update-quasi-static-code-ptrs object spacemap fwdmap diff)))
2732 space-id spacemap)))
2733 ;; Walk new space and fix pointers into itself
2734 (format nil "~&Fixing newspace~%")
2735 (map-objects-in-range
2736 (lambda (object widetag size)
2737 (declare (ignore widetag size))
2738 (update-quasi-static-code-ptrs object spacemap fwdmap 0))
2739 (%make-lisp-obj (sap-int new-space))
2740 (%make-lisp-obj (sap-int (sap+ new-space freeptr))))
2741 ;; don't zerofill asm code in static space
2742 (zerofill-old-code spacemap (cdr codeblobs) page-ranges)
2743 ;; Update the core header to contain newspace
2744 (let ((spaces (nreconc
2745 (mapcar (lambda (space)
2746 (list 0 (space-id space)
2747 (int-sap (translate-ptr (space-addr space) spacemap))
2748 (space-addr space)
2749 (space-nwords space)))
2750 space-list)
2751 `((0 ,immobile-text-core-space-id ,new-space
2752 ,+code-space-nominal-address+
2753 ,(ash freeptr (- word-shift)))))))
2754 (rewrite-core spaces spacemap card-mask-nbits initfun
2755 core-header core-dir-start output)
2756 ))))))))
2758 ;;;;
2760 (defun cl-user::elfinate (&optional (args (cdr *posix-argv*)))
2761 (cond ((string= (car args) "split")
2762 (pop args)
2763 (let (pie dss)
2764 (loop (cond ((string= (car args) "--pie")
2765 (setq pie t)
2766 (pop args))
2767 ((string= (car args) "--dynamic-space-size")
2768 (pop args)
2769 (setq dss (parse-integer (pop args))))
2771 (return))))
2772 (destructuring-bind (input asm) args
2773 (split-core input asm :enable-pie pie
2774 :dynamic-space-size dss))))
2775 ((string= (car args) "copy")
2776 (apply #'copy-to-elf-obj (cdr args)))
2777 ((string= (car args) "extract")
2778 (apply #'move-dynamic-code-to-text-space (cdr args)))
2779 #+nil
2780 ((string= (car args) "relocate")
2781 (destructuring-bind (input output binary start-sym) (cdr args)
2782 (relocate-core
2783 input output binary (parse-integer start-sym :radix 16))))
2785 (error "Unknown command: ~S" args))))
2787 ;;; Processing a core without immobile-space
2789 ;;; This file provides a recipe which gets a little bit closer to being able to
2790 ;;; emulate #+immobile-space in so far as producing an ELF core is concerned.
2791 ;;; The recipe is a bit more complicated than I'd like, but it works.
2792 ;;; Let's say you want a core with contiguous text space containing the code
2793 ;;; of a quicklisp system.
2795 ;;; $ run-sbcl.sh
2796 ;;; * (ql:quickload :one-more-re-nightmare-tests)
2797 ;;; * (save-lisp-and-die "step1.core")
2798 ;;; $ run-sbcl.sh
2799 ;;; * (load "tools-for-build/editcore")
2800 ;;; * (sb-editcore:move-dynamic-code-to-text-space "step1.core" "step2.core")
2801 ;;; * (sb-editcore:redirect:text-space-calls "step2.core")
2802 ;;; Now "step2.core" has a text space, and all lisp-to-lisp calls bypass their FDEFN.
2803 ;;; At this point split-core on "step2.core" can run in the manner of elfcore.test.sh
2805 (defun get-code-segments (code vaddr spacemap)
2806 (let ((di (%code-debug-info code))
2807 (inst-base (+ vaddr (ash (code-header-words code) word-shift)))
2808 (result))
2809 (aver (%instancep di))
2810 (if (zerop (code-n-entries code)) ; assembler routines
2811 (dolist (entry (target-hash-table-alist di spacemap))
2812 (let* ((val (translate (undescriptorize (cdr entry)) spacemap))
2813 ;; VAL is (start end . index)
2814 (start (the fixnum (car val)))
2815 (end (the fixnum (car (translate (cdr val) spacemap)))))
2816 (push (make-code-segment code start (- (1+ end) start)
2817 :virtual-location (+ inst-base start))
2818 result)))
2819 (dolist (range (get-text-ranges code spacemap))
2820 (let ((car (car range)))
2821 (when (integerp car)
2822 (push (make-code-segment code car (- (cdr range) car)
2823 :virtual-location (+ inst-base car))
2824 result)))))
2825 (sort result #'< :key #'sb-disassem:seg-virtual-location)))
2827 (defstruct (range (:constructor make-range (labeled vaddr bytecount)))
2828 labeled vaddr bytecount)
2830 (defun inst-vaddr (inst) (range-vaddr (car inst)))
2831 (defun inst-length (inst) (range-bytecount (car inst)))
2832 (defun inst-end (inst &aux (range (car inst)))
2833 (+ (range-vaddr range) (range-bytecount range)))
2835 (defmethod print-object ((self range) stream)
2836 (format stream "~A~x,~x"
2837 (if (range-labeled self) "L:" " ")
2838 (range-vaddr self)
2839 (range-bytecount self)))
2840 (defun get-code-instruction-model (code vaddr spacemap)
2841 (let* ((segments (get-code-segments code vaddr spacemap))
2842 (insts-vaddr (+ vaddr (ash (code-header-words code) word-shift)))
2843 (dstate (sb-disassem:make-dstate))
2844 (fun-header-locs
2845 (loop for i from 0 below (code-n-entries code)
2846 collect (+ insts-vaddr (%code-fun-offset code i))))
2847 (labels))
2848 (sb-disassem:label-segments segments dstate)
2849 ;; are labels not already sorted?
2850 (setq labels (sort (sb-disassem::dstate-labels dstate) #'< :key #'car))
2851 (sb-int:collect ((result))
2852 (dolist (seg segments (coerce (result) 'vector))
2853 (setf (sb-disassem:dstate-segment dstate) seg
2854 (sb-disassem:dstate-segment-sap dstate)
2855 (funcall (sb-disassem:seg-sap-maker seg)))
2856 (setf (sb-disassem:dstate-cur-offs dstate) 0)
2857 (loop
2858 (when (eql (sb-disassem:dstate-cur-addr dstate) (car fun-header-locs))
2859 (incf (sb-disassem:dstate-cur-offs dstate) (* simple-fun-insts-offset n-word-bytes))
2860 (pop fun-header-locs))
2861 (let* ((pc (sb-disassem:dstate-cur-addr dstate))
2862 (labeled (when (and labels (= pc (caar labels)))
2863 (pop labels)
2865 (inst (sb-disassem:disassemble-instruction dstate))
2866 (nbytes (- (sb-disassem:dstate-cur-addr dstate) pc)))
2867 (result (cons (make-range labeled pc nbytes) inst)))
2868 (when (>= (sb-disassem:dstate-cur-offs dstate) (sb-disassem:seg-length seg))
2869 (return)))))))
2871 ;; The extra copy of ASM routines, particularly C-calling trampolines, that now reside in text
2872 ;; space have to be modified to correctly reference their C functions. They assume that static
2873 ;; space is near alien-linkage space, and so they use this form:
2874 ;; xxxx: E8A1F0EFFF CALL #x50000060 ; alloc
2875 ;; which unforuntately means that after relocating to text space, that instruction refers
2876 ;; to random garbage, and more unfortunately there is no room to squeeze in an instruction
2877 ;; that encodes to 7 bytes.
2878 ;; So we have to create an extra jump "somewhere" that indirects through the linkage table
2879 ;; but is callable from the text-space code.
2880 ;;; I don't feel like programmatically scanning the asm code to determine these.
2881 ;;; Hardcoded is good enough (until it isn't)
2882 (defparameter *c-linkage-redirects*
2883 (mapcar (lambda (x) (cons x (foreign-symbol-sap x)))
2884 '("switch_to_arena"
2885 "alloc"
2886 "alloc_list"
2887 "listify_rest_arg"
2888 "make_list"
2889 "alloc_funinstance"
2890 "allocation_tracker_counted"
2891 "allocation_tracker_sized")))
2893 (defun get-text-space-asm-code-replica (space spacemap)
2894 (let* ((physaddr (sap-int (space-physaddr space spacemap)))
2895 (offsets-vector (%make-lisp-obj (logior physaddr other-pointer-lowtag)))
2896 (offset (aref offsets-vector 0)))
2897 (values (+ (space-addr space) offset)
2898 (%make-lisp-obj (+ physaddr offset other-pointer-lowtag)))))
2900 (defun get-static-space-asm-code (space spacemap)
2901 (let ((found
2902 (block nil
2903 (sb-editcore::walk-target-space
2904 (lambda (x widetag size)
2905 (declare (ignore widetag size))
2906 (when (code-component-p x)
2907 (return x)))
2908 static-core-space-id spacemap))))
2909 (values (+ (- (get-lisp-obj-address found)
2910 (sap-int (space-physaddr space spacemap))
2911 other-pointer-lowtag)
2912 (space-addr space))
2913 found)))
2915 (defun patch-assembly-codeblob (spacemap)
2916 (binding* ((static-space (get-space static-core-space-id spacemap))
2917 (text-space (get-space immobile-text-core-space-id spacemap))
2918 ((new-code-vaddr new-code) (get-text-space-asm-code-replica text-space spacemap))
2919 ((old-code-vaddr old-code) (get-static-space-asm-code static-space spacemap))
2920 (code-offsets-vector
2921 (%make-lisp-obj (logior (sap-int (space-physaddr text-space spacemap))
2922 other-pointer-lowtag)))
2923 (header-bytes (ash (code-header-words old-code) word-shift))
2924 (old-insts-vaddr (+ old-code-vaddr header-bytes))
2925 (new-insts-vaddr (+ new-code-vaddr header-bytes))
2926 (items *c-linkage-redirects*)
2927 (inst-buffer (make-array 8 :element-type '(unsigned-byte 8)))
2928 (code-offsets-vector-size (primitive-object-size code-offsets-vector))
2929 (c-linkage-vector-vaddr (+ (space-addr text-space) code-offsets-vector-size))
2930 (c-linkage-vector ; physical
2931 (%make-lisp-obj (logior (sap-int (sap+ (space-physaddr text-space spacemap)
2932 code-offsets-vector-size))
2933 other-pointer-lowtag))))
2934 (aver (<= (length items) (length c-linkage-vector)))
2935 (with-pinned-objects (inst-buffer)
2936 (do ((sap (vector-sap inst-buffer))
2937 (item-index 0 (1+ item-index))
2938 (items items (cdr items)))
2939 ((null items))
2940 ;; Each new quasi-linkage-table entry takes 8 bytes to encode.
2941 ;; The JMP is 7 bytes, followed by a nop.
2942 ;; FF2425nnnnnnnn = JMP [ea]
2943 (setf (sap-ref-8 sap 0) #xFF
2944 (sap-ref-8 sap 1) #x24
2945 (sap-ref-8 sap 2) #x25
2946 (sap-ref-32 sap 3) (sap-int (sap+ (cdar items) 8))
2947 (sap-ref-8 sap 7) #x90) ; nop
2948 (setf (aref c-linkage-vector item-index) (%vector-raw-bits inst-buffer 0))))
2949 ;; Produce a model of the instructions. It doesn't really matter whether we scan
2950 ;; OLD-CODE or NEW-CODE since we're supplying the proper virtual address either way.
2951 (let ((insts (get-code-instruction-model old-code old-code-vaddr spacemap)))
2952 ;; (dovector (inst insts) (write inst :base 16 :pretty nil :escape nil) (terpri))
2953 (dovector (inst insts)
2954 ;; Look for any call to a linkage table entry.
2955 (when (eq (second inst) 'call)
2956 (let ((operand (third inst)))
2957 (when (and (integerp operand)
2958 (>= operand alien-linkage-table-space-start)
2959 (< operand (+ alien-linkage-table-space-start
2960 alien-linkage-table-space-size)))
2961 (let* ((index (position (int-sap operand) *c-linkage-redirects*
2962 :key #'cdr :test #'sap=))
2963 (branch-target (+ c-linkage-vector-vaddr
2964 (ash vector-data-offset word-shift)
2965 ;; each new linkage entry takes up exactly 1 word
2966 (* index n-word-bytes)))
2967 (old-next-ip-abs (int-sap (inst-end inst))) ; virtual
2968 (next-ip-rel (sap- old-next-ip-abs (int-sap old-insts-vaddr)))
2969 (new-next-ip (+ new-insts-vaddr next-ip-rel)))
2970 (setf (signed-sap-ref-32 (code-instructions new-code) (- next-ip-rel 4))
2971 (- branch-target new-next-ip))))))))))
2973 (defun get-mov-src-constant (code code-vaddr inst ea spacemap)
2974 (let* ((next-ip (inst-end inst))
2975 ;; this is a virtual adrress
2976 (abs-addr (+ next-ip (machine-ea-disp ea))))
2977 (when (and (not (logtest abs-addr #b111)) ; lispword-aligned
2978 (>= abs-addr code-vaddr)
2979 (< abs-addr (+ code-vaddr (ash (code-header-words code) word-shift))))
2980 (let ((paddr (translate-ptr abs-addr spacemap)))
2981 (translate (sap-ref-lispobj (int-sap paddr) 0) spacemap)))))
2983 #+x86-64
2984 (defun locate-const-move-to-rax (code vaddr insts start spacemap fdefns)
2985 ;; Look for a MOV to RAX from a code header constant
2986 ;; Technically this should fail if it finds _any_ instruction
2987 ;; that affects RAX before it finds the one we're looking for.
2988 (loop for i downfrom start to 1
2989 do (let ((inst (svref insts i)))
2990 (cond ((range-labeled (first inst)) (return)) ; labeled statement - fail
2991 ((and (eq (second inst) 'mov)
2992 (eq (third inst) (load-time-value (get-gpr :qword 0)))
2993 (typep (fourth inst) '(cons machine-ea (eql :qword))))
2994 (let ((ea (car (fourth inst))))
2995 (when (and (eq (machine-ea-base ea) :rip)
2996 (minusp (machine-ea-disp ea)))
2997 (return
2998 (let ((fdefn (get-mov-src-constant code vaddr inst ea spacemap)))
2999 (when (and (fdefn-p fdefn) (memq fdefn fdefns))
3000 (sb-vm::set-fdefn-has-static-callers fdefn 1)
3001 (values i (fdefn-fun fdefn))))))))))))
3003 #+x86-64
3004 (defun replacement-opcode (inst)
3005 (ecase (second inst) ; opcode
3006 (jmp #xE9)
3007 (call #xE8)))
3009 #+x86-64
3010 (defun patch-fdefn-call (code vaddr insts inst i spacemap fdefns &optional print)
3011 ;; START is the index into INSTS of the instructon that loads RAX
3012 (multiple-value-bind (start callee)
3013 (locate-const-move-to-rax code vaddr insts (1- i) spacemap fdefns)
3014 (when (and start
3015 (let ((text-space (get-space immobile-text-core-space-id spacemap)))
3016 (< (space-addr text-space)
3017 ;; CALLEE is an untranslated address
3018 (get-lisp-obj-address callee)
3019 (space-end text-space))))
3020 (when print
3021 (let ((addr (inst-vaddr (svref insts start))) ; starting address
3022 (end (inst-end inst)))
3023 (sb-c:dis (translate-ptr addr spacemap) (- end addr))))
3024 ;; Several instructions have to be replaced to make room for the new CALL
3025 ;; which is a longer than the old, but it's ok since a MOV is eliminated.
3026 (let* ((sum-lengths
3027 (loop for j from start to i sum (inst-length (svref insts j))))
3028 (new-bytes (make-array sum-lengths :element-type '(unsigned-byte 8)))
3029 (new-index 0))
3030 (loop for j from (1+ start) below i
3031 do (let* ((old-inst (svref insts j))
3032 (ip (inst-vaddr old-inst))
3033 (physaddr (int-sap (translate-ptr ip spacemap)))
3034 (nbytes (inst-length old-inst)))
3035 (dotimes (k nbytes)
3036 (setf (aref new-bytes new-index) (sap-ref-8 physaddr k))
3037 (incf new-index))))
3038 ;; insert padding given that the new call takes 5 bytes to encode
3039 (let* ((nop-len (- sum-lengths (+ new-index 5)))
3040 (nop-pattern (ecase nop-len
3041 (5 '(#x0f #x1f #x44 #x00 #x00)))))
3042 (dolist (byte nop-pattern)
3043 (setf (aref new-bytes new-index) byte)
3044 (incf new-index)))
3045 ;; change the call
3046 (let* ((branch-target
3047 (simple-fun-entry-sap (translate callee spacemap)))
3048 (next-pc (int-sap (inst-end inst)))
3049 (rel32 (sap- branch-target next-pc)))
3050 (setf (aref new-bytes new-index) (replacement-opcode inst))
3051 (with-pinned-objects (new-bytes)
3052 (setf (signed-sap-ref-32 (vector-sap new-bytes) (1+ new-index)) rel32)
3053 (when print
3054 (format t "~&Replaced by:~%")
3055 (let ((s (sb-disassem::make-vector-segment new-bytes 0 sum-lengths
3056 :virtual-location vaddr)))
3057 (sb-disassem::disassemble-segment
3058 s *standard-output* (sb-disassem:make-dstate))))
3059 (let* ((vaddr (inst-vaddr (svref insts start)))
3060 (paddr (translate-ptr vaddr spacemap)))
3061 (%byte-blt new-bytes 0 (int-sap paddr) 0 sum-lengths))))))))
3063 (defun find-static-call-target-in-text-space (inst addr spacemap static-asm-code text-asm-code)
3064 (declare (ignorable inst))
3065 ;; this will (for better or for worse) find static fdefns as well as asm routines,
3066 ;; so we have to figure out which it is.
3067 (let ((asm-codeblob-size
3068 (primitive-object-size
3069 (%make-lisp-obj (logior (translate-ptr static-asm-code spacemap)
3070 other-pointer-lowtag)))))
3071 (cond ((<= static-asm-code addr (+ static-asm-code (1- asm-codeblob-size)))
3072 (let* ((offset-from-base (- addr static-asm-code))
3073 (new-vaddr (+ text-asm-code offset-from-base)))
3074 (sap-ref-word (int-sap (translate-ptr new-vaddr spacemap)) 0)))
3076 (let* ((fdefn-vaddr (- addr (ash fdefn-raw-addr-slot word-shift)))
3077 (fdefn-paddr (int-sap (translate-ptr fdefn-vaddr spacemap))))
3078 ;; Confirm it looks like a static fdefn
3079 (aver (= (logand (sap-ref-word fdefn-paddr 0) widetag-mask) fdefn-widetag))
3080 (let ((entrypoint (sap-ref-word fdefn-paddr (ash fdefn-raw-addr-slot word-shift))))
3081 ;; Confirm there is a simple-fun header where expected
3082 (let ((header
3083 (sap-ref-word (int-sap (translate-ptr entrypoint spacemap))
3084 (- (ash simple-fun-insts-offset word-shift)))))
3085 (aver (= (logand header widetag-mask) simple-fun-widetag))
3086 ;; Return the entrypoint which already point to text space
3087 entrypoint)))))))
3089 ;; Patch either a ca through a static-space fdefn or an asm routine indirect jump.
3090 (defun patch-static-space-call (inst spacemap static-asm-code text-asm-code)
3091 (let* ((new-bytes (make-array 7 :element-type '(unsigned-byte 8)))
3092 (addr (machine-ea-disp (car (third inst))))
3093 (branch-target
3094 (find-static-call-target-in-text-space
3095 inst addr spacemap static-asm-code text-asm-code)))
3096 (when branch-target
3097 (setf (aref new-bytes 0) #x66 (aref new-bytes 1) #x90) ; 2-byte NOP
3098 (setf (aref new-bytes 2) (replacement-opcode inst))
3099 (let ((next-ip (inst-end inst)))
3100 (with-pinned-objects (new-bytes)
3101 (setf (signed-sap-ref-32 (vector-sap new-bytes) 3) (- branch-target next-ip)))
3102 (%byte-blt new-bytes 0 (int-sap (translate-ptr (inst-vaddr inst) spacemap)) 0 7)))))
3104 ;;; Avoid splicing out any fdefn not uniquely identified by its function binding.
3105 (defun get-patchable-fdefns (code spacemap &aux alist result)
3106 (multiple-value-bind (start count) (code-header-fdefn-range code)
3107 (loop for i from start repeat count
3108 do (let* ((fdefn (translate (code-header-ref code i) spacemap))
3109 (fun (translate (fdefn-fun fdefn) spacemap)))
3110 (when (simple-fun-p fun)
3111 ;; It is dangerous to create heap cons cells holding pointers to
3112 ;; objects at their logical address in the target core.
3113 ;; TBH, all target objects should be wrapped in a DESCRIPTOR
3114 ;; structure defined at the top of this file.
3115 (push (cons fun fdefn) alist)))))
3116 (dolist (cell alist result)
3117 (destructuring-bind (fun . fdefn) cell
3118 (unless (find-if (lambda (other)
3119 (and (eq (car other) fun) (neq (cdr other) fdefn)))
3120 alist)
3121 (push fdefn result)))))
3123 ;;; Since dynamic-space code is pretty much relocatable,
3124 ;;; disassembling it at a random physical address is fine.
3125 #+x86-64
3126 (defun patch-lisp-codeblob
3127 (code vaddr spacemap static-asm-code text-asm-code
3128 &aux (insts (get-code-instruction-model code vaddr spacemap))
3129 (fdefns (get-patchable-fdefns code spacemap)))
3130 (declare (simple-vector insts))
3131 (do ((i 0 (1+ i)))
3132 ((>= i (length insts)))
3133 (let* ((inst (svref insts i))
3134 (this-op (second inst)))
3135 (when (member this-op '(call jmp))
3136 ;; is it potentially a call via an fdefn or an asm code indirection?
3137 (let* ((operand (third inst))
3138 (ea (if (listp operand) (car operand))))
3139 (when (and (typep operand '(cons machine-ea (eql :qword)))
3140 (or (and (eql (machine-ea-base ea) 0) ; [RAX-9]
3141 (eql (machine-ea-disp ea) 9)
3142 (not (machine-ea-index ea)))
3143 (and (not (machine-ea-base ea))
3144 (not (machine-ea-index ea))
3145 (<= static-space-start (machine-ea-disp ea)
3146 (sap-int *static-space-free-pointer*)))))
3147 (if (eql (machine-ea-base ea) 0) ; based on RAX
3148 (patch-fdefn-call code vaddr insts inst i spacemap fdefns)
3149 (patch-static-space-call inst spacemap
3150 static-asm-code text-asm-code))))))))
3152 (defun persist-to-file (spacemap core-offset stream)
3153 (aver (zerop core-offset))
3154 (dolist (space-id `(,static-core-space-id
3155 ,immobile-text-core-space-id
3156 ,dynamic-core-space-id))
3157 (let ((space (get-space space-id spacemap)))
3158 (file-position stream (* (1+ (space-data-page space)) +backend-page-bytes+))
3159 (sb-unix:unix-write (sb-impl::fd-stream-fd stream)
3160 (space-physaddr space spacemap)
3162 (align-up (* (space-nwords space) n-word-bytes)
3163 +backend-page-bytes+)))))
3165 (defun redirect-text-space-calls (pathname)
3166 (with-open-file (stream pathname :element-type '(unsigned-byte 8)
3167 :direction :io :if-exists :overwrite)
3168 (binding* ((core-header (make-array +backend-page-bytes+ :element-type '(unsigned-byte 8)))
3169 (core-offset (read-core-header stream core-header t))
3170 ((npages space-list card-mask-nbits core-dir-start initfun)
3171 (parse-core-header stream core-header)))
3172 (declare (ignore card-mask-nbits core-dir-start initfun))
3173 (with-mapped-core (sap core-offset npages stream)
3174 (let ((spacemap (cons sap (sort (copy-list space-list) #'> :key #'space-addr))))
3175 (patch-assembly-codeblob spacemap)
3176 (let* ((text-space (get-space immobile-text-core-space-id spacemap))
3177 (offsets-vector (%make-lisp-obj (logior (sap-int (space-physaddr text-space spacemap))
3178 lowtag-mask)))
3179 (static-space-asm-code
3180 (get-static-space-asm-code (get-space static-core-space-id spacemap) spacemap))
3181 (text-space-asm-code
3182 (get-text-space-asm-code-replica text-space spacemap)))
3183 (assert text-space)
3184 ;; offset 0 is the offset of the ASM routine codeblob which was already processed.
3185 (loop for j from 1 below (length offsets-vector)
3186 do (let ((vaddr (+ (space-addr text-space) (aref offsets-vector j)))
3187 (physobj (%make-lisp-obj
3188 (logior (sap-int (sap+ (space-physaddr text-space spacemap)
3189 (aref offsets-vector j)))
3190 other-pointer-lowtag))))
3191 ;; Assert that there are no fixups other than GC card table mask fixups
3192 (let ((fixups (sb-vm::%code-fixups physobj)))
3193 (unless (fixnump fixups)
3194 (setq fixups (translate fixups spacemap))
3195 (aver (typep fixups 'bignum)))
3196 (multiple-value-bind (list1 list2 list3)
3197 (sb-c::unpack-code-fixup-locs fixups)
3198 (declare (ignore list3))
3199 (aver (null list1))
3200 (aver (null list2))))
3201 (patch-lisp-codeblob physobj vaddr spacemap
3202 static-space-asm-code text-space-asm-code))))
3203 (persist-to-file spacemap core-offset stream))))))
3204 ;;;;
3206 ;; If loaded as a script, do this
3207 (eval-when (:execute)
3208 (let ((args (cdr *posix-argv*)))
3209 (when args
3210 (let ((*print-pretty* nil))
3211 (format t "Args: ~S~%" args)
3212 (cl-user::elfinate args)))))