Remove unnecessary constraint on space placement
[sbcl.git] / tools-for-build / editcore.lisp
blob3aa3ac7ef9eaa5e5b9e40093023ab6126d0e85d6
1 ;;;; Utilities for separating an SBCL core file into two pieces:
2 ;;;; 1. An assembly language file containing the immobile code space
3 ;;;; 2. A '.o' file wrapping a core file containing everything else
4 ;;;; We operate as a "tool" that processes external files rather than
5 ;;;; operating on the in-process data, but it is also possible to dump
6 ;;;; the current image by creating a straight-through translation
7 ;;;; of internal/external code addresses.
9 ;;;; This software is part of the SBCL system. See the README file for
10 ;;;; more information.
11 ;;;;
12 ;;;; This software is derived from the CMU CL system, which was
13 ;;;; written at Carnegie Mellon University and released into the
14 ;;;; public domain. The software is in the public domain and is
15 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
16 ;;;; files for more information.
18 (eval-when (:compile-toplevel :load-toplevel :execute)
19 (require :sb-posix)) ; for mmap
21 (defpackage "SB-EDITCORE"
22 (:use "CL" "SB-VM" "SB-INT" "SB-EXT" "SB-KERNEL" "SB-SYS"
23 "SB-ALIEN")
24 (:import-from "SB-ALIEN-INTERNALS"
25 #:alien-type-bits #:parse-alien-type
26 #:alien-value-sap #:alien-value-type)
27 (:import-from "SB-C" #:+backend-page-bytes+)
28 (:import-from "SB-VM" #:map-objects-in-range #:reconstitute-object
29 #:%closure-callee))
31 (in-package "SB-EDITCORE")
33 (declaim (muffle-conditions compiler-note))
35 (eval-when (:execute)
36 (setq sb-ext:*evaluator-mode* :compile))
38 (defconstant core-magic
39 (logior (ash (char-code #\S) 24)
40 (ash (char-code #\B) 16)
41 (ash (char-code #\C) 8)
42 (char-code #\L)))
44 (defconstant build-id-core-entry-type-code 3860)
45 (defconstant new-directory-core-entry-type-code 3861)
46 (defconstant initial-fun-core-entry-type-code 3863)
47 (defconstant page-table-core-entry-type-code 3880)
48 (defconstant end-core-entry-type-code 3840)
50 (defconstant static-core-space-id 2)
51 (defconstant immobile-fixedobj-core-space-id 4)
52 (defconstant immobile-varyobj-core-space-id 5)
54 (defglobal +noexec-stack-note+ ".section .note.GNU-stack, \"\", @progbits")
56 ;;; Given ADDR which is an address in the target core, return the address at which
57 ;;; ADDR is currently mapped while performing the split.
58 (defun translate-ptr (addr spaces &aux (space (assoc addr (cdr spaces) :test #'>=)))
59 (+ (sap-int (car spaces))
60 (* (cadr space) +backend-page-bytes+)
61 (- addr (car space))))
63 ;;; Use extreme care: It works to use host accessors on the target core,
64 ;;; but we must avoid type checks on instances because LAYOUTs need translation.
65 ;;; Printing boxed objects from the target core will almost always crash.
66 (defun translate (obj spaces)
67 (%make-lisp-obj (translate-ptr (get-lisp-obj-address obj) spaces)))
69 (defstruct (core-sym (:copier nil) (:predicate nil)
70 (:constructor make-core-sym (package name external)))
71 (package nil)
72 (name nil :read-only t)
73 (external nil :read-only t))
75 (defun c-name (lispname pp-state)
76 (when (and (symbolp lispname)
77 (eq (symbol-package lispname) *cl-package*))
78 (return-from c-name (concatenate 'string "cl:" (string-downcase lispname))))
79 ;; Get rid of junk from LAMBDAs
80 (setq lispname
81 (named-let recurse ((x lispname))
82 (cond ((typep x '(cons (eql lambda)))
83 (let ((args (second x)))
84 `(lambda ,(if args sb-c::*debug-name-sharp* "()")
85 ,@(recurse (cddr x)))))
86 ((eq x :in) "in")
87 ((consp x)
88 (recons x (recurse (car x)) (recurse (cdr x))))
89 (t x))))
91 ;; Shorten obnoxiously long printed representations of methods
92 ;; by changing FAST-METHOD to METHOD (because who cares?)
93 ;; and shorten
94 ;; (method my-long-package-name:my-method-name (my-long-package-name:type-name))
95 ;; to
96 ;; (method my-method-name (type-name))
97 ;; I suspect that can use DWARF info to provide even more description,
98 ;; though I also suspect it's relatively unambiguous anyway
99 ;; especially given that file information is available separately.
100 (flet ((unpackageize (thing)
101 (when (typep thing 'core-sym)
102 (setf (core-sym-package thing) nil))
103 thing))
104 (when (typep lispname '(cons (eql sb-pcl::fast-method)))
105 (setq lispname `(method ,@(cdr lispname)))
106 (setf (second lispname) (unpackageize (second lispname)))
107 (dolist (qual (car (last lispname)))
108 (unpackageize qual))))
110 ;; Perform backslash escaping on the exploded string
111 ;; Strings were stringified without surrounding quotes,
112 ;; but there might be quotes embedded anywhere, so escape them,
113 ;; and also remove newlines and non-ASCII.
114 (let ((characters
115 (mapcan (lambda (char)
116 (cond ((not (base-char-p char)) (list #\?))
117 ((member char '(#\\ #\")) (list #\\ char))
118 ((eql char #\newline) (list #\_))
119 (t (list char))))
120 (coerce (if (and (stringp lispname)
121 ;; L denotes a symbol which can not be global on macOS.
122 (char= (char lispname 0) #\L))
123 (concatenate 'string "_" lispname)
124 (write-to-string lispname
125 :pretty t :pprint-dispatch (cdr pp-state)
126 ;; FIXME: should be :level 1, however see
127 ;; https://bugs.launchpad.net/sbcl/+bug/1733222
128 :escape t :level 2 :length 5
129 :case :downcase :gensym nil
130 :right-margin 10000))
131 'list))))
132 (let* ((string (coerce characters 'string))
133 (occurs (incf (gethash string (car pp-state) 0))))
134 (if (> occurs 1)
135 (concatenate 'string string "_" (write-to-string occurs))
136 string))))
138 (defmethod print-object ((sym core-sym) stream)
139 (format stream "~(~:[~*~;~:*~A~:[:~;~]:~]~A~)"
140 (core-sym-package sym)
141 (core-sym-external sym)
142 (core-sym-name sym)))
144 (defun fun-name-from-core (fun spaces core-nil packages
145 &aux (name (%simple-fun-name fun)))
146 (named-let recurse ((depth 0) (x name))
147 (unless (= (logand (get-lisp-obj-address x) 3) 3)
148 (return-from recurse x)) ; immediate object
149 (when (eq x core-nil)
150 (return-from recurse nil))
151 (setq x (translate x spaces))
152 (ecase (lowtag-of x)
153 (#.list-pointer-lowtag
154 (cons (recurse (1+ depth) (car x))
155 (recurse (1+ depth) (cdr x))))
156 ((#.instance-pointer-lowtag #.fun-pointer-lowtag) "?")
157 (#.other-pointer-lowtag
158 (cond
159 ((symbolp x)
160 (let ((name (translate (symbol-name x) spaces)))
161 (if (eq (symbol-package x) core-nil) ; uninterned
162 (string-downcase name)
163 (let* ((package (truly-the package
164 (translate (symbol-package x) spaces)))
165 (package-name (translate (sb-impl::package-%name package)
166 spaces))
167 (compute-externals
168 (not (or (string= package-name "KEYWORD")
169 (string= package-name "COMMON-LISP"))))
170 (externals (if compute-externals
171 (gethash package-name packages)
172 t)))
173 (unless externals
174 (dovector (x (translate
175 (sb-impl::package-hashtable-cells
176 (truly-the sb-impl::package-hashtable
177 (translate (package-external-symbols package)
178 spaces)))
179 spaces))
180 (unless (fixnump x)
181 (push (if (eq x core-nil) ; random packages can export NIL. wow.
182 "NIL"
183 (translate (symbol-name (translate x spaces)) spaces))
184 externals)))
185 (setf externals (coerce externals 'vector)
186 (gethash package-name packages) externals))
187 ;; The name-cleaning code wants to compare against symbols
188 ;; in CL, PCL, and KEYWORD, so use real symbols for those.
189 ;; Other than that, we avoid finding host symbols
190 ;; because the externalness could be wrong and misleading.
191 ;; It's a very subtle point, but best to get it right.
192 (cond ((string= package-name "COMMON-LISP")
193 (find-symbol name *cl-package*))
194 ((and (string= package-name "KEYWORD")
195 (find-symbol name package-name))) ; if existing keyword, use it
196 ((string= package-name "SB-PCL")
197 (or (find-symbol name "SB-PCL")
198 (error "FIND-SYMBOL failed? ~S ~S" name package-name)))
200 (make-core-sym (if (string= package-name "KEYWORD") nil package-name)
201 name
202 (if compute-externals
203 (find name externals :test 'string=)
204 t))))))))
205 ((stringp x) x)
206 (t "?"))))))
208 (defstruct (core-state
209 (:conc-name "CS-")
210 (:predicate nil)
211 (:copier nil)
212 (:constructor make-core-state
213 (fixedobj-space-start fixedobj-space-end
214 &aux (inst-space (sb-disassem::get-inst-space))
215 (call-inst (sb-disassem::find-inst #b11101000 inst-space))
216 (jmp-inst (sb-disassem::find-inst #b11101001 inst-space))
217 (pop-inst (sb-disassem::find-inst #x5d inst-space)))))
218 (fixedobj-space-start 0 :type fixnum :read-only t)
219 (fixedobj-space-end 0 :type fixnum :read-only t)
220 (dstate (sb-disassem::make-dstate nil) :read-only t)
221 (seg (sb-disassem::%make-segment
222 :sap-maker (lambda () (error "Bad sap maker"))
223 :virtual-location 0) :read-only t)
224 (call-inst nil :read-only t)
225 (jmp-inst nil :read-only t)
226 (pop-inst nil :read-only t))
228 ;;; Emit .byte or .quad directives dumping memory from SAP for COUNT bytes
229 ;;; to STREAM. SIZE specifies which direcive to emit.
230 ;;; EXCEPTIONS specify offsets at which a specific string should be
231 ;;; written to the file in lieu of memory contents, useful for emitting
232 ;;; expressions involving the assembler '.' symbol (the current PC).
233 (defun emit-asm-directives (size sap count stream &optional exceptions)
234 (declare (optimize speed))
235 (declare (stream stream))
236 (let ((*print-base* 16)
237 (string-buffer (make-array 18 :element-type 'base-char))
238 (fmt #.(coerce "0x%lx" 'base-string))
239 (per-line 0))
240 (declare ((integer 0 32) per-line)
241 (fixnum count))
242 string-buffer fmt
243 (ecase size
244 (:qword
245 (format stream " .quad")
246 (dotimes (i count)
247 (declare ((unsigned-byte 20) i))
248 (declare (simple-vector exceptions))
249 (write-char (if (> per-line 0) #\, #\space) stream)
250 (acond ((and (< i (length exceptions)) (aref exceptions i))
251 (write-string it stream))
253 #+nil
254 (let ((len
255 ;; output-reasonable-integer-in-base is so slow comparated
256 ;; to printf() that the second-most amount of time spent
257 ;; writing the asm file occurs in that function.
258 ;; Unbelievable that we can't do better than that.
259 (with-pinned-objects (string-buffer fmt)
260 (alien-funcall
261 (extern-alien "snprintf"
262 (function int system-area-pointer unsigned system-area-pointer unsigned))
263 (vector-sap string-buffer)
264 (length string-buffer)
265 (vector-sap fmt)
266 (sap-ref-word sap (* i n-word-bytes))))))
267 (write-string string-buffer stream :end len))
268 (write-string "0x" stream)
269 (write (sap-ref-word sap (* i n-word-bytes)) :stream stream)))
270 (when (and (= (incf per-line) 16) (< (1+ i) count))
271 (format stream "~% .quad")
272 (setq per-line 0))))
273 (:byte
274 (aver (not exceptions))
275 (format stream " .byte")
276 (dotimes (i count)
277 (write-char (if (> per-line 0) #\, #\space) stream)
278 (write-string "0x" stream)
279 (write (sap-ref-8 sap i) :stream stream)
280 (when (and (= (incf per-line) 32) (< (1+ i) count))
281 (format stream "~% .byte")
282 (setq per-line 0))))))
283 (terpri stream))
285 (defun emit-lisp-asm-routines (spaces code-component output emit-sizes vector count)
286 (emit-asm-directives :qword
287 (sap+ (code-instructions code-component)
288 (- (* sb-vm:code-constants-offset sb-vm:n-word-bytes)))
289 sb-vm:code-constants-offset
290 output #())
291 (let ((list (loop for i from 2 by 2 repeat count
292 collect
293 (let* ((location (translate (svref vector (1+ i)) spaces))
294 (offset (car location))
295 (nbytes (- (1+ (cdr location)) offset))
296 (name (translate
297 (symbol-name (translate (svref vector i) spaces))
298 spaces)))
299 (list* offset name nbytes)))))
300 (loop for (offset name . nbytes) in (sort list #'< :key #'car)
301 do (format output " .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
302 (format nil "~(\"~a\"~)" name) (if emit-sizes nbytes))
303 (emit-asm-directives
304 :qword
305 (sap+ (code-instructions code-component) offset)
306 (ceiling nbytes sb-vm:n-word-bytes)
307 output #()))))
309 ;;; Disassemble the function pointed to by SAP for LENGTH bytes, returning
310 ;;; all instructions that should be emitted using assembly language
311 ;;; instead of assembler pseudo-ops. This includes two sets of instructions:
312 ;;; - function prologue instructions that setup the call frame
313 ;;; - jmp/call instructions that transfer control to the fixedoj space
314 ;;; delimited by bounds in STATE.
315 ;;; At execution time the function will have virtual address LOAD-ADDR.
316 (defun list-annotated-instructions (sap length state load-addr emit-cfi)
317 (let ((dstate (cs-dstate state))
318 (seg (cs-seg state))
319 (call-inst (cs-call-inst state))
320 (jmp-inst (cs-jmp-inst state))
321 (pop-inst (cs-pop-inst state))
322 (list))
323 (setf (sb-disassem::seg-virtual-location seg) load-addr
324 (sb-disassem::seg-length seg) length
325 (sb-disassem::seg-sap-maker seg) (lambda () sap))
326 ;; KLUDGE: "8f 45 08" is the standard prologue
327 (when (and emit-cfi (= (logand (sap-ref-32 sap 0) #xFFFFFF) #x08458f))
328 (push (list* 0 3 "pop" "8(%rbp)") list))
329 (sb-disassem::map-segment-instructions
330 (lambda (dchunk inst)
331 (cond
332 ((or (eq inst jmp-inst) (eq inst call-inst))
333 (let ((target-addr (+ (sb-x86-64-asm::near-jump-displacement dchunk dstate)
334 (sb-disassem::dstate-next-addr dstate))))
335 (when (<= (cs-fixedobj-space-start state)
336 target-addr
337 (cs-fixedobj-space-end state))
338 (push (list* (sb-disassem::dstate-cur-offs dstate)
339 5 ; length
340 (if (eq inst call-inst) "call" "jmp")
341 target-addr)
342 list))))
343 ((and (eq inst pop-inst) (eq (logand dchunk #xFF) #x5D))
344 (push (list* (sb-disassem::dstate-cur-offs dstate) 1 "pop" "%rbp") list))))
346 dstate
347 nil)
348 (nreverse list)))
350 ;;; Using assembler directives and/or real mnemonics, dump COUNT bytes
351 ;;; of memory at PADDR (physical addr) to STREAM.
352 ;;; The function's address as per the core file is VADDR.
353 ;;; (Its eventual address is indeterminate)
354 ;;; If EMIT-CFI is true, then also emit cfi directives.
356 ;;; Notice that we can use one fewer cfi directive than usual because
357 ;;; Lisp always carries a frame pointer as set up by the caller.
359 ;;; C convention
360 ;;; ============
361 ;;; pushq %rbp
362 ;;; .cfi_def_cfa_offset 16 # CFA offset from default register (rsp) is +16
363 ;;; .cfi_offset 6, -16 # old rbp was saved in -16(CFA)
364 ;;; movq %rsp, %rbp
365 ;;; .cfi_def_cfa_register 6 # use rbp as CFA register
367 ;;; Lisp convention
368 ;;; ===============
369 ;;; popq 8(%rbp) # place saved %rip in its ABI-compatible stack slot
370 ;;; # making RSP = RBP after the pop, and RBP = CFA - 16
371 ;;; .cfi_def_cfa 6, 16
372 ;;; .cfi_offset 6, -16
374 ;;; Of course there is a flip-side to this: unwinders think that the new frame
375 ;;; is already begun in the caller. Interruption between these two instructions:
376 ;;; MOV RBP, RSP / CALL #xzzzzz
377 ;;; will show the backtrace as if two invocations of the caller are on stack.
378 ;;; This is tricky to fix because while we can relativize the CFA to the
379 ;;; known frame size, we can't do that based only on a disassembly.
381 (defun emit-lisp-function (paddr vaddr count stream emit-cfi core-state)
382 (when emit-cfi
383 (format stream " .cfi_startproc~%"))
384 ;; Any byte offset that appears as a key in the INSTRUCTIONS causes the indicated
385 ;; bytes to be written as an assembly language instruction rather than opaquely,
386 ;; thereby affecting the ELF data (cfi or relocs) produced.
387 (let ((instructions
388 (list-annotated-instructions (int-sap paddr) count core-state vaddr emit-cfi))
389 (ptr paddr))
390 (symbol-macrolet ((cur-offset (- ptr paddr)))
391 (loop
392 (let ((until (if instructions (caar instructions) count)))
393 ;; if we're not aligned, then write some number of bytes
394 ;; to cause alignment. But do not write past the next offset
395 ;; that needs to be written as an instruction.
396 (when (logtest ptr #x7) ; unaligned
397 (let ((n (min (- (nth-value 1 (ceiling ptr 8)))
398 (- until cur-offset))))
399 (aver (<= 0 n 7))
400 (emit-asm-directives :byte (int-sap ptr) n stream)
401 (incf ptr n)))
402 ;; Now we're either aligned to a multiple of 8, or the current
403 ;; offset needs to be written as a textual instruction.
404 (let ((n (- until cur-offset)))
405 (aver (>= n 0))
406 (multiple-value-bind (qwords remainder) (floor n 8)
407 (when (plusp qwords)
408 (emit-asm-directives :qword (int-sap ptr) qwords stream #())
409 (incf ptr (* qwords 8)))
410 (when (plusp remainder)
411 (emit-asm-directives :byte (int-sap ptr) remainder stream)
412 (incf ptr remainder))))
413 ;; If the current offset is COUNT, we're done.
414 (when (= cur-offset count) (return))
415 (aver (= cur-offset until))
416 (destructuring-bind (length opcode . operand) (cdr (pop instructions))
417 (when (cond ((integerp operand) ; jmp or call
418 (format stream " ~A 0x~X~%" opcode operand))
419 ((string= opcode "pop")
420 (format stream " ~A ~A~%" opcode operand)
421 (cond ((string= operand "8(%rbp)")
422 (format stream " .cfi_def_cfa 6, 16~% .cfi_offset 6, -16~%"))
423 ((string= operand "%rbp")
424 ;(format stream " .cfi_def_cfa 7, 8~%")
425 nil)
426 (t)))
427 (t))
428 (bug "Random annotated opcode ~S" opcode))
429 (incf ptr length))
430 (when (= cur-offset count) (return))))))
431 (when emit-cfi
432 (format stream " .cfi_endproc~%")))
434 ;;; Convert immobile CODE-SPACE to an assembly file in OUTPUT.
435 ;;; TODO: relocate fdefns and instances of standard-generic-function
436 ;;; into the space that is dumped into an ELF section.
437 (defun write-assembler-text
438 (code-space static-space fixedobj-space spaces output
439 &optional emit-sizes (emit-cfi t)
440 &aux (code-addr (car code-space)) ; target logical address, not in-memory address now
441 (scan-limit (cdr code-space))
442 (core-state (make-core-state (car fixedobj-space)
443 (cdr fixedobj-space)))
444 (total-code-size 0)
445 (pp-state (cons (make-hash-table :test 'equal)
446 ;; copy no entries for macros/special-operators (flet, etc)
447 (sb-pretty::make-pprint-dispatch-table)))
448 (packages (make-hash-table :test 'equal))
449 (core-nil (sb-kernel:%make-lisp-obj (logior (car static-space) #x17)))
450 (prev-namestring "")
451 (n-linker-relocs 0)
452 end-loc)
453 (set-pprint-dispatch 'string
454 ;; Write strings without string quotes
455 (lambda (stream string) (write-string string stream))
457 (cdr pp-state))
458 (labels ((ldsym-quote (name)
459 (concatenate 'string '(#\") name '(#\")))
460 (dumpwords (addr count stream &optional (exceptions #()) logical-addr)
461 (let ((sap (int-sap addr)))
462 (aver (sap>= sap (car spaces)))
463 ;; Make intra-code-space pointers computed at link time
464 (dotimes (i (if logical-addr count 0))
465 (unless (and (< i (length exceptions)) (svref exceptions i))
466 (let ((word (sap-ref-word sap (* i n-word-bytes))))
467 (when (and (= (logand word 3) 3) ; is a pointer
468 (<= (car code-space) word (1- (cdr code-space)))) ; to code space
469 #+nil
470 (format t "~&~(~x: ~x~)~%" (+ logical-addr (* i n-word-bytes))
471 word)
472 (incf n-linker-relocs)
473 (setf exceptions (adjust-array exceptions (max (length exceptions) (1+ i))
474 :initial-element nil)
475 (svref exceptions i)
476 (format nil "__lisp_code_start+0x~x"
477 (- word (car code-space))))))))
478 (emit-asm-directives :qword sap count stream exceptions)))
479 (make-code-obj (addr)
480 (let ((translation (translate-ptr addr spaces)))
481 (aver (= (%widetag-of (sap-ref-word (int-sap translation) 0))
482 code-header-widetag))
483 (%make-lisp-obj (logior translation other-pointer-lowtag))))
484 (calc-obj-size (code)
485 ;; No need to pin - it's not managed by GC
486 (nth-value 2
487 (reconstitute-object
488 (ash (logandc2 (get-lisp-obj-address code) lowtag-mask)
489 (- n-fixnum-tag-bits)))))
490 (%widetag-of (word)
491 (logand word widetag-mask)))
492 (format output " .text~% .file \"sbcl.core\"
493 .globl __lisp_code_start, __lisp_code_end~% .balign 4096~%__lisp_code_start:~%")
495 ;; Scan the assembly routines.
496 (if (typep sb-fasl:*assembler-routines* 'sb-kernel:code-component)
497 (let* ((code-component (make-code-obj code-addr))
498 (size (calc-obj-size code-component))
499 (hashtable
500 (truly-the hash-table
501 (translate (car (translate (sb-kernel:%code-debug-info code-component)
502 spaces))
503 spaces)))
504 (cells (translate (sb-impl::hash-table-table hashtable) spaces))
505 (count (sb-impl::hash-table-number-entries hashtable)))
506 (incf code-addr size)
507 (setf total-code-size size)
508 (emit-lisp-asm-routines spaces code-component output emit-sizes
509 cells count))
510 #+nil
511 (collect ((code-components))
512 (loop (when (>= code-addr scan-limit) (return))
513 (let ((code (make-code-obj code-addr)))
514 (when (plusp (code-n-entries code)) (return))
515 (let ((size (calc-obj-size code)))
516 (code-components (cons code-addr size))
517 (incf code-addr size))))
518 (setf total-code-size (- code-addr (car code-space)))
519 (emit-lisp-asm-routines spaces (code-components) output emit-sizes)))
521 (loop
522 (when (>= code-addr scan-limit) (return))
523 (let* ((code (make-code-obj code-addr))
524 (objsize (calc-obj-size code)))
525 (setq end-loc (+ code-addr objsize))
526 (incf total-code-size objsize)
527 (cond
528 ((< (code-header-words code) 4) ; filler object
529 ;; Shouldn't occur unless defrag was not performed
530 (format output "#x~x:~% .quad 0x~X, 0x~X~% .fill ~D~%"
531 code-addr
532 simple-array-unsigned-byte-8-widetag
533 (ash (- objsize (* 2 n-word-bytes))
534 n-fixnum-tag-bits)
535 (- objsize (* 2 n-word-bytes))))
536 ((%instancep (%code-debug-info code)) ; assume it's a COMPILED-DEBUG-INFO
537 (aver (plusp (code-n-entries code)))
538 (let* ((source
539 (sb-c::compiled-debug-info-source
540 (truly-the sb-c::compiled-debug-info
541 (translate (%code-debug-info code) spaces))))
542 (namestring
543 (sb-c::debug-source-namestring
544 (truly-the sb-c::debug-source (translate source spaces)))))
545 (setq namestring (if (eq namestring core-nil)
546 "sbcl.core"
547 (translate namestring spaces)))
548 (unless (string= namestring prev-namestring)
549 (format output " .file \"~a\"~%" namestring)
550 (setq prev-namestring namestring)))
551 (let* ((code-physaddr (logandc2 (get-lisp-obj-address code) lowtag-mask))
552 (boxed-end (+ code-physaddr
553 (ash (code-header-words code) word-shift)))
554 (first-fun (logandc2 (get-lisp-obj-address (%code-entry-point code 0))
555 lowtag-mask)))
556 (format output "#x~x:~%" code-addr)
557 (dumpwords code-physaddr (code-header-words code) output #() code-addr)
558 ;; Any words after 'boxed' preceding 'first-fun' are unboxed
559 (when (> first-fun boxed-end)
560 (dumpwords boxed-end (floor (- first-fun boxed-end) n-word-bytes)
561 output)))
562 ;; Loop over all embedded functions.
563 ;; Because simple-fun offsets are relative to the code start
564 ;; (and not in a linked list as they were in the past),
565 ;; iteratation in a "foreign" code object works just fine,
566 ;; subject to the caution about reading boxed words.
567 (dotimes (j (code-n-entries code))
568 (let* ((fun (%code-entry-point code j))
569 (fun-addr (logandc2 (get-lisp-obj-address fun) lowtag-mask))
570 (end (if (< (1+ j) (code-n-entries code))
571 (logandc2 (get-lisp-obj-address (%code-entry-point code (1+ j)))
572 lowtag-mask)
573 (+ (translate-ptr code-addr spaces) objsize)))
574 (entrypoint
575 (+ fun-addr (* simple-fun-code-offset n-word-bytes)))
576 (size (- end entrypoint))
577 (lispname (fun-name-from-core fun spaces core-nil packages))
578 (quotname (ldsym-quote (c-name lispname pp-state))))
579 ;; Globalize the C symbol only if the name is a legal function designator
580 ;; per the standard definition.
581 ;; This is a technique to try to avoid appending a uniquifying suffix
582 ;; on all the junky internal things like "(lambda # in srcfile.lisp)"
583 (format output "~:[~*~; .globl ~a~%~]~@[ .type ~:*~a, @function~%~]"
584 (typep lispname '(or symbol core-sym (cons (eql setf))))
585 quotname emit-sizes)
586 (dumpwords fun-addr
587 simple-fun-code-offset output
588 (load-time-value
589 `#(nil ,(format nil ".+~D"
590 (* (1- simple-fun-code-offset)
591 n-word-bytes)))
593 nil)
594 (format output " .set ~a, .~%~@[ .size ~:*~a, ~d~%~]"
595 quotname (if emit-sizes size))
596 ;; entrypoint is the current physical address.
597 ;; Also pass in the virtual address in the core
598 ;; (which will differ from the actual load-time address)
599 (emit-lisp-function entrypoint
600 (+ code-addr (- entrypoint
601 (logandc2 (get-lisp-obj-address code)
602 lowtag-mask)))
603 size output emit-cfi core-state)))
604 (terpri output))
606 (error "Strange code component: ~S" code)))
607 (incf code-addr objsize))))
609 ;; coreparse uses unpadded __lisp_code_end to set varyobj_free_pointer
610 (format output "~:[~; .size __lisp_code_start, 0x~x~%~]__lisp_code_end:~%"
611 emit-sizes total-code-size)
613 ;; Pad so that non-lisp code can't be colocated on a GC page.
614 ;; (Lack of Lisp object headers in C code is the issue)
615 (let ((aligned-end (logandc2 (+ end-loc 4095) 4095)))
616 (when (> aligned-end end-loc)
617 (multiple-value-bind (nwords remainder)
618 (floor (- aligned-end end-loc) n-word-bytes)
619 (aver (>= nwords 2))
620 (aver (zerop remainder))
621 (decf nwords 2)
622 (format output " .quad ~d, ~d # (simple-array fixnum (~d))~%"
623 simple-array-fixnum-widetag
624 (ash nwords n-fixnum-tag-bits)
625 nwords)
626 (when (plusp nwords)
627 (format output " .fill ~d~%" (* nwords n-word-bytes))))))
628 ; (format t "~&linker-relocs=~D~%" n-linker-relocs)
629 (values total-code-size n-linker-relocs))
631 (defun extract-required-c-symbols (fixedobj-space spaces asm-file &optional (verbose nil))
632 (flet ((remote-find-symbol (package-name symbol-name)
633 (let ((physaddr (translate-ptr (car fixedobj-space) spaces))
634 ;; Make sure we remain in-bounds for the fixedobj space when translating.
635 (limit (1+ (translate-ptr (1- (cdr fixedobj-space)) spaces))))
636 (loop
637 (when (>= physaddr limit) (bug "Can't find symbol"))
638 (multiple-value-bind (obj tag size)
639 (reconstitute-object (ash physaddr (- n-fixnum-tag-bits)))
640 (when (and (= tag symbol-widetag)
641 (string= symbol-name (translate (symbol-name obj) spaces))
642 (%instancep (symbol-package obj))
643 (string= package-name
644 (translate
645 (sb-impl::package-%name
646 (truly-the package (translate (symbol-package obj) spaces)))
647 spaces)))
648 (return (%make-lisp-obj (logior physaddr other-pointer-lowtag))))
649 (incf physaddr size)))))
650 (symbol-fdefn-fun (symbol)
651 (let ((vector (translate (symbol-info-vector symbol) spaces)))
652 ;; TODO: allow for (plist . info-vector) in the info slot
653 (aver (simple-vector-p vector))
654 (translate (fdefn-fun (translate (info-vector-fdefn vector) spaces))
655 spaces))))
656 (let ((linkage-info
657 (translate (symbol-global-value (remote-find-symbol "SB-SYS" "*LINKAGE-INFO*"))
658 spaces))
659 (dyn-syminfo
660 (symbol-fdefn-fun
661 (remote-find-symbol "SB-SYS" "ENSURE-DYNAMIC-FOREIGN-SYMBOL-ADDRESS"))))
662 (aver (= (get-closure-length dyn-syminfo) 3))
663 (let* ((ht1 (translate (%closure-index-ref dyn-syminfo 1) spaces))
664 (ht2 (translate (%closure-index-ref dyn-syminfo 0) spaces))
665 (table0
666 (translate (sb-impl::hash-table-table (truly-the hash-table linkage-info))
667 spaces))
668 (table1
669 (translate (sb-impl::hash-table-table (truly-the hash-table ht1)) spaces))
670 (table2
671 (translate (sb-impl::hash-table-table (truly-the hash-table ht2)) spaces))
672 (linkage)
673 (foreign))
674 (declare (simple-vector table0 table1 table2))
675 (flet ((show (x)
676 (push x foreign)
677 (when verbose
678 (format t "~A~%" x)))
679 (scan-table (table name fun &aux (n 0) (end (length table)))
680 (when verbose
681 (format t "~&~A:~%~A~%"
682 name (make-string (1+ (length name)) :initial-element #\-)))
683 (do ((i 2 (+ i 2)))
684 ((= i end))
685 (let ((val (svref table i)))
686 (unless (unbound-marker-p val)
687 (funcall fun (translate val spaces))
688 (incf n))))
689 (when verbose
690 (format t "TOTAL: ~D entries~2%" n))))
691 (scan-table table0 "linkage info"
692 (lambda (x &aux (type #\T))
693 (when (consp x)
694 (setq x (translate (car x) spaces) type #\D))
695 (format asm-file " .long ~A~%" x)
696 (when verbose
697 (format t "~A ~A~%" type x))
698 (push x linkage)))
699 (scan-table table1 "defined" #'show)
700 (scan-table table2 "undefined" #'show)
701 (let ((diff1 ; linkage not in foreign
702 (remove-if (lambda (x) (member x foreign :test #'string=)) linkage))
703 (diff2 ; foreign not in linkage
704 (remove-if (lambda (x) (member x linkage :test #'string=)) foreign)))
705 (when verbose
706 (format t "~&Linkage not in foreign:~%~S~%" diff1)
707 (format t "~&Foreign not in linkage:~%~S~%" diff2))
708 )))))
709 (terpri asm-file))
711 ;;;; ELF file I/O
713 (defconstant +sht-null+ 0)
714 (defconstant +sht-progbits+ 1)
715 (defconstant +sht-symtab+ 2)
716 (defconstant +sht-strtab+ 3)
717 (defconstant +sht-rela+ 4)
718 (defconstant +sht-rel+ 9)
720 (define-alien-type elf64-ehdr
721 (struct elf64-edhr
722 (ident (array unsigned-char 16)) ; 7F 45 4C 46 2 1 1 0 0 0 0 0 0 0 0 0
723 (type (unsigned 16)) ; 1 0
724 (machine (unsigned 16)) ; 3E 0
725 (version (unsigned 32)) ; 1 0 0 0
726 (entry unsigned) ; 0 0 0 0 0 0 0 0
727 (phoff unsigned) ; 0 0 0 0 0 0 0 0
728 (shoff unsigned) ;
729 (flags (unsigned 32)) ; 0 0 0 0
730 (ehsize (unsigned 16)) ; 40 0
731 (phentsize (unsigned 16)) ; 0 0
732 (phnum (unsigned 16)) ; 0 0
733 (shentsize (unsigned 16)) ; 40 0
734 (shnum (unsigned 16)) ; n 0
735 (shstrndx (unsigned 16)))) ; n 0
736 (define-alien-type elf64-shdr
737 (struct elf64-shdr
738 (name (unsigned 32))
739 (type (unsigned 32))
740 (flags (unsigned 64))
741 (addr (unsigned 64))
742 (off (unsigned 64))
743 (size (unsigned 64))
744 (link (unsigned 32))
745 (info (unsigned 32))
746 (addralign (unsigned 64))
747 (entsize (unsigned 64))))
748 (define-alien-type elf64-sym
749 (struct elf64-sym
750 (name (unsigned 32))
751 (info (unsigned 8))
752 (other (unsigned 8))
753 (shndx (unsigned 16))
754 (value unsigned)
755 (size unsigned)))
756 (define-alien-type elf64-rela
757 (struct elf64-rela
758 (offset (unsigned 64))
759 (info (unsigned 64))
760 (addendr (signed 8))))
762 (defun make-elf64-sym (name info)
763 (let ((a (make-array 24 :element-type '(unsigned-byte 8))))
764 (with-pinned-objects (a)
765 (setf (sap-ref-32 (vector-sap a) 0) name
766 (sap-ref-8 (vector-sap a) 4) info))
769 ;;; Return two values: an octet vector comprising a string table
770 ;;; and an alist which maps string to offset in the table.
771 (defun string-table (strings)
772 (let* ((length (+ (1+ (length strings)) ; one more null than there are strings
773 (reduce #'+ strings :key #'length))) ; data length
774 (bytes (make-array length :element-type '(unsigned-byte 8)
775 :initial-element 0))
776 (index 1)
777 (alist))
778 (dolist (string strings)
779 (push (cons string index) alist)
780 (replace bytes (map 'vector #'char-code string) :start1 index)
781 (incf index (1+ (length string))))
782 (values bytes (nreverse alist))))
784 (defun write-alien (alien size stream)
785 (dotimes (i size)
786 (write-byte (sap-ref-8 (alien-value-sap alien) i) stream)))
788 ;;; Make a relocatable ELF file from an SBCL core file
789 (defun objcopy (input-path output-path relocs)
790 (with-open-file (input input-path :element-type '(unsigned-byte 8))
791 (binding* ((sym-entry-size 24)
792 (reloc-entry-size 24)
793 (sections
794 `#((:core "lisp.core" ,+sht-progbits+ 0 0 0 4096 0)
795 (:sym ".symtab" ,+sht-symtab+ 0 3 1 8 ,sym-entry-size)
796 ; section with the strings -- ^ ^ -- 1+ highest local symbol
797 (:str ".strtab" ,+sht-strtab+ 0 0 0 1 0)
798 (:rel ".relalisp.core" ,+sht-rela+ 0 2 1 8 ,reloc-entry-size)
799 ; symbol table -- ^ ^ -- for which section
800 (:note ".note.GNU-stack" ,+sht-null+ 0 0 0 1 0)))
801 ((string-table map)
802 (string-table (append '("__lisp_code_start")
803 (map 'list #'second sections))))
804 (padded-strings-size (logandc2 (+ (length string-table) 7) 7))
805 (ehdr-size #.(ceiling (alien-type-bits (parse-alien-type 'elf64-ehdr nil)) 8))
806 (shdr-size #.(ceiling (alien-type-bits (parse-alien-type 'elf64-shdr nil)) 8))
807 (symbols-size (* 2 sym-entry-size))
808 (shdrs-start (+ ehdr-size symbols-size padded-strings-size))
809 (shdrs-end (+ shdrs-start (* (1+ (length sections)) shdr-size)))
810 (core-size (file-length input))
811 (core-align 4096)
812 (core-start (logandc2 (+ shdrs-end (1- core-align)) (1- core-align)))
813 (ident #.(coerce '(#x7F #x45 #x4C #x46 2 1 1 0 0 0 0 0 0 0 0 0)
814 '(array (unsigned-byte 8) 1))))
815 reloc-entry-size
816 (with-open-file (output output-path :direction :output :if-exists :supersede
817 :element-type '(unsigned-byte 8))
818 (with-alien ((ehdr elf64-ehdr))
819 (dotimes (i (ceiling ehdr-size n-word-bytes))
820 (setf (sap-ref-word (alien-value-sap ehdr) (* i n-word-bytes)) 0))
821 (with-pinned-objects (ident)
822 (%byte-blt (vector-sap ident) 0 (alien-value-sap ehdr) 0 16))
823 (setf (slot ehdr 'type) 1
824 (slot ehdr 'machine) #x3E
825 (slot ehdr 'version) 1
826 (slot ehdr 'shoff) shdrs-start
827 (slot ehdr 'ehsize) ehdr-size
828 (slot ehdr 'shentsize) shdr-size
829 (slot ehdr 'shnum) (1+ (length sections)) ; section 0 is implied
830 (slot ehdr 'shstrndx) (1+ (position :str sections :key #'car)))
831 (write-alien ehdr ehdr-size output))
833 ;; Write symbol table
834 (aver (eql (file-position output) ehdr-size))
835 (write-sequence (make-elf64-sym 0 0) output)
836 ;; The symbol name index is always 1 by construction. The type is #x10
837 ;; given: #define STB_GLOBAL 1
838 ;; and: #define ELF32_ST_BIND(val) ((unsigned char) (val)) >> 4)
839 ;; which places the binding in the high 4 bits of the low byte.
840 (write-sequence (make-elf64-sym 1 #x10) output)
842 ;; Write string table
843 (aver (eql (file-position output) (+ ehdr-size symbols-size)))
844 (write-sequence string-table output)
845 (dotimes (i (- padded-strings-size (length string-table)))
846 (write-byte 0 output))
848 ;; Write section headers
849 (aver (eql (file-position output) shdrs-start))
850 (with-alien ((shdr elf64-shdr))
851 (dotimes (i (ceiling shdr-size n-word-bytes)) ; Zero-fill
852 (setf (sap-ref-word (alien-value-sap shdr) (* i n-word-bytes)) 0))
853 (dotimes (i (1+ (length sections)))
854 (when (plusp i) ; Write the zero-filled header as section 0
855 (destructuring-bind (key name type flags link info alignment entsize)
856 (aref sections (1- i))
857 (multiple-value-bind (offset size)
858 (ecase key
859 (:sym (values ehdr-size symbols-size))
860 (:str (values (+ ehdr-size symbols-size) (length string-table)))
861 (:core (values core-start core-size))
862 (:rel (values (+ core-start core-size)
863 (* (length relocs) reloc-entry-size)))
864 (:note (values 0 0)))
865 (setf (slot shdr 'name) (cdr (assoc name map :test #'string=))
866 (slot shdr 'type) type
867 (slot shdr 'flags) flags
868 (slot shdr 'off) offset
869 (slot shdr 'size) size
870 (slot shdr 'link) link
871 (slot shdr 'info) info
872 (slot shdr 'addralign) alignment
873 (slot shdr 'entsize) entsize))))
874 (write-alien shdr shdr-size output)))
875 ;; Write padding
876 (dotimes (i (- core-start (file-position output)))
877 (write-byte 0 output))
878 (aver (eq (file-position output) core-start))
879 ;; Copy the core file
880 (let ((buffer (make-array 65536 :element-type '(unsigned-byte 8))))
881 (loop (let ((n (read-sequence buffer input)))
882 (when (zerop n) (return))
883 (write-sequence buffer output :end n))))))))
885 ;;; Return a list of fixups (FIXUP-WHERE KIND ADDEND) to peform in a foreign core
886 ;;; whose code space is subject to link-time relocation.
887 ;;; #define R_X86_64_64 1 /* Direct 64 bit */
888 ;;; #define R_X86_64_PC32 2 /* PC relative 32 bit signed */
890 (defun collect-relocations (spaces)
891 (format t "mapped @ ~s~%~x~%" (car spaces) (cdr spaces))
892 (let* ((code-space (find immobile-varyobj-core-space-id (cdr spaces) :key #'fourth))
893 (code-start (car code-space))
894 (code-end (+ code-start (ash (third code-space) word-shift) -1))
895 (fixups)
896 (bias 0))
897 (declare (ignore bias))
898 (format t "~&code space is ~X:~X~%" code-start code-end)
899 (labels
900 ((abs-fixup (where referent)
901 (format t "0x~(~x~): (a)~%" (core-to-logical where) referent)
902 (push `(,where ,(- referent code-start) . :abs) fixups))
903 (rel-fixup (where referent)
904 (format t "0x~(~x~): (r)~%" (core-to-logical where) referent)
905 (push `(,where ,(- referent code-start) . :rel) fixups))
906 ;; Given a address which is an offset into the data pages of the target core,
907 ;; compute the logical address which that offset would be mapped to.
908 ;; For example core address 0 is the virtual address of static space.
909 (core-to-logical (core-offs &aux (page (floor core-offs +backend-page-bytes+)))
910 ; (format t "core-offs ~x, page=~d~%" addr page)
911 (loop for (vaddr page0 nwords id) in (cdr spaces)
912 for npages = (ceiling nwords (/ +backend-page-bytes+ n-word-bytes))
913 when (and (<= page0 page (+ page0 (1- npages)))
914 (/= id immobile-varyobj-core-space-id))
915 do (return (+ vaddr
916 (* (- page page0) +backend-page-bytes+)
917 (logand core-offs (1- +backend-page-bytes+))))
918 finally (bug "Can't translate core offset ~x using ~x"
919 core-offs spaces)))
920 (scanptrs (obj wordindex-min wordindex-max)
921 (do* ((base-addr (logandc2 (get-lisp-obj-address obj) lowtag-mask))
922 (sap (int-sap base-addr))
923 ;; core-offs is the offset in the lisp.core ELF section.
924 (core-offs (- base-addr (sap-int (car spaces))))
925 (i wordindex-min (1+ i)))
926 ((> i wordindex-max))
927 (let ((ptr (sap-ref-word sap (ash i word-shift))))
928 (when (and (= (logand ptr 3) 3) (<= code-start ptr code-end))
929 (abs-fixup (+ core-offs (ash i word-shift)) ptr)))))
930 (scanptr (obj wordindex)
931 (scanptrs obj wordindex wordindex)) ; trivial wrapper
932 (scan-obj (obj widetag vaddr size
933 &aux (core-offs (- (logandc2 (get-lisp-obj-address obj) lowtag-mask)
934 (sap-int (car spaces))))
935 (nwords (ceiling size n-word-bytes)))
936 ; (format t "~&obj @ physical ~x core addr ~x widetag ~x~%" (get-lisp-obj-address obj) core-offs widetag)
937 (when (listp obj)
938 (scanptrs obj 0 1)
939 (return-from scan-obj))
940 (case widetag
941 (#.instance-widetag
942 (let ((layout (truly-the layout
943 (translate (%instance-layout obj) spaces))))
944 ;; FIXME: even though the layout is supplied, it's not good enough,
945 ;; because the macro references the layout-bitmap which might
946 ;; be a bignum which is a pointer into the logical core address.
947 (unless (fixnump (layout-bitmap layout))
948 (error "Can't process bignum bitmap"))
949 (do-instance-tagged-slot (i obj :layout layout)
950 (scanptr obj (1+ i))))
951 (return-from scan-obj))
952 (#.simple-vector-widetag
953 (let ((len (length (the simple-vector obj))))
954 (when (eql (logand (get-header-data obj) #xFF) vector-valid-hashing-subtype)
955 (do ((i 2 (+ i 2)) (needs-rehash))
956 ((= i len)
957 (when needs-rehash
958 (setf (svref obj 1) 1)))
959 (when (scanptr obj (+ vector-data-offset i))
960 (format t "~&SET REHASH: vector=~X~%" (get-lisp-obj-address obj))
961 (setq needs-rehash t))
962 (scanptr obj (+ vector-data-offset i 1)))
963 (return-from scan-obj))
964 (setq nwords (+ len 2))))
965 (#.fdefn-widetag
966 (scanptrs obj 1 2)
967 (let* ((fdefn-pc-sap ; where to read to access the rel32 operand
968 (int-sap (+ (- (get-lisp-obj-address obj) other-pointer-lowtag)
969 (ash fdefn-raw-addr-slot word-shift))))
970 ;; what the fdefn's logical PC will be
971 (fdefn-logical-pc (+ vaddr (ash fdefn-raw-addr-slot word-shift)))
972 (rel32off (signed-sap-ref-32 fdefn-pc-sap 1))
973 (target (+ fdefn-logical-pc 5 rel32off)))
974 ;; 5 = length of jmp/call inst
975 (when (<= code-start target code-end)
976 ;; This addend needs to account for the fact that the location
977 ;; where fixup occurs is not where the fdefn will actually exist.
978 (rel-fixup (+ core-offs (ash 3 word-shift) 1) target)))
979 (return-from scan-obj))
980 ((#.closure-widetag #.funcallable-instance-widetag)
981 (let ((word (sap-ref-word (int-sap (get-lisp-obj-address obj))
982 (- n-word-bytes fun-pointer-lowtag))))
983 (when (<= code-start word code-end)
984 (abs-fixup (+ core-offs (ash 1 word-shift)) word)))
985 (when (eq widetag funcallable-instance-widetag)
986 (let ((layout (truly-the layout
987 (translate (%funcallable-instance-layout obj) spaces))))
988 (unless (fixnump (layout-bitmap layout))
989 (error "Can't process bignum bitmap"))
990 (let ((bitmap (layout-bitmap layout)))
991 (unless (eql bitmap -1)
992 ;; tagged slots precede untagged slots,
993 ;; so integer-length is the count of tagged slots.
994 (setq nwords (1+ (integer-length bitmap))))))))
995 ;; mixed boxed/unboxed objects
996 (#.code-header-widetag
997 (dotimes (i (code-n-entries obj))
998 (scanptrs (%code-entry-point obj i) 2 5))
999 (setq nwords (code-header-words obj)))
1000 ;; boxed objects that can reference code/simple-funs
1001 ((#.value-cell-widetag #.symbol-widetag #.weak-pointer-widetag))
1003 (return-from scan-obj)))
1004 (scanptrs obj 1 (1- nwords))))
1005 (dolist (space (reverse (cdr spaces)))
1006 (destructuring-bind (logical-addr data-page nwords id) space
1007 (unless (= id immobile-varyobj-core-space-id)
1008 (let* ((size (ash nwords word-shift))
1009 (physical-start (sap+ (car spaces) (* data-page +backend-page-bytes+)))
1010 (physical-end (sap+ physical-start size)))
1011 (format t "~&scan range vaddr=~12x:~12x paddr=~12x:~12x~%"
1012 logical-addr (+ logical-addr size)
1013 (sap-int physical-start) (sap-int physical-end))
1014 (dx-flet ((visit (obj widetag size)
1015 ;; Compute the object's intended virtual address
1016 (let ((vaddr (+ (- (logandc2 (get-lisp-obj-address obj) lowtag-mask)
1017 (sap-int physical-start))
1018 logical-addr)))
1019 (scan-obj obj widetag vaddr size))))
1020 (map-objects-in-range
1021 #'visit
1022 (ash (sap-int physical-start) (- n-fixnum-tag-bits))
1023 (ash (sap-int physical-end) (- n-fixnum-tag-bits)))))))))
1024 (format t "~D fixups~%" (length fixups))
1025 (bug "not done yet")))
1026 ;;;;
1028 (macrolet ((do-core-header-entry ((id-var len-var ptr-var) &body body)
1029 `(let ((,ptr-var 1))
1030 (loop
1031 (let ((,id-var (%vector-raw-bits buffer ,ptr-var))
1032 (,len-var (%vector-raw-bits buffer (1+ ,ptr-var))))
1033 (incf ,ptr-var 2)
1034 (decf ,len-var 2)
1035 (when (= ,id-var end-core-entry-type-code) (return))
1036 ,@body
1037 (incf ,ptr-var ,len-var)))))
1038 (do-directory-entry ((index-var start-index input-nbytes) &body body)
1039 `(let ((words-per-dirent 5))
1040 (multiple-value-bind (n-entries remainder)
1041 (floor ,input-nbytes words-per-dirent)
1042 (aver (zerop remainder))
1043 (symbol-macrolet ((id (%vector-raw-bits buffer index))
1044 (nwords (%vector-raw-bits buffer (+ index 1)))
1045 (data-page (%vector-raw-bits buffer (+ index 2)))
1046 (addr (%vector-raw-bits buffer (+ index 3)))
1047 (npages (%vector-raw-bits buffer (+ index 4))))
1048 (do ((,index-var ,start-index (+ ,index-var words-per-dirent)))
1049 ((= ,index-var (+ ,start-index (* n-entries words-per-dirent))))
1050 ,@body)))))
1051 (with-mapped-core ((sap-var start npages stream) &body body)
1052 `(let (,sap-var)
1053 (unwind-protect
1054 (progn
1055 (setq ,sap-var
1056 (sb-posix:mmap nil
1057 (* ,npages +backend-page-bytes+)
1058 (logior sb-posix:prot-read sb-posix:prot-write)
1059 sb-posix:map-private
1060 (sb-sys:fd-stream-fd ,stream)
1061 ;; Skip the core header
1062 (+ ,start +backend-page-bytes+)))
1063 ,@body)
1064 (when ,sap-var
1065 (sb-posix:munmap ,sap-var (* ,npages +backend-page-bytes+)))))))
1067 (defun split-core
1068 (input-pathname asm-pathname
1069 &key emit-sizes (verbose t)
1070 &aux (split-core-pathname
1071 (merge-pathnames
1072 (make-pathname :name (concatenate 'string (pathname-name asm-pathname) "-core")
1073 :type "tmp")
1074 asm-pathname))
1075 (elf-core-pathname (merge-pathnames (make-pathname :type "o") split-core-pathname))
1076 (buffer (make-array +backend-page-bytes+ :element-type '(unsigned-byte 8)))
1077 (original-total-npages 0)
1078 (core-offset 0)
1079 (static-space)
1080 (code-space)
1081 (fixedobj-space)
1082 (copy-actions)
1083 (page-adjust 0)
1084 (spaces))
1086 ;; Remove old files
1087 (ignore-errors (delete-file asm-pathname))
1088 (ignore-errors (delete-file elf-core-pathname))
1089 ;; Ensure that all files can be opened
1090 (with-open-file (input input-pathname :element-type '(unsigned-byte 8))
1091 (with-open-file (asm-file asm-pathname :direction :output :if-exists :supersede)
1092 (with-open-file (temp-core split-core-pathname :direction :output
1093 :element-type '(unsigned-byte 8) :if-exists :supersede)
1094 (read-sequence buffer input)
1095 (cond ((= (%vector-raw-bits buffer 0) core-magic))
1096 (t ; possible embedded core
1097 (file-position input (- (file-length input)
1098 (* 2 n-word-bytes)))
1099 (aver (eql (read-sequence buffer input) (* 2 n-word-bytes)))
1100 (aver (= (%vector-raw-bits buffer 1) core-magic))
1101 (setq core-offset (%vector-raw-bits buffer 0))
1102 (when verbose
1103 (format t "~&embedded core starts at #x~x into input~%" core-offset))
1104 (file-position input core-offset)
1105 (read-sequence buffer input)
1106 (aver (= (%vector-raw-bits buffer 0) core-magic))))
1107 (do-core-header-entry (id len ptr)
1108 (case id
1109 (#.build-id-core-entry-type-code
1110 (let ((string (make-string (%vector-raw-bits buffer ptr)
1111 :element-type 'base-char))
1112 (string-base (* (1+ ptr) n-word-bytes)))
1113 (loop for i below (length string)
1114 do (setf (char string i) (code-char (aref buffer (+ string-base i)))))
1115 (when verbose
1116 (format t "Build ID [~a]~%" string))))
1117 (#.new-directory-core-entry-type-code
1118 (do-directory-entry (index ptr len)
1119 (incf original-total-npages npages)
1120 (push (list addr data-page nwords id) spaces)
1121 (when verbose
1122 (format t "id=~d page=~5x + ~5x addr=~10x words=~8x~:[~; (drop)~]~%"
1123 id data-page npages addr nwords
1124 (= id immobile-varyobj-core-space-id)))
1125 (cond ((= id immobile-varyobj-core-space-id)
1126 ;; subsequent entries adjust their start page downward
1127 ;; (should be just the dynamic space entry)
1128 (setq code-space (cons addr (+ addr (ash nwords word-shift)))
1129 page-adjust npages)
1130 ;; Keep the entry but delete the page count. We need to know
1131 ;; where the space was supposed to be mapped and at what size.
1132 (setf data-page 0 npages 0))
1134 ;; Keep track of where the static and fixedobj spaces
1135 ;; were supposed to be mapped.
1136 (case id
1137 (#.static-core-space-id
1138 (setq static-space
1139 (cons addr (+ addr (ash nwords word-shift)))))
1140 (#.immobile-fixedobj-core-space-id
1141 (setq fixedobj-space
1142 (cons addr (+ addr (ash nwords word-shift))))))
1143 (when (plusp npages) ; enqueue
1144 (push (cons data-page (* npages +backend-page-bytes+))
1145 copy-actions))
1146 ;; adjust this entry's start page in the new core
1147 (decf data-page page-adjust)))))
1148 (#.page-table-core-entry-type-code
1149 (aver (= len 3))
1150 (symbol-macrolet ((nbytes (%vector-raw-bits buffer (1+ ptr)))
1151 (data-page (%vector-raw-bits buffer (+ ptr 2))))
1152 (aver (= data-page original-total-npages))
1153 (when verbose
1154 (format t "PTE: page=~5x~40tbytes=~8x~%" data-page nbytes))
1155 (push (cons data-page nbytes) copy-actions)
1156 (decf data-page page-adjust)))))
1157 (write-sequence buffer temp-core)
1158 (dolist (action (nreverse copy-actions))
1159 ;; page index convention assumes absence of core header.
1160 ;; i.e. data page 0 is the file page immediately following the core header
1161 (let ((offset (* (1+ (car action)) +backend-page-bytes+))
1162 (nbytes (cdr action)))
1163 (when verbose
1164 (format t "File offset ~10x: ~10x bytes~%" offset nbytes))
1165 (file-position input (+ core-offset offset))
1166 (loop (let ((chunksize (min (length buffer) nbytes)))
1167 (aver (eql (read-sequence buffer input :end chunksize) chunksize))
1168 (write-sequence buffer temp-core :end chunksize)
1169 (when (zerop (decf nbytes chunksize)) (return))))))
1170 ;; Trailer (runtime options and magic number)
1171 (let ((nbytes (read-sequence buffer input)))
1172 ;; expect trailing magic number
1173 (let ((ptr (floor (- nbytes n-word-bytes) n-word-bytes)))
1174 (aver (= (%vector-raw-bits buffer ptr) core-magic)))
1175 ;; File position of the core header needs to be set to 0
1176 ;; regardless of what it was
1177 (setf (%vector-raw-bits buffer 4) 0)
1178 (when verbose
1179 (format t "Trailer words:(~{~X~^ ~})~%"
1180 (loop for i below (floor nbytes n-word-bytes)
1181 collect (%vector-raw-bits buffer i))))
1182 (write-sequence buffer temp-core :end nbytes)
1183 (finish-output temp-core))
1184 ;; Sanity test
1185 (aver (= (+ core-offset
1186 (* page-adjust +backend-page-bytes+)
1187 (file-length temp-core))
1188 (file-length input))))
1189 (setq buffer nil) ; done with buffer now
1191 ;; Map the original core file to memory
1192 (with-mapped-core (sap core-offset original-total-npages input)
1193 (let* ((map (cons sap (sort spaces #'> :key 'car)))
1194 (relocs #+nil(collect-relocations map)))
1195 ;; Convert the partial core into a '.o' file
1196 (objcopy split-core-pathname elf-core-pathname relocs)
1197 (delete-file split-core-pathname)
1199 ;; There's no relation between emit-sizes and which section to put
1200 ;; C symbol references in, however it's a safe bet that if sizes
1201 ;; are supported then so is the .rodata directive.
1202 (format asm-file (if emit-sizes " .rodata~%" " .data~%"))
1203 (extract-required-c-symbols fixedobj-space map asm-file)
1204 (write-assembler-text code-space static-space fixedobj-space
1205 map asm-file emit-sizes)))
1207 (format asm-file "~% ~A~%" +noexec-stack-note+))))
1209 ) ; end MACROLET
1211 ;;;;
1213 (defun cl-user::elfinate (&optional (args (cdr sb-ext:*posix-argv*)))
1214 (cond ((string= (car args) "split")
1215 (pop args)
1216 (let ((sizes (string= (car args) "--sizes")))
1217 (when sizes
1218 (pop args))
1219 (destructuring-bind (input asm) args
1220 (split-core input asm :emit-sizes sizes))))
1221 #+nil
1222 ((string= (car args) "relocate")
1223 (destructuring-bind (input output binary start-sym) (cdr args)
1224 (relocate-core
1225 input output binary (parse-integer start-sym :radix 16))))
1227 (error "Unknown command: ~S" args))))
1229 ;; If loaded as a script, do this
1230 (eval-when (:execute)
1231 (let ((args (cdr sb-ext:*posix-argv*)))
1232 (when args
1233 (let ((*print-pretty* nil))
1234 (format t "Args: ~S~%" args)
1235 (cl-user::elfinate args)))))