From 10cc646b9e68604bdcee598c31a721905f55f333 Mon Sep 17 00:00:00 2001 From: Douglas Katzman Date: Tue, 6 Jan 2015 22:55:37 -0500 Subject: [PATCH] Plug up leaky abstraction that (%INSTANCE-REF struct 0) is a LAYOUT. This is part 1 of the compact-instance-feature change, and motivated by the design as sketched out in doc/internals-notes/compact-instance. %INSTANCE-REF 0 was generally assumed to access an object's layout, but this won't be true in general. Moreover, DO-INSTANCE-TAGGED-SLOT won't scan an instance's layout, so any code that wanted to iterate over all defined slots plus layout now needs to read the layout separately. Additionally, gencgc has been made to use instance_length() and instance_layout() accessors. --- contrib/sb-introspect/introspect.lisp | 8 +- doc/internals-notes/compact-instance | 165 ++++++++++++++++++++++++++++++++ doc/internals-notes/non-moving-gc | 95 ++++++++++++++++++ package-data-list.lisp-expr | 1 + src/code/defbangstruct.lisp | 37 ++++--- src/code/defstruct.lisp | 3 +- src/code/early-raw-slots.lisp | 68 ++++++------- src/code/fop.lisp | 9 +- src/code/pred.lisp | 9 +- src/code/room.lisp | 11 ++- src/code/sharpm.lisp | 4 +- src/code/target-defstruct.lisp | 21 ++-- src/code/target-sxhash.lisp | 5 +- src/compiler/dump.lisp | 31 +++--- src/compiler/fopcompile.lisp | 2 + src/compiler/generic/early-vm.lisp | 5 + src/compiler/generic/genesis.lisp | 15 ++- src/compiler/generic/vm-tran.lisp | 2 + src/compiler/ir1tran.lisp | 28 +++--- src/runtime/gc-common.c | 50 +++++++--- src/runtime/gencgc.c | 4 +- src/runtime/print.c | 12 +-- src/runtime/runtime.h | 9 ++ tests/raw-slots-interleaved.impure.lisp | 1 + 24 files changed, 457 insertions(+), 138 deletions(-) create mode 100644 doc/internals-notes/compact-instance create mode 100644 doc/internals-notes/non-moving-gc diff --git a/contrib/sb-introspect/introspect.lisp b/contrib/sb-introspect/introspect.lisp index 28212b764..4cf8750c9 100644 --- a/contrib/sb-introspect/introspect.lisp +++ b/contrib/sb-introspect/introspect.lisp @@ -907,11 +907,9 @@ Experimental: interface subject to change." (call (realpart object)) (call (realpart object))) (sb-vm::instance - (if (typep object 'structure-object) - (sb-kernel:do-instance-tagged-slot (i object) - (call (sb-kernel:%instance-ref object i))) - (dotimes (i (sb-kernel:%instance-length object)) - (call (sb-kernel:%instance-ref object i)))) + (call (sb-kernel:%instance-layout object)) + (sb-kernel:do-instance-tagged-slot (i object) + (call (sb-kernel:%instance-ref object i))) #+sb-thread (when (typep object 'sb-thread:thread) (cond ((eq object sb-thread:*current-thread*) diff --git a/doc/internals-notes/compact-instance b/doc/internals-notes/compact-instance new file mode 100644 index 000000000..a0bcbd13b --- /dev/null +++ b/doc/internals-notes/compact-instance @@ -0,0 +1,165 @@ +The following outlines an approach by which the core image size of +a particularly memory-hungry application can be reduced by about 100MB, +with a corresponding reduction in runtime object allocation of course. +This represents 10% of the space consumed by all structure-objects +within pseudo-static space of this core image. +The idea is to squash one word out of all structures +by storing their instance-layout in their header words. + +There are (at least) two plausible ways to do this as detailed below. + +Alternative A [as proposed on sbcl-devel] +============= +If all layouts are aligned on a 256-byte boundary, then the lowest byte +of a descriptor known to be a layout pointer is arbitrary, +in the same way that the low 3 or 4 bits of a descriptor can be recovered +from the object pointed to. + +Suppose than an instance's layout is at memory address #x1000CBAD00, +so that its tagged pointer is #x1000CBAD03. + +Existing in-memory representation: + word 0 : #x000000NN59 ; header. #x59 is the widetag, NN is the length + word 1 : #x1000CBAD03 ; pointer to LAYOUT + word 2 : first user-specified slot + word 3 : second user-specified slot + ... + +Alternative representation A: + word 0 : #x1000CBAD59 ; header + word 1 : first user-specified slot + word 2 : second user-specified slot + ... + +Word 0 is a "strange" interior pointer that has the correct widetag +for an INSTANCE, from which can be obtained a LAYOUT by subtracting #x56. +This approach is slightly inefficient in that %INSTANCE-LENGTH can only +be determined by following the LAYOUT and reading a slot from it. +It is therefore inappropriate for the legacy raw-slot implementation, +in which %INSTANCE-LENGTH plays an important role in slot access. + +Alternative B [applicable only to 64-bit architecture] +============= +If all layouts are located in low memory (< 4GB), then the upper 4 bytes +of an instance header can convey the tagged pointer, with room to spare. + +Suppose now that the layout has tagged pointer #x20CBAD03. +Alternative representation B: + word 0 : #x20CBAD03zzzzNN59 ; header + word 1 : first user-specified slot + word 2 : second user-specified slot + ... + +"zzzzNN" indicate the three bytes that may be used for length. +Two bytes are adequate for all practical purposes. + +Alternative B requires no funny pointer arithmetic, only a small +change when reading INSTANCE-LAYOUT to look in the high 4 bytes +(half-Lisp-word) when accessing the zeroth word. +And of course, it needs a segregated dynamic heap in low memory. + +While it is possible to implement a range of low addresses managed +by the existing generational GC, for various reasons it is attractive to +implement this in a non-moving GC. For one thing, type checks would be +able to reference a LAYOUT as a 32-bit immediate operand to CMP, +provided that the containing function also keeps the layout object +live by a reference from its code header, which is not a problem. + +Compatibility +============= +In either approach, there is a quesion of what to report for %INSTANCE-LENGTH. +Existing code assumes that (%INSTANCE-REF 0) is equivalent to %INSTANCE-LAYOUT. +It is possible to preserve that appearance, so that %INSTANCE-REF 1 returns +what is in "word 1" depicted above, and so on, but this consistency actually +imparts an unacceptable degree of inefficiency to the %INSTANCE-REF vop, +as well as making the meaning of %INSTANCE-LENGTH unusual for Lisp. +It is inefficient because the vop for non-constant index would have to test +for index 0 and access it differently. It is unconventional because it +allows access to one more index than implied by the length. +For example, suppose an %INSTANCE-LENGTH is 3 (4 physical words in the +object). This means that you should only be able to supply indices +of 0, 1, or 2; but in fact the last user-visible slot would be index 3 +in the above schemes because word 0 is stashed in a "hidden" place. +The three slots of data are indexed in Lisp as 1, 2, 3 instead of 0, 1, 2. +Hence, the non-adherence to a standard meaning of LENGTH. + +If the length in the header instead indicated 4, this would be strange +for GC, and would require change there. Or, it could indicate 3, and Lisp +could add 1 to it upon reading the value. This is all quite messy +and adds to the already-confusing bit of housekeeping necessary +with regard to this LENGTH field. +[Technically, the interleaved-raw-slot backend feature would be happier +if we never had to adhere to the round-to-odd convention either. +See the ample commentary at the 1 line DD-INSTANCE-LENGTH function] + +To best straighten out this conundrum, we shall: +(A) deem that %INSTANCE-LAYOUT be the only abstraction for getting + the layout, and +(B) define a new constant SB-VM:INSTANCE-DATA-START which is the + index of the first user-visible data slot. +The latter constant is either 1 or 0 depending on the presence +of the compact-header feature. Iteration over defined slots is performed +by scanning from INSTANCE-DATA-START to %INSTANCE-LENGTH. +And SB-VM:INSTANCE-SLOTS-OFFSET does not change. This is the constant +that when added to an index as specified by Lisp to the INSTANCE-REF +function, gets the proper physical index into the object. +So with or without the feature, INSTANCE-REF 0 gets physical word +index 1 relative to the object base. + +Compactifying STANDARD-OBJECT +============================= +The above shrinks every structure by 1 word, which in reality means that +structures which had one word of waste shrink by 2, and structures with +no wasted cells gain one padding word. +But a STANDARD-OBJECT has exactly 4 physical slots: + +suggesting that it would acquire one word of slack, and not shrink. + +We can do better than that: +1) suppose the clos-hash were stored in the 0th cell of the slot-vector. +There's nothing about the standard instance protocol that precludes this. +And it is a win: the "primitive" CLOS object would be + +and the slot-vector would in turn either gain 2 physical slots, +or gain no physical slots depending on whether it had a padding word. +So once again we have each object netting either -2 cells or -0 cells +of memory, considering the primitive object plus its data vector. +This is exactly the same advantage as for STRUCTURE-OBJECT. +Moreover, we gain a beautiful advantange: atomic update of +the LAYOUT and SLOTS with CMPXCHG16B (for CHANGE-CLASS etc) which +at present can not be done, as they do not satisfy that instruction's +strict memory alignment requirement. + +2) Supposing we don't store the clos-hash at all, but use a non-moving GC. +Then the clos-hash of a standard-object is just a mixing of its address. +Each CLOS primitive object will shrink by exactly 2 words. + +Unifying PCL instance access +============================ +STANDARD-INSTANCE-ACCESS and FUNCALLABLE-STANDARD-INSTANCE-ACCESS +can be made to emit identical assembly code, which simplifies +or eliminates some tests throughout the logic for PCL in deciding +what metaclass of instance is in hand (funcallable or not) +when getting the slot vector and layout. + +1) By placing the layout of a funcallable instance to its header word, +the assembly code for reading a layout of an object that is either a +standard-instance or funcallable-instance would mask off the lowtag +(essentially converting the descriptor to a native pointer), +and then read the layout from the high half of the header word. + +2) By placing the slot vector of a funcallable instance so that +it becomes the first slot after the trampoline slot, +access to the slot vector of either kind of instance can +be done with "mov result, [ptr+5]" from the tagged pointer. +This relies on the fact that the difference between the lowtags +is exactly 8 and that the instance-pointer-lowtag is 3. +Therefore, in the case of standard instance, [ptr+5] reads the +physical word which is one word after the header, +and in the case of funcallable-standard-instance, +[ptr+5] reads the physical word which is 2 words after the header, +skipping over the trampoline word. + +3) By placing a layout in every header of all 3 subtypes of FUNCTION, +then LAYOUT-OF can consistently access the layout in the same way +for any object that has either FUN-POINTER- or INSTANCE-POINTER-LOWTAG. diff --git a/doc/internals-notes/non-moving-gc b/doc/internals-notes/non-moving-gc new file mode 100644 index 000000000..699359cd5 --- /dev/null +++ b/doc/internals-notes/non-moving-gc @@ -0,0 +1,95 @@ +Non-moving GC +============= +For a few types of objects, evidence suggests that +either they are almost immediately known to be long-lived (a PACKAGE, e.g.), +of very fleeting in lifetime (gensyms during compilation, e.g). +At least four types of objects share this aspect: +- LAYOUT +- FDEFN +- PACKAGE +- SYMBOL, but only certain symbols, those which appear + (by their name) to be special variables + +In addition, these objects seem to have in common the property that speed +of allocation is relatively unimportant, and which therefore are indifferent +to an allocator which is somewhat more expensive than a "pointer bump". + +Taking each in turn: +A LAYOUT is created when augmenting the type system (via DEFSTRUCT,DEFCLASS) +which has so much overhead in and of itself, that speed of creation of +the LAYOUT is insignificant. We also expect changes to the type system +to be few and far between (in comparison to operations on the objects of that +type) such that reclamation of the metadata by GC is nearly irrelevant as +to whether or when it happens. + +An FDEFN is created for function linkage, which entails compilation, +which entails tons of overhead. (While it's certainly possible to create +FDEFNs outside of the compiler/loader, it would be unusual) +Morever, it was not even possible to delete FDEFN objects prior to the +rewrite of globaldb, which is to say, FDEFNs were *actually* immortal. + +A PACKAGE is generally expected to be long-lived, and again speed of +allocation is irrelevant. + +And finally, interned symbol usually live as long as their package does. + +For most of those objects, immobile placement does not seem to lead +to too much heap fragmentation in the immobile space. + +Prior to dumping a core image, we could allow motion on the otherwise +non-moving heap to squash the waste out. But this renders infeasible +the possibility of wiring in 32-bit immediate operands in assembly code +unless all code-components store their fixups for later use. +And indeed, assembly code makes frequent reference to SYMBOLs, FDEFNs, +and LAYOUTs, so there is something to be said for wiring them in. + +Card marking +============ +For FDEFNs and SYMBOLs, we can avoid OS-based write-protection while +incurring less of a penalty than was suggested by Paul Khuong's sw-write-barrier +branch. If symbols that "look like" special variables are immobile, +then a card-marking scheme can be used with a cost of only 1 instruction +for known symbols. + LOCK BTS [someplace], N ; set the "card dirty" bit + MOV [addr], val ; write it +Some fairly intricate cooperation with the fasloader will be required to +compute the absolute address of the mark bit, but not much more tricky +than the logic to assign symbol TLS indices at load time. +[And this doesn't help any with (SET SYM VAL) or with (PROGV)] + +We need two bits per card: one bit for whether the card is dirty, +and one bit for whether the card was ever the target of a preserve_pointer() +or the target of the effective address of a MOV instruction if that is +where the stop-for-gc signal occurred. +[This is the downside of not loading the symbol into a register] +In the latter case we can't clear the dirty bit even if the card seems clean, +because in the instruction sequence above, the second operation writes +the card in a way that would be accidentally invisible to GC if the dirty +bit were cleared after the first instruction. + + +Additional tweaks +================= + +1. If PACKAGEs and FDEFNs are both in low space (using the non-moving GC), +one 8-byte pointer slot in a SYMBOL can be made into 2 4-byte pointers +so that SYMBOL-FDEFN becomes a mere slot reference. +This is an utterly trivial change to the vops, and to the GC scavenge +function for a symbol. + +2. Non-moving GC could be used for non-toplevel CODE objects. +This might allow all functions within the same file to use the +ordinary x86 CALL instruction, under control of an optimize policy +(because it would be difficult to trace/redefine/encapsulate) + +3. If a closure header needs only 2 bytes for the length (probably true), +there is 1 byte available for a generation# and there are 4 bytes for the +layout of FUNCTION. The same is true for funcallable-instance and FUNCTION. +Note that the generation# for a SIMPLE-FUN will be stored in +the CODE-COMPONENT object, not the SIMPLE-FUN. + +[unrelated] +4. A build-time selection that limits array-total-size to 2^31 on 64-bit +would allow a vector length to be stuffed into the vector header, +which saves space for small vectors. A vector of length 3 would shrink +from 6 physical words to 4 words - a 33% savings. diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 164789189..54fc1c3ab 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -2908,6 +2908,7 @@ structure representations" "EMIT-CONSTANT-SEGMENT-HEADER" "SORT-INLINE-CONSTANTS" "EMIT-INLINE-CONSTANT") + "INSTANCE-DATA-START" "INSTANCE-HEADER-WIDETAG" "INSTANCE-POINTER-LOWTAG" "INSTANCE-SLOTS-OFFSET" "INSTANCE-USAGE" "INTERIOR-REG-SC-NUMBER" "INTERNAL-ERROR-ARGS" diff --git a/src/code/defbangstruct.lisp b/src/code/defbangstruct.lisp index f10249bb3..f1cbe080c 100644 --- a/src/code/defbangstruct.lisp +++ b/src/code/defbangstruct.lisp @@ -185,9 +185,11 @@ ;; raw-slot-metadata, 0 represents no untagged slots. (zerop (layout-raw-slot-metadata (info :type :compiler-layout name))))))) - (defun %instance-length (instance) + (defun %instance-layout (instance) (aver (or (typep instance 'structure!object) (xc-dumpable-structure-instance-p instance))) + (classoid-layout (find-classoid (type-of instance)))) + (defun %instance-length (instance) ;; INSTANCE-LENGTH tells you how many data words the backend is able to ;; physically access in this structure. Since every structure occupies ;; an even number of words, the storage slots comprise an odd number @@ -199,18 +201,23 @@ ;; more cells than there are in the declared structure because there ;; is no lower level storage that you can peek at. ;; So INSTANCE-LENGTH is exactly the same as LAYOUT-LENGTH on the host. - (layout-length (classoid-layout (find-classoid (type-of instance))))) + (layout-length (%instance-layout instance))) (defun %instance-ref (instance index) - (aver (or (typep instance 'structure!object) - (xc-dumpable-structure-instance-p instance))) - (let* ((class (find-classoid (type-of instance))) - (layout (classoid-layout class))) - (if (zerop index) - layout + (let ((layout (%instance-layout instance))) + ;; with compact headers, 0 is an ordinary slot index. + ;; without, it's the layout. + (if (eql index (1- sb!vm:instance-data-start)) + (error "XC Host should use %INSTANCE-LAYOUT, not %INSTANCE-REF 0") (let* ((dd (layout-info layout)) - (dsd (elt (dd-slots dd) (1- index))) + ;; If data starts at 1, then subtract 1 from index. + ;; otherwise use the index as-is. + (dsd (elt (dd-slots dd) + (- index sb!vm:instance-data-start))) (accessor-name (dsd-accessor-name dsd))) - (declare (type symbol accessor-name)) + ;; Why AVER these: because it is slightly abstraction-breaking + ;; to assume that the slot-index N is the NTH item in the DSDs. + ;; The target Lisp never assumes that. + (aver (and (eql (dsd-index dsd) index) (eq (dsd-raw-type dsd) t))) (funcall accessor-name instance))))) ;; I believe this approach is technically nonportable because CLHS says that ;; "The mechanism by which defstruct arranges for slot accessors to be usable @@ -218,14 +225,14 @@ ;; functions, setf expanders, or some other implementation-dependent ;; mechanism ..." ;; As it happens, many implementations provide both functions and expanders. + ;; But ... this seems never to be needed. (defun %instance-set (instance index new-value) - (aver (typep instance 'structure!object)) - (let* ((class (find-classoid (type-of instance))) - (layout (classoid-layout class))) - (if (zerop index) + (aver (typep instance 'structure!object)) ; a stronger condition than above + (let ((layout (%instance-layout instance))) + (if (< index sb!vm:instance-data-start) (error "can't set %INSTANCE-REF FOO 0 in cross-compilation host") (let* ((dd (layout-info layout)) - (dsd (elt (dd-slots dd) (1- index))) + (dsd (elt (dd-slots dd) (- index sb!vm:instance-data-start))) (accessor-name (dsd-accessor-name dsd))) (declare (type symbol accessor-name)) (funcall (fdefinition `(setf ,accessor-name)) diff --git a/src/code/defstruct.lisp b/src/code/defstruct.lisp index c01cf5c31..f49f7c529 100644 --- a/src/code/defstruct.lisp +++ b/src/code/defstruct.lisp @@ -1269,7 +1269,8 @@ unless :NAMED is also specified."))) ;; slot that is not present in DD-SLOTS. ;; All other bits start as 1 and are cleared if the word is tagged. ;; A final padding word, if any, is regarded as tagged. - (let ((bitmap (ldb (byte (dd-length dd) 0) -2))) + (let ((bitmap (ldb (byte (dd-length dd) 0) + (ash -1 sb!vm:instance-data-start)))) (dolist (slot (dd-slots dd) bitmap) (when (eql t (dsd-raw-type slot)) (setf (ldb (byte 1 (dsd-index slot)) bitmap) 0))))) diff --git a/src/code/early-raw-slots.lisp b/src/code/early-raw-slots.lisp index a0f422257..b6c4621b4 100644 --- a/src/code/early-raw-slots.lisp +++ b/src/code/early-raw-slots.lisp @@ -200,51 +200,43 @@ (defun raw-slot-words (type) (raw-slot-data-n-words (raw-slot-data-or-lose type))) -;; DO-INSTANCE-TAGGED-SLOT will iterate over the slots of THING that -;; contain tagged objects. INDEX-VAR is bound to successive slot-indices, +;; DO-INSTANCE-TAGGED-SLOT iterates over the manifest slots of THING +;; that contain tagged objects. (The LAYOUT does not count as a manifest slot). +;; INDEX-VAR is bound to successive slot-indices, ;; and is usually used as the second argument to %INSTANCE-REF. -;; START, if supplied, should be either 0 or 1 to include, -;; or respectively exclude, the object's layout slot. -;; END, if supplied, represents the upper bound of the scan and should be -;; the LAYOUT-LENGTH of the object; it defaults to %INSTANCE-LENGTH if -;; unsupplied, will therefore possibly include one word that can, in theory, -;; covertly hold one tagged object more than indicated by LAYOUT-LENGTH -;; depending on ODDP of LAYOUT-LENGTH. -;; END works correctly whether or not the backend supports slot interleaving, -;; but it is probably a bug if anyone uses the padding slot for storage. +;; EXCLUDE-PADDING, if T, skips a final word that may be present +;; at the end of the structure due to alignment requirements. ;; LAYOUT is optional and somewhat unnecessary, but since some uses of ;; this macro already have a layout in hand, it can be supplied. ;; [If the compiler were smarter about doing fewer memory accesses, ;; there would be no need at all for the LAYOUT - if it had already been ;; accessed, it shouldn't be another memory read] -;; Note also that THING is usually a STRUCTURE-OBJECT, not a condition or -;; standard-object. Iterating over a CONDITION means iterating over the -;; slots comprising the primitive representation, not the manifest slots. -;; Similarly for STANDARD-OBJECT. Additionally, in the latter case -;; it would be a bug to specify LAYOUT-LENGTH as the :END parameter. -(defmacro do-instance-tagged-slot ((index-var thing - &key (start 0) end layout) &body body) - (with-unique-names (instance limit bitmap) +;; * CAUTION: with a STANDARD-OBJECT you MUST NOT specify :EXCLUDE-PADDING T +;; because that equates to using LAYOUT-LENGTH rather than %INSTANCE-LENGTH +;; to compute the upper bound, but LAYOUT-LENGTH of a STANDARD-OBJECT +;; is not pertinent to the number of storage cells in the primitive object. +;; +(defmacro do-instance-tagged-slot ((index-var thing &key (layout nil layout-p) + exclude-padding) + &body body) + (with-unique-names (instance n-layout limit bitmap) (declare (ignorable bitmap)) - (unless layout - (setq layout `(%instance-layout ,instance))) - (unless end - (setq end `(%instance-length ,instance))) - `(let ((,instance ,thing)) - ;; If the macro is given both :LAYOUT and :END, it never uses - ;; the local rebinding of INSTANCE, which is ok. - (declare (ignorable ,instance)) - #!+interleaved-raw-slots - (let ((,bitmap (layout-untagged-bitmap ,layout))) - (do ((,index-var ,start (1+ ,index-var)) - (,limit ,end)) + (let ((end-expr (if exclude-padding + `(layout-length ,n-layout) + `(%instance-length ,instance)))) + `(let* (,@(if (and layout-p exclude-padding) nil `((,instance ,thing))) + (,n-layout ,(or layout `(%instance-layout ,instance)))) + #!+interleaved-raw-slots + (do ((,bitmap (layout-untagged-bitmap ,n-layout)) + (,index-var sb!vm:instance-data-start (1+ ,index-var)) + (,limit ,end-expr)) ((>= ,index-var ,limit)) (declare (type index ,index-var)) (unless (logbitp ,index-var ,bitmap) - ,@body))) - #!-interleaved-raw-slots - (do ((,index-var ,start (1+ ,index-var)) - (,limit (- ,end (layout-n-untagged-slots ,layout)))) - ((>= ,index-var ,limit)) - (declare (type index ,index-var)) - ,@body)))) + ,@body)) + #!-interleaved-raw-slots + (do ((,index-var 1 (1+ ,index-var)) + (,limit (- ,end-expr (layout-n-untagged-slots ,n-layout)))) + ((>= ,index-var ,limit)) + (declare (type index ,index-var)) + ,@body))))) diff --git a/src/code/fop.lisp b/src/code/fop.lisp index 58928469b..2053ecf5c 100644 --- a/src/code/fop.lisp +++ b/src/code/fop.lisp @@ -172,12 +172,15 @@ (define-cloned-fops (fop-struct 48 (layout)) (fop-small-struct 49) (let* ((size (clone-arg)) (res (%make-instance size)) ; number of words excluding header - (n-data-words (1- size))) ; ... and excluding layout + ;; Compute count of elements to pop from stack, sans layout. + ;; If instance-data-start is 0, then size is the count, + ;; otherwise subtract 1 because the layout consumes a slot. + (n-data-words (- size sb!vm:instance-data-start))) (declare (type index size)) (with-fop-stack (stack ptr n-data-words) (let ((ptr (+ ptr n-data-words))) (declare (type index ptr)) - (setf (%instance-ref res 0) layout) + (setf (%instance-layout res) layout) #!-interleaved-raw-slots (let* ((nuntagged (layout-n-untagged-slots layout)) (ntagged (- size nuntagged))) @@ -190,7 +193,7 @@ (fop-stack-ref (decf ptr))))) #!+interleaved-raw-slots (let ((metadata (layout-untagged-bitmap layout))) - (do ((i 1 (1+ i))) + (do ((i sb!vm:instance-data-start (1+ i))) ((>= i size)) (declare (type index i)) (let ((val (fop-stack-ref (decf ptr)))) diff --git a/src/code/pred.lisp b/src/code/pred.lisp index 31a4712b3..ed7394da7 100644 --- a/src/code/pred.lisp +++ b/src/code/pred.lisp @@ -372,18 +372,19 @@ length and have identical components. Other arrays must be EQ to be EQUAL." #!+interleaved-raw-slots (let ((metadata (layout-untagged-bitmap layout-x))) (if (zerop metadata) - (loop for i of-type index from 1 + (loop for i of-type index from sb!vm:instance-data-start below (layout-length layout-x) always (slot-ref-equalp)) (let ((comparators (layout-equalp-tests layout-x))) (unless (= (length comparators) - (1- (layout-length layout-x))) + (- (layout-length layout-x) sb!vm:instance-data-start)) (bug "EQUALP got incomplete instance layout")) ;; See remark at the source code for %TARGET-DEFSTRUCT ;; explaining how to use the vector of comparators. - (loop for i of-type index from 1 + (loop for i of-type index from sb!vm:instance-data-start below (layout-length layout-x) - for test = (data-vector-ref comparators (1- i)) + for test = (data-vector-ref + comparators (- i sb!vm:instance-data-start)) always (cond ((eql test 0) (slot-ref-equalp)) ((functionp test) (funcall test i x y)) diff --git a/src/code/room.lisp b/src/code/room.lisp index 1c5c2370e..6c4b18dd6 100644 --- a/src/code/room.lisp +++ b/src/code/room.lisp @@ -549,7 +549,7 @@ (declare (optimize (speed 3))) (when (eql type instance-header-widetag) (incf total-objects) - (let* ((classoid (layout-classoid (%instance-ref obj 0))) + (let* ((classoid (layout-classoid (%instance-layout obj))) (found (gethash classoid totals)) (size size)) (declare (fixnum size)) @@ -745,10 +745,11 @@ (eq (cdr obj) object)) (maybe-call fun obj))) (instance - (do-instance-tagged-slot (i obj) - (when (eq (%instance-ref obj i) object) - (maybe-call fun obj) - (return)))) + (when (or (eq (%instance-layout obj) object) + (do-instance-tagged-slot (i obj) + (when (eq (%instance-ref obj i) object) + (return t)))) + (maybe-call fun obj))) (code-component (let ((length (get-header-data obj))) (do ((i code-constants-offset (1+ i))) diff --git a/src/code/sharpm.lisp b/src/code/sharpm.lisp index d4145e748..fa5862309 100644 --- a/src/code/sharpm.lisp +++ b/src/code/sharpm.lisp @@ -266,8 +266,8 @@ (unless (eq old new) (setf (aref data i) new)))))) ((typep tree 'instance) - ;; We don't grovel slot index 0, the layout. - (do-instance-tagged-slot (i tree :start 1) + ;; We don't grovel the layout. + (do-instance-tagged-slot (i tree) (let* ((old (%instance-ref tree i)) (new (circle-subst old-new-alist old))) (unless (eq old new) diff --git a/src/code/target-defstruct.lisp b/src/code/target-defstruct.lisp index c65ec4f1c..3366e43ab 100644 --- a/src/code/target-defstruct.lisp +++ b/src/code/target-defstruct.lisp @@ -142,7 +142,7 @@ (layout (classoid-layout classoid))) (declare (ignorable layout)) #!+interleaved-raw-slots - ;; Make a vector of EQUALP slots comparators, indexed by (1- word-index). + ;; Make a vector of EQUALP slots comparators, indexed by (- word-index data-start). ;; This has to be assigned to something regardless of whether there are ;; raw slots just in case someone mutates a layout which had raw ;; slots into one which does not - although that would probably crash @@ -161,11 +161,15 @@ ;; - internal padding words which are truly ignored. ;; Other words are compared as tagged if the comparator is 0, ;; or as untagged if the comparator is a type-specific function. - (let ((comparators (make-array (1- (dd-length dd)) - :initial-element nil))) + (let ((comparators + ;; If data-start is 1, subtract 1 because we don't need + ;; a comparator for the LAYOUT slot. + (make-array (- (dd-length dd) sb!vm:instance-data-start) + :initial-element nil))) (dolist (slot (dd-slots dd) comparators) ;; -1 because LAYOUT (slot index 0) has no comparator stored. - (setf (aref comparators (1- (dsd-index slot))) + (setf (aref comparators + (- (dsd-index slot) sb!vm:instance-data-start)) (let ((raw-type (dsd-raw-type slot))) (if (eq raw-type t) 0 ; means recurse using EQUALP @@ -202,12 +206,16 @@ (%raw-instance-ref/word structure i)))) #!+interleaved-raw-slots (let ((metadata (layout-untagged-bitmap layout))) + ;; Don't assume that %INSTANCE-REF can access the layout. + (setf (%instance-layout res) (%instance-layout structure)) ;; With interleaved slots, the only difference between %instance-ref ;; and %raw-instance-ref/word is the storage class of the VOP operands. ;; Since x86(-64) doesn't partition the register set, the bitmap test ;; could be skipped if we wanted to copy everything as raw. (macrolet ((copy-loop (raw-p &optional step) - `(dotimes (i (layout-length layout)) + `(do ((i sb!vm:instance-data-start (1+ i))) + ((>= i len)) + (declare (index i)) (if ,raw-p (setf (%raw-instance-ref/word res i) (%raw-instance-ref/word structure i)) @@ -215,8 +223,7 @@ (%instance-ref structure i))) ,step))) (cond ((zerop metadata) ; no untagged slots. - (dotimes (i len) - (setf (%instance-ref res i) (%instance-ref structure i)))) + (copy-loop nil)) ;; The fixnum case uses fixnum operations for ODPP and ASH. ((fixnump metadata) ; shift and mask is faster than logbitp (copy-loop (oddp metadata) (setq metadata (ash metadata -1)))) diff --git a/src/code/target-sxhash.lisp b/src/code/target-sxhash.lisp index be00889bc..fccc909aa 100644 --- a/src/code/target-sxhash.lisp +++ b/src/code/target-sxhash.lisp @@ -350,9 +350,8 @@ (when (plusp depthoid) (let ((max-iterations depthoid) (depthoid (1- depthoid))) - ;; skipping slot #0, which is for LAYOUT - (do-instance-tagged-slot (i key :layout layout - :start 1 :end (layout-length layout)) + ;; We don't mix in LAYOUT here because it was already done above. + (do-instance-tagged-slot (i key :layout layout :exclude-padding t) (mixf result (psxhash (%instance-ref key i) depthoid)) (if (zerop (decf max-iterations)) (return))))) ;; [The following comment blurs some issues: indeed it would take diff --git a/src/compiler/dump.lisp b/src/compiler/dump.lisp index f17015a4d..90ea96870 100644 --- a/src/compiler/dump.lisp +++ b/src/compiler/dump.lisp @@ -1276,16 +1276,17 @@ (error "attempt to dump invalid structure:~% ~S~%How did this happen?" struct)) (note-potential-circularity struct file) - (aver (%instance-ref struct 0)) (do* ((length (%instance-length struct)) + (layout (%instance-layout struct)) #!-interleaved-raw-slots - (ntagged (- length (layout-n-untagged-slots (%instance-ref struct 0)))) + (ntagged (- length (layout-n-untagged-slots layout))) #!+interleaved-raw-slots - (bitmap (layout-untagged-bitmap (%instance-ref struct 0))) + (bitmap (layout-untagged-bitmap layout)) (circ (fasl-output-circularity-table file)) ;; last slot first on the stack, so that the layout is on top: (index (1- length) (1- index))) - ((minusp index) + ((< index sb!vm:instance-data-start) + (dump-non-immediate-object layout file) (dump-fop* length fop-small-struct fop-struct file)) (let* ((obj #!-interleaved-raw-slots (if (>= index ntagged) @@ -1296,23 +1297,23 @@ (%raw-instance-ref/word struct index) (%instance-ref struct index))) (ref (gethash obj circ))) - (cond (ref - (aver (not (zerop index))) - (push (make-circularity :type :struct-set - :object struct - :index index - :value obj - :enclosing-object ref) - *circularities-detected*) - (sub-dump-object nil file)) - (t - (sub-dump-object obj file)))))) + (sub-dump-object (cond (ref + (push (make-circularity :type :struct-set + :object struct + :index index + :value obj + :enclosing-object ref) + *circularities-detected*) + nil) + (t obj)) + file)))) (defun dump-layout (obj file) (when (layout-invalid obj) (compiler-error "attempt to dump reference to obsolete class: ~S" (layout-classoid obj))) (let ((name (classoid-name (layout-classoid obj)))) + ;; Q: Shouldn't we aver that NAME is the proper name for its classoid? (unless name (compiler-error "dumping anonymous layout: ~S" obj)) (dump-object name file)) diff --git a/src/compiler/fopcompile.lisp b/src/compiler/fopcompile.lisp index 72d0604e5..3faea508f 100644 --- a/src/compiler/fopcompile.lisp +++ b/src/compiler/fopcompile.lisp @@ -253,6 +253,8 @@ ;; it we bind *dump-only-valid-structures* to ;; NIL. (fasl-validate-structure value *compile-object*) + ;; The above FIXME notwithstanding, + ;; there's never a need to grovel a layout. (do-instance-tagged-slot (i value) (grovel (%instance-ref value i)))) (:ignore-it) diff --git a/src/compiler/generic/early-vm.lisp b/src/compiler/generic/early-vm.lisp index ca48a3ee7..05b015905 100644 --- a/src/compiler/generic/early-vm.lisp +++ b/src/compiler/generic/early-vm.lisp @@ -88,3 +88,8 @@ (when (< start other-start) (setf stop (min stop other-start)))) stop)) + +;; The lowest index that you can pass to %INSTANCE-REF accessing +;; a slot of data that is not the instance-layout. +;; To get a layout, you must call %INSTANCE-LAYOUT - don't assume index 0. +(def!constant instance-data-start 1) diff --git a/src/compiler/generic/genesis.lisp b/src/compiler/generic/genesis.lisp index 24fb9df12..ead2a730d 100644 --- a/src/compiler/generic/genesis.lisp +++ b/src/compiler/generic/genesis.lisp @@ -1034,12 +1034,19 @@ core and return a descriptor to it." result)) +;; This is called to backpatch two small sets of objects: +;; - layouts which are made before layout-of-layout is made (4 of them) +;; - packages, which are made before layout-of-package is made (all of them) +(defun patch-instance-layout (thing layout) + ;; Layout pointer is in the word following the header + (write-wordindexed thing sb!vm:instance-slots-offset layout)) + (defun initialize-layouts () (clrhash *cold-layouts*) ;; This assertion is due to the fact that MAKE-COLD-LAYOUT does not ;; know how to set any raw slots. (aver (= 0 (layout-raw-slot-metadata (find-layout 'layout)))) - (setq *layout-layout* *nil-descriptor*) + (setq *layout-layout* (make-fixnum-descriptor 0)) (flet ((chill-layout (name &rest inherits) ;; Check that the number of specified INHERITS matches ;; the length of the layout's inherits in the cross-compiler. @@ -1058,8 +1065,7 @@ core and return a descriptor to it." (setf *layout-layout* (chill-layout 'layout t-layout s-o-layout s!o-layout)) (dolist (layout (list t-layout s-o-layout s!o-layout *layout-layout*)) - (write-wordindexed layout sb!vm:instance-slots-offset - *layout-layout*)) + (patch-instance-layout layout *layout-layout*)) (setf *package-layout* (chill-layout 'package ; *NOT* SB!XC:PACKAGE, or you lose t-layout s-o-layout s!o-layout))))) @@ -1088,8 +1094,7 @@ core and return a descriptor to it." (init-cold-package (name &optional docstring) (let ((cold-package (car (gethash name *cold-package-symbols*)))) ;; patch in the layout - (write-wordindexed cold-package sb!vm:instance-slots-offset - *package-layout*) + (patch-instance-layout cold-package *package-layout*) ;; Initialize string slots (set-slot cold-package '%name (base-string-to-core name)) (set-slot cold-package '%nicknames (chill-nicknames name)) diff --git a/src/compiler/generic/vm-tran.lisp b/src/compiler/generic/vm-tran.lisp index d3b046182..e3db9ea45 100644 --- a/src/compiler/generic/vm-tran.lisp +++ b/src/compiler/generic/vm-tran.lisp @@ -78,6 +78,8 @@ index offset)) ;;; The layout is stored in slot 0. +;;; *** These next two transforms should be the only code, aside from +;;; some parts of the C runtime, with knowledge of the layout index. (define-source-transform %instance-layout (x) `(truly-the layout (%instance-ref ,x 0))) (define-source-transform %set-instance-layout (x val) diff --git a/src/compiler/ir1tran.lisp b/src/compiler/ir1tran.lisp index 3e5d5d14b..1341e0181 100644 --- a/src/compiler/ir1tran.lisp +++ b/src/compiler/ir1tran.lisp @@ -357,28 +357,24 @@ ;; functions don't work on general instances, only on ;; STRUCTURE!OBJECTs. ;; + ;; Behold the wonderfully clear sense of this- + ;; WHEN (EMIT-MAKE-LOAD-FORM VALUE) + ;; meaning "when you're _NOT_ using a custom load-form" + ;; ;; FIXME: What about funcallable instances with ;; user-defined MAKE-LOAD-FORM methods? (when (emit-make-load-form value) - ;; "#+sb-xc-host 0" is actually necessary. - ;; DO-INSTANCE-TAGGED-SLOT does not work here - ;; due to a build order issue. - #!+interleaved-raw-slots - (let ((bitmap #+sb-xc-host 0 - #-sb-xc-host (layout-untagged-bitmap - (%instance-ref value 0)))) - (dotimes (i (%instance-length value)) - (unless (logbitp i bitmap) - (grovel (%instance-ref value i))))) - #!-interleaved-raw-slots - (dotimes (i (- (%instance-length value) - #+sb-xc-host 0 - #-sb-xc-host (layout-n-untagged-slots - (%instance-ref value 0)))) + #+sb-xc-host + (aver (zerop (layout-raw-slot-metadata + (%instance-layout value)))) + (do-instance-tagged-slot (i value) (grovel (%instance-ref value i))))) + ;; The cross-compiler can dump certain instances that are not + ;; subtypes of STRUCTURE!OBJECT, as long as it has processed + ;; the defstruct. #+sb-xc-host ((satisfies sb!kernel::xc-dumpable-structure-instance-p) - (dotimes (i (%instance-length value)) + (do-instance-tagged-slot (i value) (grovel (%instance-ref value i)))) (t (compiler-error diff --git a/src/runtime/gc-common.c b/src/runtime/gc-common.c index 235588d8e..c01176d63 100644 --- a/src/runtime/gc-common.c +++ b/src/runtime/gc-common.c @@ -494,13 +494,41 @@ trans_fun_header(lispobj object) * instances */ +static lispobj +trans_instance(lispobj object) +{ + lispobj header; + uword_t length; + + gc_assert(is_lisp_pointer(object)); + + header = *((lispobj *) native_pointer(object)); + length = instance_length(header) + 1; + length = CEILING(length, 2); + + return copy_object(object, length); +} + +static sword_t +size_instance(lispobj *where) +{ + lispobj header; + uword_t length; + + header = *where; + length = instance_length(header) + 1; + length = CEILING(length, 2); + + return length; +} + static sword_t scav_instance_pointer(lispobj *where, lispobj object) { lispobj copy, *first_pointer; /* Object is a pointer into from space - not a FP. */ - copy = trans_boxed(object); + copy = trans_instance(object); #ifdef LISP_FEATURE_GENCGC gc_assert(copy != object); @@ -735,19 +763,19 @@ instance_scan_interleaved(void (*proc)(), #endif static sword_t -scav_instance(lispobj *where, lispobj object) +scav_instance(lispobj *where, lispobj header) { - sword_t ntotal = HeaderValue(object); - lispobj layout = ((struct instance *)where)->slots[0]; + sword_t ntotal = instance_length(header); + lispobj* layout = (lispobj*)instance_layout(where); if (!layout) return 1; - layout = (lispobj)native_pointer(layout); - if (forwarding_pointer_p((lispobj*)layout)) - layout = (lispobj)native_pointer((lispobj)forwarding_pointer_value((lispobj*)layout)); + layout = native_pointer((lispobj)layout); + if (forwarding_pointer_p(layout)) + layout = native_pointer((lispobj)forwarding_pointer_value(layout)); #ifdef LISP_FEATURE_INTERLEAVED_RAW_SLOTS - instance_scan_interleaved(scavenge, where, ntotal, (lispobj*)layout); + instance_scan_interleaved(scavenge, where, ntotal, layout); #else lispobj nuntagged = ((struct layout*)layout)->n_untagged_slots; scavenge(where + 1, ntotal - fixnum_value(nuntagged)); @@ -2305,7 +2333,7 @@ gc_init_tables(void) transother[UNBOUND_MARKER_WIDETAG] = trans_immediate; transother[NO_TLS_VALUE_MARKER_WIDETAG] = trans_immediate; transother[WEAK_POINTER_WIDETAG] = trans_weak_pointer; - transother[INSTANCE_HEADER_WIDETAG] = trans_boxed; + transother[INSTANCE_HEADER_WIDETAG] = trans_instance; transother[FDEFN_WIDETAG] = trans_boxed; /* size table, initialized the same way as scavtab */ @@ -2451,7 +2479,7 @@ gc_init_tables(void) sizetab[UNBOUND_MARKER_WIDETAG] = size_immediate; sizetab[NO_TLS_VALUE_MARKER_WIDETAG] = size_immediate; sizetab[WEAK_POINTER_WIDETAG] = size_weak_pointer; - sizetab[INSTANCE_HEADER_WIDETAG] = size_boxed; + sizetab[INSTANCE_HEADER_WIDETAG] = size_instance; sizetab[FDEFN_WIDETAG] = size_boxed; } @@ -3208,7 +3236,7 @@ struct vector * instance_classoid_name(lispobj * instance) { if (forwarding_pointer_p(instance)) instance = native_pointer((lispobj)forwarding_pointer_value(instance)); - lispobj layout = ((struct instance*)instance)->slots[0]; + lispobj layout = instance_layout(instance); return lowtag_of(layout) != INSTANCE_POINTER_LOWTAG ? NULL : layout_classoid_name(native_pointer(layout)); } diff --git a/src/runtime/gencgc.c b/src/runtime/gencgc.c index b68cbc039..0f0aa6d89 100644 --- a/src/runtime/gencgc.c +++ b/src/runtime/gencgc.c @@ -3193,8 +3193,8 @@ verify_space(lispobj *start, size_t words) case INSTANCE_HEADER_WIDETAG: { - sword_t ntotal = HeaderValue(thing); - lispobj layout = ((struct instance *)start)->slots[0]; + sword_t ntotal = instance_length(thing); + lispobj layout = instance_layout(start); if (!layout) { count = 1; break; diff --git a/src/runtime/print.c b/src/runtime/print.c index 1f5fb61e1..7fcd88494 100644 --- a/src/runtime/print.c +++ b/src/runtime/print.c @@ -449,13 +449,13 @@ static void brief_struct(lispobj obj) classoid_name = instance_classoid_name((lispobj*)instance); if ( classoid_name ) { char * namestring = simple_base_stringize(classoid_name); - printf("#", - (unsigned long) instance->slots[0], namestring); + printf("#", + (void*)instance_layout((lispobj*)instance), namestring); if ( namestring != (char*)classoid_name->data ) free(namestring); } else { - printf("#", - (unsigned long) instance->slots[0]); + printf("#", + (void*)instance_layout((lispobj*)instance)); } } } @@ -491,10 +491,10 @@ static void print_struct(lispobj obj) if (!is_valid_lisp_addr((os_vm_address_t)instance)) { printf("(invalid address)"); } else { - lispobj layout_obj = ((struct instance *)native_pointer(obj))->slots[0]; + lispobj layout_obj = instance_layout(native_pointer(obj)); print_obj("type: ", layout_obj); struct layout * layout = (struct layout*)native_pointer(layout_obj); - for (i = 1; i < HeaderValue(instance->header); i++) { + for (i=INSTANCE_DATA_START; iheader); i++) { sprintf(buffer, "slot %d: ", i); if (layout==NULL || untagged_slot_p(layout, i)) { newline(NULL); diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index 5a3d7ab51..9daca9ee9 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -260,6 +260,15 @@ HeaderValue(lispobj obj) return obj >> N_WIDETAG_BITS; } +static inline uword_t instance_length(lispobj header) +{ + return (header >> N_WIDETAG_BITS); +} +static inline lispobj instance_layout(lispobj* instance_ptr) // native ptr +{ + return instance_ptr[1]; // the word following the header is the layout +} + static inline struct cons * CONS(lispobj obj) { diff --git a/tests/raw-slots-interleaved.impure.lisp b/tests/raw-slots-interleaved.impure.lisp index 507b4de98..9f1c5a7f0 100644 --- a/tests/raw-slots-interleaved.impure.lisp +++ b/tests/raw-slots-interleaved.impure.lisp @@ -114,6 +114,7 @@ (with-test (:name :tagged-slot-iterator-macro) (setf (sb-kernel:%instance-ref *afoo* 10) 'magic) (let (l) + (push `(0 ,(sb-kernel:%instance-layout *afoo*)) l) (sb-kernel:do-instance-tagged-slot (i *afoo*) (push `(,i ,(sb-kernel:%instance-ref *afoo* i)) l)) (assert (oddp (sb-kernel:%instance-length *afoo*))) -- 2.11.4.GIT