From 9cf8c8a976a93a63c5998435b16378f4543d3376 Mon Sep 17 00:00:00 2001 From: Douglas Katzman Date: Tue, 17 Oct 2017 08:09:04 -0400 Subject: [PATCH] Avoid unnecessary write to hash-table instances during gc. This was causing write faults during GC that should really not happen. It still happens for weak tables, which demand an additional fix. --- src/code/hash-table.lisp | 5 ----- src/code/target-hash-table.lisp | 39 +++++++++++++++++++++++++++++++++------ src/runtime/coreparse.c | 6 ++---- src/runtime/gc-common.c | 12 ++++++------ src/runtime/immobile-space.c | 6 ++---- 5 files changed, 43 insertions(+), 25 deletions(-) diff --git a/src/code/hash-table.lisp b/src/code/hash-table.lisp index 480280272..29bbffe3c 100644 --- a/src/code/hash-table.lisp +++ b/src/code/hash-table.lisp @@ -87,11 +87,6 @@ ;; Used for locking GETHASH/(SETF GETHASH)/REMHASH (lock (sb!thread:make-mutex :name "hash-table lock") :type sb!thread:mutex :read-only t) - ;; The GC will set this to T if it moves an EQ-based key. This used - ;; to be signaled by a bit in the header of the kv vector, but that - ;; implementation caused some concurrency issues when we stopped - ;; inhibiting GC during hash-table lookup. - (needs-rehash-p nil :type (member nil t)) ;; Has user requested synchronization? (synchronized-p nil :type (member nil t) :read-only t) ;; For detecting concurrent accesses. diff --git a/src/code/target-hash-table.lisp b/src/code/target-hash-table.lisp index 53e035b2f..3800f3b23 100644 --- a/src/code/target-hash-table.lisp +++ b/src/code/target-hash-table.lisp @@ -226,6 +226,22 @@ Examples: (defconstant +min-hash-table-size+ 16) (defconstant +min-hash-table-rehash-threshold+ (float 1/16 1.0)) +;; The GC will set this to 1 if it moves an EQ-based key. This used +;; to be signaled by a bit in the header of the kv vector, but that +;; implementation caused some concurrency issues when we stopped +;; inhibiting GC during hash-table lookup. +;; +;; This indicator properly belongs to the k/v vector for at least 2 reasons: +;; - if the vector is on an already-written page but the table is not, +;; it avoids a write fault when setting to true. This a boon to gencgc +;; - if there were lock-free tables - which presumably operate by atomically +;; changing out the vector for a new one - whether the vector is bucketized +;; correctly after GC is an aspect of the vector, not the table +;; +;; We could do it with a single bit by implementing vops for atomic +;; read/modify/write on the header. In C there's sync_or_and_fetch, etc. +(defmacro kv-vector-needs-rehash (vector) `(svref ,vector 1)) + (defun make-hash-table (&key (test 'eql) (size +min-hash-table-size+) @@ -370,6 +386,10 @@ Examples: (scaled-size (truncate (/ (float size+1) rehash-threshold))) (length (power-of-two-ceiling (max scaled-size (1+ +min-hash-table-size+)))) + ;; FIXME: this is completely insane for 64-bit. + ;; We can not possibly support hash-tables that need + ;; such large indices. It doesn't work. + ;; Reducing this to (unsigned-byte 32) would save memory. (index-vector (make-array length :element-type '(unsigned-byte #.sb!vm:n-word-bits) @@ -394,12 +414,14 @@ Examples: index-vector next-vector (unless (eq test 'eq) + ;; See FIXME at INDEX-VECTOR. Same concern. (make-array size+1 :element-type '(unsigned-byte #.sb!vm:n-word-bits) :initial-element +magic-hash-vector-value+)) synchronized))) (declare (type index size+1 scaled-size length)) + (setf (kv-vector-needs-rehash kv-vector) 0) ;; Set up the free list, all free. These lists are 0 terminated. (do ((i 1 (1+ i))) ((>= i size)) @@ -480,6 +502,12 @@ multiple threads accessing the same hash-table without locking." ;; Disable GC tricks on the OLD-KV-VECTOR. (set-header-data old-kv-vector sb!vm:vector-normal-subtype) + ;; GC must never observe a value other than 0 or 1 in the 1st element + ;; of a vector marked as valid-hashing. The vector is initially filled + ;; with the unbound-marker, so rectify that. GC is inhibited (asserted + ;; on entry), so the store order here isn't terribly important. + (setf (kv-vector-needs-rehash new-kv-vector) 0) + ;; Non-empty weak hash tables always need GC support. (when (and (hash-table-weak-p table) (plusp (hash-table-count table))) (set-header-data new-kv-vector sb!vm:vector-valid-hashing-subtype)) @@ -544,10 +572,9 @@ multiple threads accessing the same hash-table without locking." (setf (hash-table-hash-vector table) new-hash-vector) ;; Fill the old kv-vector with 0 to help the conservative GC. Even ;; if nothing else were zeroed, it's important to clear the - ;; special first cells in old-kv-vector. + ;; special first cell in old-kv-vector. (fill old-kv-vector 0) - (setf (hash-table-rehash-trigger table) new-size) - (setf (hash-table-needs-rehash-p table) nil)) + (setf (hash-table-rehash-trigger table) new-size)) (values)) ;;; Use the same size as before, re-using the vectors. @@ -604,10 +631,10 @@ multiple threads accessing the same hash-table without locking." (type hash hashing)) ;; Push this slot into the next chain. (setf (aref next-vector i) next) - (setf (aref index-vector index) i))))))) + (setf (aref index-vector index) i)))))) ;; Clear the rehash bit only at the very end, otherwise another thread ;; might see a partially rehashed table as a normal one. - (setf (hash-table-needs-rehash-p table) nil) + (setf (kv-vector-needs-rehash kv-vector) 0)) (values)) (declaim (inline maybe-rehash)) @@ -618,7 +645,7 @@ multiple threads accessing the same hash-table without locking." (and ensure-free-slot-p (zerop (hash-table-next-free-kv hash-table)))) (rehash-without-growing-p () - (hash-table-needs-rehash-p hash-table))) + (not (eql 0 (kv-vector-needs-rehash (hash-table-table hash-table)))))) (declare (inline rehash-p rehash-without-growing-p)) (cond ((rehash-p) ;; Use recursive locks since for weak tables the lock has diff --git a/src/runtime/coreparse.c b/src/runtime/coreparse.c index 5e4509b71..4b29814d0 100644 --- a/src/runtime/coreparse.c +++ b/src/runtime/coreparse.c @@ -477,10 +477,8 @@ static void relocate_space(uword_t start, lispobj* end, struct heap_adjust* adj) needs_rehash = 1; } } - if (needs_rehash) { - struct hash_table *ht = (struct hash_table*)native_pointer(v->data[0]); - ht->needs_rehash_p = T; - } + if (needs_rehash) + data[1] = make_fixnum(1); continue; } // All the array header widetags. diff --git a/src/runtime/gc-common.c b/src/runtime/gc-common.c index e0b1050c0..e60fd7c8d 100644 --- a/src/runtime/gc-common.c +++ b/src/runtime/gc-common.c @@ -1098,8 +1098,8 @@ void scav_hash_table_entries (struct hash_table *hash_table, * help reduce collisions. */ gc_assert(next_vector_length*2 == kv_length); - if (kv_vector[1] != UNBOUND_MARKER_WIDETAG) - lose("unexpected empty-hash-table-slot marker: %p\n", kv_vector[1]); + if (kv_vector[1] && kv_vector[1] != make_fixnum(1)) + lose("unexpected need-to-rehash: %"OBJ_FMTX, kv_vector[1]); /* Work through the KV vector. */ int (*alivep_test)(lispobj,lispobj) = alivep[fixnum_value(hash_table->_weakness)]; @@ -1113,7 +1113,7 @@ void scav_hash_table_entries (struct hash_table *hash_table, /* If an EQ-based key has moved, mark the hash-table for rehash */ \ if (kv_vector[2*i] != old_key && \ (!hash_vector || hash_vector[i] == MAGIC_HASH_VECTOR_VALUE)) \ - hash_table->needs_rehash_p = T; \ + kv_vector[1] = make_fixnum(1); \ }} if (alivep_test) SCAV_ENTRIES(alivep_test(old_key, value)) @@ -1160,8 +1160,8 @@ scav_vector (lispobj *where, lispobj header) /* Verify that vector element 1 is as expected. Don't bother scavenging it, since we lose() if it's not an immediate. */ - if (where[3] != UNBOUND_MARKER_WIDETAG) - lose("unexpected empty-hash-table-slot marker: %p\n", where[3]); + if (where[3] && where[3] != make_fixnum(1)) + lose("unexpected need-to-rehash: %"OBJ_FMTX, where[3]); /* Scavenge hash table, which will fix the positions of the other * needed objects. */ @@ -1169,7 +1169,7 @@ scav_vector (lispobj *where, lispobj header) /* Cross-check the kv_vector. */ if (where != native_pointer(hash_table->table)) { - lose("hash_table table!=this table %x\n", hash_table->table); + lose("hash_table table!=this table %"OBJ_FMTX, hash_table->table); } if (!hash_table->_weakness) { diff --git a/src/runtime/immobile-space.c b/src/runtime/immobile-space.c index 90605fdc9..0dc487017 100644 --- a/src/runtime/immobile-space.c +++ b/src/runtime/immobile-space.c @@ -1798,10 +1798,8 @@ static void fixup_space(lispobj* where, size_t n_words) needs_rehash = 1; } } - if (needs_rehash) { - struct hash_table *ht = (struct hash_table*)native_pointer(v->data[0]); - ht->needs_rehash_p = T; - } + if (needs_rehash) + data[1] = make_fixnum(1); break; } else { // FALLTHROUGH_INTENDED -- 2.11.4.GIT