From e8bfd77b0c0d68465155dd7ebbfa4254a7e970b9 Mon Sep 17 00:00:00 2001 From: Douglas Katzman Date: Wed, 3 May 2017 16:38:25 -0400 Subject: [PATCH] Coalesce constant strings more aggressively maybe. Strings eligible to share data are those dumped into a fasl via FOP-{BASE,CHARACTER}-STRING, and symbol names. Whether the string is eligible for sharing is noted when allocated. The actual sharing is deferred until SAVE-LISP-AND-DIE since it requires an image-wide hashtable of strings. --- NEWS | 8 +++- make-target-2.sh | 1 + package-data-list.lisp-expr | 1 + src/code/fop.lisp | 10 +++-- src/code/string.lisp | 3 ++ src/code/symbol.lisp | 3 +- src/code/target-package.lisp | 5 ++- src/compiler/generic/genesis.lisp | 18 +++++---- src/compiler/target-disassem.lisp | 3 +- src/runtime/gc-common.c | 82 +++++++++++++++++++++++++++++++++++++++ src/runtime/gencgc.c | 14 +++++++ 11 files changed, 132 insertions(+), 16 deletions(-) diff --git a/NEWS b/NEWS index 3ad6a50ac..7875d05bc 100644 --- a/NEWS +++ b/NEWS @@ -9,7 +9,13 @@ changes relative to sbcl-1.3.17: from a tenured object or a thread stack by producing a proof as a sequence of pointers to follow. The file "tests/traceroot.test.sh" contains an example usage. - + * enhancement: if the alien symbol "gc_coalesce_string_literals" is + set to 1 prior to SAVE-LISP-AND-DIE, then similar string constants + loaded from different fasl files may be collapsed to one object. + For instance, two functions returning the literal string "HI" + might return EQ strings after collapsing, which may be undesired + in a particular use. The flag pertains to gencgc only. + changes in sbcl-1.3.17 relative to sbcl-1.3.16: * enhancement: memory overhead from the garbage collector's metadata is reduced on 64-bit architectures; no change for 32-bit. diff --git a/make-target-2.sh b/make-target-2.sh index 93497f0a7..cedc1baa6 100755 --- a/make-target-2.sh +++ b/make-target-2.sh @@ -49,6 +49,7 @@ if [ "$1" != --load ]; then fi echo //doing warm init - load and dump phase echo '(load "loader.lisp") (load-sbcl-file "make-target-2-load.lisp" nil) +(setf (extern-alien "gc_coalesce_string_literals" char) 1) (sb-ext:save-lisp-and-die "output/sbcl.core")' | \ ./src/runtime/sbcl \ --core output/cold-sbcl.core \ diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 8de0df537..59d6856e9 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1237,6 +1237,7 @@ possibly temporarily, because it might be used internally." ;; symbol-hacking idioms "GENSYMIFY" "GENSYMIFY*" "KEYWORDICATE" "SYMBOLICATE" "INTERNED-SYMBOL-P" "PACKAGE-SYMBOLICATE" + "LOGICALLY-READONLYIZE" ;; certainly doesn't belong in public extensions ;; FIXME: maybe belongs in %KERNEL with other typesystem stuff? diff --git a/src/code/fop.lisp b/src/code/fop.lisp index 3d8855ff9..1963c2082 100644 --- a/src/code/fop.lisp +++ b/src/code/fop.lisp @@ -420,15 +420,17 @@ ;;;; fops for loading arrays (!define-fop 100 (fop-base-string ((:operands length))) - (read-base-string-as-bytes (fasl-input-stream) - (make-string length :element-type 'base-char))) + (logically-readonlyize + (read-base-string-as-bytes (fasl-input-stream) + (make-string length :element-type 'base-char)))) ;; FIXME: can save space by UTF-8 encoding, or use 1 bit to indicate pure ASCII ;; in the fasl even though the result will be a non-base string. #!+sb-unicode (!define-fop 160 :not-host (fop-character-string ((:operands length))) - (read-string-as-unsigned-byte-32 (fasl-input-stream) - (make-string length))) + (logically-readonlyize + (read-string-as-unsigned-byte-32 (fasl-input-stream) + (make-string length)))) (!define-fop 92 (fop-vector ((:operands size))) (if (zerop size) diff --git a/src/code/string.lisp b/src/code/string.lisp index 926f859da..915e26643 100644 --- a/src/code/string.lisp +++ b/src/code/string.lisp @@ -577,3 +577,6 @@ new string COUNT long filled with the fill character." (defun string-trim (char-bag string) (generic-string-trim char-bag string t t)) + +(defun logically-readonlyize (string) + (set-header-data (the string string) 1)) diff --git a/src/code/symbol.lisp b/src/code/symbol.lisp index 58316cba2..806f52a00 100644 --- a/src/code/symbol.lisp +++ b/src/code/symbol.lisp @@ -332,7 +332,8 @@ distinct from the global value. Can also be SETF." (defun make-symbol (string) "Make and return a new symbol with the STRING as its print name." (declare (type string string)) - (%make-symbol 0 (if (simple-string-p string) string (subseq string 0)))) + (%make-symbol 0 (logically-readonlyize + (if (simple-string-p string) string (subseq string 0))))) ;;; All symbols go into immobile space if #!+immobile-symbols is enabled, ;;; but not if disabled. The win with immobile space that is that all symbols diff --git a/src/code/target-package.lisp b/src/code/target-package.lisp index bb9f2939b..ce141ac38 100644 --- a/src/code/target-package.lisp +++ b/src/code/target-package.lisp @@ -964,8 +964,9 @@ implementation it is ~S." *!default-package-use-list*) (if where (values symbol where) (let ((symbol-name - (replace (make-string length :element-type elt-type) - name))) + (logically-readonlyize + (replace (make-string length :element-type elt-type) + name)))) (with-single-package-locked-error (:package package "interning ~A" symbol-name) (let ((symbol ; Symbol kind: 1=keyword, 2=other interned diff --git a/src/compiler/generic/genesis.lisp b/src/compiler/generic/genesis.lisp index e339107bb..a5fea9909 100644 --- a/src/compiler/generic/genesis.lisp +++ b/src/compiler/generic/genesis.lisp @@ -559,7 +559,8 @@ (defun set-header-data (object data) (write-header-word object data (ldb (byte sb!vm:n-widetag-bits 0) - (read-bits-wordindexed object 0)))) + (read-bits-wordindexed object 0))) + object) ; return the object itself, like SB!KERNEL:SET-HEADER-DATA (defun get-header-data (object) (ash (read-bits-wordindexed object 0) (- sb!vm:n-widetag-bits))) @@ -1352,6 +1353,8 @@ core and return a descriptor to it." (defvar *cold-symbols*) (declaim (type hash-table *cold-symbols*)) +(defun set-readonly (string) (set-header-data string 1)) + (defun initialize-packages () (let ((package-data-list ;; docstrings are set in src/cold/warm. It would work to do it here, @@ -1373,8 +1376,9 @@ core and return a descriptor to it." (list* cold-package nil nil)) ;; Initialize string slots (write-slots cold-package package-layout - :%name (base-string-to-core - (target-package-name name)) + :%name (set-readonly + (base-string-to-core + (target-package-name name))) :%nicknames (chill-nicknames name) :doc-string (if docstring (base-string-to-core docstring) @@ -1608,7 +1612,7 @@ core and return a descriptor to it." ;; NIL's name is in dynamic space because any extra ;; bytes allocated in static space would need to ;; be accounted for by STATIC-SYMBOL-OFFSET. - (base-string-to-core "NIL" *dynamic*)) + (set-readonly (base-string-to-core "NIL" *dynamic*))) (setf (gethash (descriptor-bits result) *cold-symbols*) nil (get nil 'cold-intern-info) result))) @@ -2216,14 +2220,14 @@ core and return a descriptor to it." #!-sb-dynamic-core (dolist (symbol (sort (%hash-table-alist *cold-foreign-symbol-table*) #'string< :key #'car)) - (cold-push (cold-cons (base-string-to-core (car symbol)) + (cold-push (cold-cons (set-readonly (base-string-to-core (car symbol))) (number-to-core (cdr symbol))) result)) (cold-set '*!initial-foreign-symbols* result) #!+sb-dynamic-core (let ((runtime-linking-list *nil-descriptor*)) (dolist (symbol *dyncore-linkage-keys*) - (cold-push (cold-cons (base-string-to-core (car symbol)) + (cold-push (cold-cons (set-readonly (base-string-to-core (car symbol))) (cdr symbol)) runtime-linking-list)) (cold-set 'sb!vm::*required-runtime-c-symbols* @@ -2461,7 +2465,7 @@ core and return a descriptor to it." (define-cold-fop (fop-base-string (len)) (let ((string (make-string len))) (read-string-as-bytes (fasl-input-stream) string) - (base-string-to-core string))) + (set-readonly (base-string-to-core string)))) #!+sb-unicode (define-cold-fop (fop-character-string (len)) diff --git a/src/compiler/target-disassem.lisp b/src/compiler/target-disassem.lisp index eeaf3841c..969aff3da 100644 --- a/src/compiler/target-disassem.lisp +++ b/src/compiler/target-disassem.lisp @@ -800,7 +800,8 @@ (awhen (get symbol 'instruction-flavors) (setf (get symbol 'instruction-flavors) (collect-inst-variants - (string-upcase symbol) package it cache)))) + (logically-readonlyize (string-upcase symbol)) + package it cache)))) (apply 'format t "~&Disassembler: ~D printers, ~D prefilters, ~D labelers~%" (mapcar (lambda (x) (length (cdr x))) cache)))) diff --git a/src/runtime/gc-common.c b/src/runtime/gc-common.c index 4763427c5..7e76778ab 100644 --- a/src/runtime/gc-common.c +++ b/src/runtime/gc-common.c @@ -1938,3 +1938,85 @@ void gc_heapsort_uwords(heap array, int length) sift_down(array, 0, end); } } + +//// Coalescing of constant strings for SAVE-LISP-AND-DIE + +static void remap_string(lispobj* where, struct hopscotch_table* ht) +{ + lispobj obj = *where; + struct vector* s = (struct vector*)native_pointer(obj); + if (lowtag_of(obj) == OTHER_POINTER_LOWTAG && + (widetag_of(s->header) == SIMPLE_BASE_STRING_WIDETAG +#ifdef SIMPLE_CHARACTER_STRING_WIDETAG + || widetag_of(s->header) == SIMPLE_CHARACTER_STRING_WIDETAG +#endif + ) && HeaderValue(s->header) == 1) { /* readonly string indicator */ + int index = hopscotch_get(ht, (uword_t)s, 0); + if (!index) // Not found + hopscotch_insert(ht, (uword_t)s, 1); + else + *where = make_lispobj((void*)ht->keys[index-1], OTHER_POINTER_LOWTAG); + } +} + +static uword_t remap_strings(lispobj* where, lispobj* limit, uword_t arg) +{ + struct hopscotch_table* ht = (struct hopscotch_table*)arg; + lispobj layout, bitmap, *next; + sword_t nwords, i, j; + + for ( ; where < limit ; where = next ) { + lispobj header = *where; + if (is_cons_half(header)) { + remap_string(where+0, ht); + remap_string(where+1, ht); + next = where + 2; + } else { + int widetag = widetag_of(header); + nwords = sizetab[widetag](where); + next = where + nwords; + switch (widetag) { + case INSTANCE_WIDETAG: // mixed boxed/unboxed objects +#ifdef LISP_FEATURE_COMPACT_INSTANCE_HEADER + case FUNCALLABLE_INSTANCE_WIDETAG: +#endif + layout = instance_layout(where); + bitmap = ((struct layout*)native_pointer(layout))->bitmap; + for(i=1; ivalue, + (uword_t)&ht); + remap_strings((lispobj*)IMMOBILE_VARYOBJ_SUBSPACE_START, + (lispobj*)SYMBOL(IMMOBILE_SPACE_FREE_POINTER)->value, + (uword_t)&ht); +#endif + walk_generation(remap_strings, -1, (uword_t)&ht); + hopscotch_delete(&ht); +} diff --git a/src/runtime/gencgc.c b/src/runtime/gencgc.c index f4ca75ad0..ea5451efb 100644 --- a/src/runtime/gencgc.c +++ b/src/runtime/gencgc.c @@ -4581,6 +4581,7 @@ prepare_for_final_gc () } } +char gc_coalesce_string_literals = 0; /* Do a non-conservative GC, and then save a core with the initial * function being set to the value of the static symbol @@ -4613,6 +4614,19 @@ gc_and_save(char *filename, boolean prepend_runtime, gencgc_alloc_start_page = last_free_page; collect_garbage(HIGHEST_NORMAL_GENERATION+1); + if (gc_coalesce_string_literals) { + extern struct lisp_startup_options lisp_startup_options; + extern void coalesce_strings(); + boolean verbose = !lisp_startup_options.noinform; + if (verbose) { + printf("[coalescing similar strings... "); + fflush(stdout); + } + coalesce_strings(); + if (verbose) + printf("done]\n"); + } + prepare_for_final_gc(); gencgc_alloc_start_page = -1; collect_garbage(HIGHEST_NORMAL_GENERATION+1); -- 2.11.4.GIT