From 98db51588a21d11a6564b0aadb90a412ef112a20 Mon Sep 17 00:00:00 2001 From: Nikodemus Siivola Date: Sat, 9 Jun 2007 18:31:37 +0000 Subject: [PATCH] 1.0.6.41: optimized bignum printing * Cache the power-vectors, the computation of which is the real bottleneck of bignum printing. So that we don't keep huge bignums forever, make GC gently scrub the cache. * Rename %OUTPUT-FIXNUM-IN-BASE to %OUTPUT-REASONABLE-INTEGER-IN-BASE and %OUTPUT-BIGNUM-IN-BASE to %OUTPUT-HUGE-INTEGER-IN-BASE. * The ideal cutoff point between the two algorithms isn't the fixnum/bignum divide, but is (on x86/Darwin) around 87 bits -- so make the cutoff point N-POSITIVE-FIXNUM-BITS * 3, and hope that makes sense on other platforms as well. This improves (on x86/Darwin) bignum printing speed in the reasonable range by 40%, and by 30% while below 2048 bits. The benefit decreases after that, as the GC drops bignums with over 2048 bits from the cache -- this doesn't show in a tight benchmarking loop, though. --- NEWS | 2 + package-data-list.lisp-expr | 1 + src/code/gc.lisp | 3 ++ src/code/print.lisp | 112 ++++++++++++++++++++++++++++++++++++-------- version.lisp-expr | 2 +- 5 files changed, 99 insertions(+), 21 deletions(-) diff --git a/NEWS b/NEWS index b50e994f5..fa541ee89 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,8 @@ changes in sbcl-1.0.7 relative to sbcl-1.0.6: "a constant string". * enhancement: SB-POSIX now supports lockf(). (Thanks to Zach Beane.) * enhancement: SB-POSIX now supports getcwd(). (Thanks to Tassilo Horn.) + * optimization: bignum printing speed has been improved by 20-40% + (depending on the bignum size.) * bug fix: WITH-MUTEX and WITH-RECURSIVE-LOCK are now interrupt safe on Linux. * bug fix: the cache used by the CLOS to store precomputed effective diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr index 2b8f049fc..74ab81ded 100644 --- a/package-data-list.lisp-expr +++ b/package-data-list.lisp-expr @@ -1419,6 +1419,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries." "SCALE-DOUBLE-FLOAT" #!+long-float "SCALE-LONG-FLOAT" "SCALE-SINGLE-FLOAT" + "SCRUB-POWER-CACHE" "SEQUENCEP" "SEQUENCE-COUNT" "SEQUENCE-END" "SEQUENCE-OF-CHECKED-LENGTH-GIVEN-TYPE" "SET-ARRAY-HEADER" "SET-HEADER-DATA" "SHIFT-TOWARDS-END" diff --git a/src/code/gc.lisp b/src/code/gc.lisp index c0c7b022b..df569edfc 100644 --- a/src/code/gc.lisp +++ b/src/code/gc.lisp @@ -252,6 +252,9 @@ run in any thread.") ;; as having these cons more then we have space left leads to huge ;; badness. (scrub-control-stack) + ;; Power cache of the bignum printer: drops overly large bignums and + ;; removes duplicate entries. + (scrub-power-cache) ;; FIXME: CTYPE-OF-CACHE-CLEAR isn't thread-safe. #!-sb-thread (ctype-of-cache-clear)) diff --git a/src/code/print.lisp b/src/code/print.lisp index ffc2f7216..e9b58d0a8 100644 --- a/src/code/print.lisp +++ b/src/code/print.lisp @@ -982,17 +982,17 @@ (2 #\b) (8 #\o) (16 #\x) - (t (%output-fixnum-in-base base 10 stream) + (t (%output-reasonable-integer-in-base base 10 stream) #\r)) stream)) -(defun %output-fixnum-in-base (n base stream) +(defun %output-reasonable-integer-in-base (n base stream) (multiple-value-bind (q r) (truncate n base) ;; Recurse until you have all the digits pushed on ;; the stack. (unless (zerop q) - (%output-fixnum-in-base q base stream)) + (%output-reasonable-integer-in-base q base stream)) ;; Then as each recursive call unwinds, turn the ;; digit (in remainder) into a character and output ;; the character. @@ -1000,21 +1000,89 @@ (schar "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" r) stream))) +;;; *POWER-CACHE* is an alist mapping bases to power-vectors. It is +;;; filled and probed by POWERS-FOR-BASE. SCRUB-POWER-CACHE is called +;;; always prior a GC to drop overly large bignums from the cache. +;;; +;;; It doesn't need a lock, but if you work on SCRUB-POWER-CACHE or +;;; POWERS-FOR-BASE, see that you don't break the assumptions! +(defvar *power-cache* nil) + +(defconstant +power-cache-integer-length-limit+ 2048) + +(defun scrub-power-cache () + (let ((cache *power-cache*)) + (dolist (cell cache) + (let ((powers (cdr cell))) + (declare (simple-vector powers)) + (let ((too-big (position-if + (lambda (x) + (>= (integer-length x) + +power-cache-integer-length-limit+)) + powers))) + (when too-big + (setf (cdr cell) (subseq powers 0 too-big)))))) + ;; Since base 10 is overwhelmingly common, make sure it's at head. + ;; Try to keep other bases in a hopefully sensible order as well. + (if (eql 10 (caar cache)) + (setf *power-cache* cache) + ;; If we modify the list destructively we need to copy it, otherwise + ;; an alist lookup in progress might be screwed. + (setf *power-cache* (sort (copy-list cache) + (lambda (a b) + (declare (fixnum a b)) + (cond ((= 10 a) t) + ((= 10 b) nil) + ((= 16 a) t) + ((= 16 b) nil) + ((= 2 a) t) + ((= 2 b) nil) + (t (< a b)))) + :key #'car))))) + +;;; Compute (and cache) a power vector for a BASE and LIMIT: +;;; the vector holds integers for which +;;; (aref powers k) == (expt base (expt 2 k)) +;;; holds. +(defun powers-for-base (base limit) + (flet ((compute-powers (from) + (let (powers) + (do ((p from (* p p))) + ((> p limit) + ;; We don't actually need this, but we also + ;; prefer not to cons it up a second time... + (push p powers)) + (push p powers)) + (nreverse powers)))) + ;; Grab a local reference so that we won't stuff consed at the + ;; head by other threads -- or sorting by SCRUB-POWER-CACHE. + (let ((cache *power-cache*)) + (let ((cell (assoc base cache))) + (if cell + (let* ((powers (cdr cell)) + (len (length powers)) + (max (svref powers (1- len)))) + (if (> max limit) + powers + (let ((new + (concatenate 'vector powers + (compute-powers (* max max))))) + (setf (cdr cell) new) + new))) + (let ((powers (coerce (compute-powers base) 'vector))) + ;; Add new base to head: SCRUB-POWER-CACHE will later + ;; put it to a better place. + (setf *power-cache* (acons base powers cache)) + powers)))))) + ;; Algorithm by Harald Hanche-Olsen, sbcl-devel 2005-02-05 -(defun %output-bignum-in-base (n base stream) +(defun %output-huge-integer-in-base (n base stream) (declare (type bignum n) (type fixnum base)) - (let ((power (make-array 10 :adjustable t :fill-pointer 0))) - ;; Here there be the bottleneck for big bignums, in the (* p p). - ;; A special purpose SQUARE-BIGNUM might help a bit. See eg: Dan - ;; Zuras, "On Squaring and Multiplying Large Integers", ARITH-11: - ;; IEEE Symposium on Computer Arithmetic, 1993, pp. 260 to 271. - ;; Reprinted as "More on Multiplying and Squaring Large Integers", - ;; IEEE Transactions on Computers, volume 43, number 8, August - ;; 1994, pp. 899-908. - (do ((p base (* p p))) - ((> p n)) - (vector-push-extend p power)) - ;; (aref power k) == (expt base (expt 2 k)) + ;; POWER is a vector for which the following holds: + ;; (aref power k) == (expt base (expt 2 k)) + (let* ((power (powers-for-base base n)) + (k-start (or (position-if (lambda (x) (> x n)) power) + (bug "power-vector too short")))) (labels ((bisect (n k exactp) (declare (fixnum k)) ;; N is the number to bisect @@ -1036,15 +1104,19 @@ ;; doesn't get any leading zeros. (bisect q k exactp) (bisect r k (or exactp (plusp q)))))))) - (bisect n (fill-pointer power) nil)))) + (bisect n k-start nil)))) (defun %output-integer-in-base (integer base stream) (when (minusp integer) (write-char #\- stream) (setf integer (- integer))) - (if (fixnump integer) - (%output-fixnum-in-base integer base stream) - (%output-bignum-in-base integer base stream))) + ;; The ideal cutoff point between these two algorithms is almost + ;; certainly quite platform dependent: this gives 87 for 32 bit + ;; SBCL, which is about right at least for x86/Darwin. + (if (or (fixnump integer) + (< (integer-length integer) (* 3 sb!vm:n-positive-fixnum-bits))) + (%output-reasonable-integer-in-base integer base stream) + (%output-huge-integer-in-base integer base stream))) (defun output-integer (integer stream) (let ((base *print-base*)) diff --git a/version.lisp-expr b/version.lisp-expr index fed4c4183..5a0873751 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.6.40" +"1.0.6.41" -- 2.11.4.GIT