From 98db51588a21d11a6564b0aadb90a412ef112a20 Mon Sep 17 00:00:00 2001
From: Nikodemus Siivola <nikodemus@random-state.net>
Date: Sat, 9 Jun 2007 18:31:37 +0000
Subject: [PATCH] 1.0.6.41: optimized bignum printing

 * Cache the power-vectors, the computation of which is the real
   bottleneck of bignum printing. So that we don't keep huge bignums
   forever, make GC gently scrub the cache.

 * Rename %OUTPUT-FIXNUM-IN-BASE to %OUTPUT-REASONABLE-INTEGER-IN-BASE
   and %OUTPUT-BIGNUM-IN-BASE to %OUTPUT-HUGE-INTEGER-IN-BASE.

 * The ideal cutoff point between the two algorithms isn't the
   fixnum/bignum divide, but is (on x86/Darwin) around 87 bits -- so
   make the cutoff point N-POSITIVE-FIXNUM-BITS * 3, and hope that
   makes sense on other platforms as well.

 This improves (on x86/Darwin) bignum printing speed in the reasonable
 range by 40%, and by 30% while below 2048 bits. The benefit decreases
 after that, as the GC drops bignums with over 2048 bits from the
 cache -- this doesn't show in a tight benchmarking loop, though.
---
 NEWS                        |   2 +
 package-data-list.lisp-expr |   1 +
 src/code/gc.lisp            |   3 ++
 src/code/print.lisp         | 112 ++++++++++++++++++++++++++++++++++++--------
 version.lisp-expr           |   2 +-
 5 files changed, 99 insertions(+), 21 deletions(-)

diff --git a/NEWS b/NEWS
index b50e994f5..fa541ee89 100644
--- a/NEWS
+++ b/NEWS
@@ -17,6 +17,8 @@ changes in sbcl-1.0.7 relative to sbcl-1.0.6:
     "a constant string".
   * enhancement: SB-POSIX now supports lockf(). (Thanks to Zach Beane.)  
   * enhancement: SB-POSIX now supports getcwd(). (Thanks to Tassilo Horn.)
+  * optimization: bignum printing speed has been improved by 20-40%
+    (depending on the bignum size.)
   * bug fix: WITH-MUTEX and WITH-RECURSIVE-LOCK are now interrupt safe
     on Linux.
   * bug fix: the cache used by the CLOS to store precomputed effective
diff --git a/package-data-list.lisp-expr b/package-data-list.lisp-expr
index 2b8f049fc..74ab81ded 100644
--- a/package-data-list.lisp-expr
+++ b/package-data-list.lisp-expr
@@ -1419,6 +1419,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "SCALE-DOUBLE-FLOAT"
                #!+long-float "SCALE-LONG-FLOAT"
                "SCALE-SINGLE-FLOAT"
+               "SCRUB-POWER-CACHE"
                "SEQUENCEP" "SEQUENCE-COUNT" "SEQUENCE-END"
                "SEQUENCE-OF-CHECKED-LENGTH-GIVEN-TYPE"
                "SET-ARRAY-HEADER" "SET-HEADER-DATA" "SHIFT-TOWARDS-END"
diff --git a/src/code/gc.lisp b/src/code/gc.lisp
index c0c7b022b..df569edfc 100644
--- a/src/code/gc.lisp
+++ b/src/code/gc.lisp
@@ -252,6 +252,9 @@ run in any thread.")
   ;; as having these cons more then we have space left leads to huge
   ;; badness.
   (scrub-control-stack)
+  ;; Power cache of the bignum printer: drops overly large bignums and
+  ;; removes duplicate entries.
+  (scrub-power-cache)
   ;; FIXME: CTYPE-OF-CACHE-CLEAR isn't thread-safe.
   #!-sb-thread
   (ctype-of-cache-clear))
diff --git a/src/code/print.lisp b/src/code/print.lisp
index ffc2f7216..e9b58d0a8 100644
--- a/src/code/print.lisp
+++ b/src/code/print.lisp
@@ -982,17 +982,17 @@
                 (2 #\b)
                 (8 #\o)
                 (16 #\x)
-                (t (%output-fixnum-in-base base 10 stream)
+                (t (%output-reasonable-integer-in-base base 10 stream)
                    #\r))
               stream))
 
-(defun %output-fixnum-in-base (n base stream)
+(defun %output-reasonable-integer-in-base (n base stream)
   (multiple-value-bind (q r)
       (truncate n base)
     ;; Recurse until you have all the digits pushed on
     ;; the stack.
     (unless (zerop q)
-      (%output-fixnum-in-base q base stream))
+      (%output-reasonable-integer-in-base q base stream))
     ;; Then as each recursive call unwinds, turn the
     ;; digit (in remainder) into a character and output
     ;; the character.
@@ -1000,21 +1000,89 @@
      (schar "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" r)
      stream)))
 
+;;; *POWER-CACHE* is an alist mapping bases to power-vectors. It is
+;;; filled and probed by POWERS-FOR-BASE. SCRUB-POWER-CACHE is called
+;;; always prior a GC to drop overly large bignums from the cache.
+;;;
+;;; It doesn't need a lock, but if you work on SCRUB-POWER-CACHE or
+;;; POWERS-FOR-BASE, see that you don't break the assumptions!
+(defvar *power-cache* nil)
+
+(defconstant +power-cache-integer-length-limit+ 2048)
+
+(defun scrub-power-cache ()
+  (let ((cache *power-cache*))
+    (dolist (cell cache)
+      (let ((powers (cdr cell)))
+        (declare (simple-vector powers))
+        (let ((too-big (position-if
+                        (lambda (x)
+                          (>= (integer-length x)
+                              +power-cache-integer-length-limit+))
+                        powers)))
+          (when too-big
+            (setf (cdr cell) (subseq powers 0 too-big))))))
+    ;; Since base 10 is overwhelmingly common, make sure it's at head.
+    ;; Try to keep other bases in a hopefully sensible order as well.
+    (if (eql 10 (caar cache))
+        (setf *power-cache* cache)
+        ;; If we modify the list destructively we need to copy it, otherwise
+        ;; an alist lookup in progress might be screwed.
+        (setf *power-cache* (sort (copy-list cache)
+                                  (lambda (a b)
+                                    (declare (fixnum a b))
+                                    (cond ((= 10 a) t)
+                                          ((= 10 b) nil)
+                                          ((= 16 a) t)
+                                          ((= 16 b) nil)
+                                          ((= 2 a) t)
+                                          ((= 2 b) nil)
+                                          (t (< a b))))
+                                  :key #'car)))))
+
+;;; Compute (and cache) a power vector for a BASE and LIMIT:
+;;; the vector holds integers for which
+;;;    (aref powers k) == (expt base (expt 2 k))
+;;; holds.
+(defun powers-for-base (base limit)
+  (flet ((compute-powers (from)
+           (let (powers)
+             (do ((p from (* p p)))
+                 ((> p limit)
+                  ;; We don't actually need this, but we also
+                  ;; prefer not to cons it up a second time...
+                  (push p powers))
+               (push p powers))
+             (nreverse powers))))
+    ;; Grab a local reference so that we won't stuff consed at the
+    ;; head by other threads -- or sorting by SCRUB-POWER-CACHE.
+    (let ((cache *power-cache*))
+      (let ((cell (assoc base cache)))
+        (if cell
+            (let* ((powers (cdr cell))
+                   (len (length powers))
+                   (max (svref powers (1- len))))
+              (if (> max limit)
+                  powers
+                  (let ((new
+                         (concatenate 'vector powers
+                                      (compute-powers (* max max)))))
+                    (setf (cdr cell) new)
+                    new)))
+            (let ((powers (coerce (compute-powers base) 'vector)))
+              ;; Add new base to head: SCRUB-POWER-CACHE will later
+              ;; put it to a better place.
+              (setf *power-cache* (acons base powers cache))
+              powers))))))
+
 ;; Algorithm by Harald Hanche-Olsen, sbcl-devel 2005-02-05
-(defun %output-bignum-in-base (n base stream)
+(defun %output-huge-integer-in-base (n base stream)
   (declare (type bignum n) (type fixnum base))
-  (let ((power (make-array 10 :adjustable t :fill-pointer 0)))
-    ;; Here there be the bottleneck for big bignums, in the (* p p).
-    ;; A special purpose SQUARE-BIGNUM might help a bit. See eg: Dan
-    ;; Zuras, "On Squaring and Multiplying Large Integers", ARITH-11:
-    ;; IEEE Symposium on Computer Arithmetic, 1993, pp. 260 to 271.
-    ;; Reprinted as "More on Multiplying and Squaring Large Integers",
-    ;; IEEE Transactions on Computers, volume 43, number 8, August
-    ;; 1994, pp. 899-908.
-    (do ((p base (* p p)))
-        ((> p n))
-      (vector-push-extend p power))
-    ;; (aref power k) == (expt base (expt 2 k))
+  ;; POWER is a vector for which the following holds:
+  ;;   (aref power k) == (expt base (expt 2 k))
+  (let* ((power (powers-for-base base n))
+         (k-start (or (position-if (lambda (x) (> x n)) power)
+                      (bug "power-vector too short"))))
     (labels ((bisect (n k exactp)
                (declare (fixnum k))
                ;; N is the number to bisect
@@ -1036,15 +1104,19 @@
                         ;; doesn't get any leading zeros.
                         (bisect q k exactp)
                         (bisect r k (or exactp (plusp q))))))))
-      (bisect n (fill-pointer power) nil))))
+      (bisect n k-start nil))))
 
 (defun %output-integer-in-base (integer base stream)
   (when (minusp integer)
     (write-char #\- stream)
     (setf integer (- integer)))
-  (if (fixnump integer)
-      (%output-fixnum-in-base integer base stream)
-      (%output-bignum-in-base integer base stream)))
+  ;; The ideal cutoff point between these two algorithms is almost
+  ;; certainly quite platform dependent: this gives 87 for 32 bit
+  ;; SBCL, which is about right at least for x86/Darwin.
+  (if (or (fixnump integer)
+          (< (integer-length integer) (* 3 sb!vm:n-positive-fixnum-bits)))
+      (%output-reasonable-integer-in-base integer base stream)
+      (%output-huge-integer-in-base integer base stream)))
 
 (defun output-integer (integer stream)
   (let ((base *print-base*))
diff --git a/version.lisp-expr b/version.lisp-expr
index fed4c4183..5a0873751 100644
--- a/version.lisp-expr
+++ b/version.lisp-expr
@@ -17,4 +17,4 @@
 ;;; checkins which aren't released. (And occasionally for internal
 ;;; versions, especially for internal versions off the main CVS
 ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".)
-"1.0.6.40"
+"1.0.6.41"
-- 
2.11.4.GIT