From 94e0f68a627ce839d59e88b4c8faad486e75af91 Mon Sep 17 00:00:00 2001 From: Juho Snellman Date: Mon, 10 Dec 2007 05:35:10 +0000 Subject: [PATCH] 1.0.12.23: Optimize STRING-*-TRIM * Add deftransforms for STRING(-LEFT|-RIGHT|)-TRIM of simple strings. As a sleazy benchmark trick, also optimize for constant character bags. * Rewrite the function versions of the string trimmers for more code reuse. New versions also no longer cons up a new string when no trimming needs to be done. (Allowed in the spec, as pointed out by Attila Lendvai) * Add tests. --- src/code/string.lisp | 56 +++++++++++++++++++++++------------------------ src/compiler/seqtran.lisp | 40 +++++++++++++++++++++++++++++++++ tests/string.pure.lisp | 32 +++++++++++++++++++++++++++ version.lisp-expr | 2 +- 4 files changed, 101 insertions(+), 29 deletions(-) diff --git a/src/code/string.lisp b/src/code/string.lisp index 36bc060d4..dede9485c 100644 --- a/src/code/string.lisp +++ b/src/code/string.lisp @@ -404,36 +404,36 @@ new string COUNT long filled with the fill character." (%capitalize string start end)) ) ; FLET -(defun string-left-trim (char-bag string) +(defun generic-string-trim (char-bag string left-p right-p) (with-string string - (do ((index start (1+ index))) - ((or (= index (the fixnum end)) - (not (find (schar string index) char-bag :test #'char=))) - (subseq (the simple-string string) index end)) - (declare (fixnum index))))) + (let* ((left-end (if left-p + (do ((index start (1+ index))) + ((or (= index (the fixnum end)) + (not (find (schar string index) + char-bag + :test #'char=))) + index) + (declare (fixnum index))) + 0)) + (right-end (if right-p + (do ((index (1- (the fixnum end)) (1- index))) + ((or (< index left-end) + (not (find (schar string index) + char-bag + :test #'char=))) + (1+ index)) + (declare (fixnum index))) + (length string)))) + (if (and (eql left-end 0) + (eql right-end (length string))) + string + (subseq (the simple-string string) left-end right-end))))) + +(defun string-left-trim (char-bag string) + (generic-string-trim char-bag string t nil)) (defun string-right-trim (char-bag string) - (with-string string - (do ((index (1- (the fixnum end)) (1- index))) - ((or (< index start) - (not (find (schar string index) char-bag :test #'char=))) - (subseq (the simple-string string) start (1+ index))) - (declare (fixnum index))))) + (generic-string-trim char-bag string nil t)) (defun string-trim (char-bag string) - (with-string string - (let* ((left-end (do ((index start (1+ index))) - ((or (= index (the fixnum end)) - (not (find (schar string index) - char-bag - :test #'char=))) - index) - (declare (fixnum index)))) - (right-end (do ((index (1- (the fixnum end)) (1- index))) - ((or (< index left-end) - (not (find (schar string index) - char-bag - :test #'char=))) - (1+ index)) - (declare (fixnum index))))) - (subseq (the simple-string string) left-end right-end)))) + (generic-string-trim char-bag string t t)) diff --git a/src/compiler/seqtran.lisp b/src/compiler/seqtran.lisp index 78cb9dc91..b9f1e07d6 100644 --- a/src/compiler/seqtran.lisp +++ b/src/compiler/seqtran.lisp @@ -1275,3 +1275,43 @@ (effective-find-position-key key)))))) (define-find-position-if-not find-if-not 0) (define-find-position-if-not position-if-not 1)) + +(macrolet ((define-trimmer-transform (fun-name leftp rightp) + `(deftransform ,fun-name ((char-bag string) + (t simple-string)) + (let ((find-expr + (if (constant-lvar-p char-bag) + ;; If the bag is constant, use MEMBER + ;; instead of FIND, since we have a + ;; deftransform for MEMBER that can + ;; open-code all of the comparisons when + ;; the list is constant. -- JES, 2007-12-10 + `(not (member (schar string index) + ',(coerce (lvar-value char-bag) 'list) + :test #'char=)) + '(not (find (schar string index) char-bag :test #'char=))))) + `(flet ((char-not-in-bag (index) + ,find-expr)) + (let* ((end (length string)) + (left-end (if ,',leftp + (do ((index 0 (1+ index))) + ((or (= index (the fixnum end)) + (char-not-in-bag index)) + index) + (declare (fixnum index))) + 0)) + (right-end (if ,',rightp + (do ((index (1- end) (1- index))) + ((or (< index left-end) + (char-not-in-bag index)) + (1+ index)) + (declare (fixnum index))) + end))) + (if (and (eql left-end 0) + (eql right-end (length string))) + string + (subseq string left-end right-end)))))))) + (define-trimmer-transform string-left-trim t nil) + (define-trimmer-transform string-right-trim nil t) + (define-trimmer-transform string-trim t t)) + diff --git a/tests/string.pure.lisp b/tests/string.pure.lisp index 15e7afd1c..82ccc9400 100644 --- a/tests/string.pure.lisp +++ b/tests/string.pure.lisp @@ -87,3 +87,35 @@ :start1 a)) 9) 9)) + +;; String trimming. + +(flet ((make-test (string left right both) + (macrolet ((check (fun wanted) + `(let ((result (,fun " " string))) + (assert (equal result ,wanted)) + (when (equal string ,wanted) + ;; Check that the original string is + ;; returned when no changes are needed. Not + ;; required by the spec, but a desireable + ;; feature for performance. + (assert (eql result string)))))) + ;; Check the functional implementations + (locally + (declare (notinline string-left-trim string-right-trim + string-trim)) + (check string-left-trim left) + (check string-right-trim right) + (check string-trim both)) + ;; Check the transforms + (locally + (declare (type simple-string string)) + (check string-left-trim left) + (check string-right-trim right) + (check string-trim both))))) + (make-test "x " "x " "x" "x") + (make-test " x" "x" " x" "x") + (make-test " x " "x " " x" "x") + (make-test " x x " "x x " " x x" "x x")) + + diff --git a/version.lisp-expr b/version.lisp-expr index 070ef525f..291f9e3f2 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.12.22" +"1.0.12.23" -- 2.11.4.GIT