From b6517a60bccd6462d504c7c5accaf3626432b009 Mon Sep 17 00:00:00 2001 From: Douglas Katzman Date: Mon, 5 Jan 2015 22:23:22 -0500 Subject: [PATCH] Implement suggestions of Paul Khuong re. POPCNT --- src/assembly/x86-64/arith.lisp | 19 ++++++++++++++----- src/compiler/generic/vm-tran.lisp | 13 +++++++++++++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/assembly/x86-64/arith.lisp b/src/assembly/x86-64/arith.lisp index 38ea0150c..5b3028f9c 100644 --- a/src/assembly/x86-64/arith.lisp +++ b/src/assembly/x86-64/arith.lisp @@ -448,14 +448,23 @@ (inst test (make-ea :byte :disp (make-fixup "cpuid_fn1_ecx" :foreign 2)) #x80) - (inst jmp :nz fast)) + (inst jmp :z slow) + ;; Intel's implementation of POPCNT on some models treats it as + ;; a 2-operand ALU op in the manner of ADD,SUB,etc which means that + ;; it falsely appears to need data from the destination register. + ;; The workaround is to clear the destination. + ;; See http://stackoverflow.com/questions/25078285 + (unless (location= result arg) + ;; We only break the spurious dep. chain if result isn't the same + ;; register as arg. (If they're location=, don't trash the arg!) + (inst xor result result)) + (inst popcnt result arg) + (inst jmp done)) + slow (move rdx arg) (inst mov rcx (make-fixup 'logcount :assembly-routine)) (inst call rcx) (move result rdx) - #!-win32 (inst jmp done) - fast - #!-win32 (inst popcnt result arg) - done)))) + done)))) (def-it unsigned-byte-64-count 14 unsigned-reg unsigned-num) (def-it positive-fixnum-count 13 any-reg positive-fixnum)) diff --git a/src/compiler/generic/vm-tran.lisp b/src/compiler/generic/vm-tran.lisp index 70d514d54..d3b046182 100644 --- a/src/compiler/generic/vm-tran.lisp +++ b/src/compiler/generic/vm-tran.lisp @@ -456,9 +456,22 @@ (end-1 (truncate (truly-the index (1- length)) sb!vm:n-word-bits))) ((>= index end-1) + ;; "(mod (1- length) ...)" is the bit index within the word + ;; of the array index of the ultimate bit to be examined. + ;; "1+" it is the number of bits in that word. + ;; But I don't get why people are allowed to store random data that + ;; we mask off, as if we could accomodate all possible ways that + ;; unsafe code can spew bits where they don't belong. + ;; Does it have to do with %shrink-vector, perhaps? + ;; Some rationale would be nice... (let* ((extra (1+ (mod (1- length) sb!vm:n-word-bits))) (mask (ash #.(1- (ash 1 sb!vm:n-word-bits)) (- extra sb!vm:n-word-bits))) + ;; The above notwithstanding, for big-endian wouldn't it + ;; be possible to write this expression as a single shift? + ;; (LOGAND MOST-POSITIVE-WORD (ASH most-positive-word (- n-word-bits extra))) + ;; rather than a right-shift to fill in zeros on the left + ;; then by a left-shift to left-align the 1s? (bits (logand (ash mask ,(ecase sb!c:*backend-byte-order* (:little-endian 0) -- 2.11.4.GIT