SSE intrinsics
authorPaul Khuong <pvk@pvk.ca>
Tue, 16 Jun 2009 00:42:02 +0000 (15 20:42 -0400)
committerPaul Khuong <pvk@pvk.ca>
Tue, 15 Dec 2009 13:57:33 +0000 (15 13:57 +0000)
19 files changed:
package-data-list.lisp-expr
src/code/class.lisp
src/code/interr.lisp
src/code/pred.lisp
src/code/print.lisp
src/compiler/generic/early-objdef.lisp
src/compiler/generic/interr.lisp
src/compiler/generic/late-type-vops.lisp
src/compiler/generic/objdef.lisp
src/compiler/generic/primtype.lisp
src/compiler/generic/vm-fndb.lisp
src/compiler/generic/vm-typetran.lisp
src/compiler/x86-64/float.lisp
src/compiler/x86-64/insts.lisp
src/compiler/x86-64/macros.lisp
src/compiler/x86-64/sap.lisp
src/compiler/x86-64/vm.lisp
src/runtime/gc-common.c
src/runtime/gencgc.c

index 2dfb02b..869bb21 100644 (file)
@@ -1257,6 +1257,7 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "%MAKE-FUNCALLABLE-STRUCTURE-INSTANCE-ALLOCATOR"
                "%MAKE-RATIO" "%MAKE-LISP-OBJ"
                "%MAKE-INSTANCE"
+               #!+x86-64 "%MAKE-SSE-PACK"
                "%MAKE-STRUCTURE-INSTANCE"
                "%MAKE-STRUCTURE-INSTANCE-ALLOCATOR"
                "%MAP" "%MAP-TO-SIMPLE-VECTOR-ARITY-1"
@@ -1319,6 +1320,8 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "%SET-SYMBOL-HASH"
                "%SIN" "%SIN-QUICK" "%SINGLE-FLOAT"
                "%SINH" "%SQRT" "%SXHASH-SIMPLE-STRING"
+               #!+x86-64 "%SSE-PACK-LOW"
+               #!+x86-64 "%SSE-PACK-HIGH"
                "%SXHASH-SIMPLE-SUBSTRING" "%TAN" "%TAN-QUICK" "%TANH"
                "%UNARY-ROUND"
                "%UNARY-TRUNCATE"
@@ -1515,6 +1518,8 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "OBJECT-NOT-SIMPLE-ARRAY-COMPLEX-DOUBLE-FLOAT-ERROR"
                #!+long-float
                "OBJECT-NOT-SIMPLE-ARRAY-COMPLEX-LONG-FLOAT-ERROR"
+               #!+x86-64
+               "OBJECT-NOT-SSE-PACK-ERROR"
                "OBJECT-NOT-SIMPLE-ARRAY-COMPLEX-SINGLE-FLOAT-ERROR"
                "OBJECT-NOT-SIMPLE-ARRAY-DOUBLE-FLOAT-ERROR"
                "OBJECT-NOT-SIMPLE-ARRAY-ERROR"
@@ -1628,6 +1633,8 @@ is a good idea, but see SB-SYS re. blurring of boundaries."
                "SINGLE-FLOAT-INT-EXPONENT" "SINGLE-FLOAT-SIGNIFICAND"
                "SINGLE-VALUE-TYPE" "SINGLE-VALUE-SPECIFIER-TYPE"
                "SPECIALIZABLE" "SPECIALIZABLE-VECTOR" "SPECIFIER-TYPE"
+               #!+x86-64 "SSE-PACK"
+               #!+x86-64 "SSE-PACK-P"
                "STACK-REF" "STREAM-DESIGNATOR" "STRING-DESIGNATOR"
                "STRING-FILL*"
                "STRING-SUBSEQ*"
@@ -2492,10 +2499,13 @@ structure representations"
                #!+long-float "COMPLEX-LONG-FLOAT-WIDETAG"
                #!+long-float "COMPLEX-LONG-REG-SC-NUMBER"
                #!+long-float "COMPLEX-LONG-STACK-SC-NUMBER"
-               #!-x86-64 #!-x86-64
-               "COMPLEX-SINGLE-FLOAT-IMAG-SLOT" "COMPLEX-SINGLE-FLOAT-REAL-SLOT"
-               #!+x86-64
-               "COMPLEX-SINGLE-FLOAT-DATA-SLOT"
+               #!+x86-64 "SSE-PACK-HI-VALUE-SLOT"
+               #!+x86-64 "SSE-PACK-LO-VALUE-SLOT"
+               #!+x86-64 "SSE-PACK-SIZE"
+               #!+x86-64 "SSE-PACK-WIDETAG"
+               #!-x86-64 "COMPLEX-SINGLE-FLOAT-IMAG-SLOT"
+               #!-x86-64 "COMPLEX-SINGLE-FLOAT-REAL-SLOT"
+               #!+x86-64 "COMPLEX-SINGLE-FLOAT-DATA-SLOT"
                "COMPLEX-SINGLE-FLOAT-SIZE" "COMPLEX-SINGLE-FLOAT-WIDETAG"
                "COMPLEX-SINGLE-REG-SC-NUMBER" "COMPLEX-SINGLE-STACK-SC-NUMBER"
                "COMPLEX-SIZE" "COMPLEX-BASE-STRING-WIDETAG"
index 4e2d8fc..10e0f13 100644 (file)
 ;;; hierarchy).  See NAMED :COMPLEX-SUBTYPEP-ARG2
 (defvar *non-instance-classoid-types*
   '(symbol system-area-pointer weak-pointer code-component
-    lra fdefn random-class))
+    lra fdefn random-class sse-pack))
 
 ;;; KLUDGE: we need this because of the need to represent
 ;;; intersections of two classes, even when empty at a given time, as
       :inherits (complex number)
       :codes (#.sb!vm:complex-long-float-widetag)
       :prototype-form (complex 42l0 42l0))
+     #!+x86-64
+     (sse-pack
+      :codes (#.sb!vm:sse-pack-widetag))
      (real :translation real :inherits (number))
      (float
       :translation float
index e00663d..b4740dd 100644 (file)
          :datum object
          :expected-type '(complex long-float)))
 
+#!+x86-64
+(deferr object-not-sse-pack-error (object)
+  (error 'type-error
+         :datum object
+         :expected-type 'sse-pack))
+
 (deferr object-not-weak-pointer-error (object)
   (error 'type-error
          :datum object
index 417b1c0..482205a 100644 (file)
   (def-type-predicate-wrapper realp)
   (def-type-predicate-wrapper short-float-p)
   (def-type-predicate-wrapper single-float-p)
+  #!+x86-64 (def-type-predicate-wrapper sse-pack-p)
   (def-type-predicate-wrapper %instancep)
   (def-type-predicate-wrapper symbolp)
   (def-type-predicate-wrapper system-area-pointer-p)
index 31b48ab..8dec5ef 100644 (file)
      (output-code-component object stream))
     (fdefn
      (output-fdefn object stream))
+    (sse-pack
+     (output-sse-pack object stream))
     (t
      (output-random object stream))))
 \f
   (print-unreadable-object (fdefn stream)
     (write-string "FDEFINITION object for " stream)
     (output-object (fdefn-name fdefn) stream)))
+
+(defun output-sse-pack (pack stream)
+  (declare (type sse-pack pack))
+  (cond #+nil(*read-eval*
+         (format stream "#.(~S #X~8,'0X)" 'int-sap (sap-int sap)))
+        (t
+         (print-unreadable-object (pack stream)
+           (format stream "SSE pack: #X~16,'0X:#X~16,'0X"
+                   (%sse-pack-low  pack)
+                   (%sse-pack-high pack))))))
 \f
 ;;;; functions
 
index 550a515..ab92d1b 100644 (file)
   unused01-widetag
   #!+(and sb-lutex sb-thread)
   lutex-widetag                             ; 01011110
-  unused02-widetag                          ; 01100010
+  #!-x86-64
+  unused02-widetag
+  #!+x86-64
+  sse-pack-widetag                          ; 01100010
   unused03-widetag                          ; 01100110
   unused04-widetag                          ; 01101010
   unused05-widetag                          ; 01101110
index 539d632..8b0dd98 100644 (file)
   #!+long-float
   (object-not-complex-long-float
    "Object is not of type (COMPLEX LONG-FLOAT).")
+  #!+x86-64
+  (object-not-sse-pack
+   "Object is not of type SSE-PACK.")
   (object-not-weak-pointer
    "Object is not a WEAK-POINTER.")
   (object-not-instance
index 2774689..8c960e4 100644 (file)
@@ -39,6 +39,9 @@
 (!define-type-vops ratiop check-ratio ratio object-not-ratio-error
   (ratio-widetag))
 
+(!define-type-vops sse-pack-p check-sse-pack sse-pack object-not-sse-pack-error
+  (sse-pack-widetag))
+
 (!define-type-vops complexp check-complex complex object-not-complex-error
   (complex-widetag complex-single-float-widetag complex-double-float-widetag
                    #!+long-float complex-long-float-widetag))
index d768bca..59d8f10 100644 (file)
   (real :c-type "double" :length #!-x86-64 2 #!+x86-64 1)
   (imag :c-type "double" :length #!-x86-64 2 #!+x86-64 1))
 
+#!+x86-64
+(define-primitive-object (sse-pack
+                          :lowtag other-pointer-lowtag
+                          :widetag sse-pack-widetag)
+  (filler)
+  (lo-value :c-type "long" :type (unsigned-byte 64))
+  (hi-value :c-type "long" :type (unsigned-byte 64)))
+
 #!+(and sb-lutex sb-thread)
 (define-primitive-object (lutex
                           :lowtag other-pointer-lowtag
index 2b5e2c6..b321f2f 100644 (file)
 (/show0 "about to !DEF-PRIMITIVE-TYPE COMPLEX-DOUBLE-FLOAT")
 (!def-primitive-type complex-double-float (complex-double-reg descriptor-reg)
   :type (complex double-float))
-
+#!+x86-64
+(progn
+  (/show0 "about to !DEF-PRIMITIVE-TYPE SSE-PACK")
+  (!def-primitive-type sse-pack (sse-reg descriptor-reg)))
 
 ;;; primitive other-pointer array types
 (/show0 "primtype.lisp 96")
                (part-of character))))
         (built-in-classoid
          (case (classoid-name type)
-           ((complex function system-area-pointer weak-pointer)
+           ((complex function sse-pack system-area-pointer weak-pointer)
             (values (primitive-type-or-lose (classoid-name type)) t))
            (cons-type
             (part-of list))
index 492238f..3f9b905 100644 (file)
 (defknown make-value-cell (t) t
   (flushable movable))
 
+#!+x86-64
+(progn
+  (defknown sse-pack-p (t) boolean (foldable flushable))
+  (defknown %make-sse-pack ((unsigned-byte 64) (unsigned-byte 64))
+      sse-pack)
+  (defknown (%sse-pack-low %sse-pack-high) (sse-pack)
+      (unsigned-byte 64)))
+
 ;;;; threading
 
 #!+(and sb-lutex sb-thread)
index ae7d8e6..032823a 100644 (file)
 (define-type-predicate unsigned-byte-64-p (unsigned-byte 64))
 #!+#.(cl:if (cl:= 64 sb!vm:n-word-bits) '(and) '(or))
 (define-type-predicate signed-byte-64-p (signed-byte 64))
+#!+x86-64
+(define-type-predicate sse-pack-p sse-pack)
 (define-type-predicate vector-nil-p (vector nil))
 (define-type-predicate weak-pointer-p weak-pointer)
 (define-type-predicate code-component-p code-component)
index 5b21175..ec25d29 100644 (file)
     (ea-for-cxf-stack tn :double :real base))
   (defun ea-for-cdf-imag-stack (tn &optional (base rbp-tn))
     (ea-for-cxf-stack tn :double :imag base)))
+
+(defun ea-for-sse-stack (tn &optional (base rbp-tn))
+  (make-ea :qword :base base
+           :disp (- (* (+ (tn-offset tn)
+                          2)
+                       n-word-bytes))))
+
 \f
 ;;;; move functions
 
 (eval-when (:compile-toplevel :execute)
   (setf *read-default-float-format* 'single-float))
 \f
-;;;; complex float move functions
+;;;; complex float and SSE move functions
 
 ;;; X is source, Y is destination.
 (define-move-fun (load-complex-single 2) (vop x y)
 (define-move-fun (store-complex-double 2) (vop x y)
   ((complex-double-reg) (complex-double-stack))
   (inst movupd (ea-for-cdf-data-stack y) x))
+
+(define-move-fun (load-sse-pack 2) (vop x y)
+  ((sse-stack) (sse-reg))
+  (inst movdqu y (ea-for-sse-stack x)))
+
+(define-move-fun (store-sse-pack 2) (vop x y)
+  ((sse-reg) (sse-stack))
+  (inst movdqu (ea-for-sse-stack y) x))
 \f
 ;;;; move VOPs
 
   (frob single-move single-reg)
   (frob double-move double-reg)
   (frob complex-single-move complex-single-reg)
-  (frob complex-double-move complex-double-reg))
+  (frob complex-double-move complex-double-reg)
+  (frob sse-move sse-reg))
 
 \f
 ;;; Move from float to a descriptor reg. allocating a new float
 (define-move-vop move-from-double :move
   (double-reg) (descriptor-reg))
 
+(define-vop (move-from-sse)
+  (:args (x :scs (sse-reg)))
+  (:results (y :scs (descriptor-reg)))
+  (:node-var node)
+  (:note "SSE to pointer coercion")
+  (:generator 13
+     (with-fixed-allocation (y
+                             sse-pack-widetag
+                             sse-pack-size
+                             node)
+       (inst movdqa (make-ea-for-object-slot
+                     y sse-pack-lo-value-slot other-pointer-lowtag)
+             x))))
+(define-move-vop move-from-sse :move
+  (sse-reg) (descriptor-reg))
+
 ;;; Move from a descriptor to a float register.
 (define-vop (move-to-single)
   (:args (x :scs (descriptor-reg) :target tmp))
     (inst movsd y (ea-for-df-desc x))))
 (define-move-vop move-to-double :move (descriptor-reg) (double-reg))
 
+(define-vop (move-to-sse)
+  (:args (x :scs (descriptor-reg)))
+  (:results (y :scs (sse-reg)))
+  (:note "pointer to SSE coercion")
+  (:generator 2
+    (inst movdqa y (make-ea-for-object-slot
+                    x sse-pack-lo-value-slot other-pointer-lowtag))))
+(define-move-vop move-to-sse :move (descriptor-reg) (sse-reg))
+
 \f
 ;;; Move from complex float to a descriptor reg. allocating a new
 ;;; complex float object in the process.
   (frob move-single-float-arg single-reg single-stack :single)
   (frob move-double-float-arg double-reg double-stack :double))
 
+(define-vop (move-sse-arg)
+  (:args (x :scs (sse-reg) :target y)
+         (fp :scs (any-reg)
+             :load-if (not (sc-is y sse-reg))))
+  (:results (y))
+  (:note "SSE argument move")
+  (:generator 4
+     (sc-case y
+       (sse-reg
+        (unless (location= x y)
+          (inst movdqa y x)))
+       (sse-stack
+        (inst movdqa (ea-for-sse-stack y fp) x)))))
+(define-move-vop move-sse-arg :move-arg
+  (sse-reg descriptor-reg) (sse-reg))
+
 ;;;; complex float MOVE-ARG VOP
 (macrolet ((frob (name sc stack-sc format)
              `(progn
 
 (define-move-vop move-arg :move-arg
   (single-reg double-reg
-   complex-single-reg complex-double-reg)
+   complex-single-reg complex-double-reg
+   sse-reg)
   (descriptor-reg))
 
 \f
   (:ignore x)
   (:generator 0))
 
+;;; Additional function that must be provided by #!+complex-vops
+;;; platforms
 (defknown swap-complex ((complex float)) (complex float)
     (foldable flushable movable always-translatable))
 (defoptimizer (swap-complex derive-type) ((x))
   (:generator 2
      (move r x)
      (inst shufpd r r #b01)))
+
+\f
+;;;; SSE pack operation
+(define-vop (%sse-pack-low)
+  (:translate %sse-pack-low)
+  (:args (x :scs (sse-reg)))
+  (:arg-types sse-pack)
+  (:results (dst :scs (unsigned-reg)))
+  (:result-types unsigned-num)
+  (:policy :fast-safe)
+  (:generator 3
+    (inst movd dst x)))
+
+(defun %sse-pack-low (x)
+  (declare (type sse-pack x))
+  (%sse-pack-low x))
+
+(define-vop (%sse-pack-high)
+  (:translate %sse-pack-high)
+  (:args (x :scs (sse-reg)))
+  (:arg-types sse-pack)
+  (:temporary (:sc sse-reg) tmp)
+  (:results (dst :scs (unsigned-reg)))
+  (:result-types unsigned-num)
+  (:policy :fast-safe)
+  (:generator 3
+    (inst movdqa tmp x)
+    (inst psrldq tmp 8)
+    (inst movd dst tmp)))
+
+(defun %sse-pack-high (x)
+  (declare (type sse-pack x))
+  (%sse-pack-high x))
+
+(define-vop (%make-sse-pack)
+  (:translate %make-sse-pack)
+  (:policy :fast-safe)
+  (:args (lo :scs (unsigned-reg))
+         (hi :scs (unsigned-reg)))
+  (:arg-types unsigned-num unsigned-num)
+  (:temporary (:sc sse-stack) tmp)
+  (:results (dst :scs (sse-reg)))
+  (:result-types sse-pack)
+  (:generator 5
+    (let ((offset (- (* (1+ (tn-offset tmp))
+                        n-word-bytes))))
+      (inst mov (make-ea :qword :base rbp-tn :disp (- offset 8)) lo)
+      (inst mov (make-ea :qword :base rbp-tn :disp offset) hi))
+    (inst movdqa dst (ea-for-sse-stack tmp))))
+
+(defun %make-sse-pack (low high)
+  (declare (type (unsigned-byte 64) low high))
+  (%make-sse-pack low high))
index 28ac794..dc1b7cc 100644 (file)
      ;; FIXME: might as well be COND instead of having to use #. readmacro
      ;; to hack up the code
      (case (sc-name (tn-sc thing))
+       (#.*oword-sc-names*
+        :oword)
        (#.*qword-sc-names*
         :qword)
        (#.*dword-sc-names*
index 018b43c..b15d218 100644 (file)
@@ -26,6 +26,9 @@
          ((double-reg complex-double-reg)
           (aver (xmm-register-p ,n-src))
           (inst movapd ,n-dst ,n-src))
+         ((sse-reg sse-stack)
+          (aver (xmm-register-p ,n-src))
+          (inst movdqa ,n-dst ,n-src))
          (t
           (inst mov ,n-dst ,n-src))))))
 
index b255b07..bf827ea 100644 (file)
   (:results (sap :scs (sap-reg)))
   (:result-types system-area-pointer)
   (:generator 2
-    (move sap vector)
-    (inst add
-          sap
-          (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))))
+    (if (location= sap vector)
+        (inst add
+              sap
+              (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))
+        (inst lea sap
+              (make-ea :qword
+                       :base vector
+                       :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))))))
 
 
index 63af4f9..d5cf43a 100644 (file)
   (double-stack stack)
   (complex-single-stack stack)  ; complex-single-floats
   (complex-double-stack stack :element-size 2)  ; complex-double-floats
-
+  (sse-stack stack :element-size 2)
 
   ;;
   ;; magic SCs
                       :save-p t
                       :alternate-scs (complex-double-stack))
 
+  (sse-reg float-registers
+           :locations #.*float-regs*
+           :constant-scs ()
+           :save-p t
+           :alternate-scs (sse-stack))
+
   ;; a catch or unwind block
   (catch-block stack :element-size kludge-nondeterministic-catch-block-size))
 
 (defparameter *double-sc-names* '(double-reg double-stack))
 (defparameter *complex-sc-names* '(complex-single-reg complex-single-stack
                                    complex-double-reg complex-double-stack))
+(defparameter *oword-sc-names* '(sse-reg sse-stack))
 ) ; EVAL-WHEN
 \f
 ;;;; miscellaneous TNs for the various registers
index a9f69b6..37fca91 100644 (file)
@@ -1965,6 +1965,9 @@ gc_init_tables(void)
 #ifdef COMPLEX_LONG_FLOAT_WIDETAG
     scavtab[COMPLEX_LONG_FLOAT_WIDETAG] = scav_unboxed;
 #endif
+#ifdef SSE_PACK_WIDETAG
+    scavtab[SSE_PACK_WIDETAG] = scav_unboxed;
+#endif
     scavtab[SIMPLE_ARRAY_WIDETAG] = scav_boxed;
     scavtab[SIMPLE_BASE_STRING_WIDETAG] = scav_base_string;
 #ifdef SIMPLE_CHARACTER_STRING_WIDETAG
@@ -2203,6 +2206,9 @@ gc_init_tables(void)
     transother[SYMBOL_HEADER_WIDETAG] = trans_boxed;
     transother[CHARACTER_WIDETAG] = trans_immediate;
     transother[SAP_WIDETAG] = trans_unboxed;
+#ifdef SSE_PACK_WIDETAG
+    transother[SSE_PACK_WIDETAG] = trans_unboxed;
+#endif
     transother[UNBOUND_MARKER_WIDETAG] = trans_immediate;
     transother[NO_TLS_VALUE_MARKER_WIDETAG] = trans_immediate;
     transother[WEAK_POINTER_WIDETAG] = trans_weak_pointer;
@@ -2343,6 +2349,9 @@ gc_init_tables(void)
     sizetab[SYMBOL_HEADER_WIDETAG] = size_boxed;
     sizetab[CHARACTER_WIDETAG] = size_immediate;
     sizetab[SAP_WIDETAG] = size_unboxed;
+#ifdef SSE_PACK_WIDETAG
+    sizetab[SSE_PACK_WIDETAG] = size_unboxed;
+#endif
     sizetab[UNBOUND_MARKER_WIDETAG] = size_immediate;
     sizetab[NO_TLS_VALUE_MARKER_WIDETAG] = size_immediate;
     sizetab[WEAK_POINTER_WIDETAG] = size_weak_pointer;
index 5bd67c8..7ee5c9a 100644 (file)
@@ -2371,6 +2371,9 @@ looks_like_valid_lisp_pointer_p(lispobj *pointer, lispobj *start_addr)
 #ifdef COMPLEX_LONG_FLOAT_WIDETAG
         case COMPLEX_LONG_FLOAT_WIDETAG:
 #endif
+#ifdef SSE_PACK_WIDETAG
+        case SSE_PACK_WIDETAG:
+#endif
         case SIMPLE_ARRAY_WIDETAG:
         case COMPLEX_BASE_STRING_WIDETAG:
 #ifdef COMPLEX_CHARACTER_STRING_WIDETAG
@@ -3535,6 +3538,9 @@ verify_space(lispobj *start, size_t words)
 #ifdef COMPLEX_LONG_FLOAT_WIDETAG
                 case COMPLEX_LONG_FLOAT_WIDETAG:
 #endif
+#ifdef SSE_PACK_WIDETAG
+                case SSE_PACK_WIDETAG:
+#endif
                 case SIMPLE_BASE_STRING_WIDETAG:
 #ifdef SIMPLE_CHARACTER_STRING_WIDETAG
                 case SIMPLE_CHARACTER_STRING_WIDETAG: