1 #ifndef __SSE_DOUBLE_H__
2 #define __SSE_DOUBLE_H__
4 #if CMK_USE_AVX && defined(__AVX__)
21 SSEDouble(double d
) { val
= _mm256_set1_pd(d
); }
23 SSEDouble(double d0
, double d1
, double d2
, double d3
) { val
= _mm256_setr_pd(d0
,d1
,d2
,d3
); }
25 /* Arithmetic Operators*/
26 friend inline SSEDouble
operator -(const SSEDouble
&a
) {SSEDouble c
;c
.val
=_mm256_sub_pd(_mm256_setzero_pd(),a
.val
);return c
;}
28 friend inline SSEDouble
operator +(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_add_pd(a
.val
,b
.val
);return c
;}
30 friend inline SSEDouble
operator -(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_sub_pd(a
.val
,b
.val
);return c
;}
32 friend inline SSEDouble
operator *(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_mul_pd(a
.val
,b
.val
);return c
;}
34 friend inline SSEDouble
operator /(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_div_pd(a
.val
,b
.val
);return c
;}
36 friend inline SSEDouble
sqrt (const SSEDouble
&a
) { SSEDouble c
;c
.val
= _mm256_sqrt_pd(a
.val
);return c
;}
39 friend inline SSEDouble
operator +(double a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_add_pd(_mm256_set1_pd(a
),b
.val
);return c
;}
42 friend inline SSEDouble
operator -(double a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_sub_pd(_mm256_set1_pd(a
),b
.val
);return c
;}
44 friend inline SSEDouble
operator *(double a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_mul_pd(_mm256_set1_pd(a
),b
.val
);return c
;}
46 friend inline SSEDouble
operator /(double a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_div_pd(_mm256_set1_pd(a
),b
.val
);return c
;}
48 inline SSEDouble
& operator +=(const SSEDouble
&a
) {val
= _mm256_add_pd(val
,a
.val
);return *this;}
50 inline SSEDouble
& operator -=(const SSEDouble
&a
) {val
= _mm256_sub_pd(val
,a
.val
);return *this;}
52 inline SSEDouble
& operator *=(const SSEDouble
&a
) {val
= _mm256_mul_pd(val
,a
.val
);return *this;}
54 inline SSEDouble
& operator /=(const SSEDouble
&a
) {val
= _mm256_div_pd(val
,a
.val
);return *this;}
58 friend inline SSEDouble
operator &(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_and_pd(a
.val
,b
.val
);return c
;}
60 friend inline SSEDouble
operator |(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_or_pd(a
.val
,b
.val
);return c
;}
62 friend inline SSEDouble
operator ^(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_xor_pd(a
.val
,b
.val
);return c
;}
64 friend inline SSEDouble
andnot (const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_andnot_pd(a
.val
,b
.val
);return c
;}
66 /*Comparison Operators*/
68 //friend inline SSEDouble operator <(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_cmplt_pd(a.val,b.val);return c;}
69 friend inline SSEDouble
operator <(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_cmp_pd(a
.val
,b
.val
,_CMP_LT_OS
);return c
;}
71 //friend inline SSEDouble operator >(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_cmpgt_pd(a.val,b.val);return c;}
72 friend inline SSEDouble
operator >(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_cmp_pd(a
.val
,b
.val
,_CMP_GT_OS
);return c
;}
74 //friend inline SSEDouble operator ==(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_cmpeq_pd(a.val,b.val);return c;}
75 friend inline SSEDouble
operator ==(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm256_cmp_pd(a
.val
,b
.val
,_CMP_EQ_OQ
);return c
;}
77 //friend inline SSEDouble operator <(const SSEDouble &a, double b) {SSEDouble c;c.val= _mm256_cmplt_pd(a.val,_mm256_set1_pd(b));return c;}
78 friend inline SSEDouble
operator <(const SSEDouble
&a
, double b
) {SSEDouble c
;c
.val
= _mm256_cmp_pd(a
.val
,_mm256_set1_pd(b
),_CMP_LT_OS
);return c
;}
80 //friend inline SSEDouble operator >(const SSEDouble &a, double b) {SSEDouble c;c.val= _mm256_cmpgt_pd(a.val,_mm256_set1_pd(b));return c;}
81 friend inline SSEDouble
operator >(const SSEDouble
&a
, double b
) {SSEDouble c
;c
.val
= _mm256_cmp_pd(a
.val
,_mm256_set1_pd(b
),_CMP_GT_OS
);return c
;}
83 friend inline SSEDouble
max (const SSEDouble
&a
, SSEDouble
&b
) { SSEDouble c
; c
.val
= _mm256_max_pd(a
.val
,b
.val
);return c
;}
86 /*Masking Operations */
88 friend inline int movemask( const SSEDouble
&a
) {return _mm256_movemask_pd(a
.val
);}
93 friend inline void storeu(double *p
, const SSEDouble
&a
) { _mm256_storeu_pd(p
,a
.val
);}
107 #include<emmintrin.h>
123 SSEDouble(double d
) { val
= _mm_set1_pd(d
);}
125 SSEDouble(double d0
, double d1
) {val
= _mm_setr_pd(d0
,d1
);}
127 /* Arithmetic Operators*/
128 friend inline SSEDouble
operator -(const SSEDouble
&a
) {SSEDouble c
;c
.val
=_mm_sub_pd(_mm_setzero_pd(),a
.val
);return c
;}
130 friend inline SSEDouble
operator +(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_add_pd(a
.val
,b
.val
);return c
;}
132 friend inline SSEDouble
operator -(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_sub_pd(a
.val
,b
.val
);return c
;}
134 friend inline SSEDouble
operator *(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_mul_pd(a
.val
,b
.val
);return c
;}
136 friend inline SSEDouble
operator /(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_div_pd(a
.val
,b
.val
);return c
;}
138 friend inline SSEDouble
sqrt (const SSEDouble
&a
) { SSEDouble c
;c
.val
= _mm_sqrt_pd(a
.val
);return c
;}
141 friend inline SSEDouble
operator +(double a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_add_pd(_mm_set1_pd(a
),b
.val
);return c
;}
144 friend inline SSEDouble
operator -(double a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_sub_pd(_mm_set1_pd(a
),b
.val
);return c
;}
146 friend inline SSEDouble
operator *(double a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_mul_pd(_mm_set1_pd(a
),b
.val
);return c
;}
148 friend inline SSEDouble
operator /(double a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_div_pd(_mm_set1_pd(a
),b
.val
);return c
;}
150 inline SSEDouble
& operator +=(const SSEDouble
&a
) {val
= _mm_add_pd(val
,a
.val
);return *this;}
152 inline SSEDouble
& operator -=(const SSEDouble
&a
) {val
= _mm_sub_pd(val
,a
.val
);return *this;}
154 inline SSEDouble
& operator *=(const SSEDouble
&a
) {val
= _mm_mul_pd(val
,a
.val
);return *this;}
156 inline SSEDouble
& operator /=(const SSEDouble
&a
) {val
= _mm_div_pd(val
,a
.val
);return *this;}
158 /*Logical Operators*/
160 friend inline SSEDouble
operator &(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_and_pd(a
.val
,b
.val
);return c
;}
162 friend inline SSEDouble
operator |(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_or_pd(a
.val
,b
.val
);return c
;}
164 friend inline SSEDouble
operator ^(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_xor_pd(a
.val
,b
.val
);return c
;}
166 friend inline SSEDouble
andnot (const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_andnot_pd(a
.val
,b
.val
);return c
;}
168 /*Comparison Operators*/
171 friend inline SSEDouble
operator <(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_cmplt_pd(a
.val
,b
.val
);return c
;}
173 friend inline SSEDouble
operator >(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_cmpgt_pd(a
.val
,b
.val
);return c
;}
175 friend inline SSEDouble
operator ==(const SSEDouble
&a
, const SSEDouble
&b
) {SSEDouble c
;c
.val
= _mm_cmpeq_pd(a
.val
,b
.val
);return c
;}
177 friend inline SSEDouble
operator <(const SSEDouble
&a
, double b
) {SSEDouble c
;c
.val
= _mm_cmplt_pd(a
.val
,_mm_set1_pd(b
));return c
;}
179 friend inline SSEDouble
operator >(const SSEDouble
&a
, double b
) {SSEDouble c
;c
.val
= _mm_cmpgt_pd(a
.val
,_mm_set1_pd(b
));return c
;}
181 friend inline SSEDouble
max (const SSEDouble
&a
, SSEDouble
&b
) { SSEDouble c
; c
.val
= _mm_max_pd(a
.val
,b
.val
);return c
;}
184 /*Masking Operations */
186 friend inline int movemask( const SSEDouble
&a
) {return _mm_movemask_pd(a
.val
);}
191 friend inline void storel(double *p
, const SSEDouble
&a
) { _mm_storel_pd(p
,a
.val
);}
193 friend inline void storeh(double *p
, const SSEDouble
&a
) { _mm_storeh_pd(p
,a
.val
);}
205 void SSEDouble::display()
209 //_mm_storeh_pd(z,val);
218 // __m128d t1=_mm_setr_pd(3.0,0.0); __m128d t2 = _mm_setr_pd(5.0,0.0);
220 SSEDouble d1(2.0),d2(4.0),d4(25.0);
222 SSEDouble d3 = (25.0/5.0) + (d1 * d2) + d4 ;
232 // __m128d t = _mm_and_pd(t1,t2);
234 // cout << movemask(d3);
237 //int i = movemask(d4);
247 #endif //__SSE_DOUBLE_H__