Update references to hapi_src to hapi_impl and revert hapiRegisterCallbacks
[charm.git] / src / util / SSE-Double.h
blob9d00426188854b108d9f958b6a0a3bf0e8ab4310
1 #ifndef __SSE_DOUBLE_H__
2 #define __SSE_DOUBLE_H__
4 #if CMK_USE_AVX && defined(__AVX__)
6 #include <x86intrin.h>
8 #include<iostream>
11 class SSEDouble
14 public: __m256d val;
17 public:
19 SSEDouble() {}
21 SSEDouble(double d) { val = _mm256_set1_pd(d); }
23 SSEDouble(double d0, double d1, double d2, double d3) { val = _mm256_setr_pd(d0,d1,d2,d3); }
25 /* Arithmetic Operators*/
26 friend inline SSEDouble operator -(const SSEDouble &a) {SSEDouble c;c.val=_mm256_sub_pd(_mm256_setzero_pd(),a.val);return c;}
28 friend inline SSEDouble operator +(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_add_pd(a.val,b.val);return c;}
30 friend inline SSEDouble operator -(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_sub_pd(a.val,b.val);return c;}
32 friend inline SSEDouble operator *(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_mul_pd(a.val,b.val);return c;}
34 friend inline SSEDouble operator /(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_div_pd(a.val,b.val);return c;}
36 friend inline SSEDouble sqrt (const SSEDouble &a) { SSEDouble c;c.val= _mm256_sqrt_pd(a.val);return c;}
39 friend inline SSEDouble operator +(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_add_pd(_mm256_set1_pd(a),b.val);return c;}
42 friend inline SSEDouble operator -(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_sub_pd(_mm256_set1_pd(a),b.val);return c;}
44 friend inline SSEDouble operator *(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_mul_pd(_mm256_set1_pd(a),b.val);return c;}
46 friend inline SSEDouble operator /(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_div_pd(_mm256_set1_pd(a),b.val);return c;}
48 inline SSEDouble& operator +=(const SSEDouble &a) {val= _mm256_add_pd(val,a.val);return *this;}
50 inline SSEDouble& operator -=(const SSEDouble &a) {val= _mm256_sub_pd(val,a.val);return *this;}
52 inline SSEDouble& operator *=(const SSEDouble &a) {val= _mm256_mul_pd(val,a.val);return *this;}
54 inline SSEDouble& operator /=(const SSEDouble &a) {val= _mm256_div_pd(val,a.val);return *this;}
56 /*Logical Operators*/
58 friend inline SSEDouble operator &(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_and_pd(a.val,b.val);return c;}
60 friend inline SSEDouble operator |(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_or_pd(a.val,b.val);return c;}
62 friend inline SSEDouble operator ^(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_xor_pd(a.val,b.val);return c;}
64 friend inline SSEDouble andnot (const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_andnot_pd(a.val,b.val);return c;}
66 /*Comparison Operators*/
68 //friend inline SSEDouble operator <(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_cmplt_pd(a.val,b.val);return c;}
69 friend inline SSEDouble operator <(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_cmp_pd(a.val,b.val,_CMP_LT_OS);return c;}
71 //friend inline SSEDouble operator >(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_cmpgt_pd(a.val,b.val);return c;}
72 friend inline SSEDouble operator >(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_cmp_pd(a.val,b.val,_CMP_GT_OS);return c;}
74 //friend inline SSEDouble operator ==(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_cmpeq_pd(a.val,b.val);return c;}
75 friend inline SSEDouble operator ==(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_cmp_pd(a.val,b.val,_CMP_EQ_OQ);return c;}
77 //friend inline SSEDouble operator <(const SSEDouble &a, double b) {SSEDouble c;c.val= _mm256_cmplt_pd(a.val,_mm256_set1_pd(b));return c;}
78 friend inline SSEDouble operator <(const SSEDouble &a, double b) {SSEDouble c;c.val= _mm256_cmp_pd(a.val,_mm256_set1_pd(b),_CMP_LT_OS);return c;}
80 //friend inline SSEDouble operator >(const SSEDouble &a, double b) {SSEDouble c;c.val= _mm256_cmpgt_pd(a.val,_mm256_set1_pd(b));return c;}
81 friend inline SSEDouble operator >(const SSEDouble &a, double b) {SSEDouble c;c.val= _mm256_cmp_pd(a.val,_mm256_set1_pd(b),_CMP_GT_OS);return c;}
83 friend inline SSEDouble max (const SSEDouble &a, SSEDouble &b) { SSEDouble c; c.val= _mm256_max_pd(a.val,b.val);return c;}
86 /*Masking Operations */
88 friend inline int movemask( const SSEDouble &a) {return _mm256_movemask_pd(a.val);}
91 /*Store Operations*/
93 friend inline void storeu(double *p, const SSEDouble &a) { _mm256_storeu_pd(p,a.val);}
96 // void display();
104 #else
107 #include<emmintrin.h>
109 #include<iostream>
113 class SSEDouble
116 public: __m128d val;
119 public:
121 SSEDouble() {}
123 SSEDouble(double d) { val= _mm_set1_pd(d);}
125 SSEDouble(double d0, double d1) {val = _mm_setr_pd(d0,d1);}
127 /* Arithmetic Operators*/
128 friend inline SSEDouble operator -(const SSEDouble &a) {SSEDouble c;c.val=_mm_sub_pd(_mm_setzero_pd(),a.val);return c;}
130 friend inline SSEDouble operator +(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_add_pd(a.val,b.val);return c;}
132 friend inline SSEDouble operator -(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_sub_pd(a.val,b.val);return c;}
134 friend inline SSEDouble operator *(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_mul_pd(a.val,b.val);return c;}
136 friend inline SSEDouble operator /(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_div_pd(a.val,b.val);return c;}
138 friend inline SSEDouble sqrt (const SSEDouble &a) { SSEDouble c;c.val= _mm_sqrt_pd(a.val);return c;}
141 friend inline SSEDouble operator +(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm_add_pd(_mm_set1_pd(a),b.val);return c;}
144 friend inline SSEDouble operator -(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm_sub_pd(_mm_set1_pd(a),b.val);return c;}
146 friend inline SSEDouble operator *(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm_mul_pd(_mm_set1_pd(a),b.val);return c;}
148 friend inline SSEDouble operator /(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm_div_pd(_mm_set1_pd(a),b.val);return c;}
150 inline SSEDouble& operator +=(const SSEDouble &a) {val= _mm_add_pd(val,a.val);return *this;}
152 inline SSEDouble& operator -=(const SSEDouble &a) {val= _mm_sub_pd(val,a.val);return *this;}
154 inline SSEDouble& operator *=(const SSEDouble &a) {val= _mm_mul_pd(val,a.val);return *this;}
156 inline SSEDouble& operator /=(const SSEDouble &a) {val= _mm_div_pd(val,a.val);return *this;}
158 /*Logical Operators*/
160 friend inline SSEDouble operator &(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_and_pd(a.val,b.val);return c;}
162 friend inline SSEDouble operator |(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_or_pd(a.val,b.val);return c;}
164 friend inline SSEDouble operator ^(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_xor_pd(a.val,b.val);return c;}
166 friend inline SSEDouble andnot (const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_andnot_pd(a.val,b.val);return c;}
168 /*Comparison Operators*/
171 friend inline SSEDouble operator <(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_cmplt_pd(a.val,b.val);return c;}
173 friend inline SSEDouble operator >(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_cmpgt_pd(a.val,b.val);return c;}
175 friend inline SSEDouble operator ==(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_cmpeq_pd(a.val,b.val);return c;}
177 friend inline SSEDouble operator <(const SSEDouble &a, double b) {SSEDouble c;c.val= _mm_cmplt_pd(a.val,_mm_set1_pd(b));return c;}
179 friend inline SSEDouble operator >(const SSEDouble &a, double b) {SSEDouble c;c.val= _mm_cmpgt_pd(a.val,_mm_set1_pd(b));return c;}
181 friend inline SSEDouble max (const SSEDouble &a, SSEDouble &b) { SSEDouble c; c.val= _mm_max_pd(a.val,b.val);return c;}
184 /*Masking Operations */
186 friend inline int movemask( const SSEDouble &a) {return _mm_movemask_pd(a.val);}
189 /*Store Operations*/
191 friend inline void storel(double *p, const SSEDouble &a) { _mm_storel_pd(p,a.val);}
193 friend inline void storeh(double *p, const SSEDouble &a) { _mm_storeh_pd(p,a.val);}
196 // void display();
205 void SSEDouble::display()
208 storel(z,val);
209 //_mm_storeh_pd(z,val);
210 cout<<*z;
213 int main()
216 double i=1.0;
217 double *p=&i;
218 // __m128d t1=_mm_setr_pd(3.0,0.0); __m128d t2 = _mm_setr_pd(5.0,0.0);
220 SSEDouble d1(2.0),d2(4.0),d4(25.0);
222 SSEDouble d3 = (25.0/5.0) + (d1 * d2) + d4 ;
225 storel(p,d3);
227 cout<<*p;
228 // d3 = d1 ^ d2;
230 // d4 = sqrt(d2);
232 // __m128d t = _mm_and_pd(t1,t2);
234 // cout << movemask(d3);
235 d3.display();
237 //int i = movemask(d4);
239 //cout<<i;
245 #endif
247 #endif //__SSE_DOUBLE_H__