Update references to hapi_src to hapi_impl and revert hapiRegisterCallbacks
[charm.git] / src / util / SSE-Float.h
blobd3c52fca2dad711db220075ec0082793c84cf7fe
1 #ifndef __SSE_FLOAT_H__
2 #define __SSE_FLOAT_H__
4 #include<emmintrin.h>
5 #include<iostream>
9 class SSEFloat
12 public: __m128 val;
15 public:
17 SSEFloat() {}
19 SSEFloat(float f) { val= _mm_set1_ps(f);}
21 SSEFloat(float f0, float f1,float f2, float f3) {val = _mm_setr_ps(f0,f1,f2,f3);}
23 /* Arithmetic Operators*/
25 friend inline SSEFloat operator -(const SSEFloat &a) {SSEFloat c;c.val=_mm_sub_ps(_mm_setzero_ps(),a.val);return c;}
27 friend inline SSEFloat operator +(const SSEFloat &a, const SSEFloat &b) {SSEFloat c;c.val= _mm_add_ps(a.val,b.val);return c;}
29 friend inline SSEFloat operator -(const SSEFloat &a, const SSEFloat &b) {SSEFloat c;c.val= _mm_sub_ps(a.val,b.val);return c;}
31 friend inline SSEFloat operator *(const SSEFloat &a, const SSEFloat &b) {SSEFloat c;c.val= _mm_mul_ps(a.val,b.val);return c;}
33 friend inline SSEFloat operator /(const SSEFloat &a, const SSEFloat &b) {SSEFloat c;c.val= _mm_div_ps(a.val,b.val);return c;}
35 friend inline SSEFloat sqrt (const SSEFloat &a) { SSEFloat c;c.val= _mm_sqrt_ps(a.val);return c;}
38 friend inline SSEFloat operator +(float a, const SSEFloat &b) {SSEFloat c;c.val= _mm_add_ps(_mm_set1_ps(a),b.val);return c;}
41 friend inline SSEFloat operator -(float a, const SSEFloat &b) {SSEFloat c;c.val= _mm_sub_ps(_mm_set1_ps(a),b.val);return c;}
43 friend inline SSEFloat operator *(float a, const SSEFloat &b) {SSEFloat c;c.val= _mm_mul_ps(_mm_set1_ps(a),b.val);return c;}
45 friend inline SSEFloat operator /(float a, const SSEFloat &b) {SSEFloat c;c.val= _mm_div_ps(_mm_set1_ps(a),b.val);return c;}
47 inline SSEFloat& operator +=(const SSEFloat &a) {val= _mm_add_ps(val,a.val);return *this;}
49 inline SSEFloat& operator -=(const SSEFloat &a) {val= _mm_sub_ps(val,a.val);return *this;}
51 inline SSEFloat& operator *=(const SSEFloat &a) {val= _mm_mul_ps(val,a.val);return *this;}
53 inline SSEFloat& operator /=(const SSEFloat &a) {val= _mm_div_ps(val,a.val);return *this;}
55 /*Logical Operators*/
57 friend inline SSEFloat operator &(const SSEFloat &a, const SSEFloat &b) {SSEFloat c;c.val= _mm_and_ps(a.val,b.val);return c;}
59 friend inline SSEFloat operator |(const SSEFloat &a, const SSEFloat &b) {SSEFloat c;c.val= _mm_or_ps(a.val,b.val);return c;}
61 friend inline SSEFloat operator ^(const SSEFloat &a, const SSEFloat &b) {SSEFloat c;c.val= _mm_xor_ps(a.val,b.val);return c;}
63 friend inline SSEFloat andnot (const SSEFloat &a, const SSEFloat &b) {SSEFloat c;c.val= _mm_andnot_ps(a.val,b.val);return c;}
65 /*Comparison Operators*/
68 friend inline SSEFloat operator <(const SSEFloat &a, const SSEFloat &b) {SSEFloat c;c.val= _mm_cmplt_ps(a.val,b.val);return c;}
70 friend inline SSEFloat operator >(const SSEFloat &a, const SSEFloat &b) {SSEFloat c;c.val= _mm_cmpgt_ps(a.val,b.val);return c;}
72 friend inline SSEFloat operator ==(const SSEFloat &a, const SSEFloat &b) {SSEFloat c;c.val= _mm_cmpeq_ps(a.val,b.val);return c;}
74 friend inline SSEFloat operator <(const SSEFloat &a, float b) {SSEFloat c;c.val= _mm_cmplt_ps(a.val,_mm_set1_ps(b));return c;}
76 friend inline SSEFloat operator >(const SSEFloat &a, float b) {SSEFloat c;c.val= _mm_cmpgt_ps(a.val,_mm_set1_ps(b));return c;}
78 friend inline SSEFloat max (const SSEFloat &a, SSEFloat &b) { SSEFloat c; c.val= _mm_max_ps(a.val,b.val);return c;}
81 /*Masking Operations */
83 friend inline int movemask( const SSEFloat &a) {return _mm_movemask_ps(a.val);}
86 /*Store Operations*/
88 friend inline void storeu(float *p, const SSEFloat &a) { _mm_storeu_ps(p,a.val);}
90 // friend void storeh(float *p, const SSEFloat &a) { _mm_storeh_pd(p,a.val);}
93 // void display();
102 void Double::display()
105 storel(z,val);
106 //_mm_storeh_pd(z,val);
107 cout<<*z;
112 int main()
115 float i[4];
116 float *p=i;
117 // __m128d t1=_mm_setr_pd(3.0,0.0); __m128d t2 = _mm_setr_pd(5.0,0.0);
119 SSEFloat f1(2.0,1.0,4.0,5.0),f2(4.0,7.0,2.0,5.0),f4(25.0);
121 SSEFloat d3 = (f1 * f2) ;
124 storeu(p,d3);
126 cout<<*p;
128 p++;cout<<*p;
130 p++;cout<<*p;
132 p++;cout<<*p;
134 // d3 = d1 ^ d2;
136 // d4 = sqrt(d2);
138 // __m128d t = _mm_and_pd(t1,t2);
140 // cout << movemask(d3);
141 // d3.display();
143 //int i = movemask(d4);
145 //cout<<i;
152 #endif // __SSE_FLOAT_H__