6 * $Date: 2012-07-06 15:04:28 +0200 (Fr, 06. Jul 2012) $
7 ***************************************************************/
10 * \brief Definition of class ComplexDouble for fast complex number arithmetic.
12 * \author Martin Gronemann
15 * This file is part of the Open Graph Drawing Framework (OGDF).
19 * See README.txt in the root directory of the OGDF installation for details.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * Version 2 or 3 as published by the Free Software Foundation;
25 * see the file LICENSE.txt included in the packaging of this file
29 * This program is distributed in the hope that it will be useful,
30 * but WITHOUT ANY WARRANTY; without even the implied warranty of
31 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 * GNU General Public License for more details.
35 * You should have received a copy of the GNU General Public
36 * License along with this program; if not, write to the Free
37 * Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
38 * Boston, MA 02110-1301, USA.
40 * \see http://www.gnu.org/copyleft/gpl.html
41 ***************************************************************/
43 #ifndef OGDF_COMPLEX_DOUBLE_H
44 #define OGDF_COMPLEX_DOUBLE_H
46 #include "FastUtils.h"
52 //! Class to generate instrinsics for complex number arithmetic functions
53 #ifdef OGDF_FME_KERNEL_USE_SSE
59 // ---------------------------------------------------
61 // ---------------------------------------------------
62 inline ComplexDouble()
64 reg
= _mm_setzero_pd();
67 inline ComplexDouble(const ComplexDouble
& other
)
72 inline ComplexDouble(double x
)
74 reg
= _mm_setr_pd((x
), (0));
77 inline ComplexDouble(double x
, double y
)
79 reg
= _mm_setr_pd((x
), (y
));
82 inline ComplexDouble(const double* ptr
)
84 reg
= _mm_load_pd(ptr
);
88 inline ComplexDouble(__m128d r
) : reg(r
)
92 inline ComplexDouble(float x
, float y
)
94 reg
= _mm_cvtps_pd(_mm_setr_ps((x
), (y
), 0, 0));
97 // ---------------------------------------------------
98 // Standard arithmetic
99 // ---------------------------------------------------
100 inline ComplexDouble
operator+(const ComplexDouble
& other
) const
102 return ComplexDouble( _mm_add_pd(reg
, other
.reg
) );
105 inline ComplexDouble
operator-(const ComplexDouble
& other
) const
107 return ComplexDouble( _mm_sub_pd(reg
, other
.reg
) );
110 inline ComplexDouble
operator-(void) const
112 return ComplexDouble( _mm_sub_pd(_mm_setzero_pd(), reg
) );
115 inline ComplexDouble
operator*(const ComplexDouble
& other
) const
117 // ---------------------------------
118 // | a0*b0 - a1*b1 | a0*b1 + a1*b0 |
119 // ---------------------------------
121 __m128d b_t
= _mm_shuffle_pd(other
.reg
, other
.reg
, _MM_SHUFFLE2(0, 1));
122 // left = | a0*b0 | a1*b1 |
123 __m128d left
= _mm_mul_pd(reg
, other
.reg
);
124 // right = | a0*b1 | a1*b0 |
125 __m128d right
= _mm_mul_pd(reg
, b_t
);
126 // left = | a0*b0 | -a1*b1 |
127 left
= _mm_mul_pd(left
, _mm_setr_pd(1.0, -1.0) ) ;
128 // left = | a0*b0 + (-a1*b1) | a0*b1 + a1*b0 |
129 return ComplexDouble( _mm_hadd_pd ( left
, right
) );
132 inline ComplexDouble
operator/(const ComplexDouble
& other
) const
134 // 1/(length(other)^2 * this * other.conj;
136 __m128d conj_reg
= _mm_mul_pd(other
.reg
, _mm_setr_pd(1.0, -1.0) ) ;
138 __m128d b_t
= _mm_shuffle_pd(conj_reg
, conj_reg
, _MM_SHUFFLE2(0, 1));
139 // left = | a0*b0 | a1*b1 |
140 __m128d left
= _mm_mul_pd(reg
, conj_reg
);
141 // right = | a0*b1 | a1*b0 |
142 __m128d right
= _mm_mul_pd(reg
, b_t
);
143 // left = | a0*b0 | -a1*b1 |
144 left
= _mm_mul_pd(left
, _mm_setr_pd(1.0, -1.0) ) ;
145 // left = | a0*b0 + (-a1*b1) | a0*b1 + a1*b0 |
146 __m128d product
= _mm_hadd_pd ( left
, right
) ;
147 // product = reg*other.reg.conj
149 __m128d l
= _mm_mul_pd(conj_reg
, conj_reg
);
150 // l = b0*b0 + b1*b1 | b0*b0 + b1*b1
151 l
= _mm_hadd_pd(l
, l
);
152 // l = length^2 | length^2
153 return ComplexDouble( _mm_div_pd(product
, l
));
156 inline ComplexDouble
operator*(double scalar
) const
158 return ComplexDouble( _mm_mul_pd(reg
, _mm_setr_pd(scalar
, scalar
)) );
161 inline ComplexDouble
operator/(double scalar
) const
163 //double rcp = 1.0/scalar;
164 return ComplexDouble( _mm_div_pd(reg
, _mm_setr_pd(scalar
, scalar
)) );
167 inline ComplexDouble
operator*(unsigned int scalar
) const
169 return ComplexDouble( _mm_mul_pd(reg
, _mm_setr_pd((double)scalar
, (double)scalar
)) );
172 inline void operator+=(const ComplexDouble
& other
)
174 reg
= _mm_add_pd(reg
, other
.reg
);
177 inline void operator-=(const ComplexDouble
& other
)
179 reg
= _mm_sub_pd(reg
, other
.reg
);
182 inline void operator*=(const ComplexDouble
& other
)
185 __m128d b_t
= _mm_shuffle_pd(other
.reg
, other
.reg
, _MM_SHUFFLE2(0, 1));
186 // left = | a0*b0 | a1*b1 |
187 __m128d left
= _mm_mul_pd(reg
, other
.reg
);
188 // right = | a0*b1 | a1*b0 |
189 __m128d right
= _mm_mul_pd(reg
, b_t
);
190 // left = | a0*b0 | -a1*b1 |
191 left
= _mm_mul_pd(left
, _mm_setr_pd(1.0, -1.0) ) ;
192 // left = | a0*b0 + (-a1*b1) | a0*b1 + a1*b0 |
193 reg
= _mm_hadd_pd ( left
, right
) ;
196 inline void operator*=(double scalar
)
198 // (real*scalar, imag*scalar)
199 reg
= _mm_mul_pd(reg
, _mm_setr_pd(scalar
, scalar
));
202 inline void operator/=(const ComplexDouble
& other
)
204 // 1/(length(other)^2 * this * other.conj;
206 __m128d conj_reg
= _mm_mul_pd(other
.reg
, _mm_setr_pd(1.0, -1.0) ) ;
208 __m128d b_t
= _mm_shuffle_pd(conj_reg
, conj_reg
, _MM_SHUFFLE2(0, 1));
209 // left = | a0*b0 | a1*b1 |
210 __m128d left
= _mm_mul_pd(reg
, conj_reg
);
211 // right = | a0*b1 | a1*b0 |
212 __m128d right
= _mm_mul_pd(reg
, b_t
);
213 // left = | a0*b0 | -a1*b1 |
214 left
= _mm_mul_pd(left
, _mm_setr_pd(1.0, -1.0) ) ;
215 // left = | a0*b0 + (-a1*b1) | a0*b1 + a1*b0 |
216 __m128d product
= _mm_hadd_pd ( left
, right
) ;
218 __m128d l
= _mm_mul_pd(conj_reg
, conj_reg
);
219 // l = b0*b0 + b1*b1 | b0*b0 + b1*b1
220 l
= _mm_hadd_pd(l
, l
);
221 // l = length^2 | length^2
222 reg
= _mm_div_pd(product
, l
);
225 // ---------------------------------------------------
226 // Additional arithmetic
227 // ---------------------------------------------------
228 inline double length() const
230 // sqrt(real*real + imag*imag)
232 __m128d r
= _mm_mul_pd(reg
, reg
);
233 r
= _mm_hadd_pd(r
, _mm_setzero_pd());
234 r
= _mm_sqrt_sd(r
, r
);
235 _mm_store_sd(&res
, r
);
239 inline ComplexDouble
conj() const
242 return ComplexDouble( _mm_mul_pd(reg
, _mm_setr_pd(1.0, -1.0) ) );
245 // ---------------------------------------------------
247 // ---------------------------------------------------
248 inline void operator=(const ComplexDouble
& other
)
253 //! load from 16byte aligned ptr
254 inline void operator=(double* ptr
)
256 reg
= _mm_load_pd(ptr
);
260 // ---------------------------------------------------
262 // ---------------------------------------------------
264 //! load from 16byte aligned ptr
265 inline void load(const double* ptr
)
267 reg
= _mm_load_pd(ptr
);
270 //! load from unaligned ptr
271 inline void load_unaligned(const double* ptr
)
273 reg
= _mm_loadu_pd(ptr
);
276 //! store to 16byte aligned ptr
277 inline void store(double* ptr
) const
279 _mm_store_pd(ptr
, reg
);
282 //! store to unaligned ptr
283 inline void store_unaligned(double* ptr
) const
285 _mm_storeu_pd(ptr
, reg
);
295 // ---------------------------------------------------
297 // ---------------------------------------------------
298 inline ComplexDouble( )
304 inline ComplexDouble(const ComplexDouble
& other
)
306 reg
[0] = other
.reg
[0];
307 reg
[1] = other
.reg
[1];
310 inline ComplexDouble(double x
)
316 inline ComplexDouble(double x
, double y
)
322 inline ComplexDouble(double* ptr
)
328 // ---------------------------------------------------
329 // Standard arithmetic
330 // ---------------------------------------------------
331 inline ComplexDouble
operator+(const ComplexDouble
& other
) const
333 return ComplexDouble( reg
[0] + other
.reg
[0], reg
[1] + other
.reg
[1] );
336 inline ComplexDouble
operator-(const ComplexDouble
& other
) const
338 return ComplexDouble( reg
[0] - other
.reg
[0], reg
[1] - other
.reg
[1] );
341 inline ComplexDouble
operator-(void) const
343 return ComplexDouble( -reg
[0] , -reg
[1] );
346 inline ComplexDouble
operator*(const ComplexDouble
& other
) const
348 return ComplexDouble( reg
[0]*other
.reg
[0] - reg
[1]*other
.reg
[1], reg
[0]*other
.reg
[1] + reg
[1]*other
.reg
[0] );
351 inline ComplexDouble
operator/(const ComplexDouble
& other
) const
353 return ((*this) *other
.conj() / (other
.reg
[0]*other
.reg
[0] + other
.reg
[1]*other
.reg
[1]));
356 inline ComplexDouble
operator*(double scalar
) const
358 return ComplexDouble( reg
[0]*scalar
, reg
[1]*scalar
);
361 inline ComplexDouble
operator/(double scalar
) const
363 return ComplexDouble( reg
[0]/scalar
, reg
[1]/scalar
);
366 inline ComplexDouble
operator*(unsigned int scalar
) const
368 return ComplexDouble( reg
[0]*(double)scalar
, reg
[1]*(double)scalar
);
371 inline void operator+=(const ComplexDouble
& other
)
373 reg
[0] += other
.reg
[0];
374 reg
[1] += other
.reg
[1];
377 inline void operator-=(const ComplexDouble
& other
)
379 reg
[0] -= other
.reg
[0];
380 reg
[1] -= other
.reg
[1];
383 inline void operator*=(const ComplexDouble
& other
)
386 t
[0] = reg
[0]*other
.reg
[0] - reg
[1]*other
.reg
[1];
387 t
[1] = reg
[0]*other
.reg
[1] + reg
[1]*other
.reg
[0];
392 inline void operator*=(double scalar
)
398 inline void operator/=(const ComplexDouble
& other
)
400 ComplexDouble t
= other
.conj() / (other
.reg
[0]*other
.reg
[0] + other
.reg
[1]*other
.reg
[1]);
402 r
[0] = reg
[0]*t
.reg
[0] - reg
[1]*t
.reg
[1];
403 r
[1] = reg
[0]*t
.reg
[1] + reg
[1]*t
.reg
[0];
408 // ---------------------------------------------------
409 // Additional arithmetic
410 // ---------------------------------------------------
411 inline double length() const
413 // sqrt(real*real + imag*imag)
414 return sqrt(reg
[0]*reg
[0] + reg
[1]*reg
[1]);
417 inline ComplexDouble
conj() const
420 return ComplexDouble( reg
[0], -reg
[1] );
424 // ---------------------------------------------------
426 // ---------------------------------------------------
427 inline void operator=(const ComplexDouble
& other
)
429 reg
[0] = other
.reg
[0];
430 reg
[1] = other
.reg
[1];
433 //! load from 16byte aligned ptr
434 inline void operator=(double* ptr
)
440 // ---------------------------------------------------
442 // ---------------------------------------------------
444 //! load from 16byte aligned ptr
445 inline void load(const double* ptr
)
451 //! load from unaligned ptr
452 inline void load_unaligned(const double* ptr
)
458 //! store to 16byte aligned ptr
459 inline void store(double* ptr
) const
465 //! store to unaligned ptr
466 inline void store_unaligned(double* ptr
) const
476 } // end of namespace ogdf::sse
478 #endif // _COMPLEX_DOUBLE_H_