ext/OGDF/src/energybased/ComplexDouble.h

   1 /*
   2  * $Revision: 2559 $
   3  *
   4  * last checkin:
   5  *   $Author: gutwenger $
   6  *   $Date: 2012-07-06 15:04:28 +0200 (Fr, 06. Jul 2012) $
   7  ***************************************************************/
   8
   9 /** \file
  10  * \brief Definition of class ComplexDouble for fast complex number arithmetic.
  11  *
  12  * \author Martin Gronemann
  13  *
  14  * \par License:
  15  * This file is part of the Open Graph Drawing Framework (OGDF).
  16  *
  17  * \par
  18  * Copyright (C)<br>
  19  * See README.txt in the root directory of the OGDF installation for details.
  20  *
  21  * \par
  22  * This program is free software; you can redistribute it and/or
  23  * modify it under the terms of the GNU General Public License
  24  * Version 2 or 3 as published by the Free Software Foundation;
  25  * see the file LICENSE.txt included in the packaging of this file
  26  * for details.
  27  *
  28  * \par
  29  * This program is distributed in the hope that it will be useful,
  30  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  31  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  32  * GNU General Public License for more details.
  33  *
  34  * \par
  35  * You should have received a copy of the GNU General Public
  36  * License along with this program; if not, write to the Free
  37  * Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  38  * Boston, MA 02110-1301, USA.
  39  *
  40  * \see  http://www.gnu.org/copyleft/gpl.html
  41  ***************************************************************/
  42
  43 #ifndef OGDF_COMPLEX_DOUBLE_H
  44 #define OGDF_COMPLEX_DOUBLE_H
  45
  46 #include "FastUtils.h"
  47 #include <math.h>
  48
  49 namespace ogdf {
  50 namespace sse {
  51
  52 //! Class to generate instrinsics for complex number arithmetic functions
  53 #ifdef OGDF_FME_KERNEL_USE_SSE
  54 class ComplexDouble
  55 {
  56 public:
  57         __m128d reg;
  58
  59         // ---------------------------------------------------
  60         //      CONSTRUCTORS
  61         // ---------------------------------------------------
  62         inline ComplexDouble()
  63         {
  64                 reg = _mm_setzero_pd();
  65         }
  66
  67         inline ComplexDouble(const ComplexDouble& other)
  68         {
  69                 reg = other.reg;
  70         }
  71
  72         inline ComplexDouble(double x)
  73         {
  74                 reg = _mm_setr_pd((x), (0));
  75         }
  76
  77         inline ComplexDouble(double x, double y)
  78         {
  79                 reg = _mm_setr_pd((x), (y));
  80         }
  81
  82         inline ComplexDouble(const double* ptr)
  83         {
  84                 reg = _mm_load_pd(ptr);
  85         }
  86
  87
  88         inline ComplexDouble(__m128d r) : reg(r)
  89         {
  90         }
  91
  92         inline ComplexDouble(float x, float y)
  93         {
  94                 reg =  _mm_cvtps_pd(_mm_setr_ps((x), (y), 0, 0));
  95         }
  96
  97         // ---------------------------------------------------
  98         //      Standard arithmetic
  99         // ---------------------------------------------------
 100         inline ComplexDouble operator+(const ComplexDouble& other) const
 101         {
 102                 return ComplexDouble( _mm_add_pd(reg, other.reg) );
 103         }
 104
 105         inline ComplexDouble operator-(const ComplexDouble& other) const
 106         {
 107                 return ComplexDouble( _mm_sub_pd(reg, other.reg) );
 108         }
 109
 110         inline ComplexDouble operator-(void) const
 111         {
 112                 return ComplexDouble( _mm_sub_pd(_mm_setzero_pd(), reg) );
 113         }
 114
 115         inline ComplexDouble operator*(const ComplexDouble& other) const
 116         {
 117                 // ---------------------------------
 118                 // | a0*b0 - a1*b1 | a0*b1 + a1*b0 |
 119                 // ---------------------------------
 120                 // bt = | b1 | b0 |
 121                 __m128d b_t = _mm_shuffle_pd(other.reg, other.reg, _MM_SHUFFLE2(0, 1));
 122                 // left = | a0*b0 | a1*b1 |
 123                 __m128d left = _mm_mul_pd(reg, other.reg);
 124                 // right = | a0*b1 | a1*b0 |
 125                 __m128d right = _mm_mul_pd(reg, b_t);
 126                 // left = | a0*b0 | -a1*b1 |
 127                 left = _mm_mul_pd(left, _mm_setr_pd(1.0,  -1.0) ) ;
 128                 // left = | a0*b0 + (-a1*b1) | a0*b1 + a1*b0 |
 129                 return ComplexDouble( _mm_hadd_pd ( left, right ) );
 130         }
 131
 132         inline ComplexDouble operator/(const ComplexDouble& other) const
 133         {
 134                 // 1/(length(other)^2 * this * other.conj;
 135                 // bt = | b0 | -b1 |
 136                 __m128d conj_reg = _mm_mul_pd(other.reg, _mm_setr_pd(1.0, -1.0) ) ;
 137                 // bt = | b1 | b0 |
 138                 __m128d b_t = _mm_shuffle_pd(conj_reg, conj_reg, _MM_SHUFFLE2(0, 1));
 139                 // left = | a0*b0 | a1*b1 |
 140                 __m128d left = _mm_mul_pd(reg, conj_reg);
 141                 // right = | a0*b1 | a1*b0 |
 142                 __m128d right = _mm_mul_pd(reg, b_t);
 143                 // left = | a0*b0 | -a1*b1 |
 144                 left = _mm_mul_pd(left, _mm_setr_pd(1.0, -1.0) ) ;
 145                 // left = | a0*b0 + (-a1*b1) | a0*b1 + a1*b0 |
 146                 __m128d product = _mm_hadd_pd ( left, right ) ;
 147                 // product = reg*other.reg.conj
 148                 // l = b0*b0 | b1*b1
 149                 __m128d l = _mm_mul_pd(conj_reg, conj_reg );
 150                 // l = b0*b0 + b1*b1 | b0*b0 + b1*b1
 151                 l = _mm_hadd_pd(l, l);
 152                 // l = length^2 | length^2
 153                 return ComplexDouble( _mm_div_pd(product, l));
 154         }
 155
 156         inline ComplexDouble operator*(double scalar) const
 157         {
 158                 return ComplexDouble( _mm_mul_pd(reg, _mm_setr_pd(scalar, scalar)) );
 159         }
 160
 161         inline ComplexDouble operator/(double scalar) const
 162         {
 163                 //double rcp = 1.0/scalar;
 164                 return ComplexDouble( _mm_div_pd(reg, _mm_setr_pd(scalar, scalar)) );
 165         }
 166
 167         inline ComplexDouble operator*(unsigned int scalar) const
 168         {
 169                 return ComplexDouble( _mm_mul_pd(reg, _mm_setr_pd((double)scalar, (double)scalar)) );
 170         }
 171
 172         inline void operator+=(const ComplexDouble& other)
 173         {
 174                 reg = _mm_add_pd(reg, other.reg);
 175         }
 176
 177         inline void operator-=(const ComplexDouble& other)
 178         {
 179                 reg = _mm_sub_pd(reg, other.reg);
 180         }
 181
 182         inline void operator*=(const ComplexDouble& other)
 183         {
 184                 // bt = | b1 | b0 |
 185                 __m128d b_t = _mm_shuffle_pd(other.reg, other.reg, _MM_SHUFFLE2(0, 1));
 186                 // left = | a0*b0 | a1*b1 |
 187                 __m128d left = _mm_mul_pd(reg, other.reg);
 188                 // right = | a0*b1 | a1*b0 |
 189                 __m128d right = _mm_mul_pd(reg, b_t);
 190                 // left = | a0*b0 | -a1*b1 |
 191                 left = _mm_mul_pd(left, _mm_setr_pd(1.0, -1.0) ) ;
 192                 // left = | a0*b0 + (-a1*b1) | a0*b1 + a1*b0 |
 193                 reg = _mm_hadd_pd ( left, right ) ;
 194         }
 195
 196         inline void operator*=(double scalar)
 197         {
 198                 // (real*scalar, imag*scalar)
 199                 reg = _mm_mul_pd(reg, _mm_setr_pd(scalar, scalar));
 200         }
 201
 202         inline void operator/=(const ComplexDouble& other)
 203         {
 204                 // 1/(length(other)^2 * this * other.conj;
 205                 // bt = | b0 | -b1 |
 206                 __m128d conj_reg = _mm_mul_pd(other.reg, _mm_setr_pd(1.0, -1.0) ) ;
 207                 // bt = | b1 | b0 |
 208                 __m128d b_t = _mm_shuffle_pd(conj_reg, conj_reg, _MM_SHUFFLE2(0, 1));
 209                 // left = | a0*b0 | a1*b1 |
 210                 __m128d left = _mm_mul_pd(reg, conj_reg);
 211                 // right = | a0*b1 | a1*b0 |
 212                 __m128d right = _mm_mul_pd(reg, b_t);
 213                 // left = | a0*b0 | -a1*b1 |
 214                 left = _mm_mul_pd(left, _mm_setr_pd(1.0, -1.0) ) ;
 215                 // left = | a0*b0 + (-a1*b1) | a0*b1 + a1*b0 |
 216                 __m128d product = _mm_hadd_pd ( left, right ) ;
 217                 // l = b0*b0 | b1*b1
 218                 __m128d l = _mm_mul_pd(conj_reg, conj_reg );
 219                 // l = b0*b0 + b1*b1 | b0*b0 + b1*b1
 220                 l = _mm_hadd_pd(l, l);
 221                 // l = length^2 | length^2
 222                 reg = _mm_div_pd(product, l);
 223         }
 224
 225         // ---------------------------------------------------
 226         //      Additional arithmetic
 227         // ---------------------------------------------------
 228         inline double length() const
 229         {
 230                 // sqrt(real*real + imag*imag)
 231                 double res;
 232                 __m128d r = _mm_mul_pd(reg, reg );
 233                 r = _mm_hadd_pd(r, _mm_setzero_pd());
 234                 r = _mm_sqrt_sd(r, r);
 235                 _mm_store_sd(&res, r);
 236                 return res;
 237         }
 238
 239         inline ComplexDouble conj() const
 240         {
 241                 // (real, -imag)
 242                 return ComplexDouble( _mm_mul_pd(reg, _mm_setr_pd(1.0, -1.0) ) );
 243         }
 244
 245         // ---------------------------------------------------
 246         //      Assignment
 247         // ---------------------------------------------------
 248         inline void operator=(const ComplexDouble& other)
 249         {
 250                 reg = other.reg;
 251         }
 252
 253         //! load from 16byte aligned ptr
 254         inline void operator=(double* ptr)
 255         {
 256                 reg = _mm_load_pd(ptr);
 257         }
 258
 259
 260         // ---------------------------------------------------
 261         //      LOAD, STORE
 262         // ---------------------------------------------------
 263
 264         //! load from 16byte aligned ptr
 265         inline void load(const double* ptr)
 266         {
 267                 reg = _mm_load_pd(ptr);
 268         }
 269
 270         //! load from unaligned ptr
 271         inline void load_unaligned(const double* ptr)
 272         {
 273                 reg = _mm_loadu_pd(ptr);
 274         }
 275
 276         //! store to 16byte aligned ptr
 277         inline void store(double* ptr) const
 278         {
 279                 _mm_store_pd(ptr, reg);
 280         }
 281
 282         //! store to unaligned ptr
 283         inline void store_unaligned(double* ptr) const
 284         {
 285                 _mm_storeu_pd(ptr, reg);
 286         }
 287 };
 288
 289 #else
 290 class ComplexDouble
 291 {
 292 public:
 293         double reg[2];
 294
 295         // ---------------------------------------------------
 296         //      CONSTRUCTORS
 297         // ---------------------------------------------------
 298         inline ComplexDouble( )
 299         {
 300                 reg[0] = 0.0;
 301                 reg[1] = 0.0;
 302         }
 303
 304         inline ComplexDouble(const ComplexDouble& other)
 305         {
 306                 reg[0] = other.reg[0];
 307                 reg[1] = other.reg[1];
 308         }
 309
 310         inline ComplexDouble(double x)
 311         {
 312                 reg[0] = x;
 313                 reg[1] = 0;
 314         }
 315
 316         inline ComplexDouble(double x, double y)
 317         {
 318                 reg[0] = x;
 319                 reg[1] = y;
 320         }
 321
 322         inline ComplexDouble(double* ptr)
 323         {
 324                 reg[0] = ptr[0];
 325                 reg[1] = ptr[1];
 326         }
 327
 328         // ---------------------------------------------------
 329         //      Standard arithmetic
 330         // ---------------------------------------------------
 331         inline ComplexDouble operator+(const ComplexDouble& other) const
 332         {
 333                 return ComplexDouble( reg[0] + other.reg[0], reg[1] + other.reg[1] );
 334         }
 335
 336         inline ComplexDouble operator-(const ComplexDouble& other) const
 337         {
 338                 return ComplexDouble( reg[0] - other.reg[0], reg[1] - other.reg[1] );
 339         }
 340
 341         inline ComplexDouble operator-(void) const
 342         {
 343                 return ComplexDouble( -reg[0] , -reg[1] );
 344         }
 345
 346         inline ComplexDouble operator*(const ComplexDouble& other) const
 347         {
 348                 return ComplexDouble( reg[0]*other.reg[0] - reg[1]*other.reg[1], reg[0]*other.reg[1] + reg[1]*other.reg[0] );
 349         }
 350
 351         inline ComplexDouble operator/(const ComplexDouble& other) const
 352         {
 353                 return ((*this) *other.conj() / (other.reg[0]*other.reg[0] + other.reg[1]*other.reg[1]));
 354         }
 355
 356         inline ComplexDouble operator*(double scalar) const
 357         {
 358                 return ComplexDouble( reg[0]*scalar, reg[1]*scalar );
 359         }
 360
 361         inline ComplexDouble operator/(double scalar) const
 362         {
 363                 return ComplexDouble( reg[0]/scalar, reg[1]/scalar );
 364         }
 365
 366         inline ComplexDouble operator*(unsigned int scalar) const
 367         {
 368                 return ComplexDouble( reg[0]*(double)scalar, reg[1]*(double)scalar );
 369         }
 370
 371         inline void operator+=(const ComplexDouble& other)
 372         {
 373                 reg[0] += other.reg[0];
 374                 reg[1] += other.reg[1];
 375         }
 376
 377         inline void operator-=(const ComplexDouble& other)
 378         {
 379                 reg[0] -= other.reg[0];
 380                 reg[1] -= other.reg[1];
 381         }
 382
 383         inline void operator*=(const ComplexDouble& other)
 384         {
 385                 double t[2];
 386                 t[0] = reg[0]*other.reg[0] - reg[1]*other.reg[1];
 387                 t[1] = reg[0]*other.reg[1] + reg[1]*other.reg[0];
 388                 reg[0] = t[0];
 389                 reg[1] = t[1];
 390         }
 391
 392         inline void operator*=(double scalar)
 393         {
 394                 reg[0] *= scalar;
 395                 reg[1] *= scalar;
 396         }
 397
 398         inline void operator/=(const ComplexDouble& other)
 399         {
 400                 ComplexDouble t = other.conj() / (other.reg[0]*other.reg[0] + other.reg[1]*other.reg[1]);
 401                 double r[2];
 402                 r[0] = reg[0]*t.reg[0] - reg[1]*t.reg[1];
 403                 r[1] = reg[0]*t.reg[1] + reg[1]*t.reg[0];
 404                 reg[0] = r[0];
 405                 reg[1] = r[1];
 406         }
 407
 408         // ---------------------------------------------------
 409         //      Additional arithmetic
 410         // ---------------------------------------------------
 411         inline double length() const
 412         {
 413                 // sqrt(real*real + imag*imag)
 414                 return sqrt(reg[0]*reg[0] + reg[1]*reg[1]);
 415         }
 416
 417         inline ComplexDouble conj() const
 418         {
 419                 // (real, -imag)
 420                 return ComplexDouble( reg[0], -reg[1] );
 421         }
 422
 423
 424         // ---------------------------------------------------
 425         //      Assignment
 426         // ---------------------------------------------------
 427         inline void operator=(const ComplexDouble& other)
 428         {
 429                 reg[0] = other.reg[0];
 430                 reg[1] = other.reg[1];
 431         }
 432
 433         //! load from 16byte aligned ptr
 434         inline void operator=(double* ptr)
 435         {
 436                 reg[0] = ptr[0];
 437                 reg[1] = ptr[1];
 438         }
 439
 440         // ---------------------------------------------------
 441         //      LOAD, STORE
 442         // ---------------------------------------------------
 443
 444         //! load from 16byte aligned ptr
 445         inline void load(const double* ptr)
 446         {
 447                 reg[0] = ptr[0];
 448                 reg[1] = ptr[1];
 449         }
 450
 451         //! load from unaligned ptr
 452         inline void load_unaligned(const double* ptr)
 453         {
 454                 reg[0] = ptr[0];
 455                 reg[1] = ptr[1];
 456         }
 457
 458         //! store to 16byte aligned ptr
 459         inline void store(double* ptr) const
 460         {
 461                 ptr[0] = reg[0];
 462                 ptr[1] = reg[1];
 463         }
 464
 465         //! store to unaligned ptr
 466         inline void store_unaligned(double* ptr) const
 467         {
 468                 ptr[0] = reg[0];
 469                 ptr[1] = reg[1];
 470         }
 471 };
 472
 473 #endif
 474 };
 475
 476 } // end of namespace ogdf::sse
 477
 478 #endif // _COMPLEX_DOUBLE_H_
 479