common/pffft.h

   1 /* Copyright (c) 2013  Julien Pommier ( pommier@modartt.com )
   2
   3    Based on original fortran 77 code from FFTPACKv4 from NETLIB,
   4    authored by Dr Paul Swarztrauber of NCAR, in 1985.
   5
   6    As confirmed by the NCAR fftpack software curators, the following
   7    FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
   8    released under the same terms.
   9
  10    FFTPACK license:
  11
  12    http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
  13
  14    Copyright (c) 2004 the University Corporation for Atmospheric
  15    Research ("UCAR"). All rights reserved. Developed by NCAR's
  16    Computational and Information Systems Laboratory, UCAR,
  17    www.cisl.ucar.edu.
  18
  19    Redistribution and use of the Software in source and binary forms,
  20    with or without modification, is permitted provided that the
  21    following conditions are met:
  22
  23    - Neither the names of NCAR's Computational and Information Systems
  24    Laboratory, the University Corporation for Atmospheric Research,
  25    nor the names of its sponsors or contributors may be used to
  26    endorse or promote products derived from this Software without
  27    specific prior written permission.
  28
  29    - Redistributions of source code must retain the above copyright
  30    notices, this list of conditions, and the disclaimer below.
  31
  32    - Redistributions in binary form must reproduce the above copyright
  33    notice, this list of conditions, and the disclaimer below in the
  34    documentation and/or other materials provided with the
  35    distribution.
  36
  37    THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  38    EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
  39    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  40    NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
  41    HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
  42    EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  43    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  44    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
  45    SOFTWARE.
  46 */
  47
  48 /* PFFFT : a Pretty Fast FFT.
  49  *
  50  * This is basically an adaptation of the single precision fftpack (v4) as
  51  * found on netlib taking advantage of SIMD instructions found on CPUs such as
  52  * Intel x86 (SSE1), PowerPC (Altivec), and Arm (NEON).
  53  *
  54  * For architectures where SIMD instructions aren't available, the code falls
  55  * back to a scalar version.
  56  *
  57  * Restrictions:
  58  *
  59  * - 1D transforms only, with 32-bit single precision.
  60  *
  61  * - supports only transforms for inputs of length N of the form
  62  * N=(2^a)*(3^b)*(5^c), given a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128, 144,
  63  * 160, etc are all acceptable lengths). Performance is best for 128<=N<=8192.
  64  *
  65  * - all (float*) pointers for the functions below are expected to have a
  66  * "SIMD-compatible" alignment, that is 16 bytes.
  67  *
  68  * You can allocate such buffers with the pffft_aligned_malloc function, and
  69  * deallocate them with pffft_aligned_free (or with stuff like posix_memalign,
  70  * aligned_alloc, etc).
  71  *
  72  * Note that for the z-domain data of real transforms, when in the canonical
  73  * order (as interleaved complex numbers) both 0-frequency and half-frequency
  74  * components, which are real, are assembled in the first entry as
  75  * F(0)+i*F(n/2+1). The original fftpack placed F(n/2+1) at the end of the
  76  * arrays instead.
  77  */
  78
  79 #ifndef PFFFT_H
  80 #define PFFFT_H
  81
  82 #include <cstddef>
  83 #include <memory>
  84
  85 #include "almalloc.h"
  86
  87
  88 /* opaque struct holding internal stuff (precomputed twiddle factors) this
  89  * struct can be shared by many threads as it contains only read-only data.
  90  */
  91 struct PFFFT_Setup;
  92
  93 /* direction of the transform */
  94 enum pffft_direction_t { PFFFT_FORWARD, PFFFT_BACKWARD };
  95
  96 /* type of transform */
  97 enum pffft_transform_t { PFFFT_REAL, PFFFT_COMPLEX };
  98
  99 void pffft_destroy_setup(gsl::owner<PFFFT_Setup*> setup) noexcept;
 100 struct PFFFTSetupDeleter {
 101     void operator()(gsl::owner<PFFFT_Setup*> setup) const noexcept { pffft_destroy_setup(setup); }
 102 };
 103 using PFFFTSetupPtr = std::unique_ptr<PFFFT_Setup,PFFFTSetupDeleter>;
 104
 105 /**
 106  * Prepare for performing transforms of size N -- the returned PFFFT_Setup
 107  * structure is read-only so it can safely be shared by multiple concurrent
 108  * threads.
 109  */
 110 PFFFTSetupPtr pffft_new_setup(unsigned int N, pffft_transform_t transform);
 111
 112 /**
 113  * Perform a Fourier transform. The z-domain data is stored in the most
 114  * efficient order for transforming back or using for convolution, and as
 115  * such, there's no guarantee to the order of the values. If you need to have
 116  * its content sorted in the usual way, that is as an array of interleaved
 117  * complex numbers, either use pffft_transform_ordered, or call pffft_zreorder
 118  * after the forward fft and before the backward fft.
 119  *
 120  * Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x. Typically
 121  * you will want to scale the backward transform by 1/N.
 122  *
 123  * The 'work' pointer must point to an area of N (2*N for complex fft) floats,
 124  * properly aligned. It cannot be NULL.
 125  *
 126  * The input and output parameters may alias.
 127  */
 128 void pffft_transform(const PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
 129
 130 /**
 131  * Similar to pffft_transform, but handles the complex values in the usual form
 132  * (interleaved complex numbers). This is similar to calling
 133  * pffft_transform(..., PFFFT_FORWARD) followed by
 134  * pffft_zreorder(..., PFFFT_FORWARD), or
 135  * pffft_zreorder(..., PFFFT_BACKWARD) followed by
 136  * pffft_transform(..., PFFFT_BACKWARD), for the given direction.
 137  *
 138  * The input and output parameters may alias.
 139  */
 140 void pffft_transform_ordered(const PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
 141
 142 /**
 143  * Reorder the z-domain data. For PFFFT_FORWARD, it reorders from the internal
 144  * representation to the "canonical" order (as interleaved complex numbers).
 145  * For PFFFT_BACKWARD, it reorders from the canonical order to the internal
 146  * order suitable for pffft_transform(..., PFFFT_BACKWARD) or
 147  * pffft_zconvolve_accumulate.
 148  *
 149  * The input and output parameters should not alias.
 150  */
 151 void pffft_zreorder(const PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
 152
 153 /**
 154  * Perform a multiplication of the z-domain data in dft_a and dft_b, and scale
 155  * and accumulate into dft_ab. The arrays should have been obtained with
 156  * pffft_transform(..., PFFFT_FORWARD) or pffft_zreorder(..., PFFFT_BACKWARD)
 157  * and should *not* be in the usual order (otherwise just perform the operation
 158  * yourself as the dft coeffs are stored as interleaved complex numbers).
 159  *
 160  * The operation performed is: dft_ab += (dft_a * dft_b)*scaling
 161  *
 162  * The dft_a, dft_b, and dft_ab parameters may alias.
 163  */
 164 void pffft_zconvolve_scale_accumulate(const PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
 165
 166 /**
 167  * Perform a multiplication of the z-domain data in dft_a and dft_b, and
 168  * accumulate into dft_ab.
 169  *
 170  * The operation performed is: dft_ab += dft_a * dft_b
 171  *
 172  * The dft_a, dft_b, and dft_ab parameters may alias.
 173  */
 174 void pffft_zconvolve_accumulate(const PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab);
 175
 176
 177 struct PFFFTSetup {
 178     PFFFTSetupPtr mSetup{};
 179
 180     PFFFTSetup() = default;
 181     PFFFTSetup(const PFFFTSetup&) = delete;
 182     PFFFTSetup(PFFFTSetup&& rhs) noexcept = default;
 183     explicit PFFFTSetup(std::nullptr_t) noexcept { }
 184     explicit PFFFTSetup(unsigned int n, pffft_transform_t transform)
 185         : mSetup{pffft_new_setup(n, transform)}
 186     { }
 187     ~PFFFTSetup() = default;
 188
 189     PFFFTSetup& operator=(const PFFFTSetup&) = delete;
 190     PFFFTSetup& operator=(PFFFTSetup&& rhs) noexcept = default;
 191
 192     [[nodiscard]] explicit operator bool() const noexcept { return mSetup != nullptr; }
 193
 194     void transform(const float *input, float *output, float *work, pffft_direction_t direction) const
 195     { pffft_transform(mSetup.get(), input, output, work, direction); }
 196
 197     void transform_ordered(const float *input, float *output, float *work,
 198         pffft_direction_t direction) const
 199     { pffft_transform_ordered(mSetup.get(), input, output, work, direction); }
 200
 201     void zreorder(const float *input, float *output, pffft_direction_t direction) const
 202     { pffft_zreorder(mSetup.get(), input, output, direction); }
 203
 204     void zconvolve_scale_accumulate(const float *dft_a, const float *dft_b, float *dft_ab,
 205         float scaling) const
 206     { pffft_zconvolve_scale_accumulate(mSetup.get(), dft_a, dft_b, dft_ab, scaling); }
 207
 208     void zconvolve_accumulate(const float *dft_a, const float *dft_b, float *dft_ab) const
 209     { pffft_zconvolve_accumulate(mSetup.get(), dft_a, dft_b, dft_ab); }
 210 };
 211
 212 #endif // PFFFT_H