Highly Efficient FFT for Exascale: HeFFTe v2.3
heffte_backend_fftw.h
1 /*
2  -- heFFTe --
3  Univ. of Tennessee, Knoxville
4  @date
5 */
6 
7 #ifndef HEFFTE_BACKEND_FFTW_H
8 #define HEFFTE_BACKEND_FFTW_H
9 
10 #include "heffte_r2r_executor.h"
11 
12 #ifdef Heffte_ENABLE_FFTW
13 
14 #include "fftw3.h"
15 
27 namespace heffte{
28 
29 namespace backend{
34  template<> struct is_enabled<fftw> : std::true_type{};
35 
40  template<> struct is_enabled<fftw_cos> : std::true_type{};
45  template<> struct is_enabled<fftw_sin> : std::true_type{};
46 
47 // Specialization is not necessary since the default behavior assumes CPU parameters.
48 // template<>
49 // struct buffer_traits<fftw>{
50 // using location = tag::cpu;
51 // template<typename T> using container = std::vector<T>;
52 // };
53 #ifndef Heffte_ENABLE_MKL
58  template<> struct default_backend<tag::cpu> {
59  using type = fftw;
60  };
61 #endif
62 }
63 
68 template<> struct is_ccomplex<fftwf_complex> : std::true_type{};
73 template<> struct is_zcomplex<fftw_complex> : std::true_type{};
74 
83 template<typename, direction> struct plan_fftw{};
84 
91 template<direction dir>
92 struct plan_fftw<std::complex<float>, dir>{
101  plan_fftw(int size, int howmanyffts, int stride, int dist) :
102  plan(fftwf_plan_many_dft(1, &size, howmanyffts, nullptr, nullptr, stride, dist,
103  nullptr, nullptr, stride, dist,
104  (dir == direction::forward) ? FFTW_FORWARD : FFTW_BACKWARD, FFTW_ESTIMATE
105  ))
106  {}
117  plan_fftw(int size1, int size2, std::array<int, 2> const &embed, int howmanyffts, int stride, int dist){
118  std::array<int, 2> size = {size2, size1};
119 
120  if (embed[0] == 0 and embed[1] == 0){
121  plan = fftwf_plan_many_dft(2, size.data(), howmanyffts, nullptr, nullptr, stride, dist,
122  nullptr, nullptr, stride, dist,
123  (dir == direction::forward) ? FFTW_FORWARD : FFTW_BACKWARD, FFTW_ESTIMATE
124  );
125  }else{
126  plan = fftwf_plan_many_dft(2, size.data(), howmanyffts, nullptr, embed.data(), stride, dist,
127  nullptr, embed.data(), stride, dist,
128  (dir == direction::forward) ? FFTW_FORWARD : FFTW_BACKWARD, FFTW_ESTIMATE
129  );
130  }
131  }
133  plan_fftw(int size1, int size2, int size3){
134  std::array<int, 3> size = {size3, size2, size1};
135  plan = fftwf_plan_many_dft(3, size.data(), 1, nullptr, nullptr, 1, 1, nullptr, nullptr, 1, 1,
136  (dir == direction::forward) ? FFTW_FORWARD : FFTW_BACKWARD, FFTW_ESTIMATE);
137  }
139  ~plan_fftw(){ fftwf_destroy_plan(plan); }
141  operator fftwf_plan() const{ return plan; }
143  fftwf_plan plan;
144 };
149 template<direction dir>
150 struct plan_fftw<std::complex<double>, dir>{
152  plan_fftw(int size, int howmanyffts, int stride, int dist) :
153  plan(fftw_plan_many_dft(1, &size, howmanyffts, nullptr, nullptr, stride, dist,
154  nullptr, nullptr, stride, dist,
155  (dir == direction::forward) ? FFTW_FORWARD : FFTW_BACKWARD, FFTW_ESTIMATE
156  ))
157  {}
159  plan_fftw(int size1, int size2, std::array<int, 2> const &embed, int howmanyffts, int stride, int dist){
160  std::array<int, 2> size = {size2, size1};
161 
162  if (embed[0] == 0 and embed[1] == 0){
163  plan = fftw_plan_many_dft(2, size.data(), howmanyffts, nullptr, nullptr, stride, dist,
164  nullptr, nullptr, stride, dist,
165  (dir == direction::forward) ? FFTW_FORWARD : FFTW_BACKWARD, FFTW_ESTIMATE
166  );
167  }else{
168  plan = fftw_plan_many_dft(2, size.data(), howmanyffts, nullptr, embed.data(), stride, dist,
169  nullptr, embed.data(), stride, dist,
170  (dir == direction::forward) ? FFTW_FORWARD : FFTW_BACKWARD, FFTW_ESTIMATE
171  );
172  }
173  }
175  plan_fftw(int size1, int size2, int size3){
176  std::array<int, 3> size = {size3, size2, size1};
177  plan = fftw_plan_many_dft(3, size.data(), 1, nullptr, nullptr, 1, 1, nullptr, nullptr, 1, 1,
178  (dir == direction::forward) ? FFTW_FORWARD : FFTW_BACKWARD, FFTW_ESTIMATE);
179  }
181  ~plan_fftw(){ fftw_destroy_plan(plan); }
183  operator fftw_plan() const{ return plan; }
185  fftw_plan plan;
186 };
187 
201 public:
209  template<typename index>
210  fftw_executor(void*, box3d<index> const box, int dimension) :
211  size(box.size[dimension]), size2(0),
212  howmanyffts(fft1d_get_howmany(box, dimension)),
213  stride(fft1d_get_stride(box, dimension)),
214  dist((dimension == box.order[0]) ? size : 1),
215  blocks((dimension == box.order[1]) ? box.osize(2) : 1),
216  block_stride(box.osize(0) * box.osize(1)),
217  total_size(box.count()),
218  embed({0, 0})
219  {}
221  template<typename index>
222  fftw_executor(void*, box3d<index> const box, int dir1, int dir2) :
223  size(box.size[std::min(dir1, dir2)]), size2(box.size[std::max(dir1, dir2)]),
224  blocks(1), block_stride(0), total_size(box.count()), embed({0, 0})
225  {
226  int odir1 = box.find_order(dir1);
227  int odir2 = box.find_order(dir2);
228 
229  if (std::min(odir1, odir2) == 0 and std::max(odir1, odir2) == 1){
230  stride = 1;
231  dist = size * size2;
232  howmanyffts = box.size[2];
233  }else if (std::min(odir1, odir2) == 1 and std::max(odir1, odir2) == 2){
234  stride = box.size[0];
235  dist = 1;
236  howmanyffts = box.size[0];
237  }else{ // case of directions (0, 2)
238  stride = 1;
239  dist = size;
240  embed = {static_cast<int>(box.size[2]), static_cast<int>(box.size[1] * box.size[0])};
241  howmanyffts = box.size[1];
242  }
243  }
245  template<typename index>
246  fftw_executor(void*, box3d<index> const box) :
247  size(box.size[0]), size2(box.size[1]), howmanyffts(box.size[2]),
248  stride(0), dist(0),
249  blocks(1), block_stride(0),
250  total_size(box.count()),
251  embed({0, 0})
252  {}
253 
255  void forward(std::complex<float> data[], std::complex<float>*) const override{
256  make_plan(cforward);
257  for(int i=0; i<blocks; i++){
258  fftwf_complex* block_data = reinterpret_cast<fftwf_complex*>(data + i * block_stride);
259  fftwf_execute_dft(*cforward, block_data, block_data);
260  }
261  }
263  void backward(std::complex<float> data[], std::complex<float>*) const override{
264  make_plan(cbackward);
265  for(int i=0; i<blocks; i++){
266  fftwf_complex* block_data = reinterpret_cast<fftwf_complex*>(data + i * block_stride);
267  fftwf_execute_dft(*cbackward, block_data, block_data);
268  }
269  }
271  void forward(std::complex<double> data[], std::complex<double>*) const override{
272  make_plan(zforward);
273  for(int i=0; i<blocks; i++){
274  fftw_complex* block_data = reinterpret_cast<fftw_complex*>(data + i * block_stride);
275  fftw_execute_dft(*zforward, block_data, block_data);
276  }
277  }
279  void backward(std::complex<double> data[], std::complex<double>*) const override{
280  make_plan(zbackward);
281  for(int i=0; i<blocks; i++){
282  fftw_complex* block_data = reinterpret_cast<fftw_complex*>(data + i * block_stride);
283  fftw_execute_dft(*zbackward, block_data, block_data);
284  }
285  }
286 
288  void forward(float const indata[], std::complex<float> outdata[], std::complex<float> *workspace) const override{
289  for(int i=0; i<total_size; i++) outdata[i] = std::complex<float>(indata[i]);
290  forward(outdata, workspace);
291  }
293  void backward(std::complex<float> indata[], float outdata[], std::complex<float> *workspace) const override{
294  backward(indata, workspace);
295  for(int i=0; i<total_size; i++) outdata[i] = std::real(indata[i]);
296  }
298  void forward(double const indata[], std::complex<double> outdata[], std::complex<double> *workspace) const override{
299  for(int i=0; i<total_size; i++) outdata[i] = std::complex<double>(indata[i]);
300  forward(outdata, workspace);
301  }
303  void backward(std::complex<double> indata[], double outdata[], std::complex<double> *workspace) const override{
304  backward(indata, workspace);
305  for(int i=0; i<total_size; i++) outdata[i] = std::real(indata[i]);
306  }
307 
309  int box_size() const override{ return total_size; }
311  size_t workspace_size() const override{ return 0; }
312 
313 private:
315  template<typename scalar_type, direction dir>
316  void make_plan(std::unique_ptr<plan_fftw<scalar_type, dir>> &plan) const{
317  if (not plan){
318  if (dist == 0)
319  plan = std::unique_ptr<plan_fftw<scalar_type, dir>>(new plan_fftw<scalar_type, dir>(size, size2, howmanyffts));
320  else if (size2 == 0)
321  plan = std::unique_ptr<plan_fftw<scalar_type, dir>>(new plan_fftw<scalar_type, dir>(size, howmanyffts, stride, dist));
322  else
323  plan = std::unique_ptr<plan_fftw<scalar_type, dir>>(new plan_fftw<scalar_type, dir>(size, size2, embed, howmanyffts, stride, dist));
324  }
325  }
326 
327  int size, size2, howmanyffts, stride, dist, blocks, block_stride, total_size;
328  std::array<int, 2> embed;
329  mutable std::unique_ptr<plan_fftw<std::complex<float>, direction::forward>> cforward;
330  mutable std::unique_ptr<plan_fftw<std::complex<float>, direction::backward>> cbackward;
331  mutable std::unique_ptr<plan_fftw<std::complex<double>, direction::forward>> zforward;
332  mutable std::unique_ptr<plan_fftw<std::complex<double>, direction::backward>> zbackward;
333 };
334 
339 template<direction dir>
340 struct plan_fftw<float, dir>{
350  plan_fftw(int size, int howmanyffts, int stride, int rdist, int cdist) :
351  plan((dir == direction::forward) ?
352  fftwf_plan_many_dft_r2c(1, &size, howmanyffts, nullptr, nullptr, stride, rdist,
353  nullptr, nullptr, stride, cdist,
354  FFTW_ESTIMATE
355  ) :
356  fftwf_plan_many_dft_c2r(1, &size, howmanyffts, nullptr, nullptr, stride, cdist,
357  nullptr, nullptr, stride, rdist,
358  FFTW_ESTIMATE
359  ))
360  {}
362  ~plan_fftw(){ fftwf_destroy_plan(plan); }
364  operator fftwf_plan() const{ return plan; }
366  fftwf_plan plan;
367 };
372 template<direction dir>
373 struct plan_fftw<double, dir>{
375  plan_fftw(int size, int howmanyffts, int stride, int rdist, int cdist) :
376  plan((dir == direction::forward) ?
377  fftw_plan_many_dft_r2c(1, &size, howmanyffts, nullptr, nullptr, stride, rdist,
378  nullptr, nullptr, stride, cdist,
379  FFTW_ESTIMATE
380  ) :
381  fftw_plan_many_dft_c2r(1, &size, howmanyffts, nullptr, nullptr, stride, cdist,
382  nullptr, nullptr, stride, rdist,
383  FFTW_ESTIMATE
384  ))
385  {}
387  ~plan_fftw(){ fftw_destroy_plan(plan); }
389  operator fftw_plan() const{ return plan; }
391  fftw_plan plan;
392 };
393 
403 public:
413  template<typename index>
414  fftw_executor_r2c(void*, box3d<index> const box, int dimension) :
415  size(box.size[dimension]),
416  howmanyffts(fft1d_get_howmany(box, dimension)),
417  stride(fft1d_get_stride(box, dimension)),
418  blocks((dimension == box.order[1]) ? box.osize(2) : 1),
419  rdist((dimension == box.order[0]) ? size : 1),
420  cdist((dimension == box.order[0]) ? size/2 + 1 : 1),
421  rblock_stride(box.osize(0) * box.osize(1)),
422  cblock_stride(box.osize(0) * (box.osize(1)/2 + 1)),
423  rsize(box.count()),
424  csize(box.r2c(dimension).count())
425  {}
426 
428  void forward(float const indata[], std::complex<float> outdata[], std::complex<float>*) const override{
429  make_plan(sforward);
430  for(int i=0; i<blocks; i++){
431  float *rdata = const_cast<float*>(indata + i * rblock_stride);
432  fftwf_complex* cdata = reinterpret_cast<fftwf_complex*>(outdata + i * cblock_stride);
433  fftwf_execute_dft_r2c(*sforward, rdata, cdata);
434  }
435  }
437  void backward(std::complex<float> indata[], float outdata[], std::complex<float>*) const override{
438  make_plan(sbackward);
439  for(int i=0; i<blocks; i++){
440  fftwf_complex* cdata = const_cast<fftwf_complex*>(reinterpret_cast<fftwf_complex const*>(indata + i * cblock_stride));
441  fftwf_execute_dft_c2r(*sbackward, cdata, outdata + i * rblock_stride);
442  }
443  }
445  void forward(double const indata[], std::complex<double> outdata[], std::complex<double>*) const override{
446  make_plan(dforward);
447  for(int i=0; i<blocks; i++){
448  double *rdata = const_cast<double*>(indata + i * rblock_stride);
449  fftw_complex* cdata = reinterpret_cast<fftw_complex*>(outdata + i * cblock_stride);
450  fftw_execute_dft_r2c(*dforward, rdata, cdata);
451  }
452  }
454  void backward(std::complex<double> indata[], double outdata[], std::complex<double>*) const override{
455  make_plan(dbackward);
456  for(int i=0; i<blocks; i++){
457  fftw_complex* cdata = const_cast<fftw_complex*>(reinterpret_cast<fftw_complex const*>(indata + i * cblock_stride));
458  fftw_execute_dft_c2r(*dbackward, cdata, outdata + i * rblock_stride);
459  }
460  }
461 
463  int box_size() const override{ return rsize; }
465  int complex_size() const override{ return csize; }
467  size_t workspace_size() const override{ return 0; }
468 
469 private:
471  template<typename scalar_type, direction dir>
472  void make_plan(std::unique_ptr<plan_fftw<scalar_type, dir>> &plan) const{
473  if (!plan) plan = std::unique_ptr<plan_fftw<scalar_type, dir>>(new plan_fftw<scalar_type, dir>(size, howmanyffts, stride, rdist, cdist));
474  }
475 
476  int size, howmanyffts, stride, blocks;
477  int rdist, cdist, rblock_stride, cblock_stride, rsize, csize;
478  mutable std::unique_ptr<plan_fftw<float, direction::forward>> sforward;
479  mutable std::unique_ptr<plan_fftw<double, direction::forward>> dforward;
480  mutable std::unique_ptr<plan_fftw<float, direction::backward>> sbackward;
481  mutable std::unique_ptr<plan_fftw<double, direction::backward>> dbackward;
482 };
483 
490 template<> struct one_dim_backend<backend::fftw>{
495 };
496 
503 template<> struct one_dim_backend<backend::fftw_cos>{
507  using executor_r2c = void;
508 };
515 template<> struct one_dim_backend<backend::fftw_sin>{
519  using executor_r2c = void;
520 };
521 
526 template<> struct default_plan_options<backend::fftw>{
528  static const bool use_reorder = true;
529 };
530 
535 template<> struct default_plan_options<backend::fftw_cos>{
537  static const bool use_reorder = true;
538 };
543 template<> struct default_plan_options<backend::fftw_sin>{
545  static const bool use_reorder = true;
546 };
547 
548 }
549 
550 #endif
551 
552 #endif /* HEFFTE_BACKEND_FFTW_H */
Base class for all backend executors.
Definition: heffte_common.h:486
virtual int complex_size() const
Return the size of the complex-box (r2c executors).
Definition: heffte_common.h:519
virtual void backward(float[], float *) const
Backward r2r, single precision.
Definition: heffte_common.h:495
virtual void forward(float[], float *) const
Forward r2r, single precision.
Definition: heffte_common.h:491
Wrapper to fftw3 API for real-to-complex transform with shortening of the data.
Definition: heffte_backend_fftw.h:402
void forward(float const indata[], std::complex< float > outdata[], std::complex< float > *) const override
Forward transform, single precision.
Definition: heffte_backend_fftw.h:428
int box_size() const override
Returns the size of the box with real data.
Definition: heffte_backend_fftw.h:463
int complex_size() const override
Returns the size of the box with complex coefficients.
Definition: heffte_backend_fftw.h:465
void forward(double const indata[], std::complex< double > outdata[], std::complex< double > *) const override
Forward transform, double precision.
Definition: heffte_backend_fftw.h:445
fftw_executor_r2c(void *, box3d< index > const box, int dimension)
Constructor defines the box and the dimension of reduction.
Definition: heffte_backend_fftw.h:414
void backward(std::complex< double > indata[], double outdata[], std::complex< double > *) const override
Backward transform, double precision.
Definition: heffte_backend_fftw.h:454
void backward(std::complex< float > indata[], float outdata[], std::complex< float > *) const override
Backward transform, single precision.
Definition: heffte_backend_fftw.h:437
size_t workspace_size() const override
Return the size of the needed workspace.
Definition: heffte_backend_fftw.h:467
Wrapper around the FFTW3 API.
Definition: heffte_backend_fftw.h:200
void forward(double const indata[], std::complex< double > outdata[], std::complex< double > *workspace) const override
Converts the deal data to complex and performs double-complex forward transform.
Definition: heffte_backend_fftw.h:298
void forward(std::complex< double > data[], std::complex< double > *) const override
Forward fft, double-complex case.
Definition: heffte_backend_fftw.h:271
virtual void backward(float[], float *) const
Bring forth method that have not been overloaded.
Definition: heffte_common.h:495
void backward(std::complex< float > indata[], float outdata[], std::complex< float > *workspace) const override
Performs backward float-complex transform and truncates the complex part of the result.
Definition: heffte_backend_fftw.h:293
void forward(float const indata[], std::complex< float > outdata[], std::complex< float > *workspace) const override
Converts the deal data to complex and performs float-complex forward transform.
Definition: heffte_backend_fftw.h:288
size_t workspace_size() const override
Return the size of the needed workspace.
Definition: heffte_backend_fftw.h:311
virtual void forward(float[], float *) const
Bring forth method that have not been overloaded.
Definition: heffte_common.h:491
fftw_executor(void *, box3d< index > const box, int dimension)
Constructor, specifies the box and dimension.
Definition: heffte_backend_fftw.h:210
fftw_executor(void *, box3d< index > const box, int dir1, int dir2)
Merges two FFTs into one.
Definition: heffte_backend_fftw.h:222
void backward(std::complex< float > data[], std::complex< float > *) const override
Backward fft, float-complex case.
Definition: heffte_backend_fftw.h:263
void forward(std::complex< float > data[], std::complex< float > *) const override
Forward fft, float-complex case.
Definition: heffte_backend_fftw.h:255
fftw_executor(void *, box3d< index > const box)
Merges three FFTs into one.
Definition: heffte_backend_fftw.h:246
void backward(std::complex< double > data[], std::complex< double > *) const override
Backward fft, double-complex case.
Definition: heffte_backend_fftw.h:279
void backward(std::complex< double > indata[], double outdata[], std::complex< double > *workspace) const override
Performs backward double-complex transform and truncates the complex part of the result.
Definition: heffte_backend_fftw.h:303
int box_size() const override
Returns the size of the box.
Definition: heffte_backend_fftw.h:309
int fft1d_get_howmany(box3d< index > const box, int const dimension)
Return the number of 1-D ffts contained in the box in the given dimension.
Definition: heffte_geometry.h:159
int fft1d_get_stride(box3d< index > const box, int const dimension)
Return the stride of the 1-D ffts contained in the box in the given dimension.
Definition: heffte_geometry.h:169
direction
Indicates the direction of the FFT (internal use only).
Definition: heffte_common.h:535
@ backward
Inverse DFT transform.
@ forward
Forward DFT transform.
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38
Defines inverse mapping from the location tag to a default backend tag.
Definition: heffte_common.h:380
stock type
Defaults to the stock backend.
Definition: heffte_common.h:382
Type-tag for the Cosine Transform using the FFTW backend.
Definition: heffte_common.h:104
Type-tag for the Sine Transform using the FFTW backend.
Definition: heffte_common.h:109
Type-tag for the FFTW backend.
Definition: heffte_common.h:99
Allows to define whether a specific backend interface has been enabled.
Definition: heffte_common.h:201
A generic container that describes a 3d box of indexes.
Definition: heffte_geometry.h:67
Defines a set of default plan options for a given backend.
Definition: heffte_common.h:642
Struct to specialize to allow HeFFTe to recognize custom single precision complex types.
Definition: heffte_utils.h:251
Struct to specialize to allow HeFFTe to recognize custom double precision complex types.
Definition: heffte_utils.h:269
void executor_r2c
There is no real-to-complex variant.
Definition: heffte_backend_fftw.h:507
void executor_r2c
There is no real-to-complex variant.
Definition: heffte_backend_fftw.h:519
Indicates the structure that will be used by the fft backend.
Definition: heffte_common.h:546
fftw_plan plan
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:391
~plan_fftw()
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:387
plan_fftw(int size, int howmanyffts, int stride, int rdist, int cdist)
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:375
~plan_fftw()
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:362
fftwf_plan plan
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:366
plan_fftw(int size, int howmanyffts, int stride, int rdist, int cdist)
Constructor taking into account the different sizes for the real and complex parts.
Definition: heffte_backend_fftw.h:350
fftw_plan plan
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:185
plan_fftw(int size1, int size2, std::array< int, 2 > const &embed, int howmanyffts, int stride, int dist)
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:159
~plan_fftw()
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:181
plan_fftw(int size, int howmanyffts, int stride, int dist)
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:152
plan_fftw(int size1, int size2, int size3)
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:175
plan_fftw(int size1, int size2, int size3)
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:133
plan_fftw(int size, int howmanyffts, int stride, int dist)
Constructor, takes inputs identical to fftwf_plan_many_dft().
Definition: heffte_backend_fftw.h:101
~plan_fftw()
Destructor, deletes the plan.
Definition: heffte_backend_fftw.h:139
plan_fftw(int size1, int size2, std::array< int, 2 > const &embed, int howmanyffts, int stride, int dist)
Constructor, takes inputs identical to fftwf_plan_many_dft().
Definition: heffte_backend_fftw.h:117
fftwf_plan plan
The FFTW3 opaque structure (pointer to struct).
Definition: heffte_backend_fftw.h:143
Base plan for fftw, using only the specialization for float and double complex.
Definition: heffte_backend_fftw.h:83
Template algorithm for the Sine and Cosine transforms.
Definition: heffte_r2r_executor.h:135