7 #ifndef HEFFTE_BACKEND_FFTW_H
8 #define HEFFTE_BACKEND_FFTW_H
10 #include "heffte_r2r_executor.h"
12 #ifdef Heffte_ENABLE_FFTW
53 #ifndef Heffte_ENABLE_MKL
68 template<>
struct is_ccomplex<fftwf_complex> : std::true_type{};
73 template<>
struct is_zcomplex<fftw_complex> : std::true_type{};
91 template<direction dir>
101 plan_fftw(
int size,
int howmanyffts,
int stride,
int dist) :
102 plan(fftwf_plan_many_dft(1, &size, howmanyffts, nullptr, nullptr, stride, dist,
103 nullptr, nullptr, stride, dist,
117 plan_fftw(
int size1,
int size2, std::array<int, 2>
const &embed,
int howmanyffts,
int stride,
int dist){
118 std::array<int, 2> size = {size2, size1};
120 if (embed[0] == 0 and embed[1] == 0){
121 plan = fftwf_plan_many_dft(2, size.data(), howmanyffts,
nullptr,
nullptr, stride, dist,
122 nullptr,
nullptr, stride, dist,
126 plan = fftwf_plan_many_dft(2, size.data(), howmanyffts,
nullptr, embed.data(), stride, dist,
127 nullptr, embed.data(), stride, dist,
134 std::array<int, 3> size = {size3, size2, size1};
135 plan = fftwf_plan_many_dft(3, size.data(), 1,
nullptr,
nullptr, 1, 1,
nullptr,
nullptr, 1, 1,
141 operator fftwf_plan()
const{
return plan; }
149 template<direction dir>
152 plan_fftw(
int size,
int howmanyffts,
int stride,
int dist) :
153 plan(fftw_plan_many_dft(1, &size, howmanyffts, nullptr, nullptr, stride, dist,
154 nullptr, nullptr, stride, dist,
159 plan_fftw(
int size1,
int size2, std::array<int, 2>
const &embed,
int howmanyffts,
int stride,
int dist){
160 std::array<int, 2> size = {size2, size1};
162 if (embed[0] == 0 and embed[1] == 0){
163 plan = fftw_plan_many_dft(2, size.data(), howmanyffts,
nullptr,
nullptr, stride, dist,
164 nullptr,
nullptr, stride, dist,
168 plan = fftw_plan_many_dft(2, size.data(), howmanyffts,
nullptr, embed.data(), stride, dist,
169 nullptr, embed.data(), stride, dist,
176 std::array<int, 3> size = {size3, size2, size1};
177 plan = fftw_plan_many_dft(3, size.data(), 1,
nullptr,
nullptr, 1, 1,
nullptr,
nullptr, 1, 1,
183 operator fftw_plan()
const{
return plan; }
209 template<
typename index>
211 size(box.size[dimension]), size2(0),
214 dist((dimension == box.order[0]) ? size : 1),
215 blocks((dimension == box.order[1]) ? box.osize(2) : 1),
216 block_stride(box.osize(0) * box.osize(1)),
217 total_size(box.count()),
221 template<
typename index>
223 size(box.size[std::min(dir1, dir2)]), size2(box.size[std::max(dir1, dir2)]),
224 blocks(1), block_stride(0), total_size(box.count()), embed({0, 0})
226 int odir1 = box.find_order(dir1);
227 int odir2 = box.find_order(dir2);
229 if (std::min(odir1, odir2) == 0 and std::max(odir1, odir2) == 1){
232 howmanyffts = box.size[2];
233 }
else if (std::min(odir1, odir2) == 1 and std::max(odir1, odir2) == 2){
234 stride = box.size[0];
236 howmanyffts = box.size[0];
240 embed = {
static_cast<int>(box.size[2]),
static_cast<int>(box.size[1] * box.size[0])};
241 howmanyffts = box.size[1];
245 template<
typename index>
247 size(box.size[0]), size2(box.size[1]), howmanyffts(box.size[2]),
249 blocks(1), block_stride(0),
250 total_size(box.count()),
255 void forward(std::complex<float> data[], std::complex<float>*)
const override{
257 for(
int i=0; i<blocks; i++){
258 fftwf_complex* block_data =
reinterpret_cast<fftwf_complex*
>(data + i * block_stride);
259 fftwf_execute_dft(*cforward, block_data, block_data);
263 void backward(std::complex<float> data[], std::complex<float>*)
const override{
264 make_plan(cbackward);
265 for(
int i=0; i<blocks; i++){
266 fftwf_complex* block_data =
reinterpret_cast<fftwf_complex*
>(data + i * block_stride);
267 fftwf_execute_dft(*cbackward, block_data, block_data);
271 void forward(std::complex<double> data[], std::complex<double>*)
const override{
273 for(
int i=0; i<blocks; i++){
274 fftw_complex* block_data =
reinterpret_cast<fftw_complex*
>(data + i * block_stride);
275 fftw_execute_dft(*zforward, block_data, block_data);
279 void backward(std::complex<double> data[], std::complex<double>*)
const override{
280 make_plan(zbackward);
281 for(
int i=0; i<blocks; i++){
282 fftw_complex* block_data =
reinterpret_cast<fftw_complex*
>(data + i * block_stride);
283 fftw_execute_dft(*zbackward, block_data, block_data);
288 void forward(
float const indata[], std::complex<float> outdata[], std::complex<float> *workspace)
const override{
289 for(
int i=0; i<total_size; i++) outdata[i] = std::complex<float>(indata[i]);
293 void backward(std::complex<float> indata[],
float outdata[], std::complex<float> *workspace)
const override{
295 for(
int i=0; i<total_size; i++) outdata[i] = std::real(indata[i]);
298 void forward(
double const indata[], std::complex<double> outdata[], std::complex<double> *workspace)
const override{
299 for(
int i=0; i<total_size; i++) outdata[i] = std::complex<double>(indata[i]);
303 void backward(std::complex<double> indata[],
double outdata[], std::complex<double> *workspace)
const override{
305 for(
int i=0; i<total_size; i++) outdata[i] = std::real(indata[i]);
309 int box_size()
const override{
return total_size; }
315 template<
typename scalar_type, direction dir>
323 plan = std::unique_ptr<plan_fftw<scalar_type, dir>>(
new plan_fftw<scalar_type, dir>(size, size2, embed, howmanyffts, stride, dist));
327 int size, size2, howmanyffts, stride, dist, blocks, block_stride, total_size;
328 std::array<int, 2> embed;
331 mutable std::unique_ptr<plan_fftw<std::complex<double>,
direction::forward>> zforward;
339 template<direction dir>
350 plan_fftw(
int size,
int howmanyffts,
int stride,
int rdist,
int cdist) :
352 fftwf_plan_many_dft_r2c(1, &size, howmanyffts, nullptr, nullptr, stride, rdist,
353 nullptr, nullptr, stride, cdist,
356 fftwf_plan_many_dft_c2r(1, &size, howmanyffts, nullptr, nullptr, stride, cdist,
357 nullptr, nullptr, stride, rdist,
364 operator fftwf_plan()
const{
return plan; }
372 template<direction dir>
375 plan_fftw(
int size,
int howmanyffts,
int stride,
int rdist,
int cdist) :
377 fftw_plan_many_dft_r2c(1, &size, howmanyffts, nullptr, nullptr, stride, rdist,
378 nullptr, nullptr, stride, cdist,
381 fftw_plan_many_dft_c2r(1, &size, howmanyffts, nullptr, nullptr, stride, cdist,
382 nullptr, nullptr, stride, rdist,
389 operator fftw_plan()
const{
return plan; }
413 template<
typename index>
415 size(box.size[dimension]),
418 blocks((dimension == box.order[1]) ? box.osize(2) : 1),
419 rdist((dimension == box.order[0]) ? size : 1),
420 cdist((dimension == box.order[0]) ? size/2 + 1 : 1),
421 rblock_stride(box.osize(0) * box.osize(1)),
422 cblock_stride(box.osize(0) * (box.osize(1)/2 + 1)),
424 csize(box.r2c(dimension).count())
428 void forward(
float const indata[], std::complex<float> outdata[], std::complex<float>*)
const override{
430 for(
int i=0; i<blocks; i++){
431 float *rdata =
const_cast<float*
>(indata + i * rblock_stride);
432 fftwf_complex* cdata =
reinterpret_cast<fftwf_complex*
>(outdata + i * cblock_stride);
433 fftwf_execute_dft_r2c(*sforward, rdata, cdata);
437 void backward(std::complex<float> indata[],
float outdata[], std::complex<float>*)
const override{
438 make_plan(sbackward);
439 for(
int i=0; i<blocks; i++){
440 fftwf_complex* cdata =
const_cast<fftwf_complex*
>(
reinterpret_cast<fftwf_complex const*
>(indata + i * cblock_stride));
441 fftwf_execute_dft_c2r(*sbackward, cdata, outdata + i * rblock_stride);
445 void forward(
double const indata[], std::complex<double> outdata[], std::complex<double>*)
const override{
447 for(
int i=0; i<blocks; i++){
448 double *rdata =
const_cast<double*
>(indata + i * rblock_stride);
449 fftw_complex* cdata =
reinterpret_cast<fftw_complex*
>(outdata + i * cblock_stride);
450 fftw_execute_dft_r2c(*dforward, rdata, cdata);
454 void backward(std::complex<double> indata[],
double outdata[], std::complex<double>*)
const override{
455 make_plan(dbackward);
456 for(
int i=0; i<blocks; i++){
457 fftw_complex* cdata =
const_cast<fftw_complex*
>(
reinterpret_cast<fftw_complex const*
>(indata + i * cblock_stride));
458 fftw_execute_dft_c2r(*dbackward, cdata, outdata + i * rblock_stride);
471 template<
typename scalar_type, direction dir>
473 if (!plan) plan = std::unique_ptr<plan_fftw<scalar_type, dir>>(
new plan_fftw<scalar_type, dir>(size, howmanyffts, stride, rdist, cdist));
476 int size, howmanyffts, stride, blocks;
477 int rdist, cdist, rblock_stride, cblock_stride, rsize, csize;
478 mutable std::unique_ptr<plan_fftw<float, direction::forward>> sforward;
479 mutable std::unique_ptr<plan_fftw<double, direction::forward>> dforward;
480 mutable std::unique_ptr<plan_fftw<float, direction::backward>> sbackward;
481 mutable std::unique_ptr<plan_fftw<double, direction::backward>> dbackward;
528 static const bool use_reorder =
true;
537 static const bool use_reorder =
true;
545 static const bool use_reorder =
true;
Base class for all backend executors.
Definition: heffte_common.h:486
virtual int complex_size() const
Return the size of the complex-box (r2c executors).
Definition: heffte_common.h:519
virtual void backward(float[], float *) const
Backward r2r, single precision.
Definition: heffte_common.h:495
virtual void forward(float[], float *) const
Forward r2r, single precision.
Definition: heffte_common.h:491
Wrapper to fftw3 API for real-to-complex transform with shortening of the data.
Definition: heffte_backend_fftw.h:402
void forward(float const indata[], std::complex< float > outdata[], std::complex< float > *) const override
Forward transform, single precision.
Definition: heffte_backend_fftw.h:428
int box_size() const override
Returns the size of the box with real data.
Definition: heffte_backend_fftw.h:463
int complex_size() const override
Returns the size of the box with complex coefficients.
Definition: heffte_backend_fftw.h:465
void forward(double const indata[], std::complex< double > outdata[], std::complex< double > *) const override
Forward transform, double precision.
Definition: heffte_backend_fftw.h:445
fftw_executor_r2c(void *, box3d< index > const box, int dimension)
Constructor defines the box and the dimension of reduction.
Definition: heffte_backend_fftw.h:414
void backward(std::complex< double > indata[], double outdata[], std::complex< double > *) const override
Backward transform, double precision.
Definition: heffte_backend_fftw.h:454
void backward(std::complex< float > indata[], float outdata[], std::complex< float > *) const override
Backward transform, single precision.
Definition: heffte_backend_fftw.h:437
size_t workspace_size() const override
Return the size of the needed workspace.
Definition: heffte_backend_fftw.h:467
Wrapper around the FFTW3 API.
Definition: heffte_backend_fftw.h:200
void forward(double const indata[], std::complex< double > outdata[], std::complex< double > *workspace) const override
Converts the deal data to complex and performs double-complex forward transform.
Definition: heffte_backend_fftw.h:298
void forward(std::complex< double > data[], std::complex< double > *) const override
Forward fft, double-complex case.
Definition: heffte_backend_fftw.h:271
virtual void backward(float[], float *) const
Bring forth method that have not been overloaded.
Definition: heffte_common.h:495
void backward(std::complex< float > indata[], float outdata[], std::complex< float > *workspace) const override
Performs backward float-complex transform and truncates the complex part of the result.
Definition: heffte_backend_fftw.h:293
void forward(float const indata[], std::complex< float > outdata[], std::complex< float > *workspace) const override
Converts the deal data to complex and performs float-complex forward transform.
Definition: heffte_backend_fftw.h:288
size_t workspace_size() const override
Return the size of the needed workspace.
Definition: heffte_backend_fftw.h:311
virtual void forward(float[], float *) const
Bring forth method that have not been overloaded.
Definition: heffte_common.h:491
fftw_executor(void *, box3d< index > const box, int dimension)
Constructor, specifies the box and dimension.
Definition: heffte_backend_fftw.h:210
fftw_executor(void *, box3d< index > const box, int dir1, int dir2)
Merges two FFTs into one.
Definition: heffte_backend_fftw.h:222
void backward(std::complex< float > data[], std::complex< float > *) const override
Backward fft, float-complex case.
Definition: heffte_backend_fftw.h:263
void forward(std::complex< float > data[], std::complex< float > *) const override
Forward fft, float-complex case.
Definition: heffte_backend_fftw.h:255
fftw_executor(void *, box3d< index > const box)
Merges three FFTs into one.
Definition: heffte_backend_fftw.h:246
void backward(std::complex< double > data[], std::complex< double > *) const override
Backward fft, double-complex case.
Definition: heffte_backend_fftw.h:279
void backward(std::complex< double > indata[], double outdata[], std::complex< double > *workspace) const override
Performs backward double-complex transform and truncates the complex part of the result.
Definition: heffte_backend_fftw.h:303
int box_size() const override
Returns the size of the box.
Definition: heffte_backend_fftw.h:309
int fft1d_get_howmany(box3d< index > const box, int const dimension)
Return the number of 1-D ffts contained in the box in the given dimension.
Definition: heffte_geometry.h:159
int fft1d_get_stride(box3d< index > const box, int const dimension)
Return the stride of the 1-D ffts contained in the box in the given dimension.
Definition: heffte_geometry.h:169
direction
Indicates the direction of the FFT (internal use only).
Definition: heffte_common.h:535
@ backward
Inverse DFT transform.
@ forward
Forward DFT transform.
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38
Defines inverse mapping from the location tag to a default backend tag.
Definition: heffte_common.h:380
stock type
Defaults to the stock backend.
Definition: heffte_common.h:382
Type-tag for the Cosine Transform using the FFTW backend.
Definition: heffte_common.h:104
Type-tag for the Sine Transform using the FFTW backend.
Definition: heffte_common.h:109
Type-tag for the FFTW backend.
Definition: heffte_common.h:99
Allows to define whether a specific backend interface has been enabled.
Definition: heffte_common.h:201
A generic container that describes a 3d box of indexes.
Definition: heffte_geometry.h:67
Defines a set of default plan options for a given backend.
Definition: heffte_common.h:642
Struct to specialize to allow HeFFTe to recognize custom single precision complex types.
Definition: heffte_utils.h:251
Struct to specialize to allow HeFFTe to recognize custom double precision complex types.
Definition: heffte_utils.h:269
void executor_r2c
There is no real-to-complex variant.
Definition: heffte_backend_fftw.h:507
void executor_r2c
There is no real-to-complex variant.
Definition: heffte_backend_fftw.h:519
Indicates the structure that will be used by the fft backend.
Definition: heffte_common.h:546
fftw_plan plan
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:391
~plan_fftw()
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:387
plan_fftw(int size, int howmanyffts, int stride, int rdist, int cdist)
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:375
~plan_fftw()
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:362
fftwf_plan plan
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:366
plan_fftw(int size, int howmanyffts, int stride, int rdist, int cdist)
Constructor taking into account the different sizes for the real and complex parts.
Definition: heffte_backend_fftw.h:350
fftw_plan plan
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:185
plan_fftw(int size1, int size2, std::array< int, 2 > const &embed, int howmanyffts, int stride, int dist)
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:159
~plan_fftw()
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:181
plan_fftw(int size, int howmanyffts, int stride, int dist)
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:152
plan_fftw(int size1, int size2, int size3)
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:175
plan_fftw(int size1, int size2, int size3)
Identical to the float-complex specialization.
Definition: heffte_backend_fftw.h:133
plan_fftw(int size, int howmanyffts, int stride, int dist)
Constructor, takes inputs identical to fftwf_plan_many_dft().
Definition: heffte_backend_fftw.h:101
~plan_fftw()
Destructor, deletes the plan.
Definition: heffte_backend_fftw.h:139
plan_fftw(int size1, int size2, std::array< int, 2 > const &embed, int howmanyffts, int stride, int dist)
Constructor, takes inputs identical to fftwf_plan_many_dft().
Definition: heffte_backend_fftw.h:117
fftwf_plan plan
The FFTW3 opaque structure (pointer to struct).
Definition: heffte_backend_fftw.h:143
Base plan for fftw, using only the specialization for float and double complex.
Definition: heffte_backend_fftw.h:83
Template algorithm for the Sine and Cosine transforms.
Definition: heffte_r2r_executor.h:135