7 #ifndef HEFFTE_BACKEND_STOCK_FFT_H
8 #define HEFFTE_BACKEND_STOCK_FFT_H
10 #include "heffte_r2r_executor.h"
12 #include "stock_fft/heffte_stock_tree.h"
49 template<>
struct is_ccomplex<stock::Complex<float, 1>> : std::true_type{};
50 template<>
struct is_zcomplex<stock::Complex<double, 1>> : std::true_type{};
51 #ifdef Heffte_ENABLE_AVX
56 template<>
struct is_ccomplex<stock::Complex<float, 4>> : std::true_type{};
57 template<>
struct is_ccomplex<stock::Complex<float, 8>> : std::true_type{};
58 #ifdef Heffte_ENABLE_AVX512
59 template<>
struct is_ccomplex<stock::Complex<float, 16>> : std::true_type{};
66 template<>
struct is_zcomplex<stock::Complex<double, 2>> : std::true_type{};
67 template<>
struct is_zcomplex<stock::Complex<double, 4>> : std::true_type{};
68 #ifdef Heffte_ENABLE_AVX512
69 template<>
struct is_zcomplex<stock::Complex<double, 8>> : std::true_type{};
73 #ifdef Heffte_ENABLE_AVX
76 template<
typename F,
int L>
77 stock::Complex<F, L> copy_pad(std::complex<F>
const *c,
int c_len,
int i_stride) {
79 throw std::runtime_error(
"Invalid padding requested!\n");
81 std::complex<F> ret[L];
82 for(
int i = 0; i < c_len; i++) ret[i] = c[i*i_stride];
83 return stock::Complex<F,L> (ret);
86 template<
typename F,
int L>
87 stock::Complex<F,L> copy_pad(F
const *c,
int c_len,
int i_stride) {
89 throw std::runtime_error(
"Invalid padding requested!\n");
91 std::complex<F> ret[L];
92 for(
int i = 0; i < c_len; i++) ret[i] = std::complex<F>(c[i*i_stride], 0.0);
93 return stock::Complex<F,L> (ret);
96 template<
typename F>
struct pack_size { };
97 #ifdef Heffte_ENABLE_AVX512
98 template<>
struct pack_size<float> {constexpr
static int size = 16;};
99 template<>
struct pack_size<double>{constexpr
static int size = 8;};
101 template<>
struct pack_size<float> {constexpr
static int size = 8;};
102 template<>
struct pack_size<double>{constexpr
static int size = 4;};
114 template<
typename F, direction dir>
115 struct plan_stock_fft{
125 plan_stock_fft(
int size,
int howmanyffts,
int stride,
int rdist,
int cdist):
126 N(size), num_ffts(howmanyffts), stride_sz(stride), real_d(rdist), comp_d(cdist) {
127 numNodes = stock::getNumNodes(
N);
128 root = std::unique_ptr<stock::biFuncNode<F,L>[]>(
new stock::biFuncNode<F,L>[numNodes]);
129 init_fft_tree(root.get(),
N);
132 int N, num_ffts, stride_sz, real_d, comp_d, numNodes;
133 constexpr
static int L = pack_size<F>::size;
134 std::unique_ptr<stock::biFuncNode<F,L>[]> root;
136 void execute(std::complex<F>
const idata[], F odata[]) {
138 stock::complex_vector<F,L> inp (
N);
139 stock::complex_vector<F,L> out (
N);
142 for(
int p = 0; p+((L/2)-1) < num_ffts; p += (L/2)) {
144 inp[0] = stock::Complex<F,L> (&idata[p*comp_d], comp_d);
145 for(
int i = 1; i < (
N+2)/2; i++) {
146 int idx = p*comp_d + i*stride_sz;
147 inp[i] = stock::Complex<F,L> (&idata[idx], comp_d);
148 inp[
N-i] = inp[i].conjugate();
151 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
153 for(
int i = 0; i <
N; i++) {
154 int idx = p*real_d + i*stride_sz;
155 stock::Complex<F,L> arr = out[i];
156 for(
int j = 0; j < L/2; j++) odata[idx+j*real_d] = arr[j].real();
161 if(num_ffts % (L/2) > 0) {
162 int rem = num_ffts % (L/2);
164 int p = num_ffts - rem;
165 inp[0] = copy_pad<F,L>(&idata[p*comp_d], rem, comp_d);
166 for(
int i = 1; i < (
N+2)/2; i++) {
167 int idx = p*comp_d + i*stride_sz;
168 inp[i] = copy_pad<F,L>(&idata[idx], rem, comp_d);
169 inp[
N-i] = inp[i].conjugate();
171 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
172 for(
int i = 0; i <
N; i++) {
173 int idx = p*real_d + i*stride_sz;
174 stock::Complex<F,L> arr = out[i];
176 for(
int k = 0; k < rem; k++) {
177 odata[idx + k*real_d] = arr[k].real();
183 void execute(F
const idata[], std::complex<F> odata[]) {
185 stock::complex_vector<F,L> inp (
N);
186 stock::complex_vector<F,L> out (
N);
189 for(
int p = 0; p+((L/2)-1) < num_ffts; p += L/2) {
191 for(
int i = 0; i <
N; i++) {
192 int idx = p*real_d + i*stride_sz;
193 inp[i] = copy_pad<F,L>(&idata[idx], L/2, real_d);
196 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
198 for(
int i = 0; i < (
N+2)/2; i++) {
199 int idx = p*comp_d + i*stride_sz;
200 stock::Complex<F,L> arr = out[i];
201 for(
int j = 0; j < L/2; j++) odata[idx+j*comp_d] = arr[j];
206 if(num_ffts % (L/2) > 0) {
207 int rem = num_ffts % (L/2);
209 int p = num_ffts - rem;
210 for(
int i = 0; i <
N; i++) {
211 int idx = p*real_d + i*stride_sz;
213 inp[i] = copy_pad<F,L>(&idata[idx], rem, real_d);
215 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
216 for(
int i = 0; i < (
N+2)/2; i++) {
217 int idx = p*comp_d + i*stride_sz;
218 stock::Complex<F,L> arr = out[i];
220 for(
int k = 0; k < rem; k++) {
221 odata[idx + k*comp_d] = arr[k];
234 template<
typename F, direction dir>
235 struct plan_stock_fft<std::complex<F>, dir>{
245 N(size), num_ffts(howmanyffts), stride_sz(stride), idist(dist), odist(dist) {
246 numNodes = stock::getNumNodes(
N);
247 root = std::unique_ptr<stock::biFuncNode<F,L>[]>(
new stock::biFuncNode<F,L>[numNodes]);
248 init_fft_tree(root.get(),
N);
251 int N, num_ffts, stride_sz, idist, odist, numNodes;
252 constexpr
static int L = pack_size<F>::size;
253 std::unique_ptr<stock::biFuncNode<F, L>[]> root;
255 void execute(std::complex<F> data[]) {
257 stock::complex_vector<F,L> inp (
N);
258 stock::complex_vector<F,L> out (
N);
261 for(
int p = 0; p + (L/2 - 1) < num_ffts; p += L/2) {
263 for(
int i = 0; i <
N; i++) {
264 int idx = p*idist + i*stride_sz;
265 inp[i] = stock::Complex<F,L> (&data[idx], idist);
268 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
270 for(
int i = 0; i <
N; i++) {
271 int idx = p*odist + i*stride_sz;
272 stock::Complex<F,L> arr = out[i];
273 for(
int j = 0; j < L/2; j++) data[idx+j*odist] = arr[j];
278 if(num_ffts % (L/2) > 0) {
279 int rem = num_ffts % (L/2);
281 int p = num_ffts - rem;
282 for(
int i = 0; i <
N; i++) {
283 int idx = p*idist + i*stride_sz;
285 inp[i] = copy_pad<F,L>(&data[idx], rem, idist);
287 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
288 for(
int i = 0; i <
N; i++) {
289 int idx = p*odist + i*stride_sz;
290 stock::Complex<F,L> arr = out[i];
292 for(
int k = 0; k < rem; k++) {
293 data[idx + k*odist] = arr[k];
308 template<
typename F, direction dir>
320 N(size), num_ffts(howmanyffts), stride_sz(stride), real_d(rdist), comp_d(cdist) {
321 numNodes = stock::getNumNodes(
N);
323 init_fft_tree(root.get(),
N);
326 int N, num_ffts, stride_sz, real_d, comp_d, numNodes;
327 std::unique_ptr<stock::biFuncNode<F, 1>[]> root;
329 void execute(std::complex<F>
const idata[], F odata[]) {
331 stock::complex_vector<F,1> inp (
N);
332 stock::complex_vector<F,1> out (
N);
335 for(
int p = 0; p < num_ffts; p++) {
338 for(
int i = 1; i < (
N+2)/2; i++) {
339 int idx = p*comp_d + i*stride_sz;
344 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
346 for(
int i = 0; i <
N; i++) {
347 int idx = p*real_d + i*stride_sz;
349 odata[idx+0*real_d] = arr[0].real();
354 void execute(F
const idata[], std::complex<F> odata[]) {
356 stock::complex_vector<F,1> inp (
N);
357 stock::complex_vector<F,1> out (
N);
360 for(
int p = 0; p < num_ffts; p++) {
362 for(
int i = 0; i <
N; i++) {
363 int idx = p*real_d + i*stride_sz;
367 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
369 for(
int i = 0; i < (
N+2)/2; i++) {
370 int idx = p*comp_d + i*stride_sz;
372 odata[idx+0*comp_d] = arr[0];
384 template<
typename F, direction dir>
395 N(size), num_ffts(howmanyffts), stride_sz(stride), idist(dist), odist(dist) {
396 numNodes = stock::getNumNodes(
N);
398 init_fft_tree(root.get(),
N);
401 int N, num_ffts, stride_sz, idist, odist, numNodes;
402 std::unique_ptr<stock::biFuncNode<F,1>[]> root;
406 stock::complex_vector<F,1> inp (
N);
407 stock::complex_vector<F,1> out (
N);
410 for(
int p = 0; p < num_ffts; p++) {
412 for(
int i = 0; i <
N; i++) {
413 int idx = p*idist + i*stride_sz;
417 root[0].fptr(inp.data(), out.data(), 1, 1, root.get(), dir);
419 for(
int i = 0; i <
N; i++) {
420 int idx = p*odist + i*stride_sz;
422 data[idx+0*odist] = arr[0];
450 template<
typename index>
452 size(box.size[dimension]),
455 dist((dimension == box.order[0]) ? size : 1),
456 blocks((dimension == box.order[1]) ? box.osize(2) : 1),
457 block_stride(box.osize(0) * box.osize(1)),
458 total_size(box.count())
462 throw std::runtime_error(
"2D transforms for the stock backend are not available yet!");
466 throw std::runtime_error(
"3D transforms for the stock backend are not available yet!");
470 void forward(std::complex<float> data[], std::complex<float>*)
const override{
472 for(
int i=0; i<blocks; i++) {
473 cforward->execute(data + i * block_stride);
477 void backward(std::complex<float> data[], std::complex<float>*)
const override{
478 make_plan(cbackward);
479 for(
int i=0; i<blocks; i++) {
480 cbackward->execute(data + i * block_stride);
484 void forward(std::complex<double> data[], std::complex<double>*)
const override{
486 for(
int i=0; i<blocks; i++) {
487 zforward->execute(data + i * block_stride);
491 void backward(std::complex<double> data[], std::complex<double>*)
const override{
492 make_plan(zbackward);
493 for(
int i=0; i<blocks; i++) {
494 zbackward->execute(data + i * block_stride);
499 void forward(
float const indata[], std::complex<float> outdata[], std::complex<float> *workspace)
const override{
500 for(
int i=0; i<total_size; i++) outdata[i] = std::complex<float>(indata[i]);
504 void backward(std::complex<float> indata[],
float outdata[], std::complex<float> *workspace)
const override{
506 for(
int i=0; i<total_size; i++) outdata[i] = std::real(indata[i]);
509 void forward(
double const indata[], std::complex<double> outdata[], std::complex<double> *workspace)
const override{
510 for(
int i=0; i<total_size; i++) outdata[i] = std::complex<double>(indata[i]);
514 void backward(std::complex<double> indata[],
double outdata[], std::complex<double> *workspace)
const override{
516 for(
int i=0; i<total_size; i++) outdata[i] = std::real(indata[i]);
520 int box_size()
const override{
return total_size; }
527 template<
typename scalar_type, direction dir>
532 int size, num_ffts, stride, dist, blocks, block_stride, total_size;
533 mutable std::unique_ptr<plan_stock_fft<std::complex<float>,
direction::forward>> cforward;
534 mutable std::unique_ptr<plan_stock_fft<std::complex<float>,
direction::backward>> cbackward;
535 mutable std::unique_ptr<plan_stock_fft<std::complex<double>,
direction::forward>> zforward;
536 mutable std::unique_ptr<plan_stock_fft<std::complex<double>,
direction::backward>> zbackward;
558 template<
typename index>
560 size(box.size[dimension]),
563 blocks((dimension == box.order[1]) ? box.osize(2) : 1),
564 rdist((dimension == box.order[0]) ? size : 1),
565 cdist((dimension == box.order[0]) ? size/2 + 1 : 1),
566 rblock_stride(box.osize(0) * box.osize(1)),
567 cblock_stride(box.osize(0) * (box.osize(1)/2 + 1)),
569 csize(box.r2c(dimension).count())
573 void forward(
float const indata[], std::complex<float> outdata[], std::complex<float>*)
const override{
575 for(
int i=0; i<blocks; i++) {
576 sforward->execute(indata + i*rblock_stride, outdata + i*cblock_stride);
580 void backward(std::complex<float> indata[],
float outdata[], std::complex<float>*)
const override{
581 make_plan(sbackward);
582 for(
int i=0; i<blocks; i++) {
583 sbackward->execute(indata + i*cblock_stride, outdata + i*rblock_stride);
587 void forward(
double const indata[], std::complex<double> outdata[], std::complex<double>*)
const override{
589 for(
int i=0; i<blocks; i++) {
590 dforward->execute(indata + i*rblock_stride, outdata + i*cblock_stride);
594 void backward(std::complex<double> indata[],
double outdata[], std::complex<double>*)
const override{
595 make_plan(dbackward);
596 for(
int i=0; i<blocks; i++) {
597 dbackward->execute(indata + i*cblock_stride, outdata + i*rblock_stride);
610 template<
typename scalar_type, direction dir>
615 int size, num_ffts, stride, blocks;
616 int rdist, cdist, rblock_stride, cblock_stride, rsize, csize;
617 mutable std::unique_ptr<plan_stock_fft<float, direction::forward>> sforward;
618 mutable std::unique_ptr<plan_stock_fft<double, direction::forward>> dforward;
619 mutable std::unique_ptr<plan_stock_fft<float, direction::backward>> sbackward;
620 mutable std::unique_ptr<plan_stock_fft<double, direction::backward>> dbackward;
665 static const bool use_reorder =
true;
673 static const bool use_reorder =
true;
681 static const bool use_reorder =
true;
Base class for all backend executors.
Definition: heffte_common.h:486
virtual int complex_size() const
Return the size of the complex-box (r2c executors).
Definition: heffte_common.h:519
virtual void backward(float[], float *) const
Backward r2r, single precision.
Definition: heffte_common.h:495
virtual void forward(float[], float *) const
Forward r2r, single precision.
Definition: heffte_common.h:491
Custom complex type taking advantage of vectorization A Complex Type intrinsic to HeFFTe that takes a...
Definition: heffte_stock_complex.h:29
Complex< F, L > conjugate()
Conjugate the current complex number.
Definition: heffte_stock_complex.h:183
Wrapper to stock API for real-to-complex transform with shortening of the data.
Definition: heffte_backend_stock.h:547
int box_size() const override
Returns the size of the box with real data.
Definition: heffte_backend_stock.h:602
int complex_size() const override
Returns the size of the box with complex coefficients.
Definition: heffte_backend_stock.h:604
size_t workspace_size() const override
Return the size of the needed workspace.
Definition: heffte_backend_stock.h:606
void backward(std::complex< float > indata[], float outdata[], std::complex< float > *) const override
Backward transform, single precision.
Definition: heffte_backend_stock.h:580
void forward(float const indata[], std::complex< float > outdata[], std::complex< float > *) const override
Forward transform, single precision.
Definition: heffte_backend_stock.h:573
stock_fft_executor_r2c(void *, box3d< index > const box, int dimension)
Constructor defines the box and the dimension of reduction.
Definition: heffte_backend_stock.h:559
void forward(double const indata[], std::complex< double > outdata[], std::complex< double > *) const override
Forward transform, double precision.
Definition: heffte_backend_stock.h:587
void backward(std::complex< double > indata[], double outdata[], std::complex< double > *) const override
Backward transform, double precision.
Definition: heffte_backend_stock.h:594
Wrapper around the Stock FFT API.
Definition: heffte_backend_stock.h:441
void forward(float const indata[], std::complex< float > outdata[], std::complex< float > *workspace) const override
Converts the real data to complex and performs float-complex forward transform.
Definition: heffte_backend_stock.h:499
virtual void backward(float[], float *) const
Bring forth method that have not been overloaded.
Definition: heffte_common.h:495
void backward(std::complex< float > indata[], float outdata[], std::complex< float > *workspace) const override
Performs backward float-complex transform and truncates the complex part of the result.
Definition: heffte_backend_stock.h:504
size_t workspace_size() const override
Return the size of the needed workspace.
Definition: heffte_backend_stock.h:523
void forward(std::complex< double > data[], std::complex< double > *) const override
Forward fft, double-complex case.
Definition: heffte_backend_stock.h:484
void forward(std::complex< float > data[], std::complex< float > *) const override
Forward fft, float-complex case.
Definition: heffte_backend_stock.h:470
void backward(std::complex< double > data[], std::complex< double > *) const override
Backward fft, double-complex case.
Definition: heffte_backend_stock.h:491
virtual void forward(float[], float *) const
Bring forth method that have not been overloaded.
Definition: heffte_common.h:491
stock_fft_executor(void *, box3d< index > const, int, int)
Placeholder, unimplemented.
Definition: heffte_backend_stock.h:461
void backward(std::complex< float > data[], std::complex< float > *) const override
Backward fft, float-complex case.
Definition: heffte_backend_stock.h:477
void forward(double const indata[], std::complex< double > outdata[], std::complex< double > *workspace) const override
Converts the deal data to complex and performs double-complex forward transform.
Definition: heffte_backend_stock.h:509
stock_fft_executor(void *, box3d< index > const)
Placeholder, unimplemented.
Definition: heffte_backend_stock.h:465
int box_size() const override
Returns the size of the box.
Definition: heffte_backend_stock.h:520
void backward(std::complex< double > indata[], double outdata[], std::complex< double > *workspace) const override
Performs backward double-complex transform and truncates the complex part of the result.
Definition: heffte_backend_stock.h:514
stock_fft_executor(void *, box3d< index > const box, int dimension)
Constructor, specifies the box and dimension.
Definition: heffte_backend_stock.h:451
int fft1d_get_howmany(box3d< index > const box, int const dimension)
Return the number of 1-D ffts contained in the box in the given dimension.
Definition: heffte_geometry.h:159
int fft1d_get_stride(box3d< index > const box, int const dimension)
Return the stride of the 1-D ffts contained in the box in the given dimension.
Definition: heffte_geometry.h:169
@ backward
Inverse DFT transform.
@ forward
Forward DFT transform.
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38
Allows to define whether a specific backend interface has been enabled.
Definition: heffte_common.h:201
Type-tag for the Cosine Transform using the stock FFT backend.
Definition: heffte_common.h:120
Type-tag for the Sine Transform using the stock FFT backend.
Definition: heffte_common.h:125
Type-tag for the stock FFT backend.
Definition: heffte_common.h:115
A generic container that describes a 3d box of indexes.
Definition: heffte_geometry.h:67
Defines a set of default plan options for a given backend.
Definition: heffte_common.h:642
Struct to specialize to allow HeFFTe to recognize custom single precision complex types.
Definition: heffte_utils.h:251
Struct to specialize to allow HeFFTe to recognize custom double precision complex types.
Definition: heffte_utils.h:269
void executor_r2c
There is no real-to-complex variant.
Definition: heffte_backend_stock.h:644
void executor_r2c
There is no real-to-complex variant.
Definition: heffte_backend_stock.h:656
Indicates the structure that will be used by the fft backend.
Definition: heffte_common.h:546
plan_stock_fft(int size, int howmanyffts, int stride, int dist)
Constructor to plan out an FFT using the stock backend.
Definition: heffte_backend_stock.h:394
void execute(std::complex< F > data[])
Execute an FFT inplace on std::complex<F> data.
Definition: heffte_backend_stock.h:404
Specialization for r2c single precision.
Definition: heffte_backend_stock.h:309
void execute(std::complex< F > const idata[], F odata[])
Execute C2R FFT.
Definition: heffte_backend_stock.h:329
void execute(F const idata[], std::complex< F > odata[])
Execute R2C FFT.
Definition: heffte_backend_stock.h:354
plan_stock_fft(int size, int howmanyffts, int stride, int rdist, int cdist)
Constructor taking into account the different sizes for the real and complex parts.
Definition: heffte_backend_stock.h:319
int N
Identical to the F-complex specialization.
Definition: heffte_backend_stock.h:326
Template algorithm for the Sine and Cosine transforms.
Definition: heffte_r2r_executor.h:135
Class to represent the call-graph nodes.
Definition: heffte_stock_algos.h:78