Highly Efficient FFT for Exascale: HeFFTe v2.3
heffte_r2r_executor.h
1 /*
2  -- heFFTe --
3  Univ. of Tennessee, Knoxville
4  @date
5 */
6 
7 #ifndef HEFFFTE_COS_EXECUTOR_H
8 #define HEFFFTE_COS_EXECUTOR_H
9 
10 #include "heffte_pack3d.h"
11 
25 namespace heffte {
26 
31 template<typename index>
33  std::array<index, 3> high{box.size[0]-1, box.size[1]-1, box.size[2]-1};
34  high[box.order[0]] = 4 * box.osize(0) - 1;
35  return box3d<index>(std::array<index, 3>{0, 0, 0}, high, box.order);
36 }
37 
44  template<typename precision>
45  static void pre_forward(void*, int length, precision const input[], precision fft_signal[]){
46  for(int i = 0; i < length; i++){
47  fft_signal[2*i] = 0;
48  fft_signal[2*i+1] = input[i];
49  }
50  fft_signal[2*length] = 0;
51  for(int i = 0; i < 2*length; i++){
52  fft_signal[4*length-i] = fft_signal[i];
53  }
54  }
56  template<typename precision>
57  static void post_forward(void*, int length, std::complex<precision> const fft_result[], precision result[]){
58  for(int i = 0; i < length; i++){
59  result[i] = std::real(fft_result[i]);
60  }
61  }
63  template<typename precision>
64  static void pre_backward(void*, int length, precision const input[], std::complex<precision> fft_signal[]){
65  for(int i = 0; i < length; i++){
66  fft_signal[i] = std::complex<precision>(input[i]);
67  }
68  fft_signal[length] = 0.0;
69 
70  int index = length-1;
71  for(int i = length+1; i < 2*length+1; i++){
72  fft_signal[i] = std::complex<precision>(-1.0 * input[index]);
73  index --;
74  }
75  }
77  template<typename precision>
78  static void post_backward(void*, int length, precision const fft_result[], precision result[]){
79  for(int i=0; i<length; i++)
80  result[i] = fft_result[2*i + 1];
81  }
82 };
83 
90  template<typename precision>
91  static void pre_forward(void*, int length, precision const input[], precision fft_signal[]){
92  for(int i=0; i<length; i++){
93  fft_signal[2*i] = 0.0;
94  fft_signal[2*i+1] = input[i];
95  }
96  fft_signal[2*length] = 0.;
97  for(int i=0; i<length; i++){
98  fft_signal[4*length-2*i] = 0.0;
99  fft_signal[4*length-2*i-1]= -input[i];
100  }
101  }
103  template<typename precision>
104  static void post_forward(void*, int length, std::complex<precision> const fft_result[], precision result[]){
105  for(int i=0; i < length; i++)
106  result[i] = -std::imag(fft_result[i+1]);
107  }
109  template<typename precision>
110  static void pre_backward(void*, int length, precision const input[], std::complex<precision> fft_signal[]){
111  fft_signal[0] = std::complex<precision>(0.0);
112  for(int i=0; i < length; i++){
113  fft_signal[i+1] = std::complex<precision>(0.0, -input[i]);
114  }
115  fft_signal[2*length] = std::complex<precision>(0.0);
116  for(int i=0; i < length-1; i++){
117  fft_signal[length + i + 1] = std::complex<precision>(0.0, -input[length - i - 2]);
118  }
119  }
121  template<typename precision>
122  static void post_backward(void*, int length, precision const fft_result[], precision result[]){
123  cpu_cos_pre_pos_processor::post_backward(nullptr, length, fft_result, result);
124  }
125 };
126 
134 template<typename fft_backend_tag, typename prepost_processor>
137  template<typename index>
138  real2real_executor(typename backend::device_instance<typename backend::buffer_traits<fft_backend_tag>::location>::stream_type cstream, box3d<index> const box, int dimension) :
139  stream(cstream),
140  length(box.osize(0)),
141  num_batch(box.osize(1) * box.osize(2)),
142  total_size(box.count()),
143  fft(make_executor_r2c<fft_backend_tag>(stream, make_cos_box(box), dimension))
144  {
145  assert(dimension == box.order[0]); // supporting only ordered operations (for now)
146  }
148  template<typename index>
149  real2real_executor(typename backend::device_instance<typename backend::buffer_traits<fft_backend_tag>::location>::stream_type cstream, box3d<index> const, int, int) : stream(cstream)
150  { throw std::runtime_error("2D real-to-real transform is not yet implemented!"); }
152  template<typename index>
153  real2real_executor(typename backend::device_instance<typename backend::buffer_traits<fft_backend_tag>::location>::stream_type cstream, box3d<index> const) : stream(cstream)
154  { throw std::runtime_error("3D real-to-real transform is not yet implemented!"); }
155 
157  template<typename scalar_type>
158  void forward(scalar_type data[], scalar_type workspace[]) const{
159  scalar_type* temp = workspace;
160  std::complex<scalar_type>* ctemp = align_pntr(reinterpret_cast<std::complex<scalar_type>*>(workspace + fft->box_size() + 1));
161  std::complex<scalar_type>* fft_work = (fft->workspace_size() == 0) ? nullptr : ctemp + fft->complex_size();
162  for(int i=0; i<num_batch; i++){
163  prepost_processor::pre_forward(stream, length, data + i * length, temp + i * 4 * length);
164  }
165  fft->forward(temp, ctemp, fft_work);
166  for(int i=0; i<num_batch; i++)
167  prepost_processor::post_forward(stream, length, ctemp + i * (2 * length + 1), data + i * length);
168  }
170  template<typename scalar_type>
171  void backward(scalar_type data[], scalar_type workspace[]) const{
172  scalar_type* temp = workspace;
173  std::complex<scalar_type>* ctemp = align_pntr(reinterpret_cast<std::complex<scalar_type>*>(workspace + fft->box_size() + 1));
174  std::complex<scalar_type>* fft_work = (fft->workspace_size() == 0) ? nullptr : ctemp + fft->complex_size();
175  for(int i=0; i<num_batch; i++)
176  prepost_processor::pre_backward(stream, length, data + i * length, ctemp + i * (2 * length + 1));
177  fft->backward(ctemp, temp, fft_work);
178  for(int i=0; i<num_batch; i++)
179  prepost_processor::post_backward(stream, length, temp + 4 * i * length, data + i * length);
180  }
181 
183  template<typename precision>
184  void forward(precision const[], std::complex<precision>[]) const{
185  throw std::runtime_error("Calling cos-transform with real-to-complex data! This should not happen!");
186  }
188  template<typename precision>
189  void backward(std::complex<precision> indata[], precision outdata[]) const{ forward(outdata, indata); }
190 
192  int box_size() const override{ return total_size; }
194  size_t workspace_size() const override{
195  return fft->box_size() + 1 + 2 * fft->complex_size() + 2 * fft->workspace_size()
196  + ((std::is_same<fft_backend_tag, backend::cufft>::value) ? 1 : 0);
197  }
199  template<typename scalar_type>
200  std::complex<scalar_type>* align_pntr(std::complex<scalar_type> *p) const{
201  if (std::is_same<fft_backend_tag, backend::cufft>::value){
202  return (reinterpret_cast<size_t>(p) % sizeof(std::complex<scalar_type>) == 0) ? p :
203  reinterpret_cast<std::complex<scalar_type>*>(reinterpret_cast<scalar_type*>(p) + 1);
204  }else{
205  return p;
206  }
207  }
209  virtual void forward(float data[], float *workspace) const override{ forward<float>(data, workspace); }
211  virtual void forward(double data[], double *workspace) const override{ forward<double>(data, workspace); }
213  virtual void backward(float data[], float *workspace) const override{ backward<float>(data, workspace); }
215  virtual void backward(double data[], double *workspace) const override{ backward<double>(data, workspace); }
216 
217 private:
219 
220  int length, num_batch, total_size;
221 
222  std::unique_ptr<typename one_dim_backend<fft_backend_tag>::executor_r2c> fft;
223 };
224 
225 
226 }
227 
228 #endif
Base class for all backend executors.
Definition: heffte_common.h:486
box3d< index > make_cos_box(box3d< index > const &box)
Create a box with larger dimension that will exploit the symmetry for the Sine and Cosine Transforms.
Definition: heffte_r2r_executor.h:32
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38
Holds the auxiliary variables needed by each backend.
Definition: heffte_common.h:358
A generic container that describes a 3d box of indexes.
Definition: heffte_geometry.h:67
std::array< index, 3 > const size
The number of indexes in each direction.
Definition: heffte_geometry.h:129
std::array< int, 3 > const order
The order of the dimensions in the k * plane_stride + j * line_stride + i indexing.
Definition: heffte_geometry.h:131
index osize(int dimension) const
Get the ordered size of the dimension, i.e., size[order[dimension]].
Definition: heffte_geometry.h:123
Pre/Post processing for the Cosine transform using the CPU.
Definition: heffte_r2r_executor.h:42
static void post_forward(void *, int length, std::complex< precision > const fft_result[], precision result[])
Post-process in the forward transform.
Definition: heffte_r2r_executor.h:57
static void pre_forward(void *, int length, precision const input[], precision fft_signal[])
Pre-process in the forward transform.
Definition: heffte_r2r_executor.h:45
static void post_backward(void *, int length, precision const fft_result[], precision result[])
Post-process in the inverse transform.
Definition: heffte_r2r_executor.h:78
static void pre_backward(void *, int length, precision const input[], std::complex< precision > fft_signal[])
Pre-process in the inverse transform.
Definition: heffte_r2r_executor.h:64
Pre/Post processing for the Sine transform using the CPU.
Definition: heffte_r2r_executor.h:88
static void post_forward(void *, int length, std::complex< precision > const fft_result[], precision result[])
Post-process in the forward transform.
Definition: heffte_r2r_executor.h:104
static void pre_backward(void *, int length, precision const input[], std::complex< precision > fft_signal[])
Pre-process in the inverse transform.
Definition: heffte_r2r_executor.h:110
static void post_backward(void *, int length, precision const fft_result[], precision result[])
Post-process in the inverse transform.
Definition: heffte_r2r_executor.h:122
static void pre_forward(void *, int length, precision const input[], precision fft_signal[])
Pre-process in the forward transform.
Definition: heffte_r2r_executor.h:91
Template algorithm for the Sine and Cosine transforms.
Definition: heffte_r2r_executor.h:135
int box_size() const override
Returns the size of the box.
Definition: heffte_r2r_executor.h:192
size_t workspace_size() const override
Returns the size of the box.
Definition: heffte_r2r_executor.h:194
void forward(scalar_type data[], scalar_type workspace[]) const
Forward transform.
Definition: heffte_r2r_executor.h:158
virtual void forward(double data[], double *workspace) const override
Forward r2r, double precision.
Definition: heffte_r2r_executor.h:211
void backward(scalar_type data[], scalar_type workspace[]) const
Inverse transform.
Definition: heffte_r2r_executor.h:171
virtual void backward(double data[], double *workspace) const override
Backward r2r, double precision.
Definition: heffte_r2r_executor.h:215
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const box, int dimension)
Construct a plan for batch 1D transforms.
Definition: heffte_r2r_executor.h:138
void backward(std::complex< precision > indata[], precision outdata[]) const
Placeholder for template type consistency, should never be called.
Definition: heffte_r2r_executor.h:189
std::complex< scalar_type > * align_pntr(std::complex< scalar_type > *p) const
Moves the pointer forward to be aligned to the size of std::complex<scalar_type>, used for CUDA only.
Definition: heffte_r2r_executor.h:200
virtual void backward(float data[], float *workspace) const override
Backward r2r, single precision.
Definition: heffte_r2r_executor.h:213
void forward(precision const[], std::complex< precision >[]) const
Placeholder for template type consistency, should never be called.
Definition: heffte_r2r_executor.h:184
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const, int, int)
Construct a plan for batch 2D transforms, not implemented currently.
Definition: heffte_r2r_executor.h:149
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const)
Construct a plan for a single 3D transform, not implemented currently.
Definition: heffte_r2r_executor.h:153
virtual void forward(float data[], float *workspace) const override
Forward r2r, single precision.
Definition: heffte_r2r_executor.h:209
Indicates the use of cpu backend and that all input/output data and arrays will be bound to the cpu.
Definition: heffte_common.h:38