7 #ifndef HEFFFTE_COS_EXECUTOR_H
8 #define HEFFFTE_COS_EXECUTOR_H
10 #include "heffte_pack3d.h"
31 template<
typename index>
33 std::array<index, 3> high{box.
size[0]-1, box.
size[1]-1, box.
size[2]-1};
44 template<
typename precision>
45 static void pre_forward(
void*,
int length, precision
const input[], precision fft_signal[]){
46 for(
int i = 0; i < length; i++){
48 fft_signal[2*i+1] = input[i];
50 fft_signal[2*length] = 0;
51 for(
int i = 0; i < 2*length; i++){
52 fft_signal[4*length-i] = fft_signal[i];
56 template<
typename precision>
57 static void post_forward(
void*,
int length, std::complex<precision>
const fft_result[], precision result[]){
58 for(
int i = 0; i < length; i++){
59 result[i] = std::real(fft_result[i]);
63 template<
typename precision>
64 static void pre_backward(
void*,
int length, precision
const input[], std::complex<precision> fft_signal[]){
65 for(
int i = 0; i < length; i++){
66 fft_signal[i] = std::complex<precision>(input[i]);
68 fft_signal[length] = 0.0;
71 for(
int i = length+1; i < 2*length+1; i++){
72 fft_signal[i] = std::complex<precision>(-1.0 * input[index]);
77 template<
typename precision>
78 static void post_backward(
void*,
int length, precision
const fft_result[], precision result[]){
79 for(
int i=0; i<length; i++)
80 result[i] = fft_result[2*i + 1];
90 template<
typename precision>
91 static void pre_forward(
void*,
int length, precision
const input[], precision fft_signal[]){
92 for(
int i=0; i<length; i++){
93 fft_signal[2*i] = 0.0;
94 fft_signal[2*i+1] = input[i];
96 fft_signal[2*length] = 0.;
97 for(
int i=0; i<length; i++){
98 fft_signal[4*length-2*i] = 0.0;
99 fft_signal[4*length-2*i-1]= -input[i];
103 template<
typename precision>
104 static void post_forward(
void*,
int length, std::complex<precision>
const fft_result[], precision result[]){
105 for(
int i=0; i < length; i++)
106 result[i] = -std::imag(fft_result[i+1]);
109 template<
typename precision>
110 static void pre_backward(
void*,
int length, precision
const input[], std::complex<precision> fft_signal[]){
111 fft_signal[0] = std::complex<precision>(0.0);
112 for(
int i=0; i < length; i++){
113 fft_signal[i+1] = std::complex<precision>(0.0, -input[i]);
115 fft_signal[2*length] = std::complex<precision>(0.0);
116 for(
int i=0; i < length-1; i++){
117 fft_signal[length + i + 1] = std::complex<precision>(0.0, -input[length - i - 2]);
121 template<
typename precision>
122 static void post_backward(
void*,
int length, precision
const fft_result[], precision result[]){
134 template<
typename fft_backend_tag,
typename prepost_processor>
137 template<
typename index>
140 length(box.osize(0)),
141 num_batch(box.osize(1) * box.osize(2)),
142 total_size(box.count()),
143 fft(make_executor_r2c<fft_backend_tag>(stream,
make_cos_box(box), dimension))
145 assert(dimension == box.
order[0]);
148 template<
typename index>
150 {
throw std::runtime_error(
"2D real-to-real transform is not yet implemented!"); }
152 template<
typename index>
154 {
throw std::runtime_error(
"3D real-to-real transform is not yet implemented!"); }
157 template<
typename scalar_type>
158 void forward(scalar_type data[], scalar_type workspace[])
const{
159 scalar_type* temp = workspace;
160 std::complex<scalar_type>* ctemp =
align_pntr(
reinterpret_cast<std::complex<scalar_type>*
>(workspace + fft->box_size() + 1));
161 std::complex<scalar_type>* fft_work = (fft->workspace_size() == 0) ?
nullptr : ctemp + fft->complex_size();
162 for(
int i=0; i<num_batch; i++){
163 prepost_processor::pre_forward(stream, length, data + i * length, temp + i * 4 * length);
165 fft->forward(temp, ctemp, fft_work);
166 for(
int i=0; i<num_batch; i++)
167 prepost_processor::post_forward(stream, length, ctemp + i * (2 * length + 1), data + i * length);
170 template<
typename scalar_type>
171 void backward(scalar_type data[], scalar_type workspace[])
const{
172 scalar_type* temp = workspace;
173 std::complex<scalar_type>* ctemp =
align_pntr(
reinterpret_cast<std::complex<scalar_type>*
>(workspace + fft->box_size() + 1));
174 std::complex<scalar_type>* fft_work = (fft->workspace_size() == 0) ?
nullptr : ctemp + fft->complex_size();
175 for(
int i=0; i<num_batch; i++)
176 prepost_processor::pre_backward(stream, length, data + i * length, ctemp + i * (2 * length + 1));
177 fft->backward(ctemp, temp, fft_work);
178 for(
int i=0; i<num_batch; i++)
179 prepost_processor::post_backward(stream, length, temp + 4 * i * length, data + i * length);
183 template<
typename precision>
184 void forward(precision
const[], std::complex<precision>[])
const{
185 throw std::runtime_error(
"Calling cos-transform with real-to-complex data! This should not happen!");
188 template<
typename precision>
189 void backward(std::complex<precision> indata[], precision outdata[])
const{
forward(outdata, indata); }
192 int box_size()
const override{
return total_size; }
195 return fft->box_size() + 1 + 2 * fft->complex_size() + 2 * fft->workspace_size()
196 + ((std::is_same<fft_backend_tag, backend::cufft>::value) ? 1 : 0);
199 template<
typename scalar_type>
200 std::complex<scalar_type>*
align_pntr(std::complex<scalar_type> *p)
const{
201 if (std::is_same<fft_backend_tag, backend::cufft>::value){
202 return (
reinterpret_cast<size_t>(p) %
sizeof(std::complex<scalar_type>) == 0) ? p :
203 reinterpret_cast<std::complex<scalar_type>*
>(
reinterpret_cast<scalar_type*
>(p) + 1);
209 virtual void forward(
float data[],
float *workspace)
const override{ forward<float>(data, workspace); }
211 virtual void forward(
double data[],
double *workspace)
const override{ forward<double>(data, workspace); }
213 virtual void backward(
float data[],
float *workspace)
const override{ backward<float>(data, workspace); }
215 virtual void backward(
double data[],
double *workspace)
const override{ backward<double>(data, workspace); }
220 int length, num_batch, total_size;
222 std::unique_ptr<typename one_dim_backend<fft_backend_tag>::executor_r2c> fft;
Base class for all backend executors.
Definition: heffte_common.h:486
box3d< index > make_cos_box(box3d< index > const &box)
Create a box with larger dimension that will exploit the symmetry for the Sine and Cosine Transforms.
Definition: heffte_r2r_executor.h:32
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38
Holds the auxiliary variables needed by each backend.
Definition: heffte_common.h:358
A generic container that describes a 3d box of indexes.
Definition: heffte_geometry.h:67
std::array< index, 3 > const size
The number of indexes in each direction.
Definition: heffte_geometry.h:129
std::array< int, 3 > const order
The order of the dimensions in the k * plane_stride + j * line_stride + i indexing.
Definition: heffte_geometry.h:131
index osize(int dimension) const
Get the ordered size of the dimension, i.e., size[order[dimension]].
Definition: heffte_geometry.h:123
Pre/Post processing for the Cosine transform using the CPU.
Definition: heffte_r2r_executor.h:42
static void post_forward(void *, int length, std::complex< precision > const fft_result[], precision result[])
Post-process in the forward transform.
Definition: heffte_r2r_executor.h:57
static void pre_forward(void *, int length, precision const input[], precision fft_signal[])
Pre-process in the forward transform.
Definition: heffte_r2r_executor.h:45
static void post_backward(void *, int length, precision const fft_result[], precision result[])
Post-process in the inverse transform.
Definition: heffte_r2r_executor.h:78
static void pre_backward(void *, int length, precision const input[], std::complex< precision > fft_signal[])
Pre-process in the inverse transform.
Definition: heffte_r2r_executor.h:64
Pre/Post processing for the Sine transform using the CPU.
Definition: heffte_r2r_executor.h:88
static void post_forward(void *, int length, std::complex< precision > const fft_result[], precision result[])
Post-process in the forward transform.
Definition: heffte_r2r_executor.h:104
static void pre_backward(void *, int length, precision const input[], std::complex< precision > fft_signal[])
Pre-process in the inverse transform.
Definition: heffte_r2r_executor.h:110
static void post_backward(void *, int length, precision const fft_result[], precision result[])
Post-process in the inverse transform.
Definition: heffte_r2r_executor.h:122
static void pre_forward(void *, int length, precision const input[], precision fft_signal[])
Pre-process in the forward transform.
Definition: heffte_r2r_executor.h:91
Template algorithm for the Sine and Cosine transforms.
Definition: heffte_r2r_executor.h:135
int box_size() const override
Returns the size of the box.
Definition: heffte_r2r_executor.h:192
size_t workspace_size() const override
Returns the size of the box.
Definition: heffte_r2r_executor.h:194
void forward(scalar_type data[], scalar_type workspace[]) const
Forward transform.
Definition: heffte_r2r_executor.h:158
virtual void forward(double data[], double *workspace) const override
Forward r2r, double precision.
Definition: heffte_r2r_executor.h:211
void backward(scalar_type data[], scalar_type workspace[]) const
Inverse transform.
Definition: heffte_r2r_executor.h:171
virtual void backward(double data[], double *workspace) const override
Backward r2r, double precision.
Definition: heffte_r2r_executor.h:215
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const box, int dimension)
Construct a plan for batch 1D transforms.
Definition: heffte_r2r_executor.h:138
void backward(std::complex< precision > indata[], precision outdata[]) const
Placeholder for template type consistency, should never be called.
Definition: heffte_r2r_executor.h:189
std::complex< scalar_type > * align_pntr(std::complex< scalar_type > *p) const
Moves the pointer forward to be aligned to the size of std::complex<scalar_type>, used for CUDA only.
Definition: heffte_r2r_executor.h:200
virtual void backward(float data[], float *workspace) const override
Backward r2r, single precision.
Definition: heffte_r2r_executor.h:213
void forward(precision const[], std::complex< precision >[]) const
Placeholder for template type consistency, should never be called.
Definition: heffte_r2r_executor.h:184
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const, int, int)
Construct a plan for batch 2D transforms, not implemented currently.
Definition: heffte_r2r_executor.h:149
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const)
Construct a plan for a single 3D transform, not implemented currently.
Definition: heffte_r2r_executor.h:153
virtual void forward(float data[], float *workspace) const override
Forward r2r, single precision.
Definition: heffte_r2r_executor.h:209
Indicates the use of cpu backend and that all input/output data and arrays will be bound to the cpu.
Definition: heffte_common.h:38