heffte/heffte__r2r__executor_8h_source.html

 /*

     -- heFFTe --

        Univ. of Tennessee, Knoxville

        @date

 */


 #ifndef HEFFFTE_COS_EXECUTOR_H

 #define HEFFFTE_COS_EXECUTOR_H


 #include "heffte_pack3d.h"


 namespace heffte {


 template<typename index>

 box3d<index> make_cos_box(box3d<index> const &box){

     std::array<index, 3> high{box.size[0]-1, box.size[1]-1, box.size[2]-1};

     high[box.order[0]] = 4 * box.osize(0) - 1;

     return box3d<index>(std::array<index, 3>{0, 0, 0}, high, box.order);

 }


 struct cpu_cos_pre_pos_processor{

     template<typename precision>

     static void pre_forward(void*, int length, precision const input[], precision fft_signal[]){

         for(int i = 0; i < length; i++){

             fft_signal[2*i] = 0;

             fft_signal[2*i+1] = input[i];

         }

         fft_signal[2*length] = 0;

         for(int i = 0; i < 2*length; i++){

             fft_signal[4*length-i] = fft_signal[i];

         }

     }

     template<typename precision>

     static void post_forward(void*, int length, std::complex<precision> const fft_result[], precision result[]){

         for(int i = 0; i < length; i++){

             result[i] = std::real(fft_result[i]);

         }

     }

     template<typename precision>

     static void pre_backward(void*, int length, precision const input[], std::complex<precision> fft_signal[]){

         for(int i = 0; i < length; i++){

             fft_signal[i] = std::complex<precision>(input[i]);

         }

         fft_signal[length] = 0.0;


         int index = length-1;

         for(int i = length+1; i < 2*length+1; i++){

             fft_signal[i] = std::complex<precision>(-1.0 * input[index]);

             index --;

         }

     }

     template<typename precision>

     static void post_backward(void*, int length, precision const fft_result[], precision result[]){

         for(int i=0; i<length; i++)

             result[i] = fft_result[2*i + 1];

     }

 };


 struct cpu_sin_pre_pos_processor{

     template<typename precision>

     static void pre_forward(void*, int length, precision const input[], precision fft_signal[]){

         for(int i=0; i<length; i++){

             fft_signal[2*i]   = 0.0;

             fft_signal[2*i+1] = input[i];

         }

         fft_signal[2*length] = 0.;

         for(int i=0; i<length; i++){

             fft_signal[4*length-2*i]  = 0.0;

             fft_signal[4*length-2*i-1]= -input[i];

         }

     }

     template<typename precision>

     static void post_forward(void*, int length, std::complex<precision> const fft_result[], precision result[]){

         for(int i=0; i < length; i++)

             result[i] = -std::imag(fft_result[i+1]);

     }

     template<typename precision>

     static void pre_backward(void*, int length, precision const input[], std::complex<precision> fft_signal[]){

         fft_signal[0] = std::complex<precision>(0.0);

         for(int i=0; i < length; i++){

             fft_signal[i+1] = std::complex<precision>(0.0, -input[i]);

         }

         fft_signal[2*length] = std::complex<precision>(0.0);

         for(int i=0; i < length-1; i++){

             fft_signal[length + i + 1] = std::complex<precision>(0.0, -input[length - i - 2]);

         }

     }

     template<typename precision>

     static void post_backward(void*, int length, precision const fft_result[], precision result[]){

         cpu_cos_pre_pos_processor::post_backward(nullptr, length, fft_result, result);

     }

 };


 template<typename fft_backend_tag, typename prepost_processor>

 struct real2real_executor : public executor_base{

     template<typename index>

     real2real_executor(typename backend::device_instance<typename backend::buffer_traits<fft_backend_tag>::location>::stream_type cstream, box3d<index> const box, int dimension) :

         stream(cstream),

         length(box.osize(0)),

         num_batch(box.osize(1) * box.osize(2)),

         total_size(box.count()),

         fft(make_executor_r2c<fft_backend_tag>(stream, make_cos_box(box), dimension))

     {

         assert(dimension == box.order[0]); // supporting only ordered operations (for now)

     }

     template<typename index>

     real2real_executor(typename backend::device_instance<typename backend::buffer_traits<fft_backend_tag>::location>::stream_type cstream, box3d<index> const, int, int) : stream(cstream)

     { throw std::runtime_error("2D real-to-real transform is not yet implemented!"); }

     template<typename index>

     real2real_executor(typename backend::device_instance<typename backend::buffer_traits<fft_backend_tag>::location>::stream_type cstream, box3d<index> const) : stream(cstream)

     { throw std::runtime_error("3D real-to-real transform is not yet implemented!"); }


     template<typename scalar_type>

     void forward(scalar_type data[], scalar_type workspace[]) const{

         scalar_type* temp = workspace;

         std::complex<scalar_type>* ctemp = align_pntr(reinterpret_cast<std::complex<scalar_type>*>(workspace + fft->box_size() + 1));

         std::complex<scalar_type>* fft_work = (fft->workspace_size() == 0) ? nullptr : ctemp + fft->complex_size();

         for(int i=0; i<num_batch; i++){

             prepost_processor::pre_forward(stream, length, data + i * length, temp + i * 4 * length);

         }

         fft->forward(temp, ctemp, fft_work);

         for(int i=0; i<num_batch; i++)

             prepost_processor::post_forward(stream, length, ctemp + i * (2 * length + 1), data + i * length);

     }

     template<typename scalar_type>

     void backward(scalar_type data[], scalar_type workspace[]) const{

         scalar_type* temp = workspace;

         std::complex<scalar_type>* ctemp = align_pntr(reinterpret_cast<std::complex<scalar_type>*>(workspace + fft->box_size() + 1));

         std::complex<scalar_type>* fft_work = (fft->workspace_size() == 0) ? nullptr : ctemp + fft->complex_size();

         for(int i=0; i<num_batch; i++)

             prepost_processor::pre_backward(stream, length, data + i * length, ctemp + i * (2 * length + 1));

         fft->backward(ctemp, temp, fft_work);

         for(int i=0; i<num_batch; i++)

             prepost_processor::post_backward(stream, length, temp + 4 * i * length, data + i * length);

     }


     template<typename precision>

     void forward(precision const[], std::complex<precision>[]) const{

         throw std::runtime_error("Calling cos-transform with real-to-complex data! This should not happen!");

     }

     template<typename precision>

     void backward(std::complex<precision> indata[], precision outdata[]) const{ forward(outdata, indata); }


     int box_size() const override{ return total_size; }

     size_t workspace_size() const override{

         return fft->box_size() + 1 + 2 * fft->complex_size() + 2 * fft->workspace_size()

                + ((std::is_same<fft_backend_tag, backend::cufft>::value) ? 1 : 0);

     }

     template<typename scalar_type>

     std::complex<scalar_type>* align_pntr(std::complex<scalar_type> *p) const{

         if (std::is_same<fft_backend_tag, backend::cufft>::value){

             return (reinterpret_cast<size_t>(p) % sizeof(std::complex<scalar_type>) == 0) ? p :

                 reinterpret_cast<std::complex<scalar_type>*>(reinterpret_cast<scalar_type*>(p) + 1);

         }else{

             return p;

         }

     }

     virtual void forward(float data[], float *workspace) const override{ forward<float>(data, workspace); }

     virtual void forward(double data[], double *workspace) const override{ forward<double>(data, workspace); }

     virtual void backward(float data[], float *workspace) const override{ backward<float>(data, workspace); }

     virtual void backward(double data[], double *workspace) const override{ backward<double>(data, workspace); }


 private:

     typename backend::device_instance<typename backend::buffer_traits<fft_backend_tag>::location>::stream_type stream;


     int length, num_batch, total_size;


     std::unique_ptr<typename one_dim_backend<fft_backend_tag>::executor_r2c> fft;

 };


 }


 #endif

heffte::executor_base
Base class for all backend executors.
Definition: heffte_common.h:486

heffte::make_cos_box
box3d< index > make_cos_box(box3d< index > const &box)
Create a box with larger dimension that will exploit the symmetry for the Sine and Cosine Transforms.
Definition: heffte_r2r_executor.h:32

heffte
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38

heffte::backend::device_instance
Holds the auxiliary variables needed by each backend.
Definition: heffte_common.h:358

heffte::box3d
A generic container that describes a 3d box of indexes.
Definition: heffte_geometry.h:67

heffte::box3d::size
std::array< index, 3 > const size
The number of indexes in each direction.
Definition: heffte_geometry.h:129

heffte::box3d::order
std::array< int, 3 > const order
The order of the dimensions in the k * plane_stride + j * line_stride + i indexing.
Definition: heffte_geometry.h:131

heffte::box3d::osize
index osize(int dimension) const
Get the ordered size of the dimension, i.e., size[order[dimension]].
Definition: heffte_geometry.h:123

heffte::cpu_cos_pre_pos_processor
Pre/Post processing for the Cosine transform using the CPU.
Definition: heffte_r2r_executor.h:42

heffte::cpu_cos_pre_pos_processor::post_forward
static void post_forward(void *, int length, std::complex< precision > const fft_result[], precision result[])
Post-process in the forward transform.
Definition: heffte_r2r_executor.h:57

heffte::cpu_cos_pre_pos_processor::pre_forward
static void pre_forward(void *, int length, precision const input[], precision fft_signal[])
Pre-process in the forward transform.
Definition: heffte_r2r_executor.h:45

heffte::cpu_cos_pre_pos_processor::post_backward
static void post_backward(void *, int length, precision const fft_result[], precision result[])
Post-process in the inverse transform.
Definition: heffte_r2r_executor.h:78

heffte::cpu_cos_pre_pos_processor::pre_backward
static void pre_backward(void *, int length, precision const input[], std::complex< precision > fft_signal[])
Pre-process in the inverse transform.
Definition: heffte_r2r_executor.h:64

heffte::cpu_sin_pre_pos_processor
Pre/Post processing for the Sine transform using the CPU.
Definition: heffte_r2r_executor.h:88

heffte::cpu_sin_pre_pos_processor::post_forward
static void post_forward(void *, int length, std::complex< precision > const fft_result[], precision result[])
Post-process in the forward transform.
Definition: heffte_r2r_executor.h:104

heffte::cpu_sin_pre_pos_processor::pre_backward
static void pre_backward(void *, int length, precision const input[], std::complex< precision > fft_signal[])
Pre-process in the inverse transform.
Definition: heffte_r2r_executor.h:110

heffte::cpu_sin_pre_pos_processor::post_backward
static void post_backward(void *, int length, precision const fft_result[], precision result[])
Post-process in the inverse transform.
Definition: heffte_r2r_executor.h:122

heffte::cpu_sin_pre_pos_processor::pre_forward
static void pre_forward(void *, int length, precision const input[], precision fft_signal[])
Pre-process in the forward transform.
Definition: heffte_r2r_executor.h:91

heffte::real2real_executor
Template algorithm for the Sine and Cosine transforms.
Definition: heffte_r2r_executor.h:135

heffte::real2real_executor::box_size
int box_size() const override
Returns the size of the box.
Definition: heffte_r2r_executor.h:192

heffte::real2real_executor::workspace_size
size_t workspace_size() const override
Returns the size of the box.
Definition: heffte_r2r_executor.h:194

heffte::real2real_executor::forward
void forward(scalar_type data[], scalar_type workspace[]) const
Forward transform.
Definition: heffte_r2r_executor.h:158

heffte::real2real_executor::forward
virtual void forward(double data[], double *workspace) const override
Forward r2r, double precision.
Definition: heffte_r2r_executor.h:211

heffte::real2real_executor::backward
void backward(scalar_type data[], scalar_type workspace[]) const
Inverse transform.
Definition: heffte_r2r_executor.h:171

heffte::real2real_executor::backward
virtual void backward(double data[], double *workspace) const override
Backward r2r, double precision.
Definition: heffte_r2r_executor.h:215

heffte::real2real_executor::real2real_executor
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const box, int dimension)
Construct a plan for batch 1D transforms.
Definition: heffte_r2r_executor.h:138

heffte::real2real_executor::backward
void backward(std::complex< precision > indata[], precision outdata[]) const
Placeholder for template type consistency, should never be called.
Definition: heffte_r2r_executor.h:189

heffte::real2real_executor::align_pntr
std::complex< scalar_type > * align_pntr(std::complex< scalar_type > *p) const
Moves the pointer forward to be aligned to the size of std::complex<scalar_type>, used for CUDA only.
Definition: heffte_r2r_executor.h:200

heffte::real2real_executor::backward
virtual void backward(float data[], float *workspace) const override
Backward r2r, single precision.
Definition: heffte_r2r_executor.h:213

heffte::real2real_executor::forward
void forward(precision const[], std::complex< precision >[]) const
Placeholder for template type consistency, should never be called.
Definition: heffte_r2r_executor.h:184

heffte::real2real_executor::real2real_executor
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const, int, int)
Construct a plan for batch 2D transforms, not implemented currently.
Definition: heffte_r2r_executor.h:149

heffte::real2real_executor::real2real_executor
real2real_executor(typename backend::device_instance< typename backend::buffer_traits< fft_backend_tag >::location >::stream_type cstream, box3d< index > const)
Construct a plan for a single 3D transform, not implemented currently.
Definition: heffte_r2r_executor.h:153

heffte::real2real_executor::forward
virtual void forward(float data[], float *workspace) const override
Forward r2r, single precision.
Definition: heffte_r2r_executor.h:209

heffte::tag::cpu
Indicates the use of cpu backend and that all input/output data and arrays will be bound to the cpu.
Definition: heffte_common.h:38