heffte/heffte__fft3d_8h_source.html

 /*

     -- heFFTe --

        Univ. of Tennessee, Knoxville

        @date

 */


 #ifndef HEFFTE_FFT3D_H

 #define HEFFTE_FFT3D_H


 #include "heffte_compute_transform.h"


 namespace heffte {


 template<typename scalar_type> struct fft_output{

     using type = scalar_type;

 };

 template<> struct fft_output<float>{

     using type = std::complex<float>;

 };

 template<> struct fft_output<double>{

     using type = std::complex<double>;

 };


 template<typename scalar_type, typename backend_tag, typename = void>

 struct transform_output{};

 template<typename scalar_type, typename backend_tag>

 struct transform_output<scalar_type, backend_tag, typename std::enable_if<backend::uses_fft_types<backend_tag>::value>::type>{

     using type = typename fft_output<scalar_type>::type;

 };

 template<typename scalar_type, typename backend_tag>

 struct transform_output<scalar_type, backend_tag, typename std::enable_if<not backend::uses_fft_types<backend_tag>::value>::type>{

     using type = scalar_type;

 };


 enum class scale{

     none,

     full,

     symmetric

 };


 template<typename backend_tag, typename index = int>

 class fft3d : public backend::device_instance<typename backend::buffer_traits<backend_tag>::location>{

 public:

     using backend_executor = typename one_dim_backend<backend_tag>::executor;

     template<typename T> using buffer_container = typename backend::buffer_traits<backend_tag>::template container<T>;

     template<typename T> using real_buffer_container = buffer_container<typename define_standard_type<T>::type::value_type>;

     template<typename T> using output_buffer_container = buffer_container<typename transform_output<T, backend_tag>::type>;


     using location_tag = typename backend::buffer_traits<backend_tag>::location;


     fft3d(box3d<index> const inbox, box3d<index> const outbox, MPI_Comm const comm,

           plan_options const options = default_options<backend_tag>()) :

         fft3d(plan_operations(mpi::gather_boxes(inbox, outbox, comm), -1, set_options<backend_tag>(options), mpi::comm_rank(comm)), comm){

         static_assert(backend::is_enabled<backend_tag>::value, "The requested backend is invalid or has not been enabled.");

     }

     fft3d(typename backend::device_instance<location_tag>::stream_type gpu_stream,

           box3d<index> const inbox, box3d<index> const outbox, MPI_Comm const comm,

           plan_options const options = default_options<backend_tag>()) :

         fft3d(gpu_stream, plan_operations(mpi::gather_boxes(inbox, outbox, comm), -1, set_options<backend_tag>(options), mpi::comm_rank(comm)), comm){

         static_assert(backend::is_enabled<backend_tag>::value, "The requested backend is invalid or has not been enabled.");

     }


     fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2,

           int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2,

           MPI_Comm const comm,

           bool use_reorder, int algorithm, bool use_pencils)

         : fft3d(box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}, {io0, io1, io2}),

                 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),

                 comm,

                 plan_options(use_reorder, static_cast<reshape_algorithm>(algorithm), use_pencils))

     {}

     fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2,

           int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2,

           MPI_Comm const comm)

         : fft3d(box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}, {io0, io1, io2}),

                 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),

                 comm)

     {}

     fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2,

           int ol0, int ol1, int ol2, int oh0, int oh1, int oh2,

           MPI_Comm const comm)

         : fft3d(box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}), box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}), comm)

     {}


     long long size_inbox() const{ return pinbox->count(); }

     long long size_outbox() const{ return poutbox->count(); }

     box3d<index> inbox() const{ return *pinbox; }

     box3d<index> outbox() const{ return *poutbox; }


     template<typename input_type, typename output_type>

     void forward(input_type const input[], output_type output[], scale scaling = scale::none) const{

         static_assert(backend::check_types<backend_tag, input_type, output_type>::value,

                       "Using either an unknown complex type or an incompatible pair of types!");


         auto workspace = make_buffer_container<typename transform_output<typename define_standard_type<output_type>::type, backend_tag>::type>(this->stream(), size_workspace());

         forward(input, output, workspace.data(), scaling);

     }


     template<typename input_type, typename output_type>

     void forward(input_type const input[], output_type output[], output_type workspace[], scale scaling = scale::none) const{

         static_assert(backend::check_types<backend_tag, input_type, output_type>::value,

                       "Using either an unknown complex type or an incompatible pair of types!");


         compute_transform<location_tag, index>(this->stream(), 1, convert_to_standard(input), convert_to_standard(output),

                                                convert_to_standard(workspace),

                                                executor_buffer_offset, size_comm_buffers(), forward_shaper,

                                                forward_executors(), direction::forward);

         apply_scale(1, direction::forward, scaling, output);

     }

     template<typename input_type, typename output_type>

     void forward(int const batch_size, input_type const input[], output_type output[],

                  output_type workspace[], scale scaling = scale::none) const{

         static_assert(backend::check_types<backend_tag, input_type, output_type>::value,

                       "Using either an unknown complex type or an incompatible pair of types!");


         compute_transform<location_tag, index>(this->stream(), batch_size, convert_to_standard(input), convert_to_standard(output),

                                                convert_to_standard(workspace),

                                                executor_buffer_offset, size_comm_buffers(), forward_shaper,

                                                forward_executors(), direction::forward);

         apply_scale(batch_size, direction::forward, scaling, output);

     }

     template<typename input_type, typename output_type>

     void forward(int const batch_size, input_type const input[], output_type output[], scale scaling = scale::none) const{

         static_assert(backend::check_types<backend_tag, input_type, output_type>::value,

                       "Using either an unknown complex type or an incompatible pair of types!");


         auto workspace = make_buffer_container<typename transform_output<typename define_standard_type<output_type>::type, backend_tag>::type>(this->stream(), batch_size * size_workspace());


         forward(batch_size, input, output, workspace.data(), scaling);

     }


     template<typename input_type>

     output_buffer_container<input_type> forward(buffer_container<input_type> const &input, scale scaling = scale::none){

         if (input.size() < static_cast<size_t>(size_inbox()))

             throw std::invalid_argument("The input vector is smaller than size_inbox(), i.e., not enough entries provided to fill the inbox.");

         auto output = make_buffer_container<typename transform_output<input_type, backend_tag>::type>(this->stream(), size_outbox());

         forward(input.data(), output.data(), scaling);

         return output;

     }


     template<typename input_type, typename output_type>

     void backward(input_type const input[], output_type output[], scale scaling = scale::none) const{

         static_assert(backend::check_types<backend_tag, output_type, input_type>::value,

                       "Using either an unknown complex type or an incompatible pair of types!");


         auto workspace = make_buffer_container<typename transform_output<input_type, backend_tag>::type>(this->stream(), size_workspace());

         backward(input, output, workspace.data(), scaling);

     }


     template<typename input_type, typename output_type>

     void backward(input_type const input[], output_type output[], input_type workspace[], scale scaling = scale::none) const{

         static_assert(backend::check_types<backend_tag, output_type, input_type>::value,

                       "Using either an unknown complex type or an incompatible pair of types!");


         compute_transform<location_tag, index>(this->stream(), 1, convert_to_standard(input), convert_to_standard(output),

                                                convert_to_standard(workspace),

                                                executor_buffer_offset, size_comm_buffers(), backward_shaper,

                                                backward_executors(), direction::backward);

         apply_scale(1, direction::backward, scaling, output);

     }

     template<typename input_type, typename output_type>

     void backward(int const batch_size, input_type const input[], output_type output[],

                   input_type workspace[], scale scaling = scale::none) const{

         static_assert(backend::check_types<backend_tag, output_type, input_type>::value,

                       "Using either an unknown complex type or an incompatible pair of types!");


         compute_transform<location_tag, index>(this->stream(), batch_size, convert_to_standard(input), convert_to_standard(output),

                                                convert_to_standard(workspace),

                                                executor_buffer_offset, size_comm_buffers(), backward_shaper,

                                                backward_executors(), direction::backward);

         apply_scale(batch_size, direction::backward, scaling, output);

     }

     template<typename input_type, typename output_type>

     void backward(int const batch_size, input_type const input[], output_type output[], scale scaling = scale::none) const{

         static_assert(backend::check_types<backend_tag, output_type, input_type>::value,

                       "Using either an unknown complex type or an incompatible pair of types!");


         auto workspace = make_buffer_container<typename transform_output<input_type, backend_tag>::type>(this->stream(), batch_size * size_workspace());

         backward(batch_size, input, output, workspace.data(), scaling);

     }


     template<typename scalar_type>

     buffer_container<scalar_type> backward(buffer_container<scalar_type> const &input, scale scaling = scale::none){

         static_assert(is_ccomplex<scalar_type>::value or is_zcomplex<scalar_type>::value,

                       "Either calling backward() with non-complex input or using an unknown complex type.");

         if (input.size() < static_cast<size_t>(size_outbox()))

             throw std::invalid_argument("The input vector is smaller than size_outbox(), i.e., not enough entries provided to fill the outbox.");

         auto result = make_buffer_container<scalar_type>(this->stream(), size_inbox());

         backward(input.data(), result.data(), scaling);

         return result;

     }


     template<typename scalar_type>

     real_buffer_container<scalar_type> backward_real(buffer_container<scalar_type> const &input, scale scaling = scale::none){

         static_assert(is_ccomplex<scalar_type>::value or is_zcomplex<scalar_type>::value,

                       "Either calling backward() with non-complex input or using an unknown complex type.");

         auto result = make_buffer_container<typename define_standard_type<scalar_type>::type::value_type>(this->stream(), size_inbox());

         backward(input.data(), result.data(), scaling);

         return result;

     }


     double get_scale_factor(scale scaling) const{

         if (backend::uses_fft_types<backend_tag>::value){

             return (scaling == scale::symmetric) ? std::sqrt(scale_factor) : scale_factor;

         }else{

             return (scaling == scale::symmetric) ? std::sqrt(scale_factor / 64.0) : scale_factor / 64.0;

         }

     }


     size_t size_workspace() const{ return size_buffer_work; }

     size_t size_comm_buffers() const{ return comm_buffer_offset; }


 private:

     fft3d(logic_plan3d<index> const &plan, MPI_Comm const comm)  :

         backend::device_instance<location_tag>(),

         pinbox(new box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(new box3d<index>(plan.out_shape[3][plan.mpi_rank])),

         scale_factor(1.0 / static_cast<double>(plan.index_count))

         #ifdef Heffte_ENABLE_MAGMA

         , hmagma(this->stream())

         #endif

     {

         setup(plan, comm);

     }


     fft3d(typename backend::device_instance<location_tag>::stream_type gpu_stream,

           logic_plan3d<index> const &plan, MPI_Comm const comm) :

         backend::device_instance<location_tag>(gpu_stream),

         pinbox(new box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(new box3d<index>(plan.out_shape[3][plan.mpi_rank])),

         scale_factor(1.0 / static_cast<double>(plan.index_count))

         #ifdef Heffte_ENABLE_MAGMA

         , hmagma(this->stream())

         #endif

     {

         setup(plan, comm);

     }


     void setup(logic_plan3d<index> const &plan, MPI_Comm const comm){

         for(int i=0; i<4; i++){

             forward_shaper[i]    = make_reshape3d<backend_tag>(this->stream(), plan.in_shape[i], plan.out_shape[i], comm, plan.options);

             backward_shaper[3-i] = make_reshape3d<backend_tag>(this->stream(), plan.out_shape[i], plan.in_shape[i], comm, plan.options);

         }


         int const my_rank = plan.mpi_rank;


         if (has_executor3d<backend_tag>() and not forward_shaper[1] and not forward_shaper[2]){

             executors[0] = make_executor<backend_tag>(this->stream(), plan.out_shape[0][my_rank]);

         }else if (has_executor2d<backend_tag>() and (not forward_shaper[1] or not forward_shaper[2])){

             if (not forward_shaper[1]){

                 executors[0] = make_executor<backend_tag>(this->stream(), plan.out_shape[0][my_rank],

                                                           plan.fft_direction[0], plan.fft_direction[1]);

                 executors[2] = make_executor<backend_tag>(this->stream(), plan.out_shape[2][my_rank], plan.fft_direction[2]);

             }else{

                 executors[0] = make_executor<backend_tag>(this->stream(), plan.out_shape[0][my_rank], plan.fft_direction[0]);

                 executors[2] = make_executor<backend_tag>(this->stream(), plan.out_shape[2][my_rank],

                                                           plan.fft_direction[1], plan.fft_direction[2]);

             }

         }else{

             executors[0] = make_executor<backend_tag>(this->stream(), plan.out_shape[0][my_rank], plan.fft_direction[0]);

             executors[1] = make_executor<backend_tag>(this->stream(), plan.out_shape[1][my_rank], plan.fft_direction[1]);

             executors[2] = make_executor<backend_tag>(this->stream(), plan.out_shape[2][my_rank], plan.fft_direction[2]);

         }


         size_t executor_workspace_size = get_max_work_size(executors);

         comm_buffer_offset = std::max(get_workspace_size(forward_shaper), get_workspace_size(backward_shaper));

         // the last junk of (fft0->box_size() + 1) / 2 is used only when doing complex-to-real backward transform

         // maybe update the API to call for different size buffers for different complex/real types

         int last_chunk = (executors[0] == nullptr) ? 0 : (((backward_shaper[3]) ? (executors[0]->box_size() + 1) / 2 : 0));

         size_buffer_work =  comm_buffer_offset + executor_workspace_size

                           + get_max_box_size(executors)

                           + last_chunk;

         executor_buffer_offset = (executor_workspace_size == 0) ? 0 : size_buffer_work - executor_workspace_size;

     }

     std::array<executor_base*, 3> forward_executors() const{

         return std::array<executor_base*, 3>{executors[0].get(), executors[1].get(), executors[2].get()};

     }

     std::array<executor_base*, 3> backward_executors() const{

         return std::array<executor_base*, 3>{executors[2].get(), executors[1].get(), executors[0].get()};

     }


     template<typename scalar_type>

     void apply_scale(int const batch_size, direction dir, scale scaling, scalar_type data[]) const{

         if (scaling != scale::none){

             add_trace name("scale");

             #ifdef Heffte_ENABLE_MAGMA

             if (std::is_same<typename backend::buffer_traits<backend_tag>::location, tag::gpu>::value){

                 hmagma.scal(batch_size * ((dir == direction::forward) ? size_outbox() : size_inbox()),

                             get_scale_factor(scaling), data);

                 return;

             }

             #endif

             data_scaling::apply(

                 this->stream(),

                 batch_size * ((dir == direction::forward) ? size_outbox() : size_inbox()),

                 data, get_scale_factor(scaling));

         }

     }


     std::unique_ptr<box3d<index>> pinbox, poutbox; // inbox/output for this process

     double scale_factor;

     std::array<std::unique_ptr<reshape3d_base<index>>, 4> forward_shaper;

     std::array<std::unique_ptr<reshape3d_base<index>>, 4> backward_shaper;


     std::array<std::unique_ptr<executor_base>, 3> executors;

     #ifdef Heffte_ENABLE_MAGMA

     gpu::magma_handle<typename backend::buffer_traits<backend_tag>::location> hmagma;

     #endif


     // cache some values for faster read

     size_t size_buffer_work, comm_buffer_offset, executor_buffer_offset;

 };


 template<typename backend_tag, typename index = int>

 using fft2d = fft3d<backend_tag, index>;


 template<typename backend_tag, typename index = int>

 using rtransform = fft3d<backend_tag, index>;


 template<typename backend_tag, typename index>

 fft3d<backend_tag, index> make_fft3d(box3d<index> const inbox, box3d<index> const outbox, MPI_Comm const comm,

                                      plan_options const options = default_options<backend_tag>()){

     static_assert(std::is_same<index, int>::value or std::is_same<index, long long>::value,

                   "heFFTe works with 'int' and 'long long' indexing only");

     static_assert(backend::is_enabled<backend_tag>::value,

                   "the backend_tag is not valid, perhaps it needs to be enabled in the build system");

     return fft3d<backend_tag, index>(inbox, outbox, comm, options);

 }


 }


 #endif

heffte::fft3d
Defines the plan for a 3-dimensional discrete Fourier transform performed on a MPI distributed data.
Definition: heffte_fft3d.h:213

heffte::fft3d::size_inbox
long long size_inbox() const
Returns the size of the inbox defined in the constructor.
Definition: heffte_fft3d.h:299

heffte::fft3d::forward
void forward(int const batch_size, input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
An overload allowing for a batch of FFTs to be performed in a single command.
Definition: heffte_fft3d.h:365

heffte::fft3d::backward_real
real_buffer_container< scalar_type > backward_real(buffer_container< scalar_type > const &input, scale scaling=scale::none)
Perform complex-to-real backward FFT using vector API (truncates the complex part).
Definition: heffte_fft3d.h:509

heffte::fft3d::fft3d
fft3d(box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Constructor creating a plan for FFT transform across the given communicator and using the box geometr...
Definition: heffte_fft3d.h:243

heffte::fft3d::backward
void backward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a backward Fourier transform using two arrays.
Definition: heffte_fft3d.h:442

heffte::fft3d::forward
void forward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a forward Fourier transform using two arrays.
Definition: heffte_fft3d.h:327

heffte::fft3d::inbox
box3d< index > inbox() const
Returns the inbox.
Definition: heffte_fft3d.h:303

heffte::fft3d::output_buffer_container
buffer_container< typename transform_output< T, backend_tag >::type > output_buffer_container
Container of the output type corresponding to T, see the table of compatible input and output types.
Definition: heffte_fft3d.h:228

heffte::fft3d::get_scale_factor
double get_scale_factor(scale scaling) const
Returns the scale factor for the given scaling.
Definition: heffte_fft3d.h:518

heffte::fft3d::location_tag
typename backend::buffer_traits< backend_tag >::location location_tag
Type-tag that is either tag::cpu or tag::gpu to indicate the location of the data.
Definition: heffte_fft3d.h:233

heffte::fft3d::outbox
box3d< index > outbox() const
Returns the outbox.
Definition: heffte_fft3d.h:305

heffte::fft3d::fft3d
fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition: heffte_fft3d.h:284

heffte::fft3d::size_outbox
long long size_outbox() const
Returns the size of the outbox defined in the constructor.
Definition: heffte_fft3d.h:301

heffte::fft3d::backward
buffer_container< scalar_type > backward(buffer_container< scalar_type > const &input, scale scaling=scale::none)
Perform complex-to-complex backward FFT using vector API.
Definition: heffte_fft3d.h:495

heffte::fft3d::backward
void backward(input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload with user-provided workspace buffer, see the corresponding overload of forward().
Definition: heffte_fft3d.h:454

heffte::fft3d::backward
void backward(int const batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
Overload for batch transforms with internally allocated workspace.
Definition: heffte_fft3d.h:483

heffte::fft3d::forward
void forward(input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
An overload utilizing a user-allocated workspace buffer.
Definition: heffte_fft3d.h:348

heffte::fft3d::size_workspace
size_t size_workspace() const
Returns the workspace size that will be used, size is measured in complex numbers.
Definition: heffte_fft3d.h:527

heffte::fft3d::fft3d
fft3d(typename backend::device_instance< location_tag >::stream_type gpu_stream, box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Identical to the other constructor but accepts a GPU stream or queue.
Definition: heffte_fft3d.h:265

heffte::fft3d::backward
void backward(int const batch_size, input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload for batch transforms, see the corresponding overload of forward().
Definition: heffte_fft3d.h:468

heffte::fft3d::backend_executor
typename one_dim_backend< backend_tag >::executor backend_executor
Alias to the wrapper class for the one dimensional backend library.
Definition: heffte_fft3d.h:216

heffte::fft3d::real_buffer_container
buffer_container< typename define_standard_type< T >::type::value_type > real_buffer_container
Container of real values corresponding to the complex type T.
Definition: heffte_fft3d.h:226

heffte::fft3d::fft3d
fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition: heffte_fft3d.h:292

heffte::fft3d::forward
output_buffer_container< input_type > forward(buffer_container< input_type > const &input, scale scaling=scale::none)
Vector variant of forward() using input and output buffer_container classes.
Definition: heffte_fft3d.h:414

heffte::fft3d::buffer_container
typename backend::buffer_traits< backend_tag >::template container< T > buffer_container
Alias to the container template associated with the backend.
Definition: heffte_fft3d.h:224

heffte::fft3d::size_comm_buffers
size_t size_comm_buffers() const
Returns the size used by the communication workspace buffers (internal use).
Definition: heffte_fft3d.h:529

heffte::fft3d::forward
void forward(int const batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
An overload that allocates workspace internally.
Definition: heffte_fft3d.h:380

heffte::fft3d::fft3d
fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, MPI_Comm const comm, bool use_reorder, int algorithm, bool use_pencils)
Internal use only, used by the Fortran interface.
Definition: heffte_fft3d.h:274

heffte::reshape_algorithm
reshape_algorithm
Defines list of potential communication algorithms.
Definition: heffte_plan_logic.h:48

heffte::set_options
plan_options set_options(plan_options opts)
Adjusts the user provided options to what can be handled by the backend.
Definition: heffte_plan_logic.h:207

heffte::scale
scale
Indicates the scaling factor to apply on the result of an FFT operation.
Definition: heffte_fft3d.h:113

heffte::make_fft3d
fft3d< backend_tag, index > make_fft3d(box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Factory method that auto-detects the index type based on the box.
Definition: heffte_fft3d.h:716

heffte::scale::none
@ none
No scale, leave the result unperturbed similar to the FFTW API.

heffte::scale::full
@ full
Apply the full scale, divide by the number of elements in the world box.

heffte::scale::symmetric
@ symmetric
Symmetric scaling, apply the square-root of the full scaling.

heffte::backend::name
std::string name()
Returns the human readable name of the backend.
Definition: heffte_common.h:240

heffte::direction
direction
Indicates the direction of the FFT (internal use only).
Definition: heffte_common.h:535

heffte::get_max_box_size
size_t get_max_box_size(std::array< some_class, 3 > const &executors)
Returns the max of the box_size() for each of the executors.
Definition: heffte_utils.h:402

heffte::get_max_work_size
size_t get_max_work_size(std::array< some_class, 3 > const &executors)
Returns the max of the workspace_size() for each of the executors.
Definition: heffte_utils.h:423

heffte::direction::backward
@ backward
Inverse DFT transform.

heffte::direction::forward
@ forward
Forward DFT transform.

heffte::plan_operations
logic_plan3d< index > plan_operations(ioboxes< index > const &boxes, int r2c_direction, plan_options const options, int const mpi_rank)
Creates the logic plan with the provided user input.
Definition: heffte_plan_logic.cpp:421

heffte::data_scaling::apply
void apply(cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor)
Simply multiply the num_entries in the data by the scale_factor.
Definition: heffte_backend_cuda.h:796

heffte::mpi::comm_rank
int comm_rank(MPI_Comm const comm)
Returns the rank of this process within the specified comm.
Definition: heffte_utils.h:78

heffte::mpi::gather_boxes
ioboxes< index > gather_boxes(box3d< index > const my_inbox, box3d< index > const my_outbox, MPI_Comm const comm)
Gather all boxes across all ranks in the comm.
Definition: heffte_geometry.h:697

heffte::get_workspace_size
size_t get_workspace_size(std::array< std::unique_ptr< reshape3d_base< index >>, 4 > const &shapers)
Returns the maximum workspace size used by the shapers.
Definition: heffte_reshape3d.h:115

heffte
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38

heffte::convert_to_standard
define_standard_type< scalar_type >::type * convert_to_standard(scalar_type input[])
Converts an array of some type to an array of the C++ equivalent type.
Definition: heffte_utils.h:354

heffte::backend::buffer_traits
Defines the container for the temporary buffers.
Definition: heffte_common.h:212

heffte::backend::buffer_traits::location
tag::cpu location
Tags the raw-array location tag::cpu or tag::gpu, used by the packers.
Definition: heffte_common.h:214

heffte::backend::check_types
Set to true/false type depending whether the types are compatible with the backend transform.
Definition: heffte_common.h:395

heffte::backend::device_instance
Holds the auxiliary variables needed by each backend.
Definition: heffte_common.h:358

heffte::backend::device_instance::stream_type
void * stream_type
The type for the internal stream, the cpu uses just a void pointer.
Definition: heffte_common.h:370

heffte::backend::device_instance< backend::buffer_traits< backend_tag >::location >::stream
void * stream()
Returns the nullptr.
Definition: heffte_common.h:364

heffte::backend::device_instance< backend::buffer_traits< backend_tag >::location >::device_instance
device_instance(void *=nullptr)
Empty constructor.
Definition: heffte_common.h:360

heffte::backend::is_enabled
Allows to define whether a specific backend interface has been enabled.
Definition: heffte_common.h:201

heffte::backend::uses_fft_types
Defines whether the backend accepts the standard FFT real-complex or complex-complex transform.
Definition: heffte_common.h:389

heffte::box3d
A generic container that describes a 3d box of indexes.
Definition: heffte_geometry.h:67

heffte::fft_output< double >::type
std::complex< double > type
The output for a double data is std::complex<double>
Definition: heffte_fft3d.h:76

heffte::fft_output< float >::type
std::complex< float > type
The output for a float data is std::complex<float>
Definition: heffte_fft3d.h:68

heffte::fft_output
Defines the relationship between pairs of input-output types in the FFT algorithms.
Definition: heffte_fft3d.h:58

heffte::fft_output::type
scalar_type type
The output type corresponding to the scalar_type.
Definition: heffte_fft3d.h:60

heffte::is_ccomplex
Struct to specialize to allow HeFFTe to recognize custom single precision complex types.
Definition: heffte_utils.h:251

heffte::is_zcomplex
Struct to specialize to allow HeFFTe to recognize custom double precision complex types.
Definition: heffte_utils.h:269

heffte::logic_plan3d
The logic plan incorporates the order and types of operations in a transform.
Definition: heffte_plan_logic.h:275

heffte::one_dim_backend
Indicates the structure that will be used by the fft backend.
Definition: heffte_common.h:546

heffte::plan_options
Defines a set of tweaks and options to use in the plan generation.
Definition: heffte_plan_logic.h:131

heffte::tag::cpu
Indicates the use of cpu backend and that all input/output data and arrays will be bound to the cpu.
Definition: heffte_common.h:38

heffte::transform_output< scalar_type, backend_tag, typename std::enable_if< not backend::uses_fft_types< backend_tag >::value >::type >::type
scalar_type type
The output type corresponding to the scalar_type and backend_tag (Cosine Transform case).
Definition: heffte_fft3d.h:103

heffte::transform_output< scalar_type, backend_tag, typename std::enable_if< backend::uses_fft_types< backend_tag >::value >::type >::type
typename fft_output< scalar_type >::type type
The output type corresponding to the scalar_type and backend_tag (FFT case).
Definition: heffte_fft3d.h:94

heffte::transform_output
Defines the relationship between pairs of input-output types in a general transform algorithm.
Definition: heffte_fft3d.h:86