7 #ifndef HEFFTE_FFT3D_R2C_H
8 #define HEFFTE_FFT3D_R2C_H
10 #include "heffte_fft3d.h"
45 template<
typename backend_tag,
typename index =
int>
77 plan_options const options = default_options<backend_tag>()) :
79 assert(r2c_direction == 0 or r2c_direction == 1 or r2c_direction == 2);
87 plan_options const options = default_options<backend_tag>()) :
91 assert(r2c_direction == 0 or r2c_direction == 1 or r2c_direction == 2);
96 fft3d_r2c(
int il0,
int il1,
int il2,
int ih0,
int ih1,
int ih2,
int io0,
int io1,
int io2,
97 int ol0,
int ol1,
int ol2,
int oh0,
int oh1,
int oh2,
int oo0,
int oo1,
int oo2,
98 int r2c_direction, MPI_Comm
const comm,
99 bool use_reorder,
int algorithm,
bool use_pencils)
100 :
fft3d_r2c(
box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}, {io0, io1, io2}),
101 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),
103 plan_options(use_reorder,
static_cast<reshape_algorithm>(algorithm), use_pencils))
106 fft3d_r2c(
int il0,
int il1,
int il2,
int ih0,
int ih1,
int ih2,
int io0,
int io1,
int io2,
107 int ol0,
int ol1,
int ol2,
int oh0,
int oh1,
int oh2,
int oo0,
int oo1,
int oo2,
108 int r2c_direction, MPI_Comm
const comm)
109 :
fft3d_r2c(
box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}, {io0, io1, io2}),
110 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),
114 fft3d_r2c(
int il0,
int il1,
int il2,
int ih0,
int ih1,
int ih2,
115 int ol0,
int ol1,
int ol2,
int oh0,
int oh1,
int oh2,
116 int r2c_direction, MPI_Comm
const comm)
117 :
fft3d_r2c(
box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}), box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}), r2c_direction, comm)
146 template<
typename input_type,
typename output_type>
150 "Using either an unknown complex type or an incompatible pair of types!");
153 forward(input, output, workspace.data(), scaling);
157 template<
typename input_type,
typename output_type>
161 "Using either an unknown complex type or an incompatible pair of types!");
170 template<
typename input_type,
typename output_type>
171 void forward(
int batch_size, input_type
const input[], output_type output[],
175 "Using either an unknown complex type or an incompatible pair of types!");
184 template<
typename input_type,
typename output_type>
188 "Using either an unknown complex type or an incompatible pair of types!");
190 auto workspace = make_buffer_container<output_type>(this->
stream(), batch_size *
size_workspace());
191 forward(batch_size, input, output, workspace.data(), scaling);
210 template<
typename input_type>
212 if (input.size() <
static_cast<size_t>(
size_inbox()))
213 throw std::invalid_argument(
"The input vector is smaller than size_inbox(), i.e., not enough entries provided to fill the inbox.");
214 static_assert(std::is_same<input_type, float>::value or std::is_same<input_type, double>::value,
215 "The input to forward() must be real, i.e., either float or double.");
216 auto output = make_buffer_container<typename fft_output<input_type>::type>(this->
stream(),
size_outbox());
217 forward(input.data(), output.data(), scaling);
234 template<
typename input_type,
typename output_type>
238 "Using either an unknown complex type or an incompatible pair of types!");
241 backward(input, output, workspace.data(), scaling);
245 template<
typename input_type,
typename output_type>
249 "Using either an unknown complex type or an incompatible pair of types!");
258 template<
typename input_type,
typename output_type>
259 void backward(
int batch_size, input_type
const input[], output_type output[],
263 "Using either an unknown complex type or an incompatible pair of types!");
272 template<
typename input_type,
typename output_type>
276 "Using either an unknown complex type or an incompatible pair of types!");
278 auto workspace = make_buffer_container<input_type>(this->
stream(), batch_size *
size_workspace());
279 backward(batch_size, input, output, workspace.data(), scaling);
285 template<
typename scalar_type>
288 "Either calling backward() with non-complex input or using an unknown complex type.");
289 auto result = make_buffer_container<typename define_standard_type<scalar_type>::type::value_type>(this->
stream(),
size_inbox());
290 backward(input.data(), result.data(), scaling);
302 pinbox(new
box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(new
box3d<index>(plan.out_shape[3][plan.mpi_rank])),
303 scale_factor(1.0 / static_cast<double>(plan.index_count))
304 #ifdef Heffte_ENABLE_MAGMA
313 logic_plan3d<index>
const &plan, MPI_Comm
const comm) :
315 pinbox(new box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(new box3d<index>(plan.out_shape[3][plan.mpi_rank])),
316 scale_factor(1.0 / static_cast<double>(plan.index_count))
317 #ifdef Heffte_ENABLE_MAGMA
325 void setup(logic_plan3d<index>
const &plan, MPI_Comm
const comm){
326 for(
int i=0; i<4; i++){
327 forward_shaper[i] = make_reshape3d<backend_tag>(this->
stream(), plan.in_shape[i], plan.out_shape[i], comm, plan.options);
328 backward_shaper[3-i] = make_reshape3d<backend_tag>(this->
stream(), plan.out_shape[i], plan.in_shape[i], comm, plan.options);
331 executors[0] = make_executor_r2c<backend_tag>(this->
stream(), plan.out_shape[0][
mpi::comm_rank(comm)], plan.fft_direction[0]);
332 executors[1] = make_executor<backend_tag>(this->
stream(), plan.out_shape[1][
mpi::comm_rank(comm)], plan.fft_direction[1]);
333 executors[2] = make_executor<backend_tag>(this->
stream(), plan.out_shape[2][
mpi::comm_rank(comm)], plan.fft_direction[2]);
337 size_buffer_work = comm_buffer_offset
339 executor_buffer_offset = (executor_workspace_size == 0) ? 0 : size_buffer_work - executor_workspace_size;
342 std::array<executor_base*, 3> forward_executors()
const{
343 return std::array<executor_base*, 3>{executors[0].get(), executors[1].get(), executors[2].get()};
346 std::array<executor_base*, 3> backward_executors()
const{
347 return std::array<executor_base*, 3>{executors[2].get(), executors[1].get(), executors[0].get()};
351 template<
typename scalar_type>
352 void apply_scale(
int const batch_size,
direction dir,
scale scaling, scalar_type data[])
const{
354 add_trace
name(
"scale");
355 #ifdef Heffte_ENABLE_MAGMA
356 if (std::is_same<location_tag, tag::gpu>::value){
368 std::unique_ptr<box3d<index>> pinbox, poutbox;
370 std::array<std::unique_ptr<reshape3d_base<index>>, 4> forward_shaper;
371 std::array<std::unique_ptr<reshape3d_base<index>>, 4> backward_shaper;
373 std::array<std::unique_ptr<executor_base>, 3> executors;
374 #ifdef Heffte_ENABLE_MAGMA
375 gpu::magma_handle<location_tag> hmagma;
379 size_t size_buffer_work, comm_buffer_offset, executor_buffer_offset;
386 template<
typename backend_tag,
typename index =
int>
393 template<
typename backend_tag,
typename index>
395 int r2c_direction, MPI_Comm
const comm,
396 plan_options const options = default_options<backend_tag>()){
397 static_assert(std::is_same<index, int>::value or std::is_same<index, long long>::value,
398 "heFFTe works with 'int' and 'long long' indexing only");
400 "The backend_tag is not valid, perhaps it needs to be enabled in the build system");
Similar to heffte::fft3d, but computed fewer redundant coefficients when the input is real.
Definition: heffte_fft3d_r2c.h:46
buffer_container< typename define_standard_type< T >::type::value_type > real_buffer_container
Container of real values corresponding to the complex type T.
Definition: heffte_fft3d_r2c.h:62
fft3d_r2c(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, int r2c_direction, MPI_Comm const comm, bool use_reorder, int algorithm, bool use_pencils)
Internal use only, used by the Fortran interface.
Definition: heffte_fft3d_r2c.h:96
void forward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a forward Fourier transform using two arrays.
Definition: heffte_fft3d_r2c.h:147
typename backend::buffer_traits< backend_tag >::template container< T > buffer_container
Alias to the container template associated with the backend (allows for RAII memory management).
Definition: heffte_fft3d_r2c.h:60
size_t size_comm_buffers() const
Returns the size used by the communication workspace buffers (internal use).
Definition: heffte_fft3d_r2c.h:131
void backward(int batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
Overload that performs a batch transform using internally allocated workspace.
Definition: heffte_fft3d_r2c.h:273
typename one_dim_backend< backend_tag >::executor_r2c backend_executor_r2c
FFT executor for the real-to-complex dimension.
Definition: heffte_fft3d_r2c.h:51
fft3d_r2c(typename backend::device_instance< location_tag >::stream_type gpu_stream, box3d< index > const inbox, box3d< index > const outbox, int r2c_direction, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
See the documentation for fft3d::fft3d()
Definition: heffte_fft3d_r2c.h:85
long long size_inbox() const
Returns the size of the inbox defined in the constructor.
Definition: heffte_fft3d_r2c.h:121
fft3d_r2c(box3d< index > const inbox, box3d< index > const outbox, int r2c_direction, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Constructor creating a plan for FFT transform across the given communicator and using the box geometr...
Definition: heffte_fft3d_r2c.h:76
typename backend::buffer_traits< backend_tag >::location location_tag
Type-tag that is either tag::cpu or tag::gpu to indicate the location of the data.
Definition: heffte_fft3d_r2c.h:55
long long size_outbox() const
Returns the size of the outbox defined in the constructor.
Definition: heffte_fft3d_r2c.h:123
buffer_container< typename fft_output< T >::type > output_buffer_container
Container of the output type corresponding to T, see the table of compatible input and output types.
Definition: heffte_fft3d_r2c.h:64
void forward(int batch_size, input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
Overload utilizing a batch transform.
Definition: heffte_fft3d_r2c.h:171
real_buffer_container< scalar_type > backward(buffer_container< scalar_type > const &input, scale scaling=scale::none)
Variant of backward() that uses buffer_container for RAII style of resource management.
Definition: heffte_fft3d_r2c.h:286
fft3d_r2c(int il0, int il1, int il2, int ih0, int ih1, int ih2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int r2c_direction, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition: heffte_fft3d_r2c.h:114
void backward(int batch_size, input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload that performs a batch transform.
Definition: heffte_fft3d_r2c.h:259
box3d< index > inbox() const
Returns the inbox.
Definition: heffte_fft3d_r2c.h:125
box3d< index > outbox() const
Returns the outbox.
Definition: heffte_fft3d_r2c.h:127
typename one_dim_backend< backend_tag >::executor backend_executor_c2c
FFT executor for the complex-to-complex dimensions.
Definition: heffte_fft3d_r2c.h:49
void forward(input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
Overload utilizing a user provided buffer.
Definition: heffte_fft3d_r2c.h:158
size_t size_workspace() const
Returns the workspace size that will be used, size is measured in complex numbers.
Definition: heffte_fft3d_r2c.h:129
fft3d_r2c(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, int r2c_direction, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition: heffte_fft3d_r2c.h:106
void backward(input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload utilizing a user provided buffer.
Definition: heffte_fft3d_r2c.h:246
void forward(int batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
Overload utilizing a batch transform using internally allocated workspace.
Definition: heffte_fft3d_r2c.h:185
double get_scale_factor(scale scaling) const
Returns the scale factor for the given scaling.
Definition: heffte_fft3d_r2c.h:297
output_buffer_container< input_type > forward(buffer_container< input_type > const &input, scale scaling=scale::none)
Vector variant of forward() using input and output buffer_container classes.
Definition: heffte_fft3d_r2c.h:211
void backward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a backward Fourier transform using two arrays.
Definition: heffte_fft3d_r2c.h:235
reshape_algorithm
Defines list of potential communication algorithms.
Definition: heffte_plan_logic.h:48
plan_options set_options(plan_options opts)
Adjusts the user provided options to what can be handled by the backend.
Definition: heffte_plan_logic.h:207
fft3d_r2c< backend_tag, index > make_fft3d_r2c(box3d< index > const inbox, box3d< index > const outbox, int r2c_direction, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Factory method that auto-detects the index type based on the box.
Definition: heffte_fft3d_r2c.h:394
scale
Indicates the scaling factor to apply on the result of an FFT operation.
Definition: heffte_fft3d.h:113
@ none
No scale, leave the result unperturbed similar to the FFTW API.
@ symmetric
Symmetric scaling, apply the square-root of the full scaling.
std::string name()
Returns the human readable name of the backend.
Definition: heffte_common.h:240
direction
Indicates the direction of the FFT (internal use only).
Definition: heffte_common.h:535
size_t get_max_box_size_r2c(std::array< some_class, 3 > const &executors)
Returns the max of the box_size() for each of the executors.
Definition: heffte_utils.h:413
size_t get_max_work_size(std::array< some_class, 3 > const &executors)
Returns the max of the workspace_size() for each of the executors.
Definition: heffte_utils.h:423
@ backward
Inverse DFT transform.
@ forward
Forward DFT transform.
logic_plan3d< index > plan_operations(ioboxes< index > const &boxes, int r2c_direction, plan_options const options, int const mpi_rank)
Creates the logic plan with the provided user input.
Definition: heffte_plan_logic.cpp:421
void apply(cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor)
Simply multiply the num_entries in the data by the scale_factor.
Definition: heffte_backend_cuda.h:796
int comm_rank(MPI_Comm const comm)
Returns the rank of this process within the specified comm.
Definition: heffte_utils.h:78
ioboxes< index > gather_boxes(box3d< index > const my_inbox, box3d< index > const my_outbox, MPI_Comm const comm)
Gather all boxes across all ranks in the comm.
Definition: heffte_geometry.h:697
size_t get_workspace_size(std::array< std::unique_ptr< reshape3d_base< index >>, 4 > const &shapers)
Returns the maximum workspace size used by the shapers.
Definition: heffte_reshape3d.h:115
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38
define_standard_type< scalar_type >::type * convert_to_standard(scalar_type input[])
Converts an array of some type to an array of the C++ equivalent type.
Definition: heffte_utils.h:354
Defines the container for the temporary buffers.
Definition: heffte_common.h:212
Holds the auxiliary variables needed by each backend.
Definition: heffte_common.h:358
void * stream_type
The type for the internal stream, the cpu uses just a void pointer.
Definition: heffte_common.h:370
void * stream()
Returns the nullptr.
Definition: heffte_common.h:364
device_instance(void *=nullptr)
Empty constructor.
Definition: heffte_common.h:360
Allows to define whether a specific backend interface has been enabled.
Definition: heffte_common.h:201
A generic container that describes a 3d box of indexes.
Definition: heffte_geometry.h:67
Struct to specialize to allow HeFFTe to recognize custom single precision complex types.
Definition: heffte_utils.h:251
Struct to specialize to allow HeFFTe to recognize custom double precision complex types.
Definition: heffte_utils.h:269
The logic plan incorporates the order and types of operations in a transform.
Definition: heffte_plan_logic.h:275
Indicates the structure that will be used by the fft backend.
Definition: heffte_common.h:546
Defines a set of tweaks and options to use in the plan generation.
Definition: heffte_plan_logic.h:131
Indicates the use of cpu backend and that all input/output data and arrays will be bound to the cpu.
Definition: heffte_common.h:38