10 #include "heffte_compute_transform.h"
68 using type = std::complex<float>;
76 using type = std::complex<double>;
85 template<
typename scalar_type,
typename backend_tag,
typename =
void>
91 template<
typename scalar_type,
typename backend_tag>
92 struct transform_output<scalar_type, backend_tag, typename std::enable_if<backend::uses_fft_types<backend_tag>::value>
::type>{
100 template<
typename scalar_type,
typename backend_tag>
101 struct transform_output<scalar_type, backend_tag, typename std::enable_if<not backend::uses_fft_types<backend_tag>::value>
::type>{
212 template<
typename backend_tag,
typename index =
int>
244 plan_options const options = default_options<backend_tag>()) :
267 plan_options const options = default_options<backend_tag>()) :
274 fft3d(
int il0,
int il1,
int il2,
int ih0,
int ih1,
int ih2,
int io0,
int io1,
int io2,
275 int ol0,
int ol1,
int ol2,
int oh0,
int oh1,
int oh2,
int oo0,
int oo1,
int oo2,
277 bool use_reorder,
int algorithm,
bool use_pencils)
278 :
fft3d(
box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}, {io0, io1, io2}),
279 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),
281 plan_options(use_reorder,
static_cast<reshape_algorithm>(algorithm), use_pencils))
284 fft3d(
int il0,
int il1,
int il2,
int ih0,
int ih1,
int ih2,
int io0,
int io1,
int io2,
285 int ol0,
int ol1,
int ol2,
int oh0,
int oh1,
int oh2,
int oo0,
int oo1,
int oo2,
287 :
fft3d(
box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}, {io0, io1, io2}),
288 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),
292 fft3d(
int il0,
int il1,
int il2,
int ih0,
int ih1,
int ih2,
293 int ol0,
int ol1,
int ol2,
int oh0,
int oh1,
int oh2,
295 :
fft3d(
box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}), box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}), comm)
326 template<
typename input_type,
typename output_type>
329 "Using either an unknown complex type or an incompatible pair of types!");
331 auto workspace = make_buffer_container<typename transform_output<typename define_standard_type<output_type>::type, backend_tag>::type>(this->
stream(),
size_workspace());
332 forward(input, output, workspace.data(), scaling);
347 template<
typename input_type,
typename output_type>
350 "Using either an unknown complex type or an incompatible pair of types!");
364 template<
typename input_type,
typename output_type>
365 void forward(
int const batch_size, input_type
const input[], output_type output[],
368 "Using either an unknown complex type or an incompatible pair of types!");
379 template<
typename input_type,
typename output_type>
382 "Using either an unknown complex type or an incompatible pair of types!");
384 auto workspace = make_buffer_container<typename transform_output<typename define_standard_type<output_type>::type, backend_tag>::type>(this->
stream(), batch_size *
size_workspace());
386 forward(batch_size, input, output, workspace.data(), scaling);
413 template<
typename input_type>
415 if (input.size() <
static_cast<size_t>(
size_inbox()))
416 throw std::invalid_argument(
"The input vector is smaller than size_inbox(), i.e., not enough entries provided to fill the inbox.");
417 auto output = make_buffer_container<typename transform_output<input_type, backend_tag>::type>(this->
stream(),
size_outbox());
418 forward(input.data(), output.data(), scaling);
441 template<
typename input_type,
typename output_type>
444 "Using either an unknown complex type or an incompatible pair of types!");
446 auto workspace = make_buffer_container<typename transform_output<input_type, backend_tag>::type>(this->
stream(),
size_workspace());
447 backward(input, output, workspace.data(), scaling);
453 template<
typename input_type,
typename output_type>
456 "Using either an unknown complex type or an incompatible pair of types!");
467 template<
typename input_type,
typename output_type>
468 void backward(
int const batch_size, input_type
const input[], output_type output[],
471 "Using either an unknown complex type or an incompatible pair of types!");
482 template<
typename input_type,
typename output_type>
485 "Using either an unknown complex type or an incompatible pair of types!");
487 auto workspace = make_buffer_container<typename transform_output<input_type, backend_tag>::type>(this->
stream(), batch_size *
size_workspace());
488 backward(batch_size, input, output, workspace.data(), scaling);
494 template<
typename scalar_type>
497 "Either calling backward() with non-complex input or using an unknown complex type.");
498 if (input.size() <
static_cast<size_t>(
size_outbox()))
499 throw std::invalid_argument(
"The input vector is smaller than size_outbox(), i.e., not enough entries provided to fill the outbox.");
500 auto result = make_buffer_container<scalar_type>(this->
stream(),
size_inbox());
501 backward(input.data(), result.data(), scaling);
508 template<
typename scalar_type>
511 "Either calling backward() with non-complex input or using an unknown complex type.");
512 auto result = make_buffer_container<typename define_standard_type<scalar_type>::type::value_type>(this->
stream(),
size_inbox());
513 backward(input.data(), result.data(), scaling);
520 return (scaling ==
scale::symmetric) ? std::sqrt(scale_factor) : scale_factor;
522 return (scaling ==
scale::symmetric) ? std::sqrt(scale_factor / 64.0) : scale_factor / 64.0;
553 pinbox(new
box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(new
box3d<index>(plan.out_shape[3][plan.mpi_rank])),
554 scale_factor(1.0 / static_cast<double>(plan.index_count))
555 #ifdef Heffte_ENABLE_MAGMA
564 logic_plan3d<index>
const &plan, MPI_Comm
const comm) :
566 pinbox(new box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(new box3d<index>(plan.out_shape[3][plan.mpi_rank])),
567 scale_factor(1.0 / static_cast<double>(plan.index_count))
568 #ifdef Heffte_ENABLE_MAGMA
576 void setup(logic_plan3d<index>
const &plan, MPI_Comm
const comm){
577 for(
int i=0; i<4; i++){
578 forward_shaper[i] = make_reshape3d<backend_tag>(this->
stream(), plan.in_shape[i], plan.out_shape[i], comm, plan.options);
579 backward_shaper[3-i] = make_reshape3d<backend_tag>(this->
stream(), plan.out_shape[i], plan.in_shape[i], comm, plan.options);
582 int const my_rank = plan.mpi_rank;
584 if (has_executor3d<backend_tag>() and not forward_shaper[1] and not forward_shaper[2]){
585 executors[0] = make_executor<backend_tag>(this->
stream(), plan.out_shape[0][my_rank]);
586 }
else if (has_executor2d<backend_tag>() and (not forward_shaper[1] or not forward_shaper[2])){
587 if (not forward_shaper[1]){
588 executors[0] = make_executor<backend_tag>(this->
stream(), plan.out_shape[0][my_rank],
589 plan.fft_direction[0], plan.fft_direction[1]);
590 executors[2] = make_executor<backend_tag>(this->
stream(), plan.out_shape[2][my_rank], plan.fft_direction[2]);
592 executors[0] = make_executor<backend_tag>(this->
stream(), plan.out_shape[0][my_rank], plan.fft_direction[0]);
593 executors[2] = make_executor<backend_tag>(this->
stream(), plan.out_shape[2][my_rank],
594 plan.fft_direction[1], plan.fft_direction[2]);
597 executors[0] = make_executor<backend_tag>(this->
stream(), plan.out_shape[0][my_rank], plan.fft_direction[0]);
598 executors[1] = make_executor<backend_tag>(this->
stream(), plan.out_shape[1][my_rank], plan.fft_direction[1]);
599 executors[2] = make_executor<backend_tag>(this->
stream(), plan.out_shape[2][my_rank], plan.fft_direction[2]);
606 int last_chunk = (executors[0] ==
nullptr) ? 0 : (((backward_shaper[3]) ? (executors[0]->box_size() + 1) / 2 : 0));
607 size_buffer_work = comm_buffer_offset + executor_workspace_size
610 executor_buffer_offset = (executor_workspace_size == 0) ? 0 : size_buffer_work - executor_workspace_size;
613 std::array<executor_base*, 3> forward_executors()
const{
614 return std::array<executor_base*, 3>{executors[0].get(), executors[1].get(), executors[2].get()};
617 std::array<executor_base*, 3> backward_executors()
const{
618 return std::array<executor_base*, 3>{executors[2].get(), executors[1].get(), executors[0].get()};
622 template<
typename scalar_type>
623 void apply_scale(
int const batch_size,
direction dir,
scale scaling, scalar_type data[])
const{
625 add_trace
name(
"scale");
626 #ifdef Heffte_ENABLE_MAGMA
640 std::unique_ptr<box3d<index>> pinbox, poutbox;
642 std::array<std::unique_ptr<reshape3d_base<index>>, 4> forward_shaper;
643 std::array<std::unique_ptr<reshape3d_base<index>>, 4> backward_shaper;
645 std::array<std::unique_ptr<executor_base>, 3> executors;
646 #ifdef Heffte_ENABLE_MAGMA
647 gpu::magma_handle<typename backend::buffer_traits<backend_tag>::location> hmagma;
651 size_t size_buffer_work, comm_buffer_offset, executor_buffer_offset;
662 template<
typename backend_tag,
typename index =
int>
708 template<
typename backend_tag,
typename index =
int>
715 template<
typename backend_tag,
typename index>
717 plan_options const options = default_options<backend_tag>()){
718 static_assert(std::is_same<index, int>::value or std::is_same<index, long long>::value,
719 "heFFTe works with 'int' and 'long long' indexing only");
721 "the backend_tag is not valid, perhaps it needs to be enabled in the build system");
Defines the plan for a 3-dimensional discrete Fourier transform performed on a MPI distributed data.
Definition: heffte_fft3d.h:213
long long size_inbox() const
Returns the size of the inbox defined in the constructor.
Definition: heffte_fft3d.h:299
void forward(int const batch_size, input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
An overload allowing for a batch of FFTs to be performed in a single command.
Definition: heffte_fft3d.h:365
real_buffer_container< scalar_type > backward_real(buffer_container< scalar_type > const &input, scale scaling=scale::none)
Perform complex-to-real backward FFT using vector API (truncates the complex part).
Definition: heffte_fft3d.h:509
fft3d(box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Constructor creating a plan for FFT transform across the given communicator and using the box geometr...
Definition: heffte_fft3d.h:243
void backward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a backward Fourier transform using two arrays.
Definition: heffte_fft3d.h:442
void forward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a forward Fourier transform using two arrays.
Definition: heffte_fft3d.h:327
box3d< index > inbox() const
Returns the inbox.
Definition: heffte_fft3d.h:303
buffer_container< typename transform_output< T, backend_tag >::type > output_buffer_container
Container of the output type corresponding to T, see the table of compatible input and output types.
Definition: heffte_fft3d.h:228
double get_scale_factor(scale scaling) const
Returns the scale factor for the given scaling.
Definition: heffte_fft3d.h:518
typename backend::buffer_traits< backend_tag >::location location_tag
Type-tag that is either tag::cpu or tag::gpu to indicate the location of the data.
Definition: heffte_fft3d.h:233
box3d< index > outbox() const
Returns the outbox.
Definition: heffte_fft3d.h:305
fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition: heffte_fft3d.h:284
long long size_outbox() const
Returns the size of the outbox defined in the constructor.
Definition: heffte_fft3d.h:301
buffer_container< scalar_type > backward(buffer_container< scalar_type > const &input, scale scaling=scale::none)
Perform complex-to-complex backward FFT using vector API.
Definition: heffte_fft3d.h:495
void backward(input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload with user-provided workspace buffer, see the corresponding overload of forward().
Definition: heffte_fft3d.h:454
void backward(int const batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
Overload for batch transforms with internally allocated workspace.
Definition: heffte_fft3d.h:483
void forward(input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
An overload utilizing a user-allocated workspace buffer.
Definition: heffte_fft3d.h:348
size_t size_workspace() const
Returns the workspace size that will be used, size is measured in complex numbers.
Definition: heffte_fft3d.h:527
fft3d(typename backend::device_instance< location_tag >::stream_type gpu_stream, box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Identical to the other constructor but accepts a GPU stream or queue.
Definition: heffte_fft3d.h:265
void backward(int const batch_size, input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload for batch transforms, see the corresponding overload of forward().
Definition: heffte_fft3d.h:468
typename one_dim_backend< backend_tag >::executor backend_executor
Alias to the wrapper class for the one dimensional backend library.
Definition: heffte_fft3d.h:216
buffer_container< typename define_standard_type< T >::type::value_type > real_buffer_container
Container of real values corresponding to the complex type T.
Definition: heffte_fft3d.h:226
fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition: heffte_fft3d.h:292
output_buffer_container< input_type > forward(buffer_container< input_type > const &input, scale scaling=scale::none)
Vector variant of forward() using input and output buffer_container classes.
Definition: heffte_fft3d.h:414
typename backend::buffer_traits< backend_tag >::template container< T > buffer_container
Alias to the container template associated with the backend.
Definition: heffte_fft3d.h:224
size_t size_comm_buffers() const
Returns the size used by the communication workspace buffers (internal use).
Definition: heffte_fft3d.h:529
void forward(int const batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
An overload that allocates workspace internally.
Definition: heffte_fft3d.h:380
fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, MPI_Comm const comm, bool use_reorder, int algorithm, bool use_pencils)
Internal use only, used by the Fortran interface.
Definition: heffte_fft3d.h:274
reshape_algorithm
Defines list of potential communication algorithms.
Definition: heffte_plan_logic.h:48
plan_options set_options(plan_options opts)
Adjusts the user provided options to what can be handled by the backend.
Definition: heffte_plan_logic.h:207
scale
Indicates the scaling factor to apply on the result of an FFT operation.
Definition: heffte_fft3d.h:113
fft3d< backend_tag, index > make_fft3d(box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Factory method that auto-detects the index type based on the box.
Definition: heffte_fft3d.h:716
@ none
No scale, leave the result unperturbed similar to the FFTW API.
@ full
Apply the full scale, divide by the number of elements in the world box.
@ symmetric
Symmetric scaling, apply the square-root of the full scaling.
std::string name()
Returns the human readable name of the backend.
Definition: heffte_common.h:240
direction
Indicates the direction of the FFT (internal use only).
Definition: heffte_common.h:535
size_t get_max_box_size(std::array< some_class, 3 > const &executors)
Returns the max of the box_size() for each of the executors.
Definition: heffte_utils.h:402
size_t get_max_work_size(std::array< some_class, 3 > const &executors)
Returns the max of the workspace_size() for each of the executors.
Definition: heffte_utils.h:423
@ backward
Inverse DFT transform.
@ forward
Forward DFT transform.
logic_plan3d< index > plan_operations(ioboxes< index > const &boxes, int r2c_direction, plan_options const options, int const mpi_rank)
Creates the logic plan with the provided user input.
Definition: heffte_plan_logic.cpp:421
void apply(cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor)
Simply multiply the num_entries in the data by the scale_factor.
Definition: heffte_backend_cuda.h:796
int comm_rank(MPI_Comm const comm)
Returns the rank of this process within the specified comm.
Definition: heffte_utils.h:78
ioboxes< index > gather_boxes(box3d< index > const my_inbox, box3d< index > const my_outbox, MPI_Comm const comm)
Gather all boxes across all ranks in the comm.
Definition: heffte_geometry.h:697
size_t get_workspace_size(std::array< std::unique_ptr< reshape3d_base< index >>, 4 > const &shapers)
Returns the maximum workspace size used by the shapers.
Definition: heffte_reshape3d.h:115
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38
define_standard_type< scalar_type >::type * convert_to_standard(scalar_type input[])
Converts an array of some type to an array of the C++ equivalent type.
Definition: heffte_utils.h:354
Defines the container for the temporary buffers.
Definition: heffte_common.h:212
tag::cpu location
Tags the raw-array location tag::cpu or tag::gpu, used by the packers.
Definition: heffte_common.h:214
Set to true/false type depending whether the types are compatible with the backend transform.
Definition: heffte_common.h:395
Holds the auxiliary variables needed by each backend.
Definition: heffte_common.h:358
void * stream_type
The type for the internal stream, the cpu uses just a void pointer.
Definition: heffte_common.h:370
void * stream()
Returns the nullptr.
Definition: heffte_common.h:364
device_instance(void *=nullptr)
Empty constructor.
Definition: heffte_common.h:360
Allows to define whether a specific backend interface has been enabled.
Definition: heffte_common.h:201
Defines whether the backend accepts the standard FFT real-complex or complex-complex transform.
Definition: heffte_common.h:389
A generic container that describes a 3d box of indexes.
Definition: heffte_geometry.h:67
std::complex< double > type
The output for a double data is std::complex<double>
Definition: heffte_fft3d.h:76
std::complex< float > type
The output for a float data is std::complex<float>
Definition: heffte_fft3d.h:68
Defines the relationship between pairs of input-output types in the FFT algorithms.
Definition: heffte_fft3d.h:58
scalar_type type
The output type corresponding to the scalar_type.
Definition: heffte_fft3d.h:60
Struct to specialize to allow HeFFTe to recognize custom single precision complex types.
Definition: heffte_utils.h:251
Struct to specialize to allow HeFFTe to recognize custom double precision complex types.
Definition: heffte_utils.h:269
The logic plan incorporates the order and types of operations in a transform.
Definition: heffte_plan_logic.h:275
Indicates the structure that will be used by the fft backend.
Definition: heffte_common.h:546
Defines a set of tweaks and options to use in the plan generation.
Definition: heffte_plan_logic.h:131
Indicates the use of cpu backend and that all input/output data and arrays will be bound to the cpu.
Definition: heffte_common.h:38