10 #include "heffte_compute_transform.h" 
   68     using type = std::complex<float>;
 
   76     using type = std::complex<double>;
 
   85 template<
typename scalar_type, 
typename backend_tag, 
typename = 
void>
 
   91 template<
typename scalar_type, 
typename backend_tag>
 
   92 struct transform_output<scalar_type, backend_tag, typename std::enable_if<backend::uses_fft_types<backend_tag>::value>
::type>{
 
  100 template<
typename scalar_type, 
typename backend_tag>
 
  101 struct transform_output<scalar_type, backend_tag, typename std::enable_if<not backend::uses_fft_types<backend_tag>::value>
::type>{
 
  212 template<
typename backend_tag, 
typename index = 
int>
 
  244           plan_options const options = default_options<backend_tag>()) :
 
  267           plan_options const options = default_options<backend_tag>()) :
 
  274     fft3d(
int il0, 
int il1, 
int il2, 
int ih0, 
int ih1, 
int ih2, 
int io0, 
int io1, 
int io2,
 
  275           int ol0, 
int ol1, 
int ol2, 
int oh0, 
int oh1, 
int oh2, 
int oo0, 
int oo1, 
int oo2,
 
  277           bool use_reorder, 
int algorithm, 
bool use_pencils)
 
  278         : 
fft3d(
box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}, {io0, io1, io2}),
 
  279                 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),
 
  281                 plan_options(use_reorder, 
static_cast<reshape_algorithm>(algorithm), use_pencils))
 
  284     fft3d(
int il0, 
int il1, 
int il2, 
int ih0, 
int ih1, 
int ih2, 
int io0, 
int io1, 
int io2,
 
  285           int ol0, 
int ol1, 
int ol2, 
int oh0, 
int oh1, 
int oh2, 
int oo0, 
int oo1, 
int oo2,
 
  287         : 
fft3d(
box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}, {io0, io1, io2}),
 
  288                 box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}, {oo0, oo1, oo2}),
 
  292     fft3d(
int il0, 
int il1, 
int il2, 
int ih0, 
int ih1, 
int ih2,
 
  293           int ol0, 
int ol1, 
int ol2, 
int oh0, 
int oh1, 
int oh2,
 
  295         : 
fft3d(
box3d<index>({il0, il1, il2}, {ih0, ih1, ih2}), box3d<index>({ol0, ol1, ol2}, {oh0, oh1, oh2}), comm)
 
  326     template<
typename input_type, 
typename output_type>
 
  329                       "Using either an unknown complex type or an incompatible pair of types!");
 
  331         auto workspace = make_buffer_container<typename transform_output<typename define_standard_type<output_type>::type, backend_tag>::type>(this->
stream(), 
size_workspace());
 
  332         forward(input, output, workspace.data(), scaling);
 
  347     template<
typename input_type, 
typename output_type>
 
  350                       "Using either an unknown complex type or an incompatible pair of types!");
 
  364     template<
typename input_type, 
typename output_type>
 
  365     void forward(
int const batch_size, input_type 
const input[], output_type output[],
 
  368                       "Using either an unknown complex type or an incompatible pair of types!");
 
  379     template<
typename input_type, 
typename output_type>
 
  382                       "Using either an unknown complex type or an incompatible pair of types!");
 
  384         auto workspace = make_buffer_container<typename transform_output<typename define_standard_type<output_type>::type, backend_tag>::type>(this->
stream(), batch_size * 
size_workspace());
 
  386         forward(batch_size, input, output, workspace.data(), scaling);
 
  413     template<
typename input_type>
 
  415         if (input.size() < 
static_cast<size_t>(
size_inbox()))
 
  416             throw std::invalid_argument(
"The input vector is smaller than size_inbox(), i.e., not enough entries provided to fill the inbox.");
 
  417         auto output = make_buffer_container<typename transform_output<input_type, backend_tag>::type>(this->
stream(), 
size_outbox());
 
  418         forward(input.data(), output.data(), scaling);
 
  441     template<
typename input_type, 
typename output_type>
 
  444                       "Using either an unknown complex type or an incompatible pair of types!");
 
  446         auto workspace = make_buffer_container<typename transform_output<input_type, backend_tag>::type>(this->
stream(), 
size_workspace());
 
  447         backward(input, output, workspace.data(), scaling);
 
  453     template<
typename input_type, 
typename output_type>
 
  456                       "Using either an unknown complex type or an incompatible pair of types!");
 
  467     template<
typename input_type, 
typename output_type>
 
  468     void backward(
int const batch_size, input_type 
const input[], output_type output[],
 
  471                       "Using either an unknown complex type or an incompatible pair of types!");
 
  482     template<
typename input_type, 
typename output_type>
 
  485                       "Using either an unknown complex type or an incompatible pair of types!");
 
  487         auto workspace = make_buffer_container<typename transform_output<input_type, backend_tag>::type>(this->
stream(), batch_size * 
size_workspace());
 
  488         backward(batch_size, input, output, workspace.data(), scaling);
 
  494     template<
typename scalar_type>
 
  497                       "Either calling backward() with non-complex input or using an unknown complex type.");
 
  498         if (input.size() < 
static_cast<size_t>(
size_outbox()))
 
  499             throw std::invalid_argument(
"The input vector is smaller than size_outbox(), i.e., not enough entries provided to fill the outbox.");
 
  500         auto result = make_buffer_container<scalar_type>(this->
stream(), 
size_inbox());
 
  501         backward(input.data(), result.data(), scaling);
 
  508     template<
typename scalar_type>
 
  511                       "Either calling backward() with non-complex input or using an unknown complex type.");
 
  512         auto result = make_buffer_container<typename define_standard_type<scalar_type>::type::value_type>(this->
stream(), 
size_inbox());
 
  513         backward(input.data(), result.data(), scaling);
 
  520             return (scaling == 
scale::symmetric) ? std::sqrt(scale_factor) : scale_factor;
 
  522             return (scaling == 
scale::symmetric) ? std::sqrt(scale_factor / 64.0) : scale_factor / 64.0;
 
  553         pinbox(new 
box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(new 
box3d<index>(plan.out_shape[3][plan.mpi_rank])),
 
  554         scale_factor(1.0 / static_cast<double>(plan.index_count))
 
  555         #ifdef Heffte_ENABLE_MAGMA
 
  564           logic_plan3d<index> 
const &plan, MPI_Comm 
const comm) :
 
  566         pinbox(new box3d<index>(plan.in_shape[0][plan.mpi_rank])), poutbox(new box3d<index>(plan.out_shape[3][plan.mpi_rank])),
 
  567         scale_factor(1.0 / static_cast<double>(plan.index_count))
 
  568         #ifdef Heffte_ENABLE_MAGMA
 
  576     void setup(logic_plan3d<index> 
const &plan, MPI_Comm 
const comm){
 
  577         for(
int i=0; i<4; i++){
 
  578             forward_shaper[i]    = make_reshape3d<backend_tag>(this->
stream(), plan.in_shape[i], plan.out_shape[i], comm, plan.options);
 
  579             backward_shaper[3-i] = make_reshape3d<backend_tag>(this->
stream(), plan.out_shape[i], plan.in_shape[i], comm, plan.options);
 
  582         int const my_rank = plan.mpi_rank;
 
  584         if (has_executor3d<backend_tag>() and not forward_shaper[1] and not forward_shaper[2]){
 
  585             executors[0] = make_executor<backend_tag>(this->
stream(), plan.out_shape[0][my_rank]);
 
  586         }
else if (has_executor2d<backend_tag>() and (not forward_shaper[1] or not forward_shaper[2])){
 
  587             if (not forward_shaper[1]){
 
  588                 executors[0] = make_executor<backend_tag>(this->
stream(), plan.out_shape[0][my_rank],
 
  589                                                           plan.fft_direction[0], plan.fft_direction[1]);
 
  590                 executors[2] = make_executor<backend_tag>(this->
stream(), plan.out_shape[2][my_rank], plan.fft_direction[2]);
 
  592                 executors[0] = make_executor<backend_tag>(this->
stream(), plan.out_shape[0][my_rank], plan.fft_direction[0]);
 
  593                 executors[2] = make_executor<backend_tag>(this->
stream(), plan.out_shape[2][my_rank],
 
  594                                                           plan.fft_direction[1], plan.fft_direction[2]);
 
  597             executors[0] = make_executor<backend_tag>(this->
stream(), plan.out_shape[0][my_rank], plan.fft_direction[0]);
 
  598             executors[1] = make_executor<backend_tag>(this->
stream(), plan.out_shape[1][my_rank], plan.fft_direction[1]);
 
  599             executors[2] = make_executor<backend_tag>(this->
stream(), plan.out_shape[2][my_rank], plan.fft_direction[2]);
 
  606         int last_chunk = (executors[0] == 
nullptr) ? 0 : (((backward_shaper[3]) ? (executors[0]->box_size() + 1) / 2 : 0));
 
  607         size_buffer_work =  comm_buffer_offset + executor_workspace_size
 
  610         executor_buffer_offset = (executor_workspace_size == 0) ? 0 : size_buffer_work - executor_workspace_size;
 
  613     std::array<executor_base*, 3> forward_executors()
 const{
 
  614         return std::array<executor_base*, 3>{executors[0].get(), executors[1].get(), executors[2].get()};
 
  617     std::array<executor_base*, 3> backward_executors()
 const{
 
  618         return std::array<executor_base*, 3>{executors[2].get(), executors[1].get(), executors[0].get()};
 
  622     template<
typename scalar_type>
 
  623     void apply_scale(
int const batch_size, 
direction dir, 
scale scaling, scalar_type data[])
 const{
 
  625             add_trace 
name(
"scale");
 
  626             #ifdef Heffte_ENABLE_MAGMA 
  640     std::unique_ptr<box3d<index>> pinbox, poutbox; 
 
  642     std::array<std::unique_ptr<reshape3d_base<index>>, 4> forward_shaper;
 
  643     std::array<std::unique_ptr<reshape3d_base<index>>, 4> backward_shaper;
 
  645     std::array<std::unique_ptr<executor_base>, 3> executors;
 
  646     #ifdef Heffte_ENABLE_MAGMA 
  647     gpu::magma_handle<typename backend::buffer_traits<backend_tag>::location> hmagma;
 
  651     size_t size_buffer_work, comm_buffer_offset, executor_buffer_offset;
 
  662 template<
typename backend_tag, 
typename index = 
int>
 
  708 template<
typename backend_tag, 
typename index = 
int>
 
  715 template<
typename backend_tag, 
typename index>
 
  717                                      plan_options const options = default_options<backend_tag>()){
 
  718     static_assert(std::is_same<index, int>::value or std::is_same<index, long long>::value,
 
  719                   "heFFTe works with 'int' and 'long long' indexing only");
 
  721                   "the backend_tag is not valid, perhaps it needs to be enabled in the build system");
 
Defines the plan for a 3-dimensional discrete Fourier transform performed on a MPI distributed data.
Definition: heffte_fft3d.h:213
long long size_inbox() const
Returns the size of the inbox defined in the constructor.
Definition: heffte_fft3d.h:299
void forward(int const batch_size, input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
An overload allowing for a batch of FFTs to be performed in a single command.
Definition: heffte_fft3d.h:365
real_buffer_container< scalar_type > backward_real(buffer_container< scalar_type > const &input, scale scaling=scale::none)
Perform complex-to-real backward FFT using vector API (truncates the complex part).
Definition: heffte_fft3d.h:509
fft3d(box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Constructor creating a plan for FFT transform across the given communicator and using the box geometr...
Definition: heffte_fft3d.h:243
void backward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a backward Fourier transform using two arrays.
Definition: heffte_fft3d.h:442
void forward(input_type const input[], output_type output[], scale scaling=scale::none) const
Performs a forward Fourier transform using two arrays.
Definition: heffte_fft3d.h:327
box3d< index > inbox() const
Returns the inbox.
Definition: heffte_fft3d.h:303
buffer_container< typename transform_output< T, backend_tag >::type > output_buffer_container
Container of the output type corresponding to T, see the table of compatible input and output types.
Definition: heffte_fft3d.h:228
double get_scale_factor(scale scaling) const
Returns the scale factor for the given scaling.
Definition: heffte_fft3d.h:518
typename backend::buffer_traits< backend_tag >::location location_tag
Type-tag that is either tag::cpu or tag::gpu to indicate the location of the data.
Definition: heffte_fft3d.h:233
box3d< index > outbox() const
Returns the outbox.
Definition: heffte_fft3d.h:305
fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition: heffte_fft3d.h:284
long long size_outbox() const
Returns the size of the outbox defined in the constructor.
Definition: heffte_fft3d.h:301
buffer_container< scalar_type > backward(buffer_container< scalar_type > const &input, scale scaling=scale::none)
Perform complex-to-complex backward FFT using vector API.
Definition: heffte_fft3d.h:495
void backward(input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload with user-provided workspace buffer, see the corresponding overload of forward().
Definition: heffte_fft3d.h:454
void backward(int const batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
Overload for batch transforms with internally allocated workspace.
Definition: heffte_fft3d.h:483
void forward(input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
An overload utilizing a user-allocated workspace buffer.
Definition: heffte_fft3d.h:348
size_t size_workspace() const
Returns the workspace size that will be used, size is measured in complex numbers.
Definition: heffte_fft3d.h:527
fft3d(typename backend::device_instance< location_tag >::stream_type gpu_stream, box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Identical to the other constructor but accepts a GPU stream or queue.
Definition: heffte_fft3d.h:265
void backward(int const batch_size, input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
Overload for batch transforms, see the corresponding overload of forward().
Definition: heffte_fft3d.h:468
typename one_dim_backend< backend_tag >::executor backend_executor
Alias to the wrapper class for the one dimensional backend library.
Definition: heffte_fft3d.h:216
buffer_container< typename define_standard_type< T >::type::value_type > real_buffer_container
Container of real values corresponding to the complex type T.
Definition: heffte_fft3d.h:226
fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, MPI_Comm const comm)
Internal use only, used by the Fortran interface.
Definition: heffte_fft3d.h:292
output_buffer_container< input_type > forward(buffer_container< input_type > const &input, scale scaling=scale::none)
Vector variant of forward() using input and output buffer_container classes.
Definition: heffte_fft3d.h:414
typename backend::buffer_traits< backend_tag >::template container< T > buffer_container
Alias to the container template associated with the backend.
Definition: heffte_fft3d.h:224
size_t size_comm_buffers() const
Returns the size used by the communication workspace buffers (internal use).
Definition: heffte_fft3d.h:529
void forward(int const batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
An overload that allocates workspace internally.
Definition: heffte_fft3d.h:380
fft3d(int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, MPI_Comm const comm, bool use_reorder, int algorithm, bool use_pencils)
Internal use only, used by the Fortran interface.
Definition: heffte_fft3d.h:274
reshape_algorithm
Defines list of potential communication algorithms.
Definition: heffte_plan_logic.h:48
plan_options set_options(plan_options opts)
Adjusts the user provided options to what can be handled by the backend.
Definition: heffte_plan_logic.h:207
scale
Indicates the scaling factor to apply on the result of an FFT operation.
Definition: heffte_fft3d.h:113
fft3d< backend_tag, index > make_fft3d(box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
Factory method that auto-detects the index type based on the box.
Definition: heffte_fft3d.h:716
@ none
No scale, leave the result unperturbed similar to the FFTW API.
@ full
Apply the full scale, divide by the number of elements in the world box.
@ symmetric
Symmetric scaling, apply the square-root of the full scaling.
std::string name()
Returns the human readable name of the backend.
Definition: heffte_common.h:240
direction
Indicates the direction of the FFT (internal use only).
Definition: heffte_common.h:535
size_t get_max_box_size(std::array< some_class, 3 > const &executors)
Returns the max of the box_size() for each of the executors.
Definition: heffte_utils.h:402
size_t get_max_work_size(std::array< some_class, 3 > const &executors)
Returns the max of the workspace_size() for each of the executors.
Definition: heffte_utils.h:423
@ backward
Inverse DFT transform.
@ forward
Forward DFT transform.
logic_plan3d< index > plan_operations(ioboxes< index > const &boxes, int r2c_direction, plan_options const options, int const mpi_rank)
Creates the logic plan with the provided user input.
Definition: heffte_plan_logic.cpp:421
void apply(cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor)
Simply multiply the num_entries in the data by the scale_factor.
Definition: heffte_backend_cuda.h:796
int comm_rank(MPI_Comm const comm)
Returns the rank of this process within the specified comm.
Definition: heffte_utils.h:78
ioboxes< index > gather_boxes(box3d< index > const my_inbox, box3d< index > const my_outbox, MPI_Comm const comm)
Gather all boxes across all ranks in the comm.
Definition: heffte_geometry.h:697
size_t get_workspace_size(std::array< std::unique_ptr< reshape3d_base< index >>, 4 > const &shapers)
Returns the maximum workspace size used by the shapers.
Definition: heffte_reshape3d.h:115
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38
define_standard_type< scalar_type >::type * convert_to_standard(scalar_type input[])
Converts an array of some type to an array of the C++ equivalent type.
Definition: heffte_utils.h:354
Defines the container for the temporary buffers.
Definition: heffte_common.h:212
tag::cpu location
Tags the raw-array location tag::cpu or tag::gpu, used by the packers.
Definition: heffte_common.h:214
Set to true/false type depending whether the types are compatible with the backend transform.
Definition: heffte_common.h:395
Holds the auxiliary variables needed by each backend.
Definition: heffte_common.h:358
void * stream_type
The type for the internal stream, the cpu uses just a void pointer.
Definition: heffte_common.h:370
void * stream()
Returns the nullptr.
Definition: heffte_common.h:364
device_instance(void *=nullptr)
Empty constructor.
Definition: heffte_common.h:360
Allows to define whether a specific backend interface has been enabled.
Definition: heffte_common.h:201
Defines whether the backend accepts the standard FFT real-complex or complex-complex transform.
Definition: heffte_common.h:389
A generic container that describes a 3d box of indexes.
Definition: heffte_geometry.h:67
std::complex< double > type
The output for a double data is std::complex<double>
Definition: heffte_fft3d.h:76
std::complex< float > type
The output for a float data is std::complex<float>
Definition: heffte_fft3d.h:68
Defines the relationship between pairs of input-output types in the FFT algorithms.
Definition: heffte_fft3d.h:58
scalar_type type
The output type corresponding to the scalar_type.
Definition: heffte_fft3d.h:60
Struct to specialize to allow HeFFTe to recognize custom single precision complex types.
Definition: heffte_utils.h:251
Struct to specialize to allow HeFFTe to recognize custom double precision complex types.
Definition: heffte_utils.h:269
The logic plan incorporates the order and types of operations in a transform.
Definition: heffte_plan_logic.h:275
Indicates the structure that will be used by the fft backend.
Definition: heffte_common.h:546
Defines a set of tweaks and options to use in the plan generation.
Definition: heffte_plan_logic.h:131
Indicates the use of cpu backend and that all input/output data and arrays will be bound to the cpu.
Definition: heffte_common.h:38