Defines the plan for a 3-dimensional discrete Fourier transform performed on a MPI distributed data. More...

#include <heffte_fft3d.h>

Public Types
using	backend_executor = typename one_dim_backend< backend_tag >::executor
	Alias to the wrapper class for the one dimensional backend library.

template<typename T >
using	buffer_container = typename backend::buffer_traits< backend_tag >::template container< T >
	Alias to the container template associated with the backend. More...

template<typename T >
using	real_buffer_container = buffer_container< typename define_standard_type< T >::type::value_type >
	Container of real values corresponding to the complex type T.

template<typename T >
using	output_buffer_container = buffer_container< typename transform_output< T, backend_tag >::type >
	Container of the output type corresponding to T, see the table of compatible input and output types.

using	location_tag = typename backend::buffer_traits< backend_tag >::location
	Type-tag that is either tag::cpu or tag::gpu to indicate the location of the data.

Public Types inherited from heffte::backend::device_instance< backend::buffer_traits< backend_tag >::location >
using	stream_type = void *
	The type for the internal stream, the cpu uses just a void pointer.

Public Member Functions
	fft3d (box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
	Constructor creating a plan for FFT transform across the given communicator and using the box geometry. More...

	fft3d (typename backend::device_instance< location_tag >::stream_type gpu_stream, box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >())
	Identical to the other constructor but accepts a GPU stream or queue. More...

	fft3d (int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, MPI_Comm const comm, bool use_reorder, int algorithm, bool use_pencils)
	Internal use only, used by the Fortran interface.

	fft3d (int il0, int il1, int il2, int ih0, int ih1, int ih2, int io0, int io1, int io2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, int oo0, int oo1, int oo2, MPI_Comm const comm)
	Internal use only, used by the Fortran interface.

	fft3d (int il0, int il1, int il2, int ih0, int ih1, int ih2, int ol0, int ol1, int ol2, int oh0, int oh1, int oh2, MPI_Comm const comm)
	Internal use only, used by the Fortran interface.

long long	size_inbox () const
	Returns the size of the inbox defined in the constructor.

long long	size_outbox () const
	Returns the size of the outbox defined in the constructor.

box3d< index >	inbox () const
	Returns the inbox.

box3d< index >	outbox () const
	Returns the outbox.

template<typename input_type , typename output_type >
void	forward (input_type const input[], output_type output[], scale scaling=scale::none) const
	Performs a forward Fourier transform using two arrays. More...

template<typename input_type , typename output_type >
void	forward (input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
	An overload utilizing a user-allocated workspace buffer. More...

template<typename input_type , typename output_type >
void	forward (int const batch_size, input_type const input[], output_type output[], output_type workspace[], scale scaling=scale::none) const
	An overload allowing for a batch of FFTs to be performed in a single command. More...

template<typename input_type , typename output_type >
void	forward (int const batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
	An overload that allocates workspace internally.

template<typename input_type >
output_buffer_container< input_type >	forward (buffer_container< input_type > const &input, scale scaling=scale::none)
	Vector variant of forward() using input and output buffer_container classes. More...

template<typename input_type , typename output_type >
void	backward (input_type const input[], output_type output[], scale scaling=scale::none) const
	Performs a backward Fourier transform using two arrays. More...

template<typename input_type , typename output_type >
void	backward (input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
	Overload with user-provided workspace buffer, see the corresponding overload of forward().

template<typename input_type , typename output_type >
void	backward (int const batch_size, input_type const input[], output_type output[], input_type workspace[], scale scaling=scale::none) const
	Overload for batch transforms, see the corresponding overload of forward().

template<typename input_type , typename output_type >
void	backward (int const batch_size, input_type const input[], output_type output[], scale scaling=scale::none) const
	Overload for batch transforms with internally allocated workspace.

template<typename scalar_type >
buffer_container< scalar_type >	backward (buffer_container< scalar_type > const &input, scale scaling=scale::none)
	Perform complex-to-complex backward FFT using vector API.

template<typename scalar_type >
real_buffer_container< scalar_type >	backward_real (buffer_container< scalar_type > const &input, scale scaling=scale::none)
	Perform complex-to-real backward FFT using vector API (truncates the complex part).

double	get_scale_factor (scale scaling) const
	Returns the scale factor for the given scaling.

size_t	size_workspace () const
	Returns the workspace size that will be used, size is measured in complex numbers.

size_t	size_comm_buffers () const
	Returns the size used by the communication workspace buffers (internal use).

Public Member Functions inherited from heffte::backend::device_instance< backend::buffer_traits< backend_tag >::location >
	device_instance (void *=nullptr)
	Empty constructor.

virtual	~device_instance ()=default
	Default destructor.

void *	stream ()
	Returns the nullptr.

void *	stream () const
	Returns the nullptr (const case).

void	synchronize_device () const
	Syncs the execution with the queue, no-op in the CPU case.

Detailed Description

template<typename backend_tag, typename index = int>
class heffte::fft3d< backend_tag, index >

Defines the plan for a 3-dimensional discrete Fourier transform performed on a MPI distributed data.

Overview: HeFFTe provides the frontend MPI communication algorithms that sync data movement across the MPI ranks, but relies on a backend implementation of FFT algorithms in one dimension. Multiple backends are supported (currently only the fftw3 library), an available backend has to be specified via a template tag. Forward and backward (inverse) transforms can be performed with different precision using the same heffte::fft3d object so long as the input and output use the same distributed geometry.

Boxes and Data Distribution

HeFFTe assumes that the input and output data is organized in three dimensional boxes, each MPI rank containing one input and one output box (currently those should not be empty). Each box is defined by three low and three high indexes, the indexes contained within a box range from the low to the high inclusively, i.e., the box heffte::box3d({0, 0, 0}, {0, 0, 2}) contains three indexes (0, 0, 0), (0, 0, 1) and (0, 0, 2). The following conventions are observed:

global indexing starts at 0
the boxes do not overlap (input can overlap with output, but the individual in/out boxed do not)
input and output boxes may be the same but do not have to overlap
no assumption is being made regarding the organization of ranks and boxes

Real and Complex Transforms

HeFFTe supports forward discrete Fourier transforms that take real or complex entries into complex output. The backward (inverse) transform takes complex data entries back to real or complex data. The precision must always match, e.g., float to std::complex<float>, double and std::complex<double>.

Forward transform input	Forward transform output	Backward transform input	Backward transform output
float	std::complex<float>	std::complex<float>	float
double	std::complex<double>	std::complex<double>	double
std::complex<float>	std::complex<float>	std::complex<float>	std::complex<float>
std::complex<double>	std::complex<double>	std::complex<double>	std::complex<double>

Complex Numbers

By default, HeFFTe works with the C++ native std::complex types, those are supported on both the CPU and GPU devices. However, many libraries provide their own complex types definitions and even though those are usually ABI compatible with the C++ standard types, the compiler treats those as distinct entities. Thus, HeFFTe recognizes the types defined by the backend libraries and additional types can be accepted with a specialization of heffte::is_ccomplex and heffte::is_zcomplex.

Backend	Type	C++ Equivalent
FFTW3	fftwf_complex	std::complex<float>
FFTW3	fftw_complex	std::complex<double>
MKL	float _Complex	std::complex<float>
MKL	double _Complex	std::complex<double>
cuFFT	cufftComplex	std::complex<float>
cuFFT	cufftDoubleComplex	std::complex<double>

Scaling

Applying a forward and inverse DFT operations will leave the result as the original data multiplied by the total number of entries in the world box. Thus, the forward and backward operations are not truly inversing, unless the correct scaling is applied. By default, HeFFTe does not apply scaling, but the methods accept an optional parameter with three different options, see also heffte::scale.

Forward	Inverse
forward(a, b, scaling::none)	forward(a, b, scaling::full)
forward(a, b, scaling::symmetric)	forward(a, b, scaling::symmetric)
forward(a, b, scaling::full)	forward(a, b, scaling::none)

Batch FFTs: If multiple signals with the same distribution across the MPI-ranks need to be transformed, then heFFTe can perform batch operations. Observe the following scenario where we take multiple FFT operations on different signals stored contiguously:
std::vector<std::complex<double>> workspace(fft.size_workspace());

for(int i=0; i<batch_size; i++)

fft.forward(input + i * fft.size_inbox(), output + i * fft.size_outbox(), workspace.data());

This can be done with the following call:
std::vector<std::complex<double>> workspace(batch_size * fft.size_workspace());

fft.forward(batch_size, input, output, workspace.data());

The advantage of the batch operations is that communication buffers can be lumped together which reduces the latency when working with small signals. However, note the increased size of the workspace.

Member Typedef Documentation

◆ buffer_container

template<typename backend_tag , typename index = int>

template<typename T >

using heffte::fft3d< backend_tag, index >::buffer_container = typename backend::buffer_traits<backend_tag>::template container<T>

Alias to the container template associated with the backend.

Following C++ RAII style of resource management, HeFFTe uses containers to manage the temporary buffers used during transformation and communication. The CPU backends use std::vector while the GPU backends use heffte::cuda::vector.

Constructor & Destructor Documentation

◆ fft3d() [1/2]

template<typename backend_tag , typename index = int>

heffte::fft3d< backend_tag, index >::fft3d	(	box3d< index > const	inbox,
		box3d< index > const	outbox,
		MPI_Comm const	comm,
		plan_options const	options = `default_options<backend_tag>()`
	)

inline

Constructor creating a plan for FFT transform across the given communicator and using the box geometry.

Parameters

inbox	is the box for the non-transformed data, i.e., the input for the forward() transform and the output of the backward() transform.
outbox	is the box for the transformed data, i.e., the output for the forward() transform and the input of the backward() transform.
comm	is the MPI communicator with all ranks that will participate in the FFT.
options	is a set of options that define the FFT plan, see heffte::plan_options for details.

◆ fft3d() [2/2]

template<typename backend_tag , typename index = int>

heffte::fft3d< backend_tag, index >::fft3d	(	typename backend::device_instance< location_tag >::stream_type	gpu_stream,
		box3d< index > const	inbox,
		box3d< index > const	outbox,
		MPI_Comm const	comm,
		plan_options const	options = `default_options<backend_tag>()`
	)

inline

Identical to the other constructor but accepts a GPU stream or queue.

Parameters

gpu_stream is an initialized GPU stream or queue, the actual type depends on the backend as follows:

CPU backend	void*, the stream is never referenced
CUDA backend	cudaStream_t
ROCm backend	hipStream_t
oneAPI backend	sycl::queue &

In all cases, heFFTe takes a non-owning reference or alias to the stream; deleting the stream is a responsibility of the user but should not be done before the heFFTe object is destroyed. If no stream is provided, heFFTe will use the default CUDA or HIP stream or a default internal SYCL queue; note in the SYCL case the internal queue will create a new SYCL context which is probably not optimal.

Member Function Documentation

◆ forward() [1/4]

template<typename backend_tag , typename index = int>

template<typename input_type , typename output_type >

void heffte::fft3d< backend_tag, index >::forward	(	input_type const	input[],
		output_type	output[],
		scale	scaling = `scale::none`
	)		const

inline

Performs a forward Fourier transform using two arrays.

Template Parameters

input_type	is a type compatible with the input of a forward FFT.
output_type	is a type compatible with the output of a forward FFT.

The input_type and output_type must be compatible, see the table of compatible types.

Parameters

input	is an array of size at least size_inbox() holding the input data corresponding to the inbox
output	is an array of size at least size_outbox() and will be overwritten with the result from the transform corresponding to the outbox
scaling	defines the type of scaling to apply (default no-scaling).

Note that in the complex-to-complex case, the two arrays can be the same, in which case the size must be at least std::max(size_inbox(), size_outbox()). Whether the same or different, padded entities of the arrays will not be accessed.

◆ forward() [2/4]

template<typename backend_tag , typename index = int>

template<typename input_type , typename output_type >

void heffte::fft3d< backend_tag, index >::forward	(	input_type const	input[],
		output_type	output[],
		output_type	workspace[],
		scale	scaling = `scale::none`
	)		const

inline

An overload utilizing a user-allocated workspace buffer.

HeFFTe requires additional buffers to for various MPI operations, e.g., pack-send-receive-unpack. In the standard overload, the extra memory will be allocated during the call to forward() and released right after. However, allocating and deallocation of large buffers can have a measurable negative effect on performance. Optionally, the use can allocate the workspace buffer externally and pass it into the HeFFTe calls.

The workspace buffer must have size equal to size_workspace() and measured in number of complex scalars, e.g., std::complex<float> or std::complex<double> for single and double precision respectively.

◆ forward() [3/4]

template<typename backend_tag , typename index = int>

template<typename input_type , typename output_type >

void heffte::fft3d< backend_tag, index >::forward	(	int const	batch_size,
		input_type const	input[],
		output_type	output[],
		output_type	workspace[],
		scale	scaling = `scale::none`
	)		const

inline

An overload allowing for a batch of FFTs to be performed in a single command.

The inputs are the same as the overload that utilizes a workspace with the added batch_size. The size of the workspace must be a batch_size * size_workspace()

◆ forward() [4/4]

template<typename backend_tag , typename index = int>

template<typename input_type >

output_buffer_container<input_type> heffte::fft3d< backend_tag, index >::forward	(	buffer_container< input_type > const &	input,
		scale	scaling = `scale::none`
	)

inline

Vector variant of forward() using input and output buffer_container classes.

Returns either std::vector or heffte::cuda:vector using only the C++ standard types.

Template Parameters

input_type is a type compatible with the input of a backward FFT, see the table of compatible types.

Parameters

input	is a std::vector or heffte::cuda::vector with size at least size_inbox() corresponding to the input of forward().
scaling	defines the type of scaling to apply (default no-scaling).

Returns: std::vector or heffte::cuda::vector with entries corresponding to the output type and with size equal to size_outbox() corresponding to the output of forward().

Exceptions

std::invalid_argument is the size of the input is less than size_inbox().

This method allow for a more C++-like calls of the form:

std::vector<double> x = ....;
...
heffte::fft3d fft(inbox, outbox, comm);
auto y = fft.forward(x); // y will be std::vector<std::complex<double>>

◆ backward()

template<typename backend_tag , typename index = int>

template<typename input_type , typename output_type >

void heffte::fft3d< backend_tag, index >::backward	(	input_type const	input[],
		output_type	output[],
		scale	scaling = `scale::none`
	)		const

inline

Performs a backward Fourier transform using two arrays.

Template Parameters

input_type	is a type compatible with the input of a backward FFT.
output_type	is a type compatible with the output of a backward FFT.

The input_type and output_type must be compatible, see the table of compatible types.

Parameters

input	is an array of size at least size_outbox() holding the input data corresponding to the outbox
output	is an array of size at least size_inbox() and will be overwritten with the result from the transform corresponding to the inbox
scaling	defines the type of scaling to apply (default no-scaling)

Note that in the complex-to-complex case, the two arrays can be the same, in which case the size must be at least std::max(size_inbox(), size_outbox()). Whether the same or different, padded entities of the arrays will not be accessed.

The documentation for this class was generated from the following file:

include/heffte_fft3d.h

Public Types

Public Member Functions

Detailed Description

template<typename backend_tag, typename index = int> class heffte::fft3d< backend_tag, index >

Member Typedef Documentation

◆ buffer_container

Constructor & Destructor Documentation

◆ fft3d() [1/2]

◆ fft3d() [2/2]

Member Function Documentation

◆ forward() [1/4]

◆ forward() [2/4]

◆ forward() [3/4]

◆ forward() [4/4]

◆ backward()

template<typename backend_tag, typename index = int>
class heffte::fft3d< backend_tag, index >