Highly Efficient FFT for Exascale: HeFFTe v2.3
Collaboration diagram for Backend cufft:

Classes

struct  heffte::cuda::cos_pre_pos_processor
 Implementation of Cosine Transform pre-post processing methods using CUDA. More...
 
struct  heffte::cuda::sin_pre_pos_processor
 Implementation of Sine Transform pre-post processing methods using CUDA. More...
 
struct  heffte::backend::is_enabled< cufft >
 Indicate that the cuFFT backend has been enabled. More...
 
struct  heffte::backend::is_enabled< cufft_cos >
 Indicate that the cuFFT backend has been enabled for Cosine Transform. More...
 
struct  heffte::backend::is_enabled< cufft_sin >
 Indicate that the cuFFT backend has been enabled for Sine Transform. More...
 
struct  heffte::backend::device_instance< tag::gpu >
 The CUDA backend uses a CUDA stream. More...
 
struct  heffte::backend::default_backend< tag::gpu >
 In CUDA mode, the default GPU backend is cufft. More...
 
struct  heffte::backend::data_manipulator< tag::gpu >
 Specialization for the data operations in CUDA mode. More...
 
struct  heffte::backend::buffer_traits< cufft >
 Defines the location type-tag and the cuda container. More...
 
struct  heffte::backend::buffer_traits< cufft_cos >
 Defines the location type-tag and the cuda container. More...
 
struct  heffte::backend::buffer_traits< cufft_sin >
 Defines the location type-tag and the cuda container. More...
 
struct  heffte::is_ccomplex< cufftComplex >
 Recognize the cuFFT single precision complex type. More...
 
struct  heffte::is_zcomplex< cufftDoubleComplex >
 Recognize the cuFFT double precision complex type. More...
 
struct  heffte::plan_cufft< scalar_type >
 Wrapper around cufftHandle plans, set for float or double complex. More...
 
class  heffte::cufft_executor
 Wrapper around the cuFFT API. More...
 
struct  heffte::plan_cufft_r2c< scalar_type >
 Plan for the r2c single and double precision transform. More...
 
class  heffte::cufft_executor_r2c
 Wrapper to cuFFT API for real-to-complex transform with shortening of the data. More...
 
struct  heffte::one_dim_backend< backend::cufft >
 Helper struct that defines the types and creates instances of one-dimensional executors. More...
 
struct  heffte::one_dim_backend< backend::cufft_cos >
 Helper struct that defines the types and creates instances of one-dimensional executors. More...
 
struct  heffte::one_dim_backend< backend::cufft_sin >
 Helper struct that defines the types and creates instances of one-dimensional executors. More...
 
struct  heffte::default_plan_options< backend::cufft >
 Sets the default options for the cufft backend. More...
 
struct  heffte::default_plan_options< backend::cufft_cos >
 Sets the default options for the cufft backend. More...
 
struct  heffte::default_plan_options< backend::cufft_sin >
 Sets the default options for the cufft backend. More...
 
struct  heffte::oapi::cos_pre_pos_processor
 Implementation of Cosine Transform pre-post processing methods using CUDA. More...
 
struct  heffte::oapi::sin_pre_pos_processor
 Implementation of Cosine Transform pre-post processing methods using CUDA. More...
 
struct  heffte::rocm::cos_pre_pos_processor
 Implementation of Cosine Transform pre-post processing methods using CUDA. More...
 
struct  heffte::rocm::sin_pre_pos_processor
 Implementation of Sine Transform pre-post processing methods using CUDA. More...
 
struct  heffte::backend::cufft
 Type-tag for the cuFFT backend. More...
 
struct  heffte::backend::cufft_cos
 Type-tag for the Cosine Transform using the cuFFT backend. More...
 
struct  heffte::backend::cufft_sin
 Type-tag for the Sine Transform using the cuFFT backend. More...
 

Functions

void heffte::cuda::check_error (cudaError_t status, const char *function_name)
 Checks the status of a CUDA command and in case of a failure, converts it to a C++ exception.
 
void heffte::cuda::check_error (cufftResult status, const char *function_name)
 Checks the status of a cufft command and in case of a failure, converts it to a C++ exception.
 
template<typename precision_type , typename index >
void heffte::cuda::convert (cudaStream_t stream, index num_entries, precision_type const source[], std::complex< precision_type > destination[])
 Convert real numbers to complex when both are located on the GPU device. More...
 
template<typename precision_type , typename index >
void heffte::cuda::convert (cudaStream_t stream, index num_entries, std::complex< precision_type > const source[], precision_type destination[])
 Convert complex numbers to real when both are located on the GPU device. More...
 
template<typename scalar_type , typename index >
void heffte::cuda::scale_data (cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor)
 Scales real data (double or float) by the scaling factor.
 
template<typename scalar_type , typename index >
void heffte::cuda::direct_pack (cudaStream_t stream, index nfast, index nmid, index nslow, index line_stride, index plane_stide, scalar_type const source[], scalar_type destination[])
 Performs a direct-pack operation for data sitting on the GPU device. More...
 
template<typename scalar_type , typename index >
void heffte::cuda::direct_unpack (cudaStream_t stream, index nfast, index nmid, index nslow, index line_stride, index plane_stide, scalar_type const source[], scalar_type destination[])
 Performs a direct-unpack operation for data sitting on the GPU device. More...
 
template<typename scalar_type , typename index >
void heffte::cuda::transpose_unpack (cudaStream_t stream, index nfast, index nmid, index nslow, index line_stride, index plane_stide, index buff_line_stride, index buff_plane_stride, int map0, int map1, int map2, scalar_type const source[], scalar_type destination[])
 Performs a transpose-unpack operation for data sitting on the GPU device. More...
 
template<typename scalar_type , typename index >
void heffte::data_scaling::apply (cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor)
 Simply multiply the num_entries in the data by the scale_factor.
 
template<typename precision_type , typename index >
void heffte::data_scaling::apply (cudaStream_t stream, index num_entries, std::complex< precision_type > *data, double scale_factor)
 Complex by real scaling.
 
template<>
std::string heffte::backend::name< cufft > ()
 Returns the human readable name of the cuFFT backend.
 
template<>
std::string heffte::backend::name< cufft_cos > ()
 Returns the human readable name of the cuFFT backend.
 
template<>
std::string heffte::backend::name< cufft_sin > ()
 Returns the human readable name of the cuFFT backend.
 

Detailed Description

Wrappers and template specializations related to the cuFFT backend. Requires CMake option:

-D Heffte_ENABLE_CUDA=ON

In addition to the cuFFT wrappers, this also includes a series of kernels for packing/unpacking/scaling the data, as well as a simple container that wraps around CUDA arrays for RAII style of resource management.

Function Documentation

◆ convert() [1/2]

template<typename precision_type , typename index >
void heffte::cuda::convert ( cudaStream_t  stream,
index  num_entries,
precision_type const  source[],
std::complex< precision_type >  destination[] 
)

Convert real numbers to complex when both are located on the GPU device.

Launches a CUDA kernel.

◆ convert() [2/2]

template<typename precision_type , typename index >
void heffte::cuda::convert ( cudaStream_t  stream,
index  num_entries,
std::complex< precision_type > const  source[],
precision_type  destination[] 
)

Convert complex numbers to real when both are located on the GPU device.

Launches a CUDA kernel.

◆ direct_pack()

template<typename scalar_type , typename index >
void heffte::cuda::direct_pack ( cudaStream_t  stream,
index  nfast,
index  nmid,
index  nslow,
index  line_stride,
index  plane_stide,
scalar_type const  source[],
scalar_type  destination[] 
)

Performs a direct-pack operation for data sitting on the GPU device.

Launches a CUDA kernel.

◆ direct_unpack()

template<typename scalar_type , typename index >
void heffte::cuda::direct_unpack ( cudaStream_t  stream,
index  nfast,
index  nmid,
index  nslow,
index  line_stride,
index  plane_stide,
scalar_type const  source[],
scalar_type  destination[] 
)

Performs a direct-unpack operation for data sitting on the GPU device.

Launches a CUDA kernel.

◆ transpose_unpack()

template<typename scalar_type , typename index >
void heffte::cuda::transpose_unpack ( cudaStream_t  stream,
index  nfast,
index  nmid,
index  nslow,
index  line_stride,
index  plane_stide,
index  buff_line_stride,
index  buff_plane_stride,
int  map0,
int  map1,
int  map2,
scalar_type const  source[],
scalar_type  destination[] 
)

Performs a transpose-unpack operation for data sitting on the GPU device.

Launches a CUDA kernel.