|
struct | is_ccomplex< cufftComplex > |
| Recognize the cuFFT single precision complex type. More...
|
|
struct | is_zcomplex< cufftDoubleComplex > |
| Recognize the cuFFT double precision complex type. More...
|
|
struct | plan_cufft |
| Wrapper around cufftHandle plans, set for float or double complex. More...
|
|
class | cufft_executor |
| Wrapper around the cuFFT API. More...
|
|
struct | plan_cufft_r2c |
| Plan for the r2c single and double precision transform. More...
|
|
class | cufft_executor_r2c |
| Wrapper to cuFFT API for real-to-complex transform with shortening of the data. More...
|
|
struct | one_dim_backend< backend::cufft > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | one_dim_backend< backend::cufft_cos > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | one_dim_backend< backend::cufft_sin > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | direct_packer< tag::gpu > |
| Simple packer that copies sub-boxes without transposing the order of the indexes. More...
|
|
struct | transpose_packer< tag::gpu > |
| GPU version of the transpose packer. More...
|
|
struct | default_plan_options< backend::cufft > |
| Sets the default options for the cufft backend. More...
|
|
struct | default_plan_options< backend::cufft_cos > |
| Sets the default options for the cufft backend. More...
|
|
struct | default_plan_options< backend::cufft_sin > |
| Sets the default options for the cufft backend. More...
|
|
struct | is_ccomplex< fftwf_complex > |
| Recognize the FFTW single precision complex type. More...
|
|
struct | is_zcomplex< fftw_complex > |
| Recognize the FFTW double precision complex type. More...
|
|
struct | plan_fftw |
| Base plan for fftw, using only the specialization for float and double complex. More...
|
|
struct | plan_fftw< std::complex< float >, dir > |
| Plan for the single precision complex transform. More...
|
|
struct | plan_fftw< std::complex< double >, dir > |
| Specialization for double complex. More...
|
|
class | fftw_executor |
| Wrapper around the FFTW3 API. More...
|
|
struct | plan_fftw< float, dir > |
| Specialization for r2c single precision. More...
|
|
struct | plan_fftw< double, dir > |
| Specialization for r2c double precision. More...
|
|
class | fftw_executor_r2c |
| Wrapper to fftw3 API for real-to-complex transform with shortening of the data. More...
|
|
struct | one_dim_backend< backend::fftw > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | one_dim_backend< backend::fftw_cos > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | one_dim_backend< backend::fftw_sin > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | default_plan_options< backend::fftw > |
| Sets the default options for the fftw backend. More...
|
|
struct | default_plan_options< backend::fftw_cos > |
| Sets the default options for the fftw backend. More...
|
|
struct | default_plan_options< backend::fftw_sin > |
| Sets the default options for the fftw backend. More...
|
|
struct | is_ccomplex< float _Complex > |
| Recognize the MKL single precision complex type. More...
|
|
struct | is_zcomplex< double _Complex > |
| Recognize the MKL double precision complex type. More...
|
|
struct | plan_mkl |
| Base plan for backend::mkl, works only for float and double complex. More...
|
|
class | mkl_executor |
| Wrapper around the MKL API. More...
|
|
struct | plan_mkl_r2c |
| Unlike the C2C plan R2C is non-symmetric and it requires that the direction is built into the plan. More...
|
|
class | mkl_executor_r2c |
| Wrapper to mkl API for real-to-complex transform with shortening of the data. More...
|
|
struct | one_dim_backend< backend::mkl > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | one_dim_backend< backend::mkl_cos > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | one_dim_backend< backend::mkl_sin > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | default_plan_options< backend::mkl > |
| Sets the default options for the mkl backend. More...
|
|
struct | default_plan_options< backend::mkl_cos > |
| Sets the default options for the mkl backend. More...
|
|
struct | default_plan_options< backend::mkl_sin > |
| Sets the default options for the mkl backend. More...
|
|
class | onemkl_executor |
| Wrapper around the oneMKL API. More...
|
|
class | onemkl_executor_r2c |
| Wrapper to oneMKL API for real-to-complex transform with shortening of the data. More...
|
|
struct | one_dim_backend< backend::onemkl > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | one_dim_backend< backend::onemkl_cos > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | one_dim_backend< backend::onemkl_sin > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | default_plan_options< backend::onemkl > |
| Sets the default options for the oneMKL backend. More...
|
|
struct | default_plan_options< backend::onemkl_cos > |
| Sets the default options for the oneMKL backend. More...
|
|
struct | default_plan_options< backend::onemkl_sin > |
| Sets the default options for the oneMKL backend. More...
|
|
struct | plan_rocfft |
| Plan for the r2c single precision transform. More...
|
|
struct | plan_rocfft< std::complex< precision_type >, dir > |
| Plan for the single precision complex transform. More...
|
|
class | rocfft_executor |
| Wrapper around the rocFFT API. More...
|
|
class | rocfft_executor_r2c |
| Wrapper to rocFFT API for real-to-complex transform with shortening of the data. More...
|
|
struct | one_dim_backend< backend::rocfft > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | one_dim_backend< backend::rocfft_cos > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | one_dim_backend< backend::rocfft_sin > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | default_plan_options< backend::rocfft > |
| Sets the default options for the cufft backend. More...
|
|
struct | default_plan_options< backend::rocfft_cos > |
| Sets the default options for the cufft backend. More...
|
|
struct | default_plan_options< backend::rocfft_sin > |
| Sets the default options for the cufft backend. More...
|
|
struct | is_ccomplex< stock::Complex< float, 1 > > |
| Recognize stock FFT single complex (which are std::complex) types. More...
|
|
struct | is_zcomplex< stock::Complex< double, 1 > > |
|
struct | plan_stock_fft |
| Specialization for r2c single precision. More...
|
|
struct | plan_stock_fft< std::complex< F >, dir > |
| Plan for the single precision complex transform. More...
|
|
class | stock_fft_executor |
| Wrapper around the Stock FFT API. More...
|
|
class | stock_fft_executor_r2c |
| Wrapper to stock API for real-to-complex transform with shortening of the data. More...
|
|
struct | one_dim_backend< backend::stock > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | one_dim_backend< backend::stock_cos > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | one_dim_backend< backend::stock_sin > |
| Helper struct that defines the types and creates instances of one-dimensional executors. More...
|
|
struct | default_plan_options< backend::stock > |
| Sets the default options for the stock fft backend. More...
|
|
struct | default_plan_options< backend::stock_cos > |
| Sets the default options for the stock fft backend. More...
|
|
struct | default_plan_options< backend::stock_sin > |
| Sets the default options for the stock fft backend. More...
|
|
class | executor_base |
| Base class for all backend executors. More...
|
|
struct | one_dim_backend |
| Indicates the structure that will be used by the fft backend. More...
|
|
struct | default_plan_options |
| Defines a set of default plan options for a given backend. More...
|
|
struct | fft_output |
| Defines the relationship between pairs of input-output types in the FFT algorithms. More...
|
|
struct | fft_output< float > |
| Specialization mapping float to std::complex<float>. More...
|
|
struct | fft_output< double > |
| Specialization mapping double to std::complex<double>. More...
|
|
struct | transform_output |
| Defines the relationship between pairs of input-output types in a general transform algorithm. More...
|
|
struct | transform_output< scalar_type, backend_tag, typename std::enable_if< backend::uses_fft_types< backend_tag >::value >::type > |
| Specialization for standard FFT. More...
|
|
struct | transform_output< scalar_type, backend_tag, typename std::enable_if< not backend::uses_fft_types< backend_tag >::value >::type > |
| Specialization for Cosine Transform. More...
|
|
class | fft3d |
| Defines the plan for a 3-dimensional discrete Fourier transform performed on a MPI distributed data. More...
|
|
class | fft3d_r2c |
| Similar to heffte::fft3d, but computed fewer redundant coefficients when the input is real. More...
|
|
struct | box3d |
| A generic container that describes a 3d box of indexes. More...
|
|
struct | rank_remap |
| Keeps the local rank and the map from the global rank to the sub-ranks used in the work. More...
|
|
struct | ioboxes |
| Pair of lists of input-output boxes as used by the heffte::fft3d. More...
|
|
struct | pack_plan_3d |
| Holds the plan for a pack/unpack operation. More...
|
|
struct | packer_backend |
| The packer needs to know whether the data will be on the CPU or GPU devices. More...
|
|
struct | direct_packer |
| Defines the direct packer without implementation, use the specializations to get the CPU or GPU implementation. More...
|
|
struct | direct_packer< tag::cpu > |
| Simple packer that copies sub-boxes without transposing the order of the indexes. More...
|
|
struct | transpose_packer |
| Defines the transpose packer without implementation, use the specializations to get the CPU implementation. More...
|
|
struct | transpose_packer< tag::cpu > |
| Transpose packer that packs sub-boxes without transposing, but unpacks applying a transpose operation. More...
|
|
struct | plan_options |
| Defines a set of tweaks and options to use in the plan generation. More...
|
|
struct | logic_plan3d |
| The logic plan incorporates the order and types of operations in a transform. More...
|
|
struct | cpu_cos_pre_pos_processor |
| Pre/Post processing for the Cosine transform using the CPU. More...
|
|
struct | cpu_sin_pre_pos_processor |
| Pre/Post processing for the Sine transform using the CPU. More...
|
|
struct | real2real_executor |
| Template algorithm for the Sine and Cosine transforms. More...
|
|
class | reshape3d_base |
| Base reshape interface. More...
|
|
class | reshape3d_alltoall |
| Reshape algorithm based on the MPI_Alltoall() method. More...
|
|
class | reshape3d_alltoallv |
| Reshape algorithm based on the MPI_Alltoallv() method. More...
|
|
class | reshape3d_pointtopoint |
| Reshape algorithm based on the MPI_Send() and MPI_Irecv() methods. More...
|
|
class | reshape3d_transpose |
| Special case of the reshape that does not involve MPI communication but applies a transpose instead. More...
|
|
struct | event |
| A tracing event. More...
|
|
struct | add_trace |
| hefftetrace More...
|
|
struct | is_ccomplex |
| Struct to specialize to allow HeFFTe to recognize custom single precision complex types. More...
|
|
struct | is_zcomplex |
| Struct to specialize to allow HeFFTe to recognize custom double precision complex types. More...
|
|
struct | is_ccomplex< std::complex< float > > |
| By default, HeFFTe recognizes std::complex<float>. More...
|
|
struct | is_zcomplex< std::complex< double > > |
| By default, HeFFTe recognizes std::complex<double>. More...
|
|
struct | define_standard_type |
| Struct to specialize that returns the C++ equivalent of each type. More...
|
|
struct | define_standard_type< float, void > |
| Type float is equivalent to float. More...
|
|
struct | define_standard_type< double, void > |
| Type double is equivalent to double. More...
|
|
struct | define_standard_type< scalar_type, typename std::enable_if< is_ccomplex< scalar_type >::value >::type > |
| Every type with specialization of heffte::is_ccomplex to std::true_type is equivalent to std::complex<float>. More...
|
|
struct | define_standard_type< scalar_type, typename std::enable_if< is_zcomplex< scalar_type >::value >::type > |
| Every type with specialization of heffte::is_zcomplex to std::true_type is equivalent to std::complex<double>. More...
|
|
|
void | check_error (MKL_LONG status, std::string const &function_name) |
| Checks the status of a call to the MKL backend.
|
|
template<typename scalar_type > |
std::vector< scalar_type > | make_buffer_container (void *, size_t size) |
| Factory method to create new buffer container for the CPU backends.
|
|
template<typename backend_tag > |
constexpr bool | has_executor2d () |
| Defines whether the executor has a 2D version (slabs).
|
|
template<typename backend_tag > |
constexpr bool | has_executor3d () |
| Defines whether the executor has a 3D version (single rank).
|
|
template<typename location_tag , typename index , typename scalar_type > |
void | compute_transform (typename backend::data_manipulator< location_tag >::stream_type stream, int const batch_size, scalar_type const input[], scalar_type output[], scalar_type workspace[], size_t executor_buffer_offset, size_t size_comm_buffers, std::array< std::unique_ptr< reshape3d_base< index >>, 4 > const &shaper, std::array< executor_base *, 3 > const &executor, direction dir) |
|
template<typename location_tag , typename index , typename scalar_type > |
void | compute_transform (typename backend::data_manipulator< location_tag >::stream_type stream, int const batch_size, scalar_type const input[], std::complex< scalar_type > output[], std::complex< scalar_type > workspace[], size_t executor_buffer_offset, size_t size_comm_buffers, std::array< std::unique_ptr< reshape3d_base< index >>, 4 > const &shaper, std::array< executor_base *, 3 > const &executor, direction) |
|
template<typename location_tag , typename index , typename scalar_type > |
void | compute_transform (typename backend::data_manipulator< location_tag >::stream_type stream, int const batch_size, std::complex< scalar_type > const input[], scalar_type output[], std::complex< scalar_type > workspace[], size_t executor_buffer_offset, size_t size_comm_buffers, std::array< std::unique_ptr< reshape3d_base< index >>, 4 > const &shaper, std::array< executor_base *, 3 > const &executor, direction) |
|
template<typename backend_tag , typename index > |
fft3d< backend_tag, index > | make_fft3d (box3d< index > const inbox, box3d< index > const outbox, MPI_Comm const comm, plan_options const options=default_options< backend_tag >()) |
| Factory method that auto-detects the index type based on the box.
|
|
template<typename backend_tag , typename index > |
fft3d_r2c< backend_tag, index > | make_fft3d_r2c (box3d< index > const inbox, box3d< index > const outbox, int r2c_direction, MPI_Comm const comm, plan_options const options=default_options< backend_tag >()) |
| Factory method that auto-detects the index type based on the box.
|
|
template<typename index > |
std::ostream & | operator<< (std::ostream &os, box3d< index > const box) |
| Debugging info, writes out the box to a stream.
|
|
template<typename index > |
int | fft1d_get_howmany (box3d< index > const box, int const dimension) |
| Return the number of 1-D ffts contained in the box in the given dimension.
|
|
template<typename index > |
int | fft1d_get_stride (box3d< index > const box, int const dimension) |
| Return the stride of the 1-D ffts contained in the box in the given dimension.
|
|
template<typename index > |
box3d< index > | find_world (std::vector< box3d< index >> const &boxes) |
| Returns the box that encapsulates all other boxes. More...
|
|
template<typename index > |
bool | match (std::vector< box3d< index >> const &shape0, std::vector< box3d< index >> const &shape1) |
| Compares two vectors of boxes, returns true if all boxes match.
|
|
template<typename index > |
bool | world_complete (std::vector< box3d< index >> const &boxes, box3d< index > const world) |
| Returns true if the geometry of the world is as expected. More...
|
|
std::vector< std::array< int, 2 > > | get_factors (int const n) |
| fft3dmisc More...
|
|
int | get_area (std::array< int, 2 > const &dims) |
| Get the surface area of a processor grid. More...
|
|
std::array< int, 2 > | make_procgrid (int const num_procs) |
| Factorize the MPI ranks into a 2D grid. More...
|
|
template<typename index > |
std::array< int, 3 > | make_procgrid2d (box3d< index > const world, int direction_1d, std::array< int, 2 > const candidate_grid) |
| Factorize the MPI ranks into a 2D grid with specific constraints. More...
|
|
template<typename index > |
std::vector< box3d< index > > | split_world (box3d< index > const world, std::array< int, 3 > const proc_grid, rank_remap const &remap=rank_remap()) |
| Splits the world box into a set of boxes that will be assigned to a process in the process grid. More...
|
|
template<typename index > |
bool | is_pencils (box3d< index > const world, std::vector< box3d< index >> const &shape, int direction) |
| Returns true if the shape forms pencils in the given direction.
|
|
template<typename index > |
bool | is_slab (box3d< index > const world, std::vector< box3d< index >> const &shape, int direction1, int direction2) |
| Returns true if the shape forms slabs in the given directions.
|
|
template<typename index > |
std::vector< box3d< index > > | reorder (std::vector< box3d< index >> const &shape, std::array< int, 3 > order) |
| Returns the same shape, but sets a different order for each box.
|
|
template<typename index > |
std::vector< box3d< index > > | maximize_overlap (std::vector< box3d< index >> const &new_boxes, std::vector< box3d< index >> const &old_boxes, std::array< int, 3 > const order, rank_remap const &remap) |
| Shuffle the new boxes to maximize the overlap with the old boxes. More...
|
|
template<typename index > |
long long | count_connections (std::vector< box3d< index >> const &new_boxes, std::vector< box3d< index >> const &old_boxes) |
| Counts the number of point-to-point connections between the old and new box geometries. More...
|
|
template<typename index > |
std::vector< box3d< index > > | make_pencils (box3d< index > const world, std::array< int, 2 > const proc_grid, int const dimension, std::vector< box3d< index >> const &source, std::array< int, 3 > const order, rank_remap const &remap=rank_remap()) |
| Breaks the world into a grid of pencils and orders the pencils to the ranks that will minimize communication. More...
|
|
template<typename index > |
std::vector< box3d< index > > | make_slabs (box3d< index > const world, int num_slabs, int const dimension1, int const dimension2, std::vector< box3d< index >> const &source, std::array< int, 3 > const order, rank_remap const &remap) |
| Breaks the world into a set of slabs that span the given dimensions. More...
|
|
template<typename index > |
std::array< int, 3 > | proc_setup_min_surface (box3d< index > const world, int num_procs) |
| Creates a grid of mpi-ranks that will minimize the area of each of the boxes. More...
|
|
template<typename index > |
std::ostream & | operator<< (std::ostream &os, pack_plan_3d< index > const &plan) |
| Writes a plan to the stream, useful for debugging.
|
|
std::ostream & | operator<< (std::ostream &os, plan_options const options) |
| Simple I/O for the plan options struct.
|
|
template<typename backend_tag , bool use_r2c = false> |
plan_options | set_options (plan_options opts) |
| Adjusts the user provided options to what can be handled by the backend. More...
|
|
plan_options | force_reorder (plan_options opts) |
| Forces the reorder logic for the ROCM r2c variant.
|
|
template<typename backend_tag > |
plan_options | default_options () |
| Returns the default backend options associated with the given backend.
|
|
template<typename index > |
std::array< bool, 3 > | pencil_directions (box3d< index > const world, std::vector< box3d< index >> const &boxes) |
| Returns true for each direction where the boxes form pencils (i.e., where the size matches the world size).
|
|
template<typename index > |
logic_plan3d< index > | plan_operations (ioboxes< index > const &boxes, int r2c_direction, plan_options const options, int const mpi_rank) |
| Creates the logic plan with the provided user input. More...
|
|
template<typename index > |
std::vector< std::array< int, 3 > > | compute_grids (logic_plan3d< index > const &plan) |
| Assuming the shapes in the plan form grids, reverse engineer the grid dimensions (used in the benchmark).
|
|
template<typename index > |
box3d< index > | make_cos_box (box3d< index > const &box) |
| Create a box with larger dimension that will exploit the symmetry for the Sine and Cosine Transforms.
|
|
template<typename index > |
void | compute_overlap_map_transpose_pack (int me, int nprocs, box3d< index > const destination, std::vector< box3d< index >> const &boxes, std::vector< int > &proc, std::vector< int > &offset, std::vector< int > &sizes, std::vector< pack_plan_3d< index >> &plans) |
| Generates an unpack plan where the boxes and the destination do not have the same order. More...
|
|
template<typename index > |
size_t | get_workspace_size (std::array< std::unique_ptr< reshape3d_base< index >>, 4 > const &shapers) |
| Returns the maximum workspace size used by the shapers.
|
|
template<typename location_tag , template< typename device > class packer = direct_packer, typename index > |
std::unique_ptr< reshape3d_alltoall< location_tag, packer, index > > | make_reshape3d_alltoall (typename backend::device_instance< location_tag >::stream_type q, std::vector< box3d< index >> const &input_boxes, std::vector< box3d< index >> const &output_boxes, bool uses_gpu_aware, MPI_Comm const comm) |
| Factory method that all the necessary work to establish the communication patterns. More...
|
|
template<typename location_tag , template< typename device > class packer = direct_packer, typename index > |
std::unique_ptr< reshape3d_alltoallv< location_tag, packer, index > > | make_reshape3d_alltoallv (typename backend::device_instance< location_tag >::stream_type q, std::vector< box3d< index >> const &input_boxes, std::vector< box3d< index >> const &output_boxes, bool use_gpu_aware, MPI_Comm const comm) |
| Factory method that all the necessary work to establish the communication patterns. More...
|
|
template<typename location_tag , template< typename device > class packer = direct_packer, typename index > |
std::unique_ptr< reshape3d_pointtopoint< location_tag, packer, index > > | make_reshape3d_pointtopoint (typename backend::device_instance< location_tag >::stream_type q, std::vector< box3d< index >> const &input_boxes, std::vector< box3d< index >> const &output_boxes, reshape_algorithm algorithm, bool use_gpu_aware, MPI_Comm const comm) |
| Factory method that all the necessary work to establish the communication patterns. More...
|
|
template<typename backend_tag , typename index > |
std::unique_ptr< reshape3d_base< index > > | make_reshape3d (typename backend::device_instance< typename backend::buffer_traits< backend_tag >::location >::stream_type stream, std::vector< box3d< index >> const &input_boxes, std::vector< box3d< index >> const &output_boxes, MPI_Comm const comm, plan_options const options) |
| Factory method to create a reshape3d instance. More...
|
|
void | init_tracing (std::string root_filename) |
| Initialize tracing and remember the root filename for output, see the Detailed Description.
|
|
void | finalize_tracing () |
| Finalize tracing and write the result to a file, see the Detailed Description.
|
|
template<class T , class U = T> |
T | c11_exchange (T &obj, U &&new_value) |
| Replace with the C++ 2014 std::exchange later.
|
|
template<typename scalar_type > |
define_standard_type< scalar_type >::type * | convert_to_standard (scalar_type input[]) |
| Converts an array of some type to an array of the C++ equivalent type.
|
|
template<typename scalar_type > |
define_standard_type< scalar_type >::type const * | convert_to_standard (scalar_type const input[]) |
| Converts a const array of some type to a const array of the C++ equivalent type.
|
|
template<typename some_class > |
int | get_last_active (std::array< std::unique_ptr< some_class >, 4 > const &shaper) |
| Return the index of the last active (non-null) unique_ptr. More...
|
|
template<typename some_class > |
int | count_active (std::array< std::unique_ptr< some_class >, 4 > const &shaper) |
| Return the number of active (non-null) unique_ptr.
|
|
template<typename some_class > |
size_t | get_max_box_size (std::array< some_class, 3 > const &executors) |
| Returns the max of the box_size() for each of the executors.
|
|
template<typename some_class > |
size_t | get_max_box_size_r2c (std::array< some_class, 3 > const &executors) |
| Returns the max of the box_size() for each of the executors.
|
|
template<typename some_class > |
size_t | get_max_work_size (std::array< some_class, 3 > const &executors) |
| Returns the max of the workspace_size() for each of the executors.
|
|
template<typename some_class_r2c , typename some_class > |
size_t | get_max_work_size (some_class_r2c const &executors_r2c, std::array< some_class, 2 > const &executors) |
| Returns the max of the workspace_size() for each of the executors.
|
|
int | direction_sign (direction dir) |
| Find the sign given a direction.
|
|
int | get_any_valid (std::array< int, 3 > current) |
| Returns either 0, 1, 2, so that it does not match any of the current values.
|
|
template<typename index > |
bool | is_pencils (box3d< index > const world, std::vector< box3d< index >> const &shape, std::vector< int > const directions) |
| Checks if using pencils in multiple directions simultaneously.
|
|
template<typename index > |
std::vector< box3d< index > > | apply_r2c (std::vector< box3d< index >> const &shape, int r2c_direction) |
| Applies the r2c direction reduction to the set of boxes.
|
|
template<typename index > |
bool | order_is_identical (std::vector< box3d< index >> const &shape) |
| Checks whether all boxes in the shape have the same order.
|
|
std::array< int, 3 > | new_order (std::array< int, 3 > current_order, int dimension) |
| Swaps the entries so that the dimension will come first.
|
|
template<typename index > |
std::vector< box3d< index > > | next_pencils_shape (box3d< index > const world, std::array< int, 2 > const proc_grid, int const dimension, std::vector< box3d< index >> const &source, bool const use_reorder, box3d< index > const world_out, std::vector< int > const test_directions, std::vector< box3d< index >> const &boxes_out, rank_remap const &remap) |
| Creates the next box geometry that corresponds to pencils in the given dimension. More...
|
|
template<typename index > |
std::vector< box3d< index > > | next_pencils_shape0 (box3d< index > const world, std::array< int, 2 > const proc_grid, int const dimension, int const r2c_direction, std::vector< box3d< index >> const &source, bool const use_reorder, box3d< index > const world_out, std::vector< int > const test_directions, std::vector< box3d< index >> const &boxes_out, rank_remap const &remap) |
| Similar to next_pencils_shape() but handles a special case of the r2c transformation.
|
|
template<typename index > |
logic_plan3d< index > | plan_pencil_reshapes (box3d< index > world_in, box3d< index > world_out, ioboxes< index > const &boxes, int r2c_direction, plan_options const opts, rank_remap const &remap) |
| Creates a plan of reshape operations using pencil decomposition. More...
|
|
template<typename index > |
std::vector< box3d< index > > | reorder_slabs (std::vector< box3d< index >> const &slabs, int dimension, bool use_reorder) |
| If use_reorder is false, then returns a copy of the slabs, otherwise changes the order so that dimension comes first.
|
|
template<typename index > |
logic_plan3d< index > | plan_slab_reshapes (box3d< index > world_in, box3d< index > world_out, ioboxes< index > const &boxes, int r2c_direction, plan_options const opts, rank_remap const &remap) |
| Creates a plan of reshape operations using slab decomposition.
|
|
template logic_plan3d< int > | plan_operations< int > (ioboxes< int > const &, int, plan_options const, int const) |
| Instantiate for int.
|
|
template logic_plan3d< long long > | plan_operations< long long > (ioboxes< long long > const &, int, plan_options const, int const) |
| Instantiate for long long.
|
|
template std::vector< std::array< int, 3 > > | compute_grids< int > (logic_plan3d< int > const &) |
|
template std::vector< std::array< int, 3 > > | compute_grids< long long > (logic_plan3d< long long > const &) |
|