Highly Efficient FFT for Exascale: HeFFTe v2.3
heffte_plan_logic.h
1 /*
2  -- heFFTe --
3  Univ. of Tennessee, Knoxville
4  @date
5 */
6 
7 #ifndef HEFFTE_PLAN_LOGIC_H
8 #define HEFFTE_PLAN_LOGIC_H
9 
10 #include "heffte_common.h"
11 
20 namespace heffte {
21 
48 enum class reshape_algorithm{
50  alltoallv = 0,
52  alltoall = 3,
54  p2p_plined = 1,
56  p2p = 2
57 };
58 
133  template<typename backend_tag> plan_options(backend_tag const)
136  use_pencils(true),
137  use_gpu_aware(true),
138  num_sub(-1),
139  subcomm(MPI_COMM_NULL)
140  {}
142  plan_options(bool reorder, reshape_algorithm alg, bool pencils)
143  : use_reorder(reorder), algorithm(alg), use_pencils(pencils), use_gpu_aware(true), num_sub(-1), subcomm(MPI_COMM_NULL)
144  {}
154  void use_num_subranks(int num_subranks){ num_sub = num_subranks; }
167  void use_subcomm(MPI_Comm comm){
168  num_sub = 1;
169  subcomm = comm;
170  }
172  int get_subranks() const{ return num_sub; }
173 private:
174  int num_sub;
175  MPI_Comm subcomm;
176 };
177 
182 inline std::ostream & operator << (std::ostream &os, plan_options const options){
183  std::string algorithm = "";
184  switch (options.algorithm){
185  case reshape_algorithm::alltoallv : algorithm = "mpi:alltoallv"; break;
186  case reshape_algorithm::alltoall : algorithm = "mpi:alltoall"; break;
187  case reshape_algorithm::p2p_plined : algorithm = "mpi:point-to-point-pipelined"; break;
188  case reshape_algorithm::p2p : algorithm = "mpi:point-to-point"; break;
189  };
190  os << "options = ("
191  << ((options.use_reorder) ? "fft1d:contiguous" : "fft1d:strided") << ", "
192  << algorithm << ", "
193  << ((options.use_pencils) ? "decomposition:pencil" : "decomposition:slab") << ", "
194  << ((options.use_gpu_aware) ? "mpi:from-gpu" : "mpi:from-cpu") << ")";
195  return os;
196 }
197 
206 template<typename backend_tag, bool use_r2c = false>
208  if (std::is_same<backend_tag, backend::stock_cos>::value
209  or std::is_same<backend_tag, backend::fftw_cos>::value
210  or std::is_same<backend_tag, backend::mkl_cos>::value
211  or std::is_same<backend_tag, backend::cufft_cos>::value
212  or std::is_same<backend_tag, backend::rocfft_cos>::value
213  or std::is_same<backend_tag, backend::onemkl_cos>::value
214  or std::is_same<backend_tag, backend::stock_sin>::value
215  or std::is_same<backend_tag, backend::fftw_sin>::value
216  or std::is_same<backend_tag, backend::mkl_sin>::value
217  or std::is_same<backend_tag, backend::cufft_sin>::value
218  or std::is_same<backend_tag, backend::rocfft_sin>::value
219  or std::is_same<backend_tag, backend::onemkl_sin>::value
220  ){
221  // currently the cosine options work only with reorder.
222  opts.use_reorder = true;
223  return opts;
224  }else if (use_r2c and std::is_same<backend_tag, backend::rocfft>::value){
225  // the rocfft backend with r2c requires the reorder (problem with the strides)
226  opts.use_reorder = true;
227  return opts;
228  }else{
229  return opts; // all options are supported for this backend
230  }
231 }
232 
238  opts.use_reorder = true;
239  return opts;
240 }
241 
246 template<typename backend_tag>
248  return plan_options(backend_tag());
249 }
250 
274 template<typename index>
277  std::vector<box3d<index>> in_shape[4];
279  std::vector<box3d<index>> out_shape[4];
281  std::array<int, 3> fft_direction;
283  long long index_count;
287  int const mpi_rank;
288 };
289 
294 template<typename index>
295 inline std::array<bool, 3> pencil_directions(box3d<index> const world, std::vector<box3d<index>> const &boxes){
296  std::array<bool, 3> is_pencil = {true, true, true};
297  for(auto const &b : boxes){
298  for(int i=0; i<3; i++)
299  is_pencil[i] = is_pencil[i] and (world.size[i] == b.size[i]);
300  }
301  return is_pencil;
302 }
303 
315 template<typename index>
316 logic_plan3d<index> plan_operations(ioboxes<index> const &boxes, int r2c_direction, plan_options const options, int const mpi_rank);
317 
322 template<typename index>
323 std::vector<std::array<int, 3>> compute_grids(logic_plan3d<index> const &plan);
324 
325 }
326 
327 #endif
reshape_algorithm
Defines list of potential communication algorithms.
Definition: heffte_plan_logic.h:48
plan_options set_options(plan_options opts)
Adjusts the user provided options to what can be handled by the backend.
Definition: heffte_plan_logic.h:207
plan_options default_options()
Returns the default backend options associated with the given backend.
Definition: heffte_plan_logic.h:247
@ alltoall
Using the MPI_Alltoall options, with padding on the data.
@ alltoallv
Using the MPI_Alltoallv options, no padding on the data (default option).
@ p2p_plined
Using MPI_Isend and MPI_Irecv, all sending receiving packing and unpacking are pipelined.
@ p2p
Using MPI_Send and MPI_Irecv, receive is pipelined with packing and sending.
std::vector< box3d< index > > reorder(std::vector< box3d< index >> const &shape, std::array< int, 3 > order)
Returns the same shape, but sets a different order for each box.
Definition: heffte_geometry.h:466
std::ostream & operator<<(std::ostream &os, box3d< index > const box)
Debugging info, writes out the box to a stream.
Definition: heffte_geometry.h:146
std::array< bool, 3 > pencil_directions(box3d< index > const world, std::vector< box3d< index >> const &boxes)
Returns true for each direction where the boxes form pencils (i.e., where the size matches the world ...
Definition: heffte_plan_logic.h:295
std::vector< std::array< int, 3 > > compute_grids(logic_plan3d< index > const &plan)
Assuming the shapes in the plan form grids, reverse engineer the grid dimensions (used in the benchma...
Definition: heffte_plan_logic.cpp:457
logic_plan3d< index > plan_operations(ioboxes< index > const &boxes, int r2c_direction, plan_options const options, int const mpi_rank)
Creates the logic plan with the provided user input.
Definition: heffte_plan_logic.cpp:421
plan_options force_reorder(plan_options opts)
Forces the reorder logic for the ROCM r2c variant.
Definition: heffte_plan_logic.h:237
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38
A generic container that describes a 3d box of indexes.
Definition: heffte_geometry.h:67
std::array< index, 3 > const size
The number of indexes in each direction.
Definition: heffte_geometry.h:129
Defines a set of default plan options for a given backend.
Definition: heffte_common.h:642
The logic plan incorporates the order and types of operations in a transform.
Definition: heffte_plan_logic.h:275
std::vector< box3d< index > > in_shape[4]
Holds the input shapes for the 4 forward reshapes (backwards reverses in and out).
Definition: heffte_plan_logic.h:277
std::array< int, 3 > fft_direction
Direction of the 1-D FFT transforms.
Definition: heffte_plan_logic.h:281
int const mpi_rank
MPI rank used in the plan creation.
Definition: heffte_plan_logic.h:287
long long index_count
The total number of indexes in all directions.
Definition: heffte_plan_logic.h:283
plan_options const options
Extra options used in the plan creation.
Definition: heffte_plan_logic.h:285
std::vector< box3d< index > > out_shape[4]
Holds the output shapes for the 4 forward reshapes (backwards reverses in and out).
Definition: heffte_plan_logic.h:279
Defines a set of tweaks and options to use in the plan generation.
Definition: heffte_plan_logic.h:131
plan_options(backend_tag const)
Constructor, initializes all options with the default values for the given backend tag.
Definition: heffte_plan_logic.h:133
int get_subranks() const
Return the set number of sub-ranks.
Definition: heffte_plan_logic.h:172
void use_num_subranks(int num_subranks)
Defines the number of ranks to use for the internal reshapes, set to -1 to use all ranks.
Definition: heffte_plan_logic.h:154
reshape_algorithm algorithm
Defines the communication algorithm.
Definition: heffte_plan_logic.h:148
bool use_reorder
Defines whether to transpose the data on reshape or to use strided 1-D ffts.
Definition: heffte_plan_logic.h:146
bool use_gpu_aware
Defines whether to use MPI calls directly from the GPU or to move to the CPU first.
Definition: heffte_plan_logic.h:152
void use_subcomm(MPI_Comm comm)
Set sub-communicator to use in the intermediate reshape operations.
Definition: heffte_plan_logic.h:167
plan_options(bool reorder, reshape_algorithm alg, bool pencils)
Constructor, initializes each variable, primarily for internal use.
Definition: heffte_plan_logic.h:142
bool use_pencils
Defines whether to use pencil or slab data distribution in the reshape steps.
Definition: heffte_plan_logic.h:150