7 #ifndef HEFFTE_GEOMETRY_H
8 #define HEFFTE_GEOMETRY_H
12 #include "heffte_utils.h"
66 template<
typename index =
int>
69 box3d(std::array<index, 3> clow, std::array<index, 3> chigh) :
73 box3d(std::array<index, 3> clow, std::array<index, 3> chigh, std::array<int, 3> corder) :
77 box3d(std::array<index, 2> clow, std::array<index, 2> chigh) :
78 box3d(std::array<index, 3>{clow[0], clow[1], 0}, std::array<index, 3>{chigh[0], chigh[1], 0}){}
80 box3d(std::array<index, 2> clow, std::array<index, 2> chigh, std::array<int, 2> corder) :
81 box3d(std::array<index, 3>{clow[0], clow[1], 0}, std::array<index, 3>{chigh[0], chigh[1], 0},
82 std::array<index, 3>{corder[0], corder[1], 2}){}
87 (
static_cast<long long>(
size[0]) *
static_cast<long long>(
size[1]) *
static_cast<long long>(
size[2])); }
96 if (
empty())
return box3d({0, 0, 0}, {-1, -1, -1});
108 return not (*
this != other);
112 for(
int i=0; i<3; i++)
113 if (
low[i] != other.
low[i] or
high[i] != other.
high[i])
return true;
125 std::array<index, 3>
const low;
127 std::array<index, 3>
const high;
129 std::array<index, 3>
const size;
138 template<
typename index =
int>
145 template<
typename index>
147 for(
int i=0; i<3; i++)
148 os << box.
low[i] <<
" " << box.
high[i] <<
" (" << box.
size[i] <<
")\n";
149 os <<
"(" << box.
order[0] <<
"," << box.
order[1] <<
"," << box.
order[2] <<
")\n";
158 template<
typename index>
161 if (dimension == box.
order[1])
return box.
osize(0);
168 template<
typename index>
170 if (dimension == box.
order[0])
return 1;
171 if (dimension == box.
order[1])
return box.
osize(0);
191 map = std::vector<int>(all_ranks, -1);
192 for(
int i=0; i<sub_ranks; i++)
map[i] = i;
196 int subcomm_rank = (subcomm == MPI_COMM_NULL) ? -1 :
mpi::comm_rank(subcomm);
198 MPI_Allgather(&subcomm_rank, 1, MPI_INT,
map.data(), 1, MPI_INT, global);
212 template<
typename index =
int>
215 std::vector<box3d<index>>
in;
217 std::vector<box3d<index>>
out;
228 template<
typename index>
230 std::array<index, 3> low = boxes[0].low;
231 std::array<index, 3> high = boxes[0].high;
233 for(index i=0; i<3; i++)
234 low[i] = std::min(low[i], b.low[i]);
235 for(index i=0; i<3; i++)
236 high[i] = std::max(high[i], b.high[i]);
245 template<
typename index>
247 if (shape0.size() != shape1.size())
return false;
248 for(
size_t i=0; i<shape0.size(); i++)
249 if (shape0[i] != shape1[i])
265 template<
typename index>
268 for(
auto b : boxes) wsize += b.count();
269 if (wsize < world.
count())
270 throw std::invalid_argument(
"The provided input boxes do not fill the world box!");
272 for(
size_t i=0; i<3; i++)
273 if (world.
low[i] != 0)
274 throw std::invalid_argument(
"Global box indexing must start from 0!");
276 for(
size_t i=0; i<boxes.size(); i++)
277 for(
size_t j=0; j<boxes.size(); j++)
278 if (i != j and not boxes[i].collide(boxes[j]).empty())
279 throw std::invalid_argument(
"Input boxes cannot overlap!");
302 std::vector<std::array<int, 2>> result;
303 for(
int i=1; i<=n; i++){
305 result.push_back({i, n / i});
308 result.push_back({1, 1});
321 inline int get_area(std::array<int, 2>
const &dims){
return dims[0] * dims[1] + dims[0] + dims[1]; }
338 std::array<int, 2> min_array = factors.front();
340 for(
auto f : factors){
342 if (farea < min_area){
361 template<
typename index>
363 auto make_grid = [&](std::array<int, 2>
const &grid)
364 ->std::array<int, 3>{
365 return (direction_1d == 0) ? std::array<int, 3>{1, grid[0], grid[1]} :
366 ((direction_1d == 1) ? std::array<int, 3>{grid[0], 1, grid[1]} :
367 std::array<int, 3>{grid[0], grid[1], 1});
369 std::array<int, 3> result = make_grid(candidate_grid);
371 auto valid = [&](std::array<int, 3>
const &grid)
373 for(
int i=0; i<3; i++)
if (grid[i] > world.
size[i])
return false;
376 if (valid(result))
return result;
379 auto factors =
get_factors(candidate_grid[0] * candidate_grid[1]);
381 int min_area =
get_area(factors.front());
383 for(
auto const &g : factors) min_area = std::max(min_area,
get_area(g));
385 for(
auto const &g : factors){
386 if (valid(make_grid(g)) and
get_area(g) <= min_area){
387 result = make_grid(g);
392 if (not valid(result))
throw std::runtime_error(
"Cannot split the given number of indexes into the given set of mpi-ranks. Most liklely, the number of indexes is too small compared to the number of mpi-ranks.");
408 template<
typename index>
411 auto fast = [=](index i)->index{
return world.
low[0] + i * (world.
size[0] / proc_grid[0]) + std::min(i, (world.
size[0] % proc_grid[0])); };
412 auto mid = [=](index i)->index{
return world.
low[1] + i * (world.
size[1] / proc_grid[1]) + std::min(i, (world.
size[1] % proc_grid[1])); };
413 auto slow = [=](index i)->index{
return world.
low[2] + i * (world.
size[2] / proc_grid[2]) + std::min(i, (world.
size[2] % proc_grid[2])); };
415 std::vector<box3d<index>> result;
416 result.reserve(proc_grid[0] * proc_grid[1] * proc_grid[2]);
417 for(index k = 0; k < proc_grid[2]; k++){
418 for(index j = 0; j < proc_grid[1]; j++){
419 for(index i = 0; i < proc_grid[0]; i++){
420 result.push_back(
box3d<index>({fast(i), mid(j), slow(k)}, {fast(i+1)-1, mid(j+1)-1, slow(k+1)-1}, world.
order));
427 std::vector<box3d<index>> remapped_result;
428 for(
size_t i=0; i<remap.map.size(); i++)
429 remapped_result.push_back( (remap.map[i] == -1) ?
430 box3d<index>(std::array<index, 3>{0, 0, 0}, std::array<index, 3>{-1, -1, -1}, world.
order) :
433 return remapped_result;
441 template<
typename index>
453 template<
typename index>
456 if (s.size[direction1] != world.
size[direction1] or s.size[direction2] != world.
size[direction2])
465 template<
typename index>
466 inline std::vector<box3d<index>>
reorder(std::vector<
box3d<index>>
const &shape, std::array<int, 3> order){
467 std::vector<box3d<index>> result;
468 result.reserve(shape.size());
469 for(
auto const &b : shape)
488 template<
typename index>
491 std::array<int, 3>
const order,
493 std::vector<box3d<index>> result;
494 result.reserve(new_boxes.size());
495 std::vector<bool> taken(new_boxes.size(),
false);
496 if (not remap.
empty()){
497 for(
size_t i=0; i<remap.
map.size(); i++)
498 if (remap.
map[i] == -1) taken[i] =
true;
501 for(
size_t i=0; i<new_boxes.size(); i++){
502 if (not remap.
empty() and remap.
map[i] == -1){
503 result.push_back(
box3d<index>(new_boxes[i].low, new_boxes[i].high, order));
508 int max_overlap = -1;
509 size_t max_index = new_boxes.size();
510 for(
size_t j=0; j<new_boxes.size(); j++){
511 int overlap = old_boxes[i].collide(new_boxes[j]).count();
512 if (not taken[j] and overlap > max_overlap){
513 max_overlap = overlap;
517 assert( max_index < new_boxes.size() );
518 taken[max_index] =
true;
519 result.push_back(
box3d<index>(new_boxes[max_index].low, new_boxes[max_index].high, order));
534 template<
typename index>
538 for(
auto &nbox : new_boxes)
539 for(
auto &obox : old_boxes)
540 if (not nbox.collide(obox).empty())
567 template<
typename index>
569 std::array<int, 2>
const proc_grid,
572 std::array<int, 3>
const order,
581 std::vector<box3d<index>> pencilsA =
585 std::vector<box3d<index>> pencilsB =
587 split_world(world,
make_procgrid2d(world, dimension, std::array<int, 2>{proc_grid[1], proc_grid[0]}), remap), source, order, remap);
598 template<
typename index>
600 int const dimension1,
int const dimension2,
602 std::array<int, 3>
const order,
605 assert( dimension1 != dimension2 );
606 std::vector<box3d<index>> slabs;
607 if (dimension1 == 0){
608 if (dimension2 == 1){
609 slabs =
split_world(world, {1, 1, num_slabs}, remap);
611 slabs =
split_world(world, {1, num_slabs, 1}, remap);
613 }
else if (dimension1 == 1){
614 if (dimension2 == 0){
615 slabs =
split_world(world, {1, 1, num_slabs}, remap);
617 slabs =
split_world(world, {num_slabs, 1, 1}, remap);
620 if (dimension2 == 0){
621 slabs =
split_world(world, {1, num_slabs, 1}, remap);
623 slabs =
split_world(world, {num_slabs, 1, 1}, remap);
642 template<
typename index>
644 assert(world.
count() > 0);
648 std::valarray<index> all_indexes = {world.
size[0], world.
size[1], world.
size[2]};
650 std::valarray<index> best_grid = {1, 1, num_procs};
653 auto surface = [&](std::valarray<index>
const &proc_grid)->
655 auto box_size = all_indexes / proc_grid;
656 return ( box_size * box_size.cshift(1) ).sum();
659 index best_surface = surface({1, 1, num_procs});
661 for(
int i=1; i<=num_procs; i++){
662 if (num_procs % i == 0){
663 int const remainder = num_procs / i;
664 for(
int j=1; j<=remainder; j++){
665 if (remainder % j == 0){
666 std::valarray<index> candidate_grid = {i, j, remainder / j};
667 index
const candidate_surface = surface(candidate_grid);
668 if (candidate_surface < best_surface){
669 best_surface = candidate_surface;
670 best_grid = candidate_grid;
677 assert(best_grid[0] * best_grid[1] * best_grid[2] == num_procs);
679 return {
static_cast<int>(best_grid[0]),
static_cast<int>(best_grid[1]),
static_cast<int>(best_grid[2])};
696 template<
typename index>
698 std::array<box3d<index>, 2> my_data = {my_inbox, my_outbox};
700 MPI_Allgather(&my_data, 2 *
sizeof(
box3d<index>), MPI_BYTE, all_boxes.data(), 2 *
sizeof(
box3d<index>), MPI_BYTE, comm);
702 for(
auto i = all_boxes.begin(); i < all_boxes.end(); i += 2){
703 result.
in.push_back(*i);
704 result.
out.push_back(*(i+1));
int fft1d_get_howmany(box3d< index > const box, int const dimension)
Return the number of 1-D ffts contained in the box in the given dimension.
Definition: heffte_geometry.h:159
int fft1d_get_stride(box3d< index > const box, int const dimension)
Return the stride of the 1-D ffts contained in the box in the given dimension.
Definition: heffte_geometry.h:169
std::vector< box3d< index > > make_slabs(box3d< index > const world, int num_slabs, int const dimension1, int const dimension2, std::vector< box3d< index >> const &source, std::array< int, 3 > const order, rank_remap const &remap)
Breaks the world into a set of slabs that span the given dimensions.
Definition: heffte_geometry.h:599
bool is_pencils(box3d< index > const world, std::vector< box3d< index >> const &shape, int direction)
Returns true if the shape forms pencils in the given direction.
Definition: heffte_geometry.h:442
bool is_slab(box3d< index > const world, std::vector< box3d< index >> const &shape, int direction1, int direction2)
Returns true if the shape forms slabs in the given directions.
Definition: heffte_geometry.h:454
bool match(std::vector< box3d< index >> const &shape0, std::vector< box3d< index >> const &shape1)
Compares two vectors of boxes, returns true if all boxes match.
Definition: heffte_geometry.h:246
std::array< int, 3 > proc_setup_min_surface(box3d< index > const world, int num_procs)
Creates a grid of mpi-ranks that will minimize the area of each of the boxes.
Definition: heffte_geometry.h:643
bool world_complete(std::vector< box3d< index >> const &boxes, box3d< index > const world)
Returns true if the geometry of the world is as expected.
Definition: heffte_geometry.h:266
std::vector< box3d< index > > maximize_overlap(std::vector< box3d< index >> const &new_boxes, std::vector< box3d< index >> const &old_boxes, std::array< int, 3 > const order, rank_remap const &remap)
Shuffle the new boxes to maximize the overlap with the old boxes.
Definition: heffte_geometry.h:489
box3d< index > find_world(std::vector< box3d< index >> const &boxes)
Returns the box that encapsulates all other boxes.
Definition: heffte_geometry.h:229
std::vector< box3d< index > > reorder(std::vector< box3d< index >> const &shape, std::array< int, 3 > order)
Returns the same shape, but sets a different order for each box.
Definition: heffte_geometry.h:466
std::vector< box3d< index > > split_world(box3d< index > const world, std::array< int, 3 > const proc_grid, rank_remap const &remap=rank_remap())
Splits the world box into a set of boxes that will be assigned to a process in the process grid.
Definition: heffte_geometry.h:409
std::vector< box3d< index > > make_pencils(box3d< index > const world, std::array< int, 2 > const proc_grid, int const dimension, std::vector< box3d< index >> const &source, std::array< int, 3 > const order, rank_remap const &remap=rank_remap())
Breaks the world into a grid of pencils and orders the pencils to the ranks that will minimize commun...
Definition: heffte_geometry.h:568
long long count_connections(std::vector< box3d< index >> const &new_boxes, std::vector< box3d< index >> const &old_boxes)
Counts the number of point-to-point connections between the old and new box geometries.
Definition: heffte_geometry.h:535
direction
Indicates the direction of the FFT (internal use only).
Definition: heffte_common.h:535
std::array< int, 2 > make_procgrid(int const num_procs)
Factorize the MPI ranks into a 2D grid.
Definition: heffte_geometry.h:336
std::array< int, 3 > make_procgrid2d(box3d< index > const world, int direction_1d, std::array< int, 2 > const candidate_grid)
Factorize the MPI ranks into a 2D grid with specific constraints.
Definition: heffte_geometry.h:362
std::ostream & operator<<(std::ostream &os, box3d< index > const box)
Debugging info, writes out the box to a stream.
Definition: heffte_geometry.h:146
int get_area(std::array< int, 2 > const &dims)
Get the surface area of a processor grid.
Definition: heffte_geometry.h:321
int comm_rank(MPI_Comm const comm)
Returns the rank of this process within the specified comm.
Definition: heffte_utils.h:78
int comm_size(MPI_Comm const comm)
Returns the size of the specified communicator.
Definition: heffte_utils.h:135
ioboxes< index > gather_boxes(box3d< index > const my_inbox, box3d< index > const my_outbox, MPI_Comm const comm)
Gather all boxes across all ranks in the comm.
Definition: heffte_geometry.h:697
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38
std::vector< std::array< int, 2 > > get_factors(int const n)
fft3dmisc
Definition: heffte_geometry.h:301
A generic container that describes a 3d box of indexes.
Definition: heffte_geometry.h:67
box3d(std::array< index, 3 > clow, std::array< index, 3 > chigh)
Constructs a box from the low and high indexes, the span in each direction includes the low and high ...
Definition: heffte_geometry.h:69
box3d collide(box3d const other) const
Creates a box that holds the intersection of this box and the other.
Definition: heffte_geometry.h:89
bool operator!=(box3d const &other) const
Compares two boxes, ignoring the order, returns true if either of the box boundaries do not match.
Definition: heffte_geometry.h:111
bool operator==(box3d const &other) const
Compares two boxes, ignoring the order, returns true if all sizes and boundaries match.
Definition: heffte_geometry.h:107
bool empty() const
Returns true if the box contains no indexes.
Definition: heffte_geometry.h:84
bool is2d() const
Returns true if either dimension contains only a single index.
Definition: heffte_geometry.h:105
box3d(std::array< index, 3 > clow, std::array< index, 3 > chigh, std::array< int, 3 > corder)
Constructs a box from the low and high indexes, also sets an order.
Definition: heffte_geometry.h:73
std::array< index, 3 > const size
The number of indexes in each direction.
Definition: heffte_geometry.h:129
box3d r2c(int dimension) const
Returns the box that is reduced in the given dimension according to the real-to-complex symmetry.
Definition: heffte_geometry.h:95
long long count() const
Counts all indexes in the box, i.e., the volume.
Definition: heffte_geometry.h:86
bool ordered_same_as(box3d const &other) const
Compares the order of two boxes, ignores the dimensions, returns true if the order is the same.
Definition: heffte_geometry.h:117
std::array< index, 3 > const low
The three lowest indexes.
Definition: heffte_geometry.h:125
box3d(std::array< index, 2 > clow, std::array< index, 2 > chigh)
Constructor for the two dimensional case.
Definition: heffte_geometry.h:77
int find_order(int dir) const
Returns the effective order of the direction (dir), 0 -> fast, 1 -> mid, 2 -> slow.
Definition: heffte_geometry.h:121
std::array< index, 3 > const high
The three highest indexes.
Definition: heffte_geometry.h:127
box3d(std::array< index, 2 > clow, std::array< index, 2 > chigh, std::array< int, 2 > corder)
Constructor for the two dimensional case, order is either (0, 1) or (1, 0).
Definition: heffte_geometry.h:80
std::array< int, 3 > const order
The order of the dimensions in the k * plane_stride + j * line_stride + i indexing.
Definition: heffte_geometry.h:131
index osize(int dimension) const
Get the ordered size of the dimension, i.e., size[order[dimension]].
Definition: heffte_geometry.h:123
Pair of lists of input-output boxes as used by the heffte::fft3d.
Definition: heffte_geometry.h:213
std::vector< box3d< index > > in
Inboxes for all ranks across the comm.
Definition: heffte_geometry.h:215
std::vector< box3d< index > > out
Outboxes for all ranks across the comm.
Definition: heffte_geometry.h:217
Keeps the local rank and the map from the global rank to the sub-ranks used in the work.
Definition: heffte_geometry.h:179
int const mpi_rank
The local rank.
Definition: heffte_geometry.h:185
void set_subranks(size_t all_ranks, int sub_ranks)
Build the map when using fixed number of sub-ranks.
Definition: heffte_geometry.h:189
void set_subranks(MPI_Comm global, MPI_Comm subcomm)
Build the map when using user provided sub-communicator.
Definition: heffte_geometry.h:195
bool empty() const
Indicates whether the remap is empty.
Definition: heffte_geometry.h:203
int size_subcomm
The number of ranks in the sub-comm.
Definition: heffte_geometry.h:201
rank_remap(int rank)
Creates a new remap and initializes only the mpi_rank.
Definition: heffte_geometry.h:183
std::vector< int > map
Global to local map.
Definition: heffte_geometry.h:187
rank_remap()
Default constructor.
Definition: heffte_geometry.h:181