7 #ifndef HEFFTE_PACK3D_H
8 #define HEFFTE_PACK3D_H
10 #include "heffte_common.h"
31 template<
typename index>
44 std::array<int, 3>
map;
51 template<
typename index>
53 os <<
"nfast = " << plan.
size[0] <<
"\n";
54 os <<
"nmid = " << plan.
size[1] <<
"\n";
55 os <<
"nslow = " << plan.
size[2] <<
"\n";
61 os <<
"map = (" << plan.
map[0] <<
", " << plan.
map[1] <<
", " << plan.
map[2] <<
")\n";
74 template<
typename backend>
91 template<
typename scalar_type,
typename index>
93 scalar_type* buffer_iterator = buffer;
94 for(index slow = 0; slow < plan.
size[2]; slow++){
95 for(index mid = 0; mid < plan.
size[1]; mid++){
101 template<
typename scalar_type,
typename index>
103 for(index slow = 0; slow < plan.
size[2]; slow++){
104 for(index mid = 0; mid < plan.
size[1]; mid++){
105 std::copy_n(&buffer[(slow * plan.
size[1] + mid) * plan.
size[0]],
124 template<
typename scalar_type,
typename index>
134 template<
typename scalar_type,
typename index>
136 constexpr index stride = 256 /
sizeof(scalar_type);
137 if (plan.
map[0] == 0 and plan.
map[1] == 1){
138 for(index i=0; i<plan.
size[2]; i++)
139 for(index j=0; j<plan.
size[1]; j++)
140 for(index k=0; k<plan.
size[0]; k++)
144 }
else if (plan.
map[0] == 0 and plan.
map[1] == 2){
145 for(index bi=0; bi<plan.
size[2]; bi+=stride)
146 for(index bj=0; bj<plan.
size[1]; bj+=stride)
147 for(index bk=0; bk<plan.
size[0]; bk+=stride)
148 for(index i=bi; i<std::min(bi + stride, plan.
size[2]); i++)
149 for(index j=bj; j<std::min(bj + stride, plan.
size[1]); j++)
150 for(index k=bk; k<std::min(bk + stride, plan.
size[0]); k++)
154 }
else if (plan.
map[0] == 1 and plan.
map[1] == 0){
155 for(index bi=0; bi<plan.
size[2]; bi+=stride)
156 for(index bj=0; bj<plan.
size[1]; bj+=stride)
157 for(index bk=0; bk<plan.
size[0]; bk+=stride)
158 for(index i=bi; i<std::min(bi + stride, plan.
size[2]); i++)
159 for(index j=bj; j<std::min(bj + stride, plan.
size[1]); j++)
160 for(index k=bk; k<std::min(bk + stride, plan.
size[0]); k++)
164 }
else if (plan.
map[0] == 1 and plan.
map[1] == 2){
165 for(index bi=0; bi<plan.
size[2]; bi+=stride)
166 for(index bj=0; bj<plan.
size[1]; bj+=stride)
167 for(index bk=0; bk<plan.
size[0]; bk+=stride)
168 for(index i=bi; i<std::min(bi + stride, plan.
size[2]); i++)
169 for(index j=bj; j<std::min(bj + stride, plan.
size[1]); j++)
170 for(index k=bk; k<std::min(bk + stride, plan.
size[0]); k++)
174 }
else if (plan.
map[0] == 2 and plan.
map[1] == 0){
175 for(index bi=0; bi<plan.
size[2]; bi+=stride)
176 for(index bj=0; bj<plan.
size[1]; bj+=stride)
177 for(index bk=0; bk<plan.
size[0]; bk+=stride)
178 for(index i=bi; i<std::min(bi + stride, plan.
size[2]); i++)
179 for(index j=bj; j<std::min(bj + stride, plan.
size[1]); j++)
180 for(index k=bk; k<std::min(bk + stride, plan.
size[0]); k++)
185 for(index bi=0; bi<plan.
size[2]; bi+=stride)
186 for(index bj=0; bj<plan.
size[1]; bj+=stride)
187 for(index bk=0; bk<plan.
size[0]; bk+=stride)
188 for(index i=bi; i<std::min(bi + stride, plan.
size[2]); i++)
189 for(index j=bj; j<std::min(bj + stride, plan.
size[1]); j++)
190 for(index k=bk; k<std::min(bk + stride, plan.
size[0]); k++)
203 namespace data_scaling {
208 template<
typename scalar_type,
typename index>
209 void apply(
void*, index num_entries, scalar_type *data,
double scale_factor){;
210 for(index i=0; i<num_entries; i++) data[i] *= scale_factor;
221 template<
typename precision_type,
typename index>
222 void apply(
void *stream, index num_entries, std::complex<precision_type> *data,
double scale_factor){
223 apply<precision_type>(stream, 2*num_entries,
reinterpret_cast<precision_type*
>(data), scale_factor);
229 template<
typename scalar_type,
typename index>
230 void apply(index num_entries, scalar_type *data,
double scale_factor){
231 apply(
nullptr, num_entries, data, scale_factor);
std::ostream & operator<<(std::ostream &os, box3d< index > const box)
Debugging info, writes out the box to a stream.
Definition: heffte_geometry.h:146
void apply(cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor)
Simply multiply the num_entries in the data by the scale_factor.
Definition: heffte_backend_cuda.h:796
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38
Simple packer that copies sub-boxes without transposing the order of the indexes.
Definition: heffte_pack3d.h:89
void pack(void *, pack_plan_3d< index > const &plan, scalar_type const data[], scalar_type buffer[]) const
Execute the planned pack operation.
Definition: heffte_pack3d.h:92
void unpack(void *, pack_plan_3d< index > const &plan, scalar_type const buffer[], scalar_type data[]) const
Execute the planned unpack operation.
Definition: heffte_pack3d.h:102
Defines the direct packer without implementation, use the specializations to get the CPU or GPU imple...
Definition: heffte_pack3d.h:83
Holds the plan for a pack/unpack operation.
Definition: heffte_pack3d.h:32
index buff_plane_stride
Stride of the planes in the received buffer (transpose packing only).
Definition: heffte_pack3d.h:42
index line_stride
Stride of the lines.
Definition: heffte_pack3d.h:36
index plane_stride
Stride of the planes.
Definition: heffte_pack3d.h:38
std::array< index, 3 > size
Number of elements in the three directions.
Definition: heffte_pack3d.h:34
std::array< int, 3 > map
Maps the i,j,k indexes from input to the output (transpose packing only).
Definition: heffte_pack3d.h:44
index buff_line_stride
Stride of the lines in the received buffer (transpose packing only).
Definition: heffte_pack3d.h:40
The packer needs to know whether the data will be on the CPU or GPU devices.
Definition: heffte_pack3d.h:75
void pack(void *q, pack_plan_3d< index > const &plan, scalar_type const data[], scalar_type buffer[]) const
Execute the planned pack operation.
Definition: heffte_pack3d.h:125
void unpack(void *, pack_plan_3d< index > const &plan, scalar_type const buffer[], scalar_type data[]) const
Execute the planned unpack operation.
Definition: heffte_pack3d.h:135
Defines the transpose packer without implementation, use the specializations to get the CPU implement...
Definition: heffte_pack3d.h:116