heffte/heffte__pack3d_8h_source.html

 /*

     -- heFFTe --

        Univ. of Tennessee, Knoxville

        @date

 */


 #ifndef HEFFTE_PACK3D_H

 #define HEFFTE_PACK3D_H


 #include "heffte_common.h"


 namespace heffte {


 template<typename index>

 struct pack_plan_3d{

     std::array<index, 3> size;

     index line_stride;

     index plane_stride;

     index buff_line_stride;

     index buff_plane_stride;

     std::array<int, 3> map;

 };


 template<typename index>

 inline std::ostream & operator << (std::ostream &os, pack_plan_3d<index> const &plan){

     os << "nfast = " << plan.size[0] << "\n";

     os << "nmid  = " << plan.size[1] << "\n";

     os << "nslow = " << plan.size[2] << "\n";

     os << "line_stride = "  << plan.line_stride << "\n";

     os << "plane_stride = " << plan.plane_stride << "\n";

     if (plan.buff_line_stride > 0){

         os << "buff_line_stride = " << plan.buff_line_stride << "\n";

         os << "buff_plane_stride = " << plan.buff_plane_stride << "\n";

         os << "map = (" << plan.map[0] << ", " << plan.map[1] << ", " << plan.map[2] << ")\n";

     }

     os << "\n";

     return os;

 }


 template<typename backend>

 struct packer_backend{};


 // typename struct packer_backend<cuda>{ using mode = tag::gpu; } // specialization can differentiate between gpu and cpu backends


 template<typename mode> struct direct_packer{};


 template<> struct direct_packer<tag::cpu>{

     template<typename scalar_type, typename index>

     void pack(void*, pack_plan_3d<index> const &plan, scalar_type const data[], scalar_type buffer[]) const{

         scalar_type* buffer_iterator = buffer;

         for(index slow = 0; slow < plan.size[2]; slow++){

             for(index mid = 0; mid < plan.size[1]; mid++){

                 buffer_iterator = std::copy_n(&data[slow * plan.plane_stride + mid * plan.line_stride], plan.size[0], buffer_iterator);

             }

         }

     }

     template<typename scalar_type, typename index>

     void unpack(void*, pack_plan_3d<index> const &plan, scalar_type const buffer[], scalar_type data[]) const{

         for(index slow = 0; slow < plan.size[2]; slow++){

             for(index mid = 0; mid < plan.size[1]; mid++){

                 std::copy_n(&buffer[(slow * plan.size[1] + mid) * plan.size[0]],

                             plan.size[0], &data[slow * plan.plane_stride + mid * plan.line_stride]);

             }

         }

     }

 };


 template<typename mode> struct transpose_packer{};


 template<> struct transpose_packer<tag::cpu>{

     template<typename scalar_type, typename index>

     void pack(void *q, pack_plan_3d<index> const &plan, scalar_type const data[], scalar_type buffer[]) const{

         direct_packer<tag::cpu>().pack(q, plan, data, buffer); // packing is done the same way as the direct_packer

     }

     template<typename scalar_type, typename index>

     void unpack(void*, pack_plan_3d<index> const &plan, scalar_type const buffer[], scalar_type data[]) const{

         constexpr index stride = 256 / sizeof(scalar_type);

         if (plan.map[0] == 0 and plan.map[1] == 1){

             for(index i=0; i<plan.size[2]; i++)

                 for(index j=0; j<plan.size[1]; j++)

                     for(index k=0; k<plan.size[0]; k++)

                         data[i * plan.plane_stride + j * plan.line_stride + k]

                             = buffer[ i * plan.buff_plane_stride + j * plan.buff_line_stride + k ];


         }else if (plan.map[0] == 0 and plan.map[1] == 2){

             for(index bi=0; bi<plan.size[2]; bi+=stride)

                 for(index bj=0; bj<plan.size[1]; bj+=stride)

                     for(index bk=0; bk<plan.size[0]; bk+=stride)

                         for(index i=bi; i<std::min(bi + stride, plan.size[2]); i++)

                             for(index j=bj; j<std::min(bj + stride, plan.size[1]); j++)

                                 for(index k=bk; k<std::min(bk + stride, plan.size[0]); k++)

                                     data[i * plan.plane_stride + j * plan.line_stride + k]

                                         = buffer[ j * plan.buff_plane_stride + i * plan.buff_line_stride + k ];


         }else if (plan.map[0] == 1 and plan.map[1] == 0){

             for(index bi=0; bi<plan.size[2]; bi+=stride)

                 for(index bj=0; bj<plan.size[1]; bj+=stride)

                     for(index bk=0; bk<plan.size[0]; bk+=stride)

                         for(index i=bi; i<std::min(bi + stride, plan.size[2]); i++)

                             for(index j=bj; j<std::min(bj + stride, plan.size[1]); j++)

                                 for(index k=bk; k<std::min(bk + stride, plan.size[0]); k++)

                                     data[i * plan.plane_stride + j * plan.line_stride + k]

                                         = buffer[ i * plan.buff_plane_stride + k * plan.buff_line_stride + j ];


         }else if (plan.map[0] == 1 and plan.map[1] == 2){

             for(index bi=0; bi<plan.size[2]; bi+=stride)

                 for(index bj=0; bj<plan.size[1]; bj+=stride)

                     for(index bk=0; bk<plan.size[0]; bk+=stride)

                         for(index i=bi; i<std::min(bi + stride, plan.size[2]); i++)

                             for(index j=bj; j<std::min(bj + stride, plan.size[1]); j++)

                                 for(index k=bk; k<std::min(bk + stride, plan.size[0]); k++)

                                     data[i * plan.plane_stride + j * plan.line_stride + k]

                                         = buffer[ k * plan.buff_plane_stride + i * plan.buff_line_stride + j ];


         }else if (plan.map[0] == 2 and plan.map[1] == 0){

             for(index bi=0; bi<plan.size[2]; bi+=stride)

                 for(index bj=0; bj<plan.size[1]; bj+=stride)

                     for(index bk=0; bk<plan.size[0]; bk+=stride)

                         for(index i=bi; i<std::min(bi + stride, plan.size[2]); i++)

                             for(index j=bj; j<std::min(bj + stride, plan.size[1]); j++)

                                 for(index k=bk; k<std::min(bk + stride, plan.size[0]); k++)

                                     data[i * plan.plane_stride + j * plan.line_stride + k]

                                         = buffer[ j * plan.buff_plane_stride + k * plan.buff_line_stride + i ];


         }else{ // if (plan.map[0] == 2 and plan.map[1] == 1){

             for(index bi=0; bi<plan.size[2]; bi+=stride)

                 for(index bj=0; bj<plan.size[1]; bj+=stride)

                     for(index bk=0; bk<plan.size[0]; bk+=stride)

                         for(index i=bi; i<std::min(bi + stride, plan.size[2]); i++)

                             for(index j=bj; j<std::min(bj + stride, plan.size[1]); j++)

                                 for(index k=bk; k<std::min(bk + stride, plan.size[0]); k++)

                                     data[i * plan.plane_stride + j * plan.line_stride + k]

                                         = buffer[ k * plan.buff_plane_stride + j * plan.buff_line_stride + i ];


         }


     }

 };


 namespace data_scaling {

     template<typename scalar_type, typename index>

     void apply(void*, index num_entries, scalar_type *data, double scale_factor){;

         for(index i=0; i<num_entries; i++) data[i] *= scale_factor;

     }

     template<typename precision_type, typename index>

     void apply(void *stream, index num_entries, std::complex<precision_type> *data, double scale_factor){

         apply<precision_type>(stream, 2*num_entries, reinterpret_cast<precision_type*>(data), scale_factor);

     }

     template<typename scalar_type, typename index>

     void apply(index num_entries, scalar_type *data, double scale_factor){

         apply(nullptr, num_entries, data, scale_factor);

     }

 };


 }


 #endif

heffte::operator<<
std::ostream & operator<<(std::ostream &os, box3d< index > const box)
Debugging info, writes out the box to a stream.
Definition: heffte_geometry.h:146

heffte::data_scaling::apply
void apply(cudaStream_t stream, index num_entries, scalar_type *data, double scale_factor)
Simply multiply the num_entries in the data by the scale_factor.
Definition: heffte_backend_cuda.h:796

heffte
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38

heffte::direct_packer< tag::cpu >
Simple packer that copies sub-boxes without transposing the order of the indexes.
Definition: heffte_pack3d.h:89

heffte::direct_packer< tag::cpu >::pack
void pack(void *, pack_plan_3d< index > const &plan, scalar_type const data[], scalar_type buffer[]) const
Execute the planned pack operation.
Definition: heffte_pack3d.h:92

heffte::direct_packer< tag::cpu >::unpack
void unpack(void *, pack_plan_3d< index > const &plan, scalar_type const buffer[], scalar_type data[]) const
Execute the planned unpack operation.
Definition: heffte_pack3d.h:102

heffte::direct_packer
Defines the direct packer without implementation, use the specializations to get the CPU or GPU imple...
Definition: heffte_pack3d.h:83

heffte::pack_plan_3d
Holds the plan for a pack/unpack operation.
Definition: heffte_pack3d.h:32

heffte::pack_plan_3d::buff_plane_stride
index buff_plane_stride
Stride of the planes in the received buffer (transpose packing only).
Definition: heffte_pack3d.h:42

heffte::pack_plan_3d::line_stride
index line_stride
Stride of the lines.
Definition: heffte_pack3d.h:36

heffte::pack_plan_3d::plane_stride
index plane_stride
Stride of the planes.
Definition: heffte_pack3d.h:38

heffte::pack_plan_3d::size
std::array< index, 3 > size
Number of elements in the three directions.
Definition: heffte_pack3d.h:34

heffte::pack_plan_3d::map
std::array< int, 3 > map
Maps the i,j,k indexes from input to the output (transpose packing only).
Definition: heffte_pack3d.h:44

heffte::pack_plan_3d::buff_line_stride
index buff_line_stride
Stride of the lines in the received buffer (transpose packing only).
Definition: heffte_pack3d.h:40

heffte::packer_backend
The packer needs to know whether the data will be on the CPU or GPU devices.
Definition: heffte_pack3d.h:75

heffte::transpose_packer< tag::cpu >::pack
void pack(void *q, pack_plan_3d< index > const &plan, scalar_type const data[], scalar_type buffer[]) const
Execute the planned pack operation.
Definition: heffte_pack3d.h:125

heffte::transpose_packer< tag::cpu >::unpack
void unpack(void *, pack_plan_3d< index > const &plan, scalar_type const buffer[], scalar_type data[]) const
Execute the planned unpack operation.
Definition: heffte_pack3d.h:135

heffte::transpose_packer
Defines the transpose packer without implementation, use the specializations to get the CPU implement...
Definition: heffte_pack3d.h:116