Highly Efficient FFT for Exascale: HeFFTe v2.3
All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Modules Pages
heffte_backend_data_transfer.h
1 /*
2  -- heFFTe --
3  Univ. of Tennessee, Knoxville
4  @date
5 */
6 
7 #ifndef HEFFTE_BACKEND_DATA_TRANSFER_H
8 #define HEFFTE_BACKEND_DATA_TRANSFER_H
9 
10 #ifdef Heffte_ENABLE_GPU
11 
12 namespace heffte{
13 
14 namespace gpu {
15 
20  template<typename scalar_type>
21  using vector = device_vector<scalar_type, heffte::backend::data_manipulator<heffte::tag::gpu>>;
22 
29  template<typename manipulator>
30  struct device_transfer{
32  using backend_device = typename manipulator::backend_device;
34  template<typename scalar_type>
35  static void copy(device_vector<scalar_type, backend_device> const &source, scalar_type destination[]){
36  manipulator::copy_device_to_host(source.stream(), source.data(), source.size(), destination);
37  }
39  template<typename scalar_type>
40  static void copy(scalar_type const source[], device_vector<scalar_type, backend_device> &destination){
41  manipulator::copy_device_to_device(destination.stream(), source, destination.size(), destination.data());
42  }
43 
54  template<typename scalar_type>
55  static device_vector<scalar_type, manipulator> load(typename backend_device::stream_type stream, scalar_type const *cpu_source, size_t num_entries){
56  device_vector<scalar_type, manipulator> result(stream, num_entries);
57  manipulator::copy_host_to_device(stream, cpu_source, num_entries, result.data());
58  return result;
59  }
61  template<typename scalar_type>
62  static void load(typename backend_device::stream_type stream, scalar_type const *cpu_source, size_t num_entries, scalar_type *gpu_destination){
63  manipulator::copy_host_to_device(stream, cpu_source, num_entries, gpu_destination);
64  }
66  template<typename scalar_type>
67  static void load(void*, scalar_type const *cpu_source, size_t num_entries, scalar_type *gpu_destination){
68  std::copy_n(cpu_source, num_entries, gpu_destination);
69  }
71  template<typename scalar_type>
72  static device_vector<scalar_type, manipulator> load(scalar_type const *cpu_source, size_t num_entries){
73  return load(backend_device().stream(), cpu_source, num_entries);
74  }
76  template<typename scalar_type>
77  static device_vector<scalar_type, manipulator> load(void*, scalar_type const*, size_t){
78  return device_vector<scalar_type, manipulator>();
79  }
80 
82  template<typename scalar_type>
83  static device_vector<scalar_type, manipulator> load(std::vector<scalar_type> const &cpu_source){
84  return load(cpu_source.data(), cpu_source.size());
85  }
87  template<typename scalar_type>
88  static void load(typename backend_device::stream_type stream, std::vector<scalar_type> const &cpu_source, scalar_type gpu_destination[]){
89  manipulator::copy_host_to_device(stream, cpu_source.data(), cpu_source.size(), gpu_destination);
90  }
92  template<typename scalar_type>
93  static void load(std::vector<scalar_type> const &cpu_source, scalar_type gpu_destination[]){
94  load(backend_device().stream(), cpu_source, gpu_destination);
95  }
97  template<typename scalar_type>
98  static void load(void*, std::vector<scalar_type> const&, scalar_type[]){}
99 
107  template<typename scalar_type>
108  static void load(std::vector<scalar_type> const &a, std::vector<scalar_type> &b){ b = a; }
116  template<typename scalar_type>
117  static std::vector<scalar_type> unload(std::vector<scalar_type> const &a){ return a; }
119  template<typename scalar_type>
120  static std::vector<scalar_type> unload(void*, scalar_type const[], size_t){
121  return std::vector<scalar_type>();
122  }
123 
125  template<typename scalar_type>
126  static std::vector<scalar_type> unload(typename backend_device::stream_type stream, scalar_type const gpu_source[], size_t num_entries){
127  std::vector<scalar_type> result(num_entries);
128  manipulator::copy_device_to_host(stream, gpu_source, num_entries, result.data());
129  return result;
130  }
132  template<typename scalar_type>
133  static std::vector<scalar_type> unload(scalar_type const gpu_source[], size_t num_entries){
134  return unload(backend_device().stream(), gpu_source, num_entries);
135  }
136 
145  template<typename scalar_type>
146  static void unload(device_vector<scalar_type, manipulator> const &gpu_source, scalar_type *cpu_result){
147  manipulator::copy_device_to_host(gpu_source.device_stream(), gpu_source.data(), gpu_source.size(), cpu_result);
148  }
150  template<typename scalar_type>
151  static void unload(typename backend_device::stream_type stream, scalar_type const *gpu_source, size_t num_entries, scalar_type *cpu_result){
152  manipulator::copy_device_to_host(stream, gpu_source, num_entries, cpu_result);
153  }
155  template<typename scalar_type>
156  static void unload(void*, scalar_type const *gpu_source, size_t num_entries, scalar_type *cpu_result){
157  std::copy_n(gpu_source, num_entries, cpu_result); // void* stream indicates CPU backend
158  }
159 
161  template<typename scalar_type>
162  static std::vector<scalar_type> unload(device_vector<scalar_type, manipulator> const &gpu_source){
163  std::vector<scalar_type> result(gpu_source.size());
164  unload(gpu_source, result.data());
165  return result;
166  }
173  template<typename scalar_type>
174  static device_vector<scalar_type, manipulator> capture(scalar_type* &&raw_pointer, size_t num_entries){
175  return device_vector<scalar_type, manipulator>(std::forward<scalar_type*>(raw_pointer), num_entries);
176  }
177  };
178 
183  using transfer = device_transfer<heffte::backend::data_manipulator<heffte::tag::gpu>>;
184 
189  int device_count();
190 
197  void device_set(int active_device);
198 
203  void synchronize_default_stream();
204 
205 }
206 
211 template<typename scalar_type>
212 gpu::vector<scalar_type> make_buffer_container(typename gpu::vector<scalar_type>::stream_type stream, size_t size){
213  return gpu::vector<scalar_type>(stream, size);
214 }
215 
216 }
217 
218 #endif
219 
220 #endif
std::vector< scalar_type > make_buffer_container(void *, size_t size)
Factory method to create new buffer container for the CPU backends.
Definition: heffte_common.h:527
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38