Highly Efficient FFT for Exascale: HeFFTe v2.3
heffte_stock_tree.h
1 /*
2  -- heFFTe --
3  Univ. of Tennessee, Knoxville
4  @date
5 */
6 
7 #ifndef HEFFTE_STOCK_TREE_H
8 #define HEFFTE_STOCK_TREE_H
9 
10 #include "heffte_stock_algos.h"
11 #include <iostream>
12 
14 #define HEFFTE_STOCK_RADER_MIN 100000
15 
16 namespace heffte {
17 namespace stock {
18 
20 static const std::array<size_t,200> factors { 4, 2, 3, 5, 7, 11, 13, 16, 17, 19,
21  23, 29, 31, 37, 41, 43, 47, 53, 59, 61,
22  67, 71, 73, 79, 83, 89, 97, 101, 103, 107,
23  109, 113, 127, 131, 137, 139, 149, 151, 157, 163,
24  167, 173, 179, 181, 191, 193, 197, 199, 211, 223,
25  227, 229, 233, 239, 241, 251, 257, 263, 269, 271,
26  277, 281, 283, 293, 307, 311, 313, 317, 331, 337,
27  347, 349, 353, 359, 367, 373, 379, 383, 389, 397,
28  401, 409, 419, 421, 431, 433, 439, 443, 449, 457,
29  461, 463, 467, 479, 487, 491, 499, 503, 509, 521,
30  523, 541, 547, 557, 563, 569, 571, 577, 587, 593,
31  599, 601, 607, 613, 617, 619, 631, 641, 643, 647,
32  653, 659, 661, 673, 677, 683, 691, 701, 709, 719,
33  727, 733, 739, 743, 751, 757, 761, 769, 773, 787,
34  797, 809, 811, 821, 823, 827, 829, 839, 853, 857,
35  859, 863, 877, 881, 883, 887, 907, 911, 919, 929,
36  937, 941, 947, 953, 967, 971, 977, 983, 991, 997,
37  1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061,
38  1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123,
39  1129, 1151, 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213};
40 
42 inline size_t factor(const size_t f) {
43  if(f < HEFFTE_STOCK_THRESHOLD) return f;
44  size_t k = 0;
45  // Prioritize factors in the factors array
46  for(; k < factors.size(); k++) {
47  if( f % factors[k] == 0) return factors[k];
48  }
49 
50  // If none of those work, turn to odd numbers that are greater than the last index
51  for(k = factors[k - 1]; k*k < f; k+=2) {
52  if( f % k == 0 ) return k;
53  }
54 
55  // return f if no factor was found
56  return f;
57 }
58 
60 inline std::vector<size_t> findFactorRader(size_t factor) {
61  size_t f = factor;
62  std::vector<size_t> ret {};
63  if((f % 2) == 0) {
64  while((f % 2) == 0) {
65  f /= 2;
66  }
67  ret.push_back(factor/2);
68  }
69 
70  for(size_t k = 3; k <= f; k += 2) {
71  if((f % k) == 0) {
72  while((f % k) == 0) f /= k;
73  ret.push_back(factor/k);
74  }
75  }
76  return ret;
77 }
78 
80 inline size_t modPow(size_t base, size_t pow, size_t mod) {
81  size_t ret = 1;
82  while(pow > 0) {
83  if((pow & 0x1) != 0) {
84  ret = (ret*base) % mod;
85  }
86  base = (base*base) % mod;
87  pow >>= 1;
88  }
89  return ret;
90 }
91 
93 inline size_t primeRoot(size_t p) {
94  size_t phi = p - 1;
95  std::vector<size_t> f_vec = findFactorRader(phi);
96  bool ret = false;
97  size_t curr = 1;
98  while(!ret) {
99  curr++;
100  for(auto& i : f_vec) {
101  size_t pow = modPow(curr, i, p);
102  if(pow == 1) {
103  ret = false;
104  break;
105  }
106  ret = true;
107  }
108  }
109  return curr;
110 }
111 
113 inline bool power_of(const size_t n, const size_t pow) {
114  if(n == 1) return false;
115  size_t k = n;
116  if(pow == 2) {
117  unsigned char sum = 0x1 & n;
118  for(;k;k>>=1) sum += 0x1&k;
119  return sum == 1;
120  }
121  while(!(k % pow)) k/= pow;
122  return k == 1;
123 }
124 
130 inline size_t numNodesFactorHelper(const size_t N) {
131  // Check if power
132  if(power_of(N, 4) ||
133  power_of(N, 2) ||
134  power_of(N, 3)) {
135  return N;
136  }
137  // Check if divisible by a power
138  if((N % 4) == 0) {
139  size_t k = N;
140  while ((k % 4) == 0) k /= 4;
141  return N / k;
142  }
143  if((N % 2) == 0) {
144  size_t k = N;
145  while ((k % 2) == 0) k /= 2;
146  return N / k;
147  }
148  if((N % 3) == 0) {
149  size_t k = N;
150  while ((k % 3) == 0) k /= 3;
151  return N / k;
152  }
153  // Take a factor
154  size_t k = factor(N);
155  return (k == N && k > HEFFTE_STOCK_RADER_MIN) ? N-1 : k;
156 }
157 
159 inline size_t getLeftover(const size_t N, const size_t k) {
160  return (k == N-1) ? 0 : N/k;
161 }
162 
164 inline std::pair<fft_type, size_t> fptrFactorHelper(const size_t N) {
165  if(N < HEFFTE_STOCK_THRESHOLD) {
166  return std::pair<fft_type,size_t> {fft_type::discrete, N};
167  }
168  // Check if N is a power
169  if(power_of(N, 4)) {
170  return std::pair<fft_type,size_t> {fft_type::pow4, N};
171  }
172  if(power_of(N, 2)) {
173  return std::pair<fft_type,size_t> {fft_type::pow2, N};
174  }
175  if(power_of(N, 3)) {
176  return std::pair<fft_type,size_t> {fft_type::pow3, N};
177  }
178  // Check if N is divisible by a power
179  if((N % 4) == 0) {
180  size_t ell = N;
181  while ((ell % 4) == 0) ell /= 4;
182  return std::pair<fft_type,size_t> {fft_type::composite, N/ell};
183  }
184  if((N % 2) == 0) {
185  size_t ell = N;
186  while ((ell % 2) == 0) ell /= 2;
187  return std::pair<fft_type,size_t> {fft_type::composite, N/ell};
188  }
189  if((N % 3) == 0) {
190  size_t ell = N;
191  while ((ell % 3) == 0) ell /= 3;
192  return std::pair<fft_type,size_t> {fft_type::composite, N/ell};
193  }
194  // Factor N
195  size_t k = factor(N);
196  if(k == N) {
197  // Worry about whether to call Rader's algorithm
198  if(N > HEFFTE_STOCK_RADER_MIN) {
199  k = N-1;
200  return std::pair<fft_type,size_t> {fft_type::rader, k};
201  }
202  return std::pair<fft_type,size_t> {fft_type::discrete, k};
203  }
204  return std::pair<fft_type,size_t> {fft_type::composite, k};
205 }
206 
208 template<typename F, int L>
209 inline size_t init_fft_tree(biFuncNode<F,L>* sRoot, const size_t N) {
210  std::pair<fft_type, size_t> pr = fptrFactorHelper(N);
211  fft_type type = pr.first; size_t k = pr.second;
212  if(type == fft_type::rader) {
213  size_t a = primeRoot(N);
214  size_t ainv = modPow(a, N-2, N);
215  *sRoot = biFuncNode<F,L>(N, a, ainv);
216  }
217  else {
218  *sRoot = biFuncNode<F,L>(N, type);
219  }
220  if(type == fft_type::discrete ||
221  type == fft_type::pow2 ||
222  type == fft_type::pow3 ||
223  type == fft_type::pow4) {
224  return 1;
225  }
226  size_t q = getLeftover(N, k);
227  size_t l = init_fft_tree(sRoot + 1, k);
228  size_t r = (type == fft_type::rader) ? 0 : init_fft_tree(sRoot + 1 + l, q);
229  sRoot->left = 1;
230  sRoot->right = 1 + l;
231  return 1 + l + r;
232 }
233 
235 inline size_t getNumNodes(const size_t N) {
236  size_t k = numNodesFactorHelper(N);
237  size_t rem = getLeftover(N, k);
238  if(k == N) return 1;
239  size_t left_nodes = getNumNodes(k);
240  size_t right_nodes = (rem == 0) ? 0 : getNumNodes(rem);
241  return 1 + left_nodes + right_nodes;
242 }
243 
244 }
245 }
246 
247 #endif // END HEFFTE_STOCK_TREE_H
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38