7 #ifndef HEFFTE_STOCK_TREE_H
8 #define HEFFTE_STOCK_TREE_H
10 #include "heffte_stock_algos.h"
14 #define HEFFTE_STOCK_RADER_MIN 100000
20 static const std::array<size_t,200> factors { 4, 2, 3, 5, 7, 11, 13, 16, 17, 19,
21 23, 29, 31, 37, 41, 43, 47, 53, 59, 61,
22 67, 71, 73, 79, 83, 89, 97, 101, 103, 107,
23 109, 113, 127, 131, 137, 139, 149, 151, 157, 163,
24 167, 173, 179, 181, 191, 193, 197, 199, 211, 223,
25 227, 229, 233, 239, 241, 251, 257, 263, 269, 271,
26 277, 281, 283, 293, 307, 311, 313, 317, 331, 337,
27 347, 349, 353, 359, 367, 373, 379, 383, 389, 397,
28 401, 409, 419, 421, 431, 433, 439, 443, 449, 457,
29 461, 463, 467, 479, 487, 491, 499, 503, 509, 521,
30 523, 541, 547, 557, 563, 569, 571, 577, 587, 593,
31 599, 601, 607, 613, 617, 619, 631, 641, 643, 647,
32 653, 659, 661, 673, 677, 683, 691, 701, 709, 719,
33 727, 733, 739, 743, 751, 757, 761, 769, 773, 787,
34 797, 809, 811, 821, 823, 827, 829, 839, 853, 857,
35 859, 863, 877, 881, 883, 887, 907, 911, 919, 929,
36 937, 941, 947, 953, 967, 971, 977, 983, 991, 997,
37 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061,
38 1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123,
39 1129, 1151, 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213};
42 inline size_t factor(
const size_t f) {
43 if(f < HEFFTE_STOCK_THRESHOLD)
return f;
46 for(; k < factors.size(); k++) {
47 if( f % factors[k] == 0)
return factors[k];
51 for(k = factors[k - 1]; k*k < f; k+=2) {
52 if( f % k == 0 )
return k;
60 inline std::vector<size_t> findFactorRader(
size_t factor) {
62 std::vector<size_t> ret {};
67 ret.push_back(factor/2);
70 for(
size_t k = 3; k <= f; k += 2) {
72 while((f % k) == 0) f /= k;
73 ret.push_back(factor/k);
80 inline size_t modPow(
size_t base,
size_t pow,
size_t mod) {
83 if((pow & 0x1) != 0) {
84 ret = (ret*base) % mod;
86 base = (base*base) % mod;
93 inline size_t primeRoot(
size_t p) {
95 std::vector<size_t> f_vec = findFactorRader(phi);
100 for(
auto& i : f_vec) {
101 size_t pow = modPow(curr, i, p);
113 inline bool power_of(
const size_t n,
const size_t pow) {
114 if(n == 1)
return false;
117 unsigned char sum = 0x1 & n;
118 for(;k;k>>=1) sum += 0x1&k;
121 while(!(k % pow)) k/= pow;
130 inline size_t numNodesFactorHelper(
const size_t N) {
140 while ((k % 4) == 0) k /= 4;
145 while ((k % 2) == 0) k /= 2;
150 while ((k % 3) == 0) k /= 3;
154 size_t k = factor(N);
155 return (k == N && k > HEFFTE_STOCK_RADER_MIN) ? N-1 : k;
159 inline size_t getLeftover(
const size_t N,
const size_t k) {
160 return (k == N-1) ? 0 : N/k;
164 inline std::pair<fft_type, size_t> fptrFactorHelper(
const size_t N) {
165 if(N < HEFFTE_STOCK_THRESHOLD) {
166 return std::pair<fft_type,size_t> {fft_type::discrete, N};
170 return std::pair<fft_type,size_t> {fft_type::pow4, N};
173 return std::pair<fft_type,size_t> {fft_type::pow2, N};
176 return std::pair<fft_type,size_t> {fft_type::pow3, N};
181 while ((ell % 4) == 0) ell /= 4;
182 return std::pair<fft_type,size_t> {fft_type::composite, N/ell};
186 while ((ell % 2) == 0) ell /= 2;
187 return std::pair<fft_type,size_t> {fft_type::composite, N/ell};
191 while ((ell % 3) == 0) ell /= 3;
192 return std::pair<fft_type,size_t> {fft_type::composite, N/ell};
195 size_t k = factor(N);
198 if(N > HEFFTE_STOCK_RADER_MIN) {
200 return std::pair<fft_type,size_t> {fft_type::rader, k};
202 return std::pair<fft_type,size_t> {fft_type::discrete, k};
204 return std::pair<fft_type,size_t> {fft_type::composite, k};
208 template<
typename F,
int L>
209 inline size_t init_fft_tree(biFuncNode<F,L>* sRoot,
const size_t N) {
210 std::pair<fft_type, size_t> pr = fptrFactorHelper(N);
211 fft_type type = pr.first;
size_t k = pr.second;
212 if(type == fft_type::rader) {
213 size_t a = primeRoot(N);
214 size_t ainv = modPow(a, N-2, N);
215 *sRoot = biFuncNode<F,L>(N, a, ainv);
218 *sRoot = biFuncNode<F,L>(N, type);
220 if(type == fft_type::discrete ||
221 type == fft_type::pow2 ||
222 type == fft_type::pow3 ||
223 type == fft_type::pow4) {
226 size_t q = getLeftover(N, k);
227 size_t l = init_fft_tree(sRoot + 1, k);
228 size_t r = (type == fft_type::rader) ? 0 : init_fft_tree(sRoot + 1 + l, q);
230 sRoot->right = 1 + l;
235 inline size_t getNumNodes(
const size_t N) {
236 size_t k = numNodesFactorHelper(N);
237 size_t rem = getLeftover(N, k);
239 size_t left_nodes = getNumNodes(k);
240 size_t right_nodes = (rem == 0) ? 0 : getNumNodes(rem);
241 return 1 + left_nodes + right_nodes;
Namespace containing all HeFFTe methods and classes.
Definition: heffte_backend_cuda.h:38