43 #define K_START_TILDE(x,y) (MAX(MIN(x,y-2),0))
46 #define K_END_TILDE(x,y) MIN(x,y-1)
49 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
52 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
54 #define N_TILDE(y) (y-1)
56 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
58 #define FPT_BREAK_EVEN 4
70 double **a11,**a12,**a21,**
a22;
108 double _Complex *temp;
109 double _Complex *work;
110 double _Complex *result;
111 double _Complex *vec3;
112 double _Complex *vec4;
129 static inline void abuvxpwy(
double a,
double b,
double _Complex* u,
double _Complex* x,
130 double* v,
double _Complex* y,
double* w,
int n)
132 int l;
double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
133 double *v_ptr = v, *w_ptr = w;
134 for (l = 0; l < n; l++)
135 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
138 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
139 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
140 double* v, double _Complex* y, double* w, int n) \
142 const int n2 = n>>1; \
143 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
144 double *v_ptr = v, *w_ptr = w; \
145 for (l = 0; l < n2; l++) \
146 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
148 for (l = 0; l < n2; l++) \
149 *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
152 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
153 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
155 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
156 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
157 double* v, double _Complex* y, int n, double *xx) \
159 const int n2 = n>>1; \
160 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
161 double *v_ptr = v, *xx_ptr = xx; \
162 for (l = 0; l < n2; l++, v_ptr++) \
163 *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
165 for (l = 0; l < n2; l++, v_ptr--) \
166 *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
169 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
170 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
172 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
173 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
174 double _Complex* y, double* w, int n, double *xx) \
176 const int n2 = n>>1; \
177 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
178 double *w_ptr = w, *xx_ptr = xx; \
179 for (l = 0; l < n2; l++, w_ptr++) \
180 *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
182 for (l = 0; l < n2; l++, w_ptr--) \
183 *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
186 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
187 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
189 static inline
void auvxpwy(
double a,
double _Complex* u,
double _Complex* x,
double* v,
190 double _Complex* y,
double* w,
int n)
193 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
194 double *v_ptr = v, *w_ptr = w;
195 for (l = n; l > 0; l--)
196 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
199 static inline void auvxpwy_symmetric(
double a,
double _Complex* u,
double _Complex* x,
200 double* v,
double _Complex* y,
double* w,
int n)
202 const int n2 = n>>1; \
204 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
205 double *v_ptr = v, *w_ptr = w;
206 for (l = n2; l > 0; l--)
207 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
209 for (l = n2; l > 0; l--)
210 *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
213 static inline void auvxpwy_symmetric_1(
double a,
double _Complex* u,
double _Complex* x,
214 double* v,
double _Complex* y,
double* w,
int n,
double *xx)
216 const int n2 = n>>1; \
218 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
219 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
220 for (l = n2; l > 0; l--, xx_ptr++)
221 *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
223 for (l = n2; l > 0; l--, xx_ptr++)
224 *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
227 static inline void auvxpwy_symmetric_2(
double a,
double _Complex* u,
double _Complex* x,
228 double* v,
double _Complex* y,
double* w,
int n,
double *xx)
230 const int n2 = n>>1; \
232 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
233 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
234 for (l = n2; l > 0; l--, xx_ptr++)
235 *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
237 for (l = n2; l > 0; l--, xx_ptr++)
238 *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
241 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
242 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, double *a12, \
243 double *a21, double *a22, double g, int tau, fpt_set set) \
246 int length = 1<<(tau+1); \
248 double norm = 1.0/(length<<1); \
255 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
256 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
269 M2_FUNCTION(norm,b,b,a22,a,a21,length); \
275 M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
276 M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
277 memcpy(b,set->z,length*sizeof(double _Complex)); \
279 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
285 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
290 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
291 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
295 static inline
void fpt_do_step_symmetric_u(
double _Complex *a,
double _Complex *b,
296 double *a11,
double *a12,
double *a21,
double *a22,
double *x,
297 double gam,
int tau,
fpt_set set)
300 int length = 1<<(tau+1);
302 double norm = 1.0/(length<<1);
311 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
312 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
325 auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
331 auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
332 auvxpwy_symmetric(norm*gam,a,a,a11,b,a12,length);
333 memcpy(b,set->z,length*
sizeof(
double _Complex));
335 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
341 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
346 static inline void fpt_do_step_symmetric_l(
double _Complex *a,
double _Complex *b,
347 double *a11,
double *a12,
double *a21,
double *a22,
double *x,
double gam,
int tau,
fpt_set set)
350 int length = 1<<(tau+1);
352 double norm = 1.0/(length<<1);
361 fftw_execute_r2r(set->
plans_dct3[tau-1],(
double*)a,(
double*)a);
362 fftw_execute_r2r(set->
plans_dct3[tau-1],(
double*)b,(
double*)b);
375 auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
381 auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
382 auvxpwy_symmetric_2(norm*gam,a,a,a21,b,a22,length,x);
383 memcpy(b,set->z,length*
sizeof(
double _Complex));
385 fftw_execute_r2r(set->
plans_dct2[tau-1],(
double*)a,(
double*)a);
391 fftw_execute_r2r(set->
plans_dct2[tau-1],(
double*)b,(
double*)b);
396 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
397 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, \
398 double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
401 int length = 1<<(tau+1); \
403 double norm = 1.0/(length<<1); \
406 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
407 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
410 M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
411 M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
412 memcpy(a,set->z,length*sizeof(double _Complex)); \
415 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
416 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
419 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
420 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
425 static inline
void fpt_do_step_t_symmetric_u(
double _Complex *a,
426 double _Complex *b,
double *a11,
double *a12,
double *x,
427 double gam,
int tau,
fpt_set set)
430 int length = 1<<(tau+1);
432 double norm = 1.0/(length<<1);
435 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
436 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
439 abuvxpwy_symmetric1_1(norm,gam,set->z,a,a11,b,length,x);
440 abuvxpwy_symmetric1_2(norm,gam,b,a,a12,b,length,x);
441 memcpy(a,set->z,length*
sizeof(
double _Complex));
444 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
445 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
448 static inline void fpt_do_step_t_symmetric_l(
double _Complex *a,
449 double _Complex *b,
double *a21,
double *a22,
450 double *x,
double gam,
int tau,
fpt_set set)
453 int length = 1<<(tau+1);
455 double norm = 1.0/(length<<1);
458 fftw_execute_r2r(set->
plans_dct3[tau-1],(
double*)a,(
double*)a);
459 fftw_execute_r2r(set->
plans_dct3[tau-1],(
double*)b,(
double*)b);
462 abuvxpwy_symmetric2_2(norm,gam,set->z,a,b,a21,length,x);
463 abuvxpwy_symmetric2_1(norm,gam,b,a,b,a22,length,x);
464 memcpy(a,set->z,length*
sizeof(
double _Complex));
467 fftw_execute_r2r(set->
plans_dct2[tau-1],(
double*)a,(
double*)a);
468 fftw_execute_r2r(set->
plans_dct2[tau-1],(
double*)b,(
double*)b);
472 static void eval_clenshaw(
const double *x,
double *y,
int size,
int k,
const double *alpha,
473 const double *beta,
const double *gam)
479 double a,b,x_val_act,a_old;
482 const double *alpha_act, *beta_act, *gamma_act;
487 for (i = 0; i < size; i++)
499 alpha_act = &(alpha[k]);
500 beta_act = &(beta[k]);
501 gamma_act = &(gam[k]);
502 for (j = k; j > 1; j--)
505 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
506 b = a_old*(*gamma_act);
511 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
518 static void eval_clenshaw2(
const double *x,
double *z,
double *y,
int size1,
int size,
int k,
const double *alpha,
519 const double *beta,
const double *gam)
525 double a,b,x_val_act,a_old;
527 double *y_act, *z_act;
528 const double *alpha_act, *beta_act, *gamma_act;
534 for (i = 0; i < size; i++)
547 alpha_act = &(alpha[k]);
548 beta_act = &(beta[k]);
549 gamma_act = &(gam[k]);
550 for (j = k; j > 1; j--)
553 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
554 b = a_old*(*gamma_act);
561 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
574 static int eval_clenshaw_thresh2(
const double *x,
double *z,
double *y,
int size,
int k,
575 const double *alpha,
const double *beta,
const double *gam,
const
582 double a,b,x_val_act,a_old;
584 double *y_act, *z_act;
585 const double *alpha_act, *beta_act, *gamma_act;
586 R max = -nfft_float_property(NFFT_R_MAX);
587 const R t = LOG10(FABS(threshold));
593 for (i = 0; i < size; i++)
606 alpha_act = &(alpha[k]);
607 beta_act = &(beta[k]);
608 gamma_act = &(gam[k]);
609 for (j = k; j > 1; j--)
612 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
613 b = a_old*(*gamma_act);
619 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
620 max = FMAX(max,LOG10(FABS(*y_act)));
631 static inline void eval_sum_clenshaw_fast(
const int N,
const int M,
632 const double _Complex *a,
const double *x,
double _Complex *y,
633 const double *alpha,
const double *beta,
const double *gam,
637 double _Complex tmp1, tmp2, tmp3;
648 for (j = 0; j <= M; j++)
652 for (j = 0; j <= M; j++)
657 tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
658 for (k = N-1; k > 0; k--)
661 + (alpha[k-1] * xc + beta[k-1]) * tmp1
666 y[j] = lambda * tmp1;
671 for (k = N-1; k > 0; k--)
673 tmp3 = a[k-1] + tmp2 * gam[k];
674 tmp2 *= (alpha[k] * xc + beta[k]);
682 tmp2 *= (alpha[0] * xc + beta[0]);
684 y[j] = lambda * (tmp2 + tmp1);
715 double _Complex *y,
double _Complex *temp,
double *alpha,
double *beta,
double *gam,
719 double _Complex* it1 = temp;
720 double _Complex* it2 = y;
725 for (j = 0; j <= M; j++)
727 it2[j] = lambda * y[j];
735 for (j = 0; j <= M; j++)
738 it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
742 for (k = 2; k <= N; k++)
745 for (j = 0; j <= M; j++)
749 it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gam[k-1] * aux;
757 fpt_set fpt_init(
const int M,
const int t,
const unsigned int flags)
767 int nthreads =
X(get_num_threads)();
784 for (m = 0; m < set->
M; m++)
799 for (tau = 1; tau < t+1; tau++)
803 for (k = 0; k < plength; k++)
805 set->
xcvecs[tau-1][k] = cos(((k+0.5)*KPI)/plength);
807 plength = plength << 1;
811 set->work = (
double _Complex*)
nfft_malloc((2*set->
N)*
sizeof(
double _Complex));
812 set->result = (
double _Complex*)
nfft_malloc((2*set->
N)*
sizeof(
double _Complex));
817 set->
kindsr[0] = FFTW_REDFT10;
818 set->
kindsr[1] = FFTW_REDFT10;
819 for (tau = 0, plength = 4; tau < set->
t; tau++, plength<<=1)
823 #pragma omp critical (nfft_omp_critical_fftw_plan)
825 fftw_plan_with_nthreads(nthreads);
828 fftw_plan_many_r2r(1, &set->
lengths[tau], 2, (
double*)set->work, NULL,
829 2, 1, (
double*)set->result, NULL, 2, 1,set->
kindsr,
837 if (!(set->
flags & FPT_NO_FAST_ALGORITHM))
840 set->vec3 = (
double _Complex*)
nfft_malloc(set->
N*
sizeof(
double _Complex));
841 set->vec4 = (
double _Complex*)
nfft_malloc(set->
N*
sizeof(
double _Complex));
842 set->z = (
double _Complex*)
nfft_malloc(set->
N*
sizeof(
double _Complex));
847 set->
kinds[0] = FFTW_REDFT01;
848 set->
kinds[1] = FFTW_REDFT01;
849 for (tau = 0, plength = 4; tau < set->
t; tau++, plength<<=1)
853 #pragma omp critical (nfft_omp_critical_fftw_plan)
855 fftw_plan_with_nthreads(nthreads);
858 fftw_plan_many_r2r(1, &set->
lengths[tau], 2, (
double*)set->work, NULL,
859 2, 1, (
double*)set->result, NULL, 2, 1, set->
kinds,
873 if (!(set->
flags & FPT_NO_DIRECT_ALGORITHM))
875 set->xc_slow = (
double*)
nfft_malloc((set->
N+1)*
sizeof(double));
876 set->temp = (
double _Complex*)
nfft_malloc((set->
N+1)*
sizeof(
double _Complex));
883 void fpt_precompute(
fpt_set set,
const int m,
double *alpha,
double *beta,
884 double *gam,
int k_start,
const double threshold)
907 const double *calpha;
909 const double *cgamma;
920 data = &(set->
dpt[m]);
923 if (data->
steps != NULL)
932 if (!(set->
flags & FPT_NO_FAST_ALGORITHM))
939 for (tau = 2; tau <= set->
t; tau++)
942 data->
alphaN[tau-2] = alpha[1<<tau];
943 data->
betaN[tau-2] = beta[1<<tau];
944 data->
gammaN[tau-2] = gam[1<<tau];
952 N_tilde = N_TILDE(set->
N);
959 for (tau = 1; tau < set->
t; tau++)
964 firstl =
FIRST_L(k_start_tilde,plength);
966 lastl =
LAST_L(N_tilde,plength);
974 for (l = firstl; l <= lastl; l++)
976 if (set->
flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
988 a11 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
989 a12 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
990 a21 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
991 a22 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
997 calpha = &(alpha[plength*l+1+1]);
998 cbeta = &(beta[plength*l+1+1]);
999 cgamma = &(gam[plength*l+1+1]);
1001 if (set->
flags & FPT_NO_STABILIZATION)
1007 eval_clenshaw2(set->
xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
1009 eval_clenshaw2(set->
xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
1019 needstab = eval_clenshaw_thresh2(set->
xcvecs[tau-1], a11, a21, clength,
1020 degree-1, calpha, cbeta, cgamma, threshold);
1024 needstab = eval_clenshaw_thresh2(set->
xcvecs[tau-1], a12, a22, clength,
1025 degree, calpha, cbeta, cgamma, threshold);
1039 data->
steps[tau][l].a11[0] = a11;
1040 data->
steps[tau][l].a12[0] = a12;
1041 data->
steps[tau][l].a21[0] = a21;
1043 data->
steps[tau][l].g[0] = gam[plength*l+1+1];
1049 degree_stab = degree*(2*l+1);
1050 X(next_power_of_2_exp)((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
1064 plength_stab = N_stab;
1066 if (set->
flags & FPT_AL_SYMMETRY)
1071 clength_1 = plength_stab;
1072 clength_2 = plength_stab;
1074 a11 = (
double*)
nfft_malloc(
sizeof(
double)*clength_1);
1075 a12 = (
double*)
nfft_malloc(
sizeof(
double)*clength_1);
1076 a21 = (
double*)
nfft_malloc(
sizeof(
double)*clength_2);
1077 a22 = (
double*)
nfft_malloc(
sizeof(
double)*clength_2);
1079 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1080 eval_clenshaw2(set->
xcvecs[t_stab-2], a11, a21, clength_1,
1081 clength_2, degree_stab-1, calpha, cbeta, cgamma);
1082 eval_clenshaw2(set->
xcvecs[t_stab-2], a12, a22, clength_1,
1083 clength_2, degree_stab+0, calpha, cbeta, cgamma);
1087 clength = plength_stab/2;
1090 a11 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
1091 a12 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
1094 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
1095 eval_clenshaw(set->
xcvecs[t_stab-2], a11, clength,
1096 degree_stab-2, calpha, cbeta, cgamma);
1097 eval_clenshaw(set->
xcvecs[t_stab-2], a12, clength,
1098 degree_stab-1, calpha, cbeta, cgamma);
1104 a21 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
1105 a22 = (
double*)
nfft_malloc(
sizeof(
double)*clength);
1106 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1107 eval_clenshaw(set->
xcvecs[t_stab-2], a21, clength,
1108 degree_stab-1,calpha, cbeta, cgamma);
1109 eval_clenshaw(set->
xcvecs[t_stab-2], a22, clength,
1110 degree_stab+0, calpha, cbeta, cgamma);
1116 clength_1 = plength_stab;
1117 clength_2 = plength_stab;
1118 a11 = (
double*)
nfft_malloc(
sizeof(
double)*clength_1);
1119 a12 = (
double*)
nfft_malloc(
sizeof(
double)*clength_1);
1120 a21 = (
double*)
nfft_malloc(
sizeof(
double)*clength_2);
1121 a22 = (
double*)
nfft_malloc(
sizeof(
double)*clength_2);
1122 calpha = &(alpha[2]);
1128 eval_clenshaw2(set->
xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
1129 calpha, cbeta, cgamma);
1130 eval_clenshaw2(set->
xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
1131 calpha, cbeta, cgamma);
1134 data->
steps[tau][l].a11[0] = a11;
1135 data->
steps[tau][l].a12[0] = a12;
1136 data->
steps[tau][l].a21[0] = a21;
1139 data->
steps[tau][l].g[0] = gam[1+1];
1141 data->
steps[tau][l].
ts = t_stab;
1142 data->
steps[tau][l].
Ns = N_stab;
1146 plength = plength << 1;
1150 if (!(set->
flags & FPT_NO_DIRECT_ALGORITHM))
1153 if (set->
flags & FPT_PERSISTENT_DATA)
1155 data->
_alpha = (
double*) alpha;
1156 data->
_beta = (
double*) beta;
1157 data->
_gamma = (
double*) gam;
1164 memcpy(data->
_alpha,alpha,(set->
N+1)*
sizeof(
double));
1165 memcpy(data->
_beta,beta,(set->
N+1)*
sizeof(
double));
1166 memcpy(data->
_gamma,gam,(set->
N+1)*
sizeof(
double));
1171 void fpt_trafo_direct(
fpt_set set,
const int m,
const double _Complex *x,
double _Complex *y,
1172 const int k_end,
const unsigned int flags)
1182 X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
1187 if (set->
flags & FPT_NO_DIRECT_ALGORITHM)
1192 if (flags & FPT_FUNCTION_VALUES)
1195 for (j = 0; j <= k_end; j++)
1197 set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1));
1201 memset(set->result,0U,data->
k_start*
sizeof(
double _Complex));
1202 memcpy(&set->result[data->
k_start],x,(k_end-data->
k_start+1)*
sizeof(
double _Complex));
1207 eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
1212 memset(set->temp,0U,data->
k_start*
sizeof(
double _Complex));
1213 memcpy(&set->temp[data->
k_start],x,(k_end-data->
k_start+1)*
sizeof(
double _Complex));
1215 eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->
xcvecs[tk-2],
1219 fftw_execute_r2r(set->
plans_dct2[tk-2],(
double*)set->result,
1220 (
double*)set->result);
1222 set->result[0] *= 0.5;
1223 for (j = 0; j < Nk; j++)
1225 set->result[j] *= norm;
1228 memcpy(y,set->result,(k_end+1)*
sizeof(
double _Complex));
1232 void fpt_trafo(
fpt_set set,
const int m,
const double _Complex *x,
double _Complex *y,
1233 const int k_end,
const unsigned int flags)
1263 int length = k_end+1;
1264 fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
1269 double _Complex *work_ptr;
1270 const double _Complex *x_ptr;
1273 if (k_end < FPT_BREAK_EVEN)
1276 fpt_trafo_direct(set, m, x, y, k_end, flags);
1280 X(next_power_of_2_exp)(k_end,&Nk,&tk);
1285 if (set->
flags & FPT_NO_FAST_ALGORITHM)
1288 if (flags & FPT_FUNCTION_VALUES)
1291 int nthreads =
X(get_num_threads)();
1292 #pragma omp critical (nfft_omp_critical_fftw_plan)
1294 fftw_plan_with_nthreads(nthreads);
1296 plan = fftw_plan_many_r2r(1, &length, 2, (
double*)set->work, NULL, 2, 1,
1297 (
double*)set->work, NULL, 2, 1, kinds, 0U);
1304 memset(set->result,0U,2*Nk*
sizeof(
double _Complex));
1309 memset(set->work,0U,2*data->
k_start*
sizeof(
double _Complex));
1311 work_ptr = &set->work[2*data->
k_start];
1314 for (k = 0; k <= k_end_tilde-data->
k_start; k++)
1316 *work_ptr++ = *x_ptr++;
1317 *work_ptr++ = K(0.0);
1321 memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*
sizeof(
double _Complex));
1327 set->work[2*(Nk-2)] += data->
gammaN[tk-2]*x[Nk-data->
k_start];
1328 set->work[2*(Nk-1)] += data->
betaN[tk-2]*x[Nk-data->
k_start];
1329 set->work[2*(Nk-1)+1] = data->
alphaN[tk-2]*x[Nk-data->
k_start];
1334 for (tau = 1; tau < tk; tau++)
1337 firstl =
FIRST_L(k_start_tilde,plength);
1339 lastl =
LAST_L(k_end_tilde,plength);
1342 for (l = firstl; l <= lastl; l++)
1345 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*
sizeof(
double _Complex));
1346 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*
sizeof(
double _Complex));
1347 memset(&set->vec3[plength/2],0U,(plength/2)*
sizeof(
double _Complex));
1348 memset(&set->vec4[plength/2],0U,(plength/2)*
sizeof(
double _Complex));
1351 memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*
sizeof(
double _Complex));
1352 memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*
sizeof(
double _Complex));
1353 memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*
sizeof(
double _Complex));
1356 step = &(data->
steps[tau][l]);
1362 if (set->
flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
1371 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
1372 step->a12[0], step->a21[0], step->
a22[0], step->g[0], tau, set);
1377 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1378 step->a21[0], step->
a22[0], step->g[0], tau, set);
1381 if (step->g[0] != 0.0)
1383 for (k = 0; k < plength; k++)
1385 set->work[plength*2*l+k] += set->vec3[k];
1388 for (k = 0; k < plength; k++)
1390 set->work[plength*(2*l+1)+k] += set->vec4[k];
1398 plength_stab = step->
Ns;
1411 memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*
sizeof(
double _Complex));
1412 memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*
sizeof(
double _Complex));
1416 if (set->
flags & FPT_AL_SYMMETRY)
1420 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1421 step->a21[0], step->
a22[0], step->g[0], t_stab-1, set);
1427 fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1428 step->a21[0], step->
a22[0],
1429 set->
xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1437 fpt_do_step_symmetric_l(set->vec3, set->vec4,
1438 step->a11[0], step->a12[0],
1440 step->
a22[0], set->
xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1447 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1448 step->a21[0], step->
a22[0], step->g[0], t_stab-1, set);
1451 if (step->g[0] != 0.0)
1453 for (k = 0; k < plength_stab; k++)
1455 set->result[k] += set->vec3[k];
1459 for (k = 0; k < plength_stab; k++)
1461 set->result[Nk+k] += set->vec4[k];
1466 plength = plength<<1;
1480 for (k = 0; k < 2*Nk; k++)
1482 set->result[k] += set->work[k];
1487 y[0] = data->
gamma_m1*(set->result[0] + data->
beta_0*set->result[Nk] +
1488 data->
alpha_0*set->result[Nk+1]*0.5);
1489 y[1] = data->
gamma_m1*(set->result[1] + data->
beta_0*set->result[Nk+1]+
1490 data->
alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
1491 y[k_end-1] = data->
gamma_m1*(set->result[k_end-1] +
1492 data->
beta_0*set->result[Nk+k_end-1] +
1493 data->
alpha_0*set->result[Nk+k_end-2]*0.5);
1494 y[k_end] = data->
gamma_m1*(0.5*data->
alpha_0*set->result[Nk+k_end-1]);
1495 for (k = 2; k <= k_end-2; k++)
1497 y[k] = data->
gamma_m1*(set->result[k] + data->
beta_0*set->result[Nk+k] +
1498 data->
alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
1501 if (flags & FPT_FUNCTION_VALUES)
1504 fftw_execute_r2r(plan,(
double*)y,(
double*)y);
1506 #pragma omp critical (nfft_omp_critical_fftw_plan)
1508 fftw_destroy_plan(plan);
1509 for (k = 0; k <= k_end; k++)
1516 void fpt_transposed_direct(
fpt_set set,
const int m,
double _Complex *x,
1517 double _Complex *y,
const int k_end,
const unsigned int flags)
1525 X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
1528 if (set->
flags & FPT_NO_DIRECT_ALGORITHM)
1533 if (flags & FPT_FUNCTION_VALUES)
1535 for (j = 0; j <= k_end; j++)
1537 set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1));
1545 sizeof(
double _Complex));
1549 memcpy(set->result,y,(k_end+1)*
sizeof(
double _Complex));
1550 memset(&set->result[k_end+1],0U,(Nk-k_end-1)*
sizeof(
double _Complex));
1552 for (j = 0; j < Nk; j++)
1554 set->result[j] *= norm;
1557 fftw_execute_r2r(set->
plans_dct3[tk-2],(
double*)set->result,
1558 (
double*)set->result);
1564 memcpy(x,&set->temp[data->
k_start],(k_end-data->
k_start+1)*
sizeof(
double _Complex));
1568 void fpt_transposed(
fpt_set set,
const int m,
double _Complex *x,
1569 double _Complex *y,
const int k_end,
const unsigned int flags)
1598 int length = k_end+1;
1599 fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
1605 if (k_end < FPT_BREAK_EVEN)
1608 fpt_transposed_direct(set, m, x, y, k_end, flags);
1612 X(next_power_of_2_exp)(k_end,&Nk,&tk);
1617 if (set->
flags & FPT_NO_FAST_ALGORITHM)
1622 if (flags & FPT_FUNCTION_VALUES)
1625 int nthreads =
X(get_num_threads)();
1626 #pragma omp critical (nfft_omp_critical_fftw_plan)
1628 fftw_plan_with_nthreads(nthreads);
1630 plan = fftw_plan_many_r2r(1, &length, 2, (
double*)set->work, NULL, 2, 1,
1631 (
double*)set->work, NULL, 2, 1, kinds, 0U);
1635 fftw_execute_r2r(plan,(
double*)y,(
double*)set->result);
1637 #pragma omp critical (nfft_omp_critical_fftw_plan)
1639 fftw_destroy_plan(plan);
1640 for (k = 0; k <= k_end; k++)
1642 set->result[k] *= 0.5;
1647 memcpy(set->result,y,(k_end+1)*
sizeof(
double _Complex));
1651 memset(set->work,0U,2*Nk*
sizeof(
double _Complex));
1654 for (k = 0; k <= k_end; k++)
1656 set->work[k] = data->
gamma_m1*set->result[k];
1661 data->
alpha_0*set->result[1]);
1662 for (k = 1; k < k_end; k++)
1665 data->
alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
1669 memset(&set->work[k_end],0U,(Nk-k_end)*
sizeof(
double _Complex));
1673 memcpy(set->result,set->work,2*Nk*
sizeof(
double _Complex));
1677 for (tau = tk-1; tau >= 1; tau--)
1680 firstl =
FIRST_L(k_start_tilde,plength);
1682 lastl =
LAST_L(k_end_tilde,plength);
1685 for (l = firstl; l <= lastl; l++)
1688 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*
sizeof(
double _Complex));
1689 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*
sizeof(
double _Complex));
1691 memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
1692 (plength/2)*
sizeof(
double _Complex));
1695 step = &(data->
steps[tau][l]);
1700 if (set->
flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
1703 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1704 step->a21[0], step->
a22[0], step->g[0], tau, set);
1709 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
1710 step->a21[0], step->
a22[0], step->g[0], tau, set);
1712 memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*
sizeof(
double _Complex));
1714 for (k = 0; k < plength; k++)
1716 set->work[plength*(4*l+2)/2+k] = set->vec3[k];
1722 plength_stab = step->
Ns;
1725 memcpy(set->vec3,set->result,plength_stab*
sizeof(
double _Complex));
1726 memcpy(set->vec4,&(set->result[Nk]),plength_stab*
sizeof(
double _Complex));
1729 if (set->
flags & FPT_AL_SYMMETRY)
1733 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1734 step->a21[0], step->
a22[0], step->g[0], t_stab-1, set);
1738 fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1739 set->
xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1743 fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
1744 step->a21[0], step->
a22[0], set->
xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1749 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
1750 step->a21[0], step->
a22[0], step->g[0], t_stab-1, set);
1753 memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*
sizeof(
double _Complex));
1755 for (k = 0; k < plength; k++)
1757 set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
1762 plength = plength>>1;
1766 for (k = 0; k <= k_end_tilde-data->
k_start; k++)
1768 x[k] = set->work[2*(data->
k_start+k)];
1773 data->
gammaN[tk-2]*set->work[2*(Nk-2)]
1774 + data->
betaN[tk-2] *set->work[2*(Nk-1)]
1775 + data->
alphaN[tk-2]*set->work[2*(Nk-1)+1];
1779 void fpt_finalize(
fpt_set set)
1789 const int M = set->
M;
1792 for (m = 0; m < M; m++)
1795 data = &set->
dpt[m];
1808 N_tilde = N_TILDE(set->
N);
1810 for (tau = 1; tau < set->
t; tau++)
1813 firstl =
FIRST_L(k_start_tilde,plength);
1815 lastl =
LAST_L(N_tilde,plength);
1818 for (l = firstl; l <= lastl; l++)
1825 data->
steps[tau][l].a11[0] = NULL;
1826 data->
steps[tau][l].a12[0] = NULL;
1827 data->
steps[tau][l].a21[0] = NULL;
1835 data->
steps[tau][l].a11 = NULL;
1836 data->
steps[tau][l].a12 = NULL;
1837 data->
steps[tau][l].a21 = NULL;
1839 data->
steps[tau][l].g = NULL;
1843 data->
steps[tau] = NULL;
1845 plength = plength<<1;
1852 if (!(set->
flags & FPT_NO_DIRECT_ALGORITHM))
1856 if (!(set->
flags & FPT_PERSISTENT_DATA))
1872 for (tau = 1; tau < set->
t+1; tau++)
1875 set->
xcvecs[tau-1] = NULL;
1885 if (!(set->
flags & FPT_NO_FAST_ALGORITHM))
1898 for(tau = 0; tau < set->
t; tau++)
1901 #pragma omp critical (nfft_omp_critical_fftw_plan)
1917 if (!(set->
flags & FPT_NO_DIRECT_ALGORITHM))
1921 set->xc_slow = NULL;
Holds data for a single multiplication step in the cascade summation.
double * xc
Array for Chebychev-nodes.
bool stable
Indicates if the values contained represent a fast or a slow stabilized step.
double * alphaN
TODO Add comment here.
int M
The number of DPT transforms.
double * _alpha
< TODO Add comment here.
int * lengths
Transform lengths for fftw library.
double alpha_0
TODO Add comment here.
Holds data for a set of cascade summations.
#define LAST_L(x, y)
Index of last block of four functions at level.
#define K_START_TILDE(x, y)
Minimum degree at top of a cascade.
fftw_r2r_kind * kinds
Transform kinds for fftw library.
fpt_data * dpt
The DPT transform data.
fftw_plan * plans_dct3
Transform plans for the fftw library.
double * gammaN
TODO Add comment here.
#define X(name)
Include header for C99 complex datatype.
int N
The transform length.
Holds data for a single cascade summation.
fftw_plan * plans_dct2
Transform plans for the fftw library.
#define UNUSED(x)
Dummy use of unused parameters to silence compiler warnings.
struct fpt_set_s_ fpt_set_s
Holds data for a set of cascade summations.
int Ns
TODO Add comment here.
double * _beta
TODO Add comment here.
void * nfft_malloc(size_t n)
double beta_0
TODO Add comment here.
static void eval_sum_clenshaw_transposed(int N, int M, double _Complex *a, double *x, double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gam, double lambda)
Clenshaw algorithm.
double * _gamma
TODO Add comment here.
double ** a22
The matrix components.
fpt_step ** steps
The cascade summation steps.
double gamma_m1
TODO Add comment here.
#define FIRST_L(x, y)
Index of first block of four functions at level.
int k_start
TODO Add comment here.
int ts
TODO Add comment here.
double * betaN
TODO Add comment here.
struct fpt_data_ fpt_data
Holds data for a single cascade summation.
struct fpt_step_ fpt_step
Holds data for a single multiplication step in the cascade summation.
#define K_END_TILDE(x, y)
Maximum degree at top of a cascade.
double ** xcvecs
Array of pointers to arrays containing the Chebyshev nodes.
fftw_r2r_kind * kindsr
Transform kinds for fftw library.