NFFT  3.3.0
fastsumS2.c
1 /*
2  * Copyright (c) 2002, 2015 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* $Id$ */
20 
26 #include "config.h"
27 
28 /* standard headers */
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <math.h>
32 #include <float.h>
33 #ifdef HAVE_COMPLEX_H
34 #include <complex.h>
35 #endif
36 
37 /* NFFT3 header */
38 #include "nfft3.h"
39 
40 /* NFFT3 utilities */
41 #include "infft.h"
42 
43 /* Fourier-Legendre coefficients for Abel-Poisson kernel */
44 #define SYMBOL_ABEL_POISSON(k,h) (pow(h,k))
45 
46 /* Fourier-Legendre coefficients for singularity kernel */
47 #define SYMBOL_SINGULARITY(k,h) ((2.0/(2*k+1))*pow(h,k))
48 
49 /* Flags for the different kernel functions */
50 
52 #define KT_ABEL_POISSON (0)
53 
54 #define KT_SINGULARITY (1)
55 
56 #define KT_LOC_SUPP (2)
57 
58 #define KT_GAUSSIAN (3)
59 
61 enum pvalue {NO = 0, YES = 1, BOTH = 2};
62 
63 static inline int scaled_modified_bessel_i_series(const R x, const R alpha,
64  const int nb, const int ize, R *b)
65 {
66  const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
67  R tempa = K(1.0), empal = K(1.0) + alpha, halfx = K(0.0), tempb = K(0.0);
68  int n, ncalc = nb;
69 
70  if (enmten < x)
71  halfx = x/K(2.0);
72 
73  if (alpha != K(0.0))
74  tempa = POW(halfx, alpha)/TGAMMA(empal);
75 
76  if (ize == 2)
77  tempa *= EXP(-x);
78 
79  if (K(1.0) < x + K(1.0))
80  tempb = halfx*halfx;
81 
82  b[0] = tempa + tempa*tempb/empal;
83 
84  if (x != K(0.0) && b[0] == K(0.0))
85  ncalc = 0;
86 
87  if (nb == 1)
88  return ncalc;
89 
90  if (K(0.0) < x)
91  {
92  R tempc = halfx, tover = (enmten + enmten)/x;
93 
94  if (tempb != K(0.0))
95  tover = enmten/tempb;
96 
97  for (n = 1; n < nb; n++)
98  {
99  tempa /= empal;
100  empal += K(1.0);
101  tempa *= tempc;
102 
103  if (tempa <= tover*empal)
104  tempa = K(0.0);
105 
106  b[n] = tempa + tempa*tempb/empal;
107 
108  if (b[n] == K(0.0) && n < ncalc)
109  ncalc = n;
110  }
111  }
112  else
113  for (n = 1; n < nb; n++)
114  b[n] = K(0.0);
115 
116  return ncalc;
117 }
118 
119 static inline void scaled_modified_bessel_i_normalize(const R x,
120  const R alpha, const int nb, const int ize, R *b, const R sum_)
121 {
122  const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
123  R sum = sum_, tempa;
124  int n;
125 
126  /* Normalize, i.e., divide all b[n] by sum */
127  if (alpha != K(0.0))
128  sum = sum * TGAMMA(K(1.0) + alpha) * POW(x/K(2.0), -alpha);
129 
130  if (ize == 1)
131  sum *= EXP(-x);
132 
133  tempa = enmten;
134 
135  if (K(1.0) < sum)
136  tempa *= sum;
137 
138  for (n = 1; n <= nb; n++)
139  {
140  if (b[n-1] < tempa)
141  b[n-1] = K(0.0);
142 
143  b[n-1] /= sum;
144  }
145 }
146 
194 static int smbi(const R x, const R alpha, const int nb, const int ize, R *b)
195 {
196  /* machine dependent parameters */
197  /* NSIG - DECIMAL SIGNIFICANCE DESIRED. SHOULD BE SET TO */
198  /* IFIX(ALOG10(2)*NBIT+1), WHERE NBIT IS THE NUMBER OF */
199  /* BITS IN THE MANTISSA OF A WORKING PRECISION VARIABLE. */
200  /* SETTING NSIG LOWER WILL RESULT IN DECREASED ACCURACY */
201  /* WHILE SETTING NSIG HIGHER WILL INCREASE CPU TIME */
202  /* WITHOUT INCREASING ACCURACY. THE TRUNCATION ERROR */
203  /* IS LIMITED TO A RELATIVE ERROR OF T=.5*10**(-NSIG). */
204  /* ENTEN - 10.0 ** K, WHERE K IS THE LARGEST int SUCH THAT */
205  /* ENTEN IS MACHINE-REPRESENTABLE IN WORKING PRECISION. */
206  /* ENSIG - 10.0 ** NSIG. */
207  /* RTNSIG - 10.0 ** (-K) FOR THE SMALLEST int K SUCH THAT */
208  /* K .GE. NSIG/4. */
209  /* ENMTEN - THE SMALLEST ABS(X) SUCH THAT X/4 DOES NOT UNDERFLOW. */
210  /* XLARGE - UPPER LIMIT ON THE MAGNITUDE OF X WHEN IZE=2. BEAR */
211  /* IN MIND THAT IF ABS(X)=N, THEN AT LEAST N ITERATIONS */
212  /* OF THE BACKWARD RECURSION WILL BE EXECUTED. */
213  /* EXPARG - LARGEST WORKING PRECISION ARGUMENT THAT THE LIBRARY */
214  /* EXP ROUTINE CAN HANDLE AND UPPER LIMIT ON THE */
215  /* MAGNITUDE OF X WHEN IZE=1. */
216  const int nsig = MANT_DIG + 2;
217  const R enten = nfft_float_property(NFFT_R_MAX);
218  const R ensig = POW(K(10.0),(R)nsig);
219  const R rtnsig = POW(K(10.0),-CEIL((R)nsig/K(4.0)));
220  const R xlarge = K(1E4);
221  const R exparg = FLOOR(LOG(POW(K(R_RADIX),K(DBL_MAX_EXP-1))));
222 
223  /* System generated locals */
224  int l, n, nend, magx, nbmx, ncalc, nstart;
225  R p, em, en, sum, pold, test, empal, tempa, tempb, tempc, psave, plast, tover,
226  emp2al, psavel;
227 
228  magx = LRINT(FLOOR(x));
229 
230  /* return if x, nb, or ize out of range */
231  if ( nb <= 0 || x < K(0.0) || alpha < K(0.0) || K(1.0) <= alpha
232  || ((ize != 1 || exparg < x) && (ize != 2 || xlarge < x)))
233  return (MIN(nb,0) - 1);
234 
235  /* 2-term ascending series for small x */
236  if (x < rtnsig)
237  return scaled_modified_bessel_i_series(x,alpha,nb,ize,b);
238 
239  ncalc = nb;
240  /* forward sweep, Olver's p-sequence */
241 
242  nbmx = nb - magx;
243  n = magx + 1;
244 
245  en = (R) (n+n) + (alpha+alpha);
246  plast = K(1.0);
247  p = en/x;
248 
249  /* significance test */
250  test = ensig + ensig;
251 
252  if ((5*nsig) < (magx << 1))
253  test = SQRT(test*p);
254  else
255  test /= POW(K(1.585),(R)magx);
256 
257  if (3 <= nbmx)
258  {
259  /* calculate p-sequence until n = nb-1 */
260  tover = enten/ensig;
261  nstart = magx+2;
262  nend = nb - 1;
263 
264  for (n = nstart; n <= nend; n++)
265  {
266  en += K(2.0);
267  pold = plast;
268  plast = p;
269  p = en*plast/x + pold;
270  if (p > tover)
271  {
272  /* divide p-sequence by tover to avoid overflow. Calculate p-sequence
273  * until 1 <= |p| */
274  tover = enten;
275  p /= tover;
276  plast /= tover;
277  psave = p;
278  psavel = plast;
279  nstart = n + 1;
280 
281  do
282  {
283  n++;
284  en += K(2.0);
285  pold = plast;
286  plast = p;
287  p = en*plast/x + pold;
288  } while (p <= K(1.0));
289 
290  tempb = en/x;
291 
292  /* Backward test. Find ncalc as the largest n such that test is passed. */
293  test = pold*plast*(K(0.5) - K(0.5)/(tempb * tempb))/ensig;
294  p = plast*tover;
295  n--;
296  en -= K(2.0);
297  nend = MIN(nb,n);
298 
299  for (ncalc = nstart; ncalc <= nend; ncalc++)
300  {
301  pold = psavel;
302  psavel = psave;
303  psave = en*psavel/x + pold;
304  if (test < psave * psavel)
305  break;
306  }
307 
308  ncalc--;
309  goto L80;
310  }
311  }
312 
313  n = nend;
314  en = (R) (n+n) + (alpha+alpha);
315 
316  /* special significance test for 2 <= nbmx */
317  test = FMAX(test,SQRT(plast*ensig)*SQRT(p+p));
318  }
319 
320  /* calculate p-sequence until significance test is passed */
321  do
322  {
323  n++;
324  en += K(2.0);
325  pold = plast;
326  plast = p;
327  p = en*plast/x + pold;
328  } while (p < test);
329 
330  /* Initialize backward recursion and normalization sum. */
331 L80:
332  n++;
333  en += K(2.0);
334  tempb = K(0.0);
335  tempa = K(1.0)/p;
336  em = (R)(n-1);
337  empal = em + alpha;
338  emp2al = em - K(1.0) + (alpha+alpha);
339  sum = tempa*empal*emp2al/em;
340  nend = n-nb;
341 
342  if (nend < 0)
343  {
344  /* We have n <= nb. So store b[n] and set higher orders to zero */
345  b[n-1] = tempa;
346  nend = -nend;
347  for (l = 1; l <= nend; ++l)
348  b[n-1 + l] = K(0.0);
349  }
350  else
351  {
352  if (nend != 0)
353  {
354  /* recur backward via difference equation, calculating b[n] until n = nb */
355  for (l = 1; l <= nend; ++l)
356  {
357  n--;
358  en -= K(2.0);
359  tempc = tempb;
360  tempb = tempa;
361  tempa = en*tempb/x + tempc;
362  em -= K(1.0);
363  emp2al -= K(1.0);
364 
365  if (n == 1)
366  break;
367 
368  if (n == 2)
369  emp2al = K(1.0);
370 
371  empal -= K(1.0);
372  sum = (sum + tempa*empal)*emp2al/em;
373  }
374  }
375 
376  /* store b[nb] */
377  b[n-1] = tempa;
378 
379  if (nb <= 1)
380  {
381  sum = sum + sum + tempa;
382  scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
383  return ncalc;
384  }
385 
386  /* calculate and store b[nb-1] */
387  n--;
388  en -= 2.0;
389  b[n-1] = en*tempa/x + tempb;
390 
391  if (n == 1)
392  {
393  sum = sum + sum + b[0];
394  scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
395  return ncalc;
396  }
397 
398  em -= K(1.0);
399  emp2al -= K(1.0);
400 
401  if (n == 2)
402  emp2al = K(1.0);
403 
404  empal -= K(1.0);
405  sum = (sum + b[n-1]*empal)*emp2al/em;
406  }
407 
408  nend = n - 2;
409 
410  if (nend != 0)
411  {
412  /* Calculate and store b[n] until n = 2. */
413  for (l = 1; l <= nend; ++l)
414  {
415  n--;
416  en -= K(2.0);
417  b[n-1] = en*b[n]/x + b[n+1];
418  em -= K(1.0);
419  emp2al -= K(1.0);
420 
421  if (n == 2)
422  emp2al = K(1.0);
423 
424  empal -= K(1.0);
425  sum = (sum + b[n-1]*empal)*emp2al/em;
426  }
427  }
428 
429  /* calculate b[1] */
430  b[0] = K(2.0)*empal*b[1]/x + b[2];
431  sum = sum + sum + b[0];
432 
433  scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
434  return ncalc;
435 }
436 
451 static inline double innerProduct(const double phi1, const double theta1,
452  const double phi2, const double theta2)
453 {
454  double pi2theta1 = K2PI*theta1, pi2theta2 = K2PI*theta2;
455  return (cos(pi2theta1)*cos(pi2theta2)
456  + sin(pi2theta1)*sin(pi2theta2)*cos(K2PI*(phi1-phi2)));
457 }
458 
470 static inline double poissonKernel(const double x, const double h)
471 {
472  return (1.0/(K4PI))*((1.0-h)*(1.0+h))/pow(sqrt(1.0-2.0*h*x+h*h),3.0);
473 }
474 
486 static inline double singularityKernel(const double x, const double h)
487 {
488  return (1.0/(K2PI))/sqrt(1.0-2.0*h*x+h*h);
489 }
490 
504 static inline double locallySupportedKernel(const double x, const double h,
505  const double lambda)
506 {
507  return (x<=h)?(0.0):(pow((x-h),lambda));
508 }
509 
522 static inline double gaussianKernel(const double x, const double sigma)
523 {
524  return exp(2.0*sigma*(x-1.0));
525 }
526 
537 int main (int argc, char **argv)
538 {
539  double **p; /* The array containing the parameter sets *
540  * for the kernel functions */
541  int *m; /* The array containing the cut-off degrees M */
542  int **ld; /* The array containing the numbers of source *
543  * and target nodes, L and D */
544  int ip; /* Index variable for p */
545  int im; /* Index variable for m */
546  int ild; /* Index variable for l */
547  int ipp; /* Index for kernel parameters */
548  int ip_max; /* The maximum index for p */
549  int im_max; /* The maximum index for m */
550  int ild_max; /* The maximum index for l */
551  int ipp_max; /* The maximum index for ip */
552  int tc_max; /* The number of testcases */
553  int m_max; /* The maximum cut-off degree M for the *
554  * current dataset */
555  int l_max; /* The maximum number of source nodes L for *
556  * the current dataset */
557  int d_max; /* The maximum number of target nodes D for *
558  * the current dataset */
559  long ld_max_prec; /* The maximum number of source and target *
560  * nodes for precomputation multiplied */
561  long l_max_prec; /* The maximum number of source nodes for *
562  * precomputation */
563  int tc; /* Index variable for testcases */
564  int kt; /* The kernel function */
565  int cutoff; /* The current NFFT cut-off parameter */
566  double threshold; /* The current NFSFT threshold parameter */
567  double t_d; /* Time for direct algorithm in seconds */
568  double t_dp; /* Time for direct algorithm with *
569  precomputation in seconds */
570  double t_fd; /* Time for fast direct algorithm in seconds */
571  double t_f; /* Time for fast algorithm in seconds */
572  double temp; /* */
573  double err_f; /* Error E_infty for fast algorithm */
574  double err_fd; /* Error E_\infty for fast direct algorithm */
575  ticks t0, t1; /* */
576  int precompute = NO; /* */
577  fftw_complex *ptr; /* */
578  double* steed; /* */
579  fftw_complex *b; /* The weights (b_l)_{l=0}^{L-1} */
580  fftw_complex *f_hat; /* The spherical Fourier coefficients */
581  fftw_complex *a; /* The Fourier-Legendre coefficients */
582  double *xi; /* Target nodes */
583  double *eta; /* Source nodes */
584  fftw_complex *f_m; /* Approximate function values */
585  fftw_complex *f; /* Exact function values */
586  fftw_complex *prec = NULL; /* */
587  nfsft_plan plan; /* NFSFT plan */
588  nfsft_plan plan_adjoint; /* adjoint NFSFT plan */
589  int i; /* */
590  int k; /* */
591  int n; /* */
592  int d; /* */
593  int l; /* */
594  int use_nfsft; /* */
595  int use_nfft; /* */
596  int use_fpt; /* */
597  int rinc; /* */
598  double constant; /* */
599 
600  /* Read the number of testcases. */
601  fscanf(stdin,"testcases=%d\n",&tc_max);
602  fprintf(stdout,"%d\n",tc_max);
603 
604  /* Process each testcase. */
605  for (tc = 0; tc < tc_max; tc++)
606  {
607  /* Check if the fast transform shall be used. */
608  fscanf(stdin,"nfsft=%d\n",&use_nfsft);
609  fprintf(stdout,"%d\n",use_nfsft);
610  if (use_nfsft != NO)
611  {
612  /* Check if the NFFT shall be used. */
613  fscanf(stdin,"nfft=%d\n",&use_nfft);
614  fprintf(stdout,"%d\n",use_nfft);
615  if (use_nfft != NO)
616  {
617  /* Read the cut-off parameter. */
618  fscanf(stdin,"cutoff=%d\n",&cutoff);
619  fprintf(stdout,"%d\n",cutoff);
620  }
621  else
622  {
623  /* TODO remove this */
624  /* Initialize unused variable with dummy value. */
625  cutoff = 1;
626  }
627  /* Check if the fast polynomial transform shall be used. */
628  fscanf(stdin,"fpt=%d\n",&use_fpt);
629  fprintf(stdout,"%d\n",use_fpt);
630  /* Read the NFSFT threshold parameter. */
631  fscanf(stdin,"threshold=%lf\n",&threshold);
632  fprintf(stdout,"%lf\n",threshold);
633  }
634  else
635  {
636  /* TODO remove this */
637  /* Set dummy values. */
638  cutoff = 3;
639  threshold = 1000000000000.0;
640  }
641 
642  /* Initialize bandwidth bound. */
643  m_max = 0;
644  /* Initialize source nodes bound. */
645  l_max = 0;
646  /* Initialize target nodes bound. */
647  d_max = 0;
648  /* Initialize source nodes bound for precomputation. */
649  l_max_prec = 0;
650  /* Initialize source and target nodes bound for precomputation. */
651  ld_max_prec = 0;
652 
653  /* Read the kernel type. This is one of KT_ABEL_POISSON, KT_SINGULARITY,
654  * KT_LOC_SUPP and KT_GAUSSIAN. */
655  fscanf(stdin,"kernel=%d\n",&kt);
656  fprintf(stdout,"%d\n",kt);
657 
658  /* Read the number of parameter sets. */
659  fscanf(stdin,"parameter_sets=%d\n",&ip_max);
660  fprintf(stdout,"%d\n",ip_max);
661 
662  /* Allocate memory for pointers to parameter sets. */
663  p = (double**) nfft_malloc(ip_max*sizeof(double*));
664 
665  /* We now read in the parameter sets. */
666 
667  /* Read number of parameters. */
668  fscanf(stdin,"parameters=%d\n",&ipp_max);
669  fprintf(stdout,"%d\n",ipp_max);
670 
671  for (ip = 0; ip < ip_max; ip++)
672  {
673  /* Allocate memory for the parameters. */
674  p[ip] = (double*) nfft_malloc(ipp_max*sizeof(double));
675 
676  /* Read the parameters. */
677  for (ipp = 0; ipp < ipp_max; ipp++)
678  {
679  /* Read the next parameter. */
680  fscanf(stdin,"%lf\n",&p[ip][ipp]);
681  fprintf(stdout,"%lf\n",p[ip][ipp]);
682  }
683  }
684 
685  /* Read the number of cut-off degrees. */
686  fscanf(stdin,"bandwidths=%d\n",&im_max);
687  fprintf(stdout,"%d\n",im_max);
688  m = (int*) nfft_malloc(im_max*sizeof(int));
689 
690  /* Read the cut-off degrees. */
691  for (im = 0; im < im_max; im++)
692  {
693  /* Read cut-off degree. */
694  fscanf(stdin,"%d\n",&m[im]);
695  fprintf(stdout,"%d\n",m[im]);
696  m_max = MAX(m_max,m[im]);
697  }
698 
699  /* Read number of node specifications. */
700  fscanf(stdin,"node_sets=%d\n",&ild_max);
701  fprintf(stdout,"%d\n",ild_max);
702  ld = (int**) nfft_malloc(ild_max*sizeof(int*));
703 
704  /* Read the run specification. */
705  for (ild = 0; ild < ild_max; ild++)
706  {
707  /* Allocate memory for the run parameters. */
708  ld[ild] = (int*) nfft_malloc(5*sizeof(int));
709 
710  /* Read number of source nodes. */
711  fscanf(stdin,"L=%d ",&ld[ild][0]);
712  fprintf(stdout,"%d\n",ld[ild][0]);
713  l_max = MAX(l_max,ld[ild][0]);
714 
715  /* Read number of target nodes. */
716  fscanf(stdin,"D=%d ",&ld[ild][1]);
717  fprintf(stdout,"%d\n",ld[ild][1]);
718  d_max = MAX(d_max,ld[ild][1]);
719 
720  /* Determine whether direct and fast algorithm shall be compared. */
721  fscanf(stdin,"compare=%d ",&ld[ild][2]);
722  fprintf(stdout,"%d\n",ld[ild][2]);
723 
724  /* Check if precomputation for the direct algorithm is used. */
725  if (ld[ild][2] == YES)
726  {
727  /* Read whether the precomputed version shall also be used. */
728  fscanf(stdin,"precomputed=%d\n",&ld[ild][3]);
729  fprintf(stdout,"%d\n",ld[ild][3]);
730 
731  /* Read the number of repetitions over which measurements are
732  * averaged. */
733  fscanf(stdin,"repetitions=%d\n",&ld[ild][4]);
734  fprintf(stdout,"%d\n",ld[ild][4]);
735 
736  /* Update ld_max_prec and l_max_prec. */
737  if (ld[ild][3] == YES)
738  {
739  /* Update ld_max_prec. */
740  ld_max_prec = MAX(ld_max_prec,ld[ild][0]*ld[ild][1]);
741  /* Update l_max_prec. */
742  l_max_prec = MAX(l_max_prec,ld[ild][0]);
743  /* Turn on the precomputation for the direct algorithm. */
744  precompute = YES;
745  }
746  }
747  else
748  {
749  /* Set default value for the number of repetitions. */
750  ld[ild][4] = 1;
751  }
752  }
753 
754  /* Allocate memory for data structures. */
755  b = (fftw_complex*) nfft_malloc(l_max*sizeof(fftw_complex));
756  eta = (double*) nfft_malloc(2*l_max*sizeof(double));
757  f_hat = (fftw_complex*) nfft_malloc(NFSFT_F_HAT_SIZE(m_max)*sizeof(fftw_complex));
758  a = (fftw_complex*) nfft_malloc((m_max+1)*sizeof(fftw_complex));
759  xi = (double*) nfft_malloc(2*d_max*sizeof(double));
760  f_m = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex));
761  f = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex));
762 
763  /* Allocate memory for precomputed data. */
764  if (precompute == YES)
765  {
766  prec = (fftw_complex*) nfft_malloc(ld_max_prec*sizeof(fftw_complex));
767  }
768 
769  /* Generate random source nodes and weights. */
770  for (l = 0; l < l_max; l++)
771  {
772  b[l] = (((double)rand())/RAND_MAX) - 0.5;
773  eta[2*l] = (((double)rand())/RAND_MAX) - 0.5;
774  eta[2*l+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI);
775  }
776 
777  /* Generate random target nodes. */
778  for (d = 0; d < d_max; d++)
779  {
780  xi[2*d] = (((double)rand())/RAND_MAX) - 0.5;
781  xi[2*d+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(K2PI);
782  }
783 
784  /* Do precomputation. */
785  nfsft_precompute(m_max,threshold,
786  ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U/*NFSFT_NO_DIRECT_ALGORITHM*/)), 0U);
787 
788  /* Process all parameter sets. */
789  for (ip = 0; ip < ip_max; ip++)
790  {
791  /* Compute kernel coeffcients up to the maximum cut-off degree m_max. */
792  switch (kt)
793  {
794  case KT_ABEL_POISSON:
795  /* Compute Fourier-Legendre coefficients for the Poisson kernel. */
796  for (k = 0; k <= m_max; k++)
797  a[k] = SYMBOL_ABEL_POISSON(k,p[ip][0]);
798  break;
799 
800  case KT_SINGULARITY:
801  /* Compute Fourier-Legendre coefficients for the singularity
802  * kernel. */
803  for (k = 0; k <= m_max; k++)
804  a[k] = SYMBOL_SINGULARITY(k,p[ip][0]);
805  break;
806 
807  case KT_LOC_SUPP:
808  /* Compute Fourier-Legendre coefficients for the locally supported
809  * kernel. */
810  a[0] = 1.0;
811  if (1 <= m_max)
812  a[1] = ((p[ip][1]+1+p[ip][0])/(p[ip][1]+2.0))*a[0];
813  for (k = 2; k <= m_max; k++)
814  a[k] = (1.0/(k+p[ip][1]+1))*((2*k-1)*p[ip][0]*a[k-1] -
815  (k-p[ip][1]-2)*a[k-2]);
816  break;
817 
818  case KT_GAUSSIAN:
819  /* Fourier-Legendre coefficients */
820  steed = (double*) nfft_malloc((m_max+1)*sizeof(double));
821  smbi(2.0*p[ip][0],0.5,m_max+1,2,steed);
822  for (k = 0; k <= m_max; k++)
823  a[k] = K2PI*(sqrt(KPI/p[ip][0]))*steed[k];
824 
825  nfft_free(steed);
826  break;
827  }
828 
829  /* Normalize Fourier-Legendre coefficients. */
830  for (k = 0; k <= m_max; k++)
831  a[k] *= (2*k+1)/(K4PI);
832 
833  /* Process all node sets. */
834  for (ild = 0; ild < ild_max; ild++)
835  {
836  /* Check if the fast algorithm shall be used. */
837  if (ld[ild][2] != NO)
838  {
839  /* Check if the direct algorithm with precomputation should be
840  * tested. */
841  if (ld[ild][3] != NO)
842  {
843  /* Get pointer to start of data. */
844  ptr = prec;
845  /* Calculate increment from one row to the next. */
846  rinc = l_max_prec-ld[ild][0];
847 
848  /* Process al target nodes. */
849  for (d = 0; d < ld[ild][1]; d++)
850  {
851  /* Process all source nodes. */
852  for (l = 0; l < ld[ild][0]; l++)
853  {
854  /* Compute inner product between current source and target
855  * node. */
856  temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
857 
858  /* Switch by the kernel type. */
859  switch (kt)
860  {
861  case KT_ABEL_POISSON:
862  /* Evaluate the Poisson kernel for the current value. */
863  *ptr++ = poissonKernel(temp,p[ip][0]);
864  break;
865 
866  case KT_SINGULARITY:
867  /* Evaluate the singularity kernel for the current
868  * value. */
869  *ptr++ = singularityKernel(temp,p[ip][0]);
870  break;
871 
872  case KT_LOC_SUPP:
873  /* Evaluate the localized kernel for the current
874  * value. */
875  *ptr++ = locallySupportedKernel(temp,p[ip][0],p[ip][1]);
876  break;
877 
878  case KT_GAUSSIAN:
879  /* Evaluate the spherical Gaussian kernel for the current
880  * value. */
881  *ptr++ = gaussianKernel(temp,p[ip][0]);
882  break;
883  }
884  }
885  /* Increment pointer for next row. */
886  ptr += rinc;
887  }
888 
889  /* Initialize cumulative time variable. */
890  t_dp = 0.0;
891 
892  /* Initialize time measurement. */
893  t0 = getticks();
894 
895  /* Cycle through all runs. */
896  for (i = 0; i < ld[ild][4]; i++)
897  {
898 
899  /* Reset pointer to start of precomputed data. */
900  ptr = prec;
901  /* Calculate increment from one row to the next. */
902  rinc = l_max_prec-ld[ild][0];
903 
904  /* Check if the localized kernel is used. */
905  if (kt == KT_LOC_SUPP)
906  {
907  /* Perform final summation */
908 
909  /* Calculate the multiplicative constant. */
910  constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1)));
911 
912  /* Process all target nodes. */
913  for (d = 0; d < ld[ild][1]; d++)
914  {
915  /* Initialize function value. */
916  f[d] = 0.0;
917 
918  /* Process all source nodes. */
919  for (l = 0; l < ld[ild][0]; l++)
920  f[d] += b[l]*(*ptr++);
921 
922  /* Multiply with the constant. */
923  f[d] *= constant;
924 
925  /* Proceed to next row. */
926  ptr += rinc;
927  }
928  }
929  else
930  {
931  /* Process all target nodes. */
932  for (d = 0; d < ld[ild][1]; d++)
933  {
934  /* Initialize function value. */
935  f[d] = 0.0;
936 
937  /* Process all source nodes. */
938  for (l = 0; l < ld[ild][0]; l++)
939  f[d] += b[l]*(*ptr++);
940 
941  /* Proceed to next row. */
942  ptr += rinc;
943  }
944  }
945  }
946 
947  /* Calculate the time needed. */
948  t1 = getticks();
949  t_dp = nfft_elapsed_seconds(t1,t0);
950 
951  /* Calculate average time needed. */
952  t_dp = t_dp/((double)ld[ild][4]);
953  }
954  else
955  {
956  /* Initialize cumulative time variable with dummy value. */
957  t_dp = -1.0;
958  }
959 
960  /* Initialize cumulative time variable. */
961  t_d = 0.0;
962 
963  /* Initialize time measurement. */
964  t0 = getticks();
965 
966  /* Cycle through all runs. */
967  for (i = 0; i < ld[ild][4]; i++)
968  {
969  /* Switch by the kernel type. */
970  switch (kt)
971  {
972  case KT_ABEL_POISSON:
973 
974  /* Process all target nodes. */
975  for (d = 0; d < ld[ild][1]; d++)
976  {
977  /* Initialize function value. */
978  f[d] = 0.0;
979 
980  /* Process all source nodes. */
981  for (l = 0; l < ld[ild][0]; l++)
982  {
983  /* Compute the inner product for the current source and
984  * target nodes. */
985  temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
986 
987  /* Evaluate the Poisson kernel for the current value and add
988  * to the result. */
989  f[d] += b[l]*poissonKernel(temp,p[ip][0]);
990  }
991  }
992  break;
993 
994  case KT_SINGULARITY:
995  /* Process all target nodes. */
996  for (d = 0; d < ld[ild][1]; d++)
997  {
998  /* Initialize function value. */
999  f[d] = 0.0;
1000 
1001  /* Process all source nodes. */
1002  for (l = 0; l < ld[ild][0]; l++)
1003  {
1004  /* Compute the inner product for the current source and
1005  * target nodes. */
1006  temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
1007 
1008  /* Evaluate the Poisson kernel for the current value and add
1009  * to the result. */
1010  f[d] += b[l]*singularityKernel(temp,p[ip][0]);
1011  }
1012  }
1013  break;
1014 
1015  case KT_LOC_SUPP:
1016  /* Calculate the multiplicative constant. */
1017  constant = ((p[ip][1]+1)/(K2PI*pow(1-p[ip][0],p[ip][1]+1)));
1018 
1019  /* Process all target nodes. */
1020  for (d = 0; d < ld[ild][1]; d++)
1021  {
1022  /* Initialize function value. */
1023  f[d] = 0.0;
1024 
1025  /* Process all source nodes. */
1026  for (l = 0; l < ld[ild][0]; l++)
1027  {
1028  /* Compute the inner product for the current source and
1029  * target nodes. */
1030  temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
1031 
1032  /* Evaluate the Poisson kernel for the current value and add
1033  * to the result. */
1034  f[d] += b[l]*locallySupportedKernel(temp,p[ip][0],p[ip][1]);
1035  }
1036 
1037  /* Multiply result with constant. */
1038  f[d] *= constant;
1039  }
1040  break;
1041 
1042  case KT_GAUSSIAN:
1043  /* Process all target nodes. */
1044  for (d = 0; d < ld[ild][1]; d++)
1045  {
1046  /* Initialize function value. */
1047  f[d] = 0.0;
1048 
1049  /* Process all source nodes. */
1050  for (l = 0; l < ld[ild][0]; l++)
1051  {
1052  /* Compute the inner product for the current source and
1053  * target nodes. */
1054  temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
1055  /* Evaluate the Poisson kernel for the current value and add
1056  * to the result. */
1057  f[d] += b[l]*gaussianKernel(temp,p[ip][0]);
1058  }
1059  }
1060  break;
1061  }
1062  }
1063 
1064  /* Calculate and add the time needed. */
1065  t1 = getticks();
1066  t_d = nfft_elapsed_seconds(t1,t0);
1067  /* Calculate average time needed. */
1068  t_d = t_d/((double)ld[ild][4]);
1069  }
1070  else
1071  {
1072  /* Initialize cumulative time variable with dummy value. */
1073  t_d = -1.0;
1074  t_dp = -1.0;
1075  }
1076 
1077  /* Initialize error and cumulative time variables for the fast
1078  * algorithm. */
1079  err_fd = -1.0;
1080  err_f = -1.0;
1081  t_fd = -1.0;
1082  t_f = -1.0;
1083 
1084  /* Process all cut-off bandwidths. */
1085  for (im = 0; im < im_max; im++)
1086  {
1087  /* Init transform plans. */
1088  nfsft_init_guru(&plan_adjoint, m[im],ld[ild][0],
1089  ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) |
1090  ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)),
1091  PRE_PHI_HUT | PRE_PSI | FFTW_INIT |
1092  FFT_OUT_OF_PLACE, cutoff);
1093  nfsft_init_guru(&plan,m[im],ld[ild][1],
1094  ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) |
1095  ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)),
1096  PRE_PHI_HUT | PRE_PSI | FFTW_INIT |
1097  FFT_OUT_OF_PLACE,
1098  cutoff);
1099  plan_adjoint.f_hat = f_hat;
1100  plan_adjoint.x = eta;
1101  plan_adjoint.f = b;
1102  plan.f_hat = f_hat;
1103  plan.x = xi;
1104  plan.f = f_m;
1105  nfsft_precompute_x(&plan_adjoint);
1106  nfsft_precompute_x(&plan);
1107 
1108  /* Check if direct algorithm shall also be tested. */
1109  if (use_nfsft == BOTH)
1110  {
1111  /* Initialize cumulative time variable. */
1112  t_fd = 0.0;
1113 
1114  /* Initialize time measurement. */
1115  t0 = getticks();
1116 
1117  /* Cycle through all runs. */
1118  for (i = 0; i < ld[ild][4]; i++)
1119  {
1120 
1121  /* Execute adjoint direct NDSFT transformation. */
1122  nfsft_adjoint_direct(&plan_adjoint);
1123 
1124  /* Multiplication with the Fourier-Legendre coefficients. */
1125  for (k = 0; k <= m[im]; k++)
1126  for (n = -k; n <= k; n++)
1127  f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k];
1128 
1129  /* Execute direct NDSFT transformation. */
1130  nfsft_trafo_direct(&plan);
1131 
1132  }
1133 
1134  /* Calculate and add the time needed. */
1135  t1 = getticks();
1136  t_fd = nfft_elapsed_seconds(t1,t0);
1137 
1138  /* Calculate average time needed. */
1139  t_fd = t_fd/((double)ld[ild][4]);
1140 
1141  /* Check if error E_infty should be computed. */
1142  if (ld[ild][2] != NO)
1143  {
1144  /* Compute the error E_infinity. */
1145  err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
1146  ld[ild][0]);
1147  }
1148  }
1149 
1150  /* Check if the fast NFSFT algorithm shall also be tested. */
1151  if (use_nfsft != NO)
1152  {
1153  /* Initialize cumulative time variable for the NFSFT algorithm. */
1154  t_f = 0.0;
1155  }
1156  else
1157  {
1158  /* Initialize cumulative time variable for the direct NDSFT
1159  * algorithm. */
1160  t_fd = 0.0;
1161  }
1162 
1163  /* Initialize time measurement. */
1164  t0 = getticks();
1165 
1166  /* Cycle through all runs. */
1167  for (i = 0; i < ld[ild][4]; i++)
1168  {
1169  /* Check if the fast NFSFT algorithm shall also be tested. */
1170  if (use_nfsft != NO)
1171  {
1172  /* Execute the adjoint NFSFT transformation. */
1173  nfsft_adjoint(&plan_adjoint);
1174  }
1175  else
1176  {
1177  /* Execute the adjoint direct NDSFT transformation. */
1178  nfsft_adjoint_direct(&plan_adjoint);
1179  }
1180 
1181  /* Multiplication with the Fourier-Legendre coefficients. */
1182  for (k = 0; k <= m[im]; k++)
1183  for (n = -k; n <= k; n++)
1184  f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k];
1185 
1186  /* Check if the fast NFSFT algorithm shall also be tested. */
1187  if (use_nfsft != NO)
1188  {
1189  /* Execute the NFSFT transformation. */
1190  nfsft_trafo(&plan);
1191  }
1192  else
1193  {
1194  /* Execute the NDSFT transformation. */
1195  nfsft_trafo_direct(&plan);
1196  }
1197  }
1198 
1199  /* Check if the fast NFSFT algorithm has been used. */
1200  t1 = getticks();
1201 
1202  if (use_nfsft != NO)
1203  t_f = nfft_elapsed_seconds(t1,t0);
1204  else
1205  t_fd = nfft_elapsed_seconds(t1,t0);
1206 
1207  /* Check if the fast NFSFT algorithm has been used. */
1208  if (use_nfsft != NO)
1209  {
1210  /* Calculate average time needed. */
1211  t_f = t_f/((double)ld[ild][4]);
1212  }
1213  else
1214  {
1215  /* Calculate average time needed. */
1216  t_fd = t_fd/((double)ld[ild][4]);
1217  }
1218 
1219  /* Check if error E_infty should be computed. */
1220  if (ld[ild][2] != NO)
1221  {
1222  /* Check if the fast NFSFT algorithm has been used. */
1223  if (use_nfsft != NO)
1224  {
1225  /* Compute the error E_infinity. */
1226  err_f = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
1227  ld[ild][0]);
1228  }
1229  else
1230  {
1231  /* Compute the error E_infinity. */
1232  err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
1233  ld[ild][0]);
1234  }
1235  }
1236 
1237  /* Print out the error measurements. */
1238  fprintf(stdout,"%e\n%e\n%e\n%e\n%e\n%e\n\n",t_d,t_dp,t_fd,t_f,err_fd,
1239  err_f);
1240 
1241  /* Finalize the NFSFT plans */
1242  nfsft_finalize(&plan_adjoint);
1243  nfsft_finalize(&plan);
1244  } /* for (im = 0; im < im_max; im++) - Process all cut-off
1245  * bandwidths.*/
1246  } /* for (ild = 0; ild < ild_max; ild++) - Process all node sets. */
1247  } /* for (ip = 0; ip < ip_max; ip++) - Process all parameter sets. */
1248 
1249  /* Delete precomputed data. */
1250  nfsft_forget();
1251 
1252  /* Check if memory for precomputed data of the matrix K has been
1253  * allocated. */
1254  if (precompute == YES)
1255  {
1256  /* Free memory for precomputed matrix K. */
1257  nfft_free(prec);
1258  }
1259  /* Free data arrays. */
1260  nfft_free(f);
1261  nfft_free(f_m);
1262  nfft_free(xi);
1263  nfft_free(eta);
1264  nfft_free(a);
1265  nfft_free(f_hat);
1266  nfft_free(b);
1267 
1268  /* Free memory for node sets. */
1269  for (ild = 0; ild < ild_max; ild++)
1270  nfft_free(ld[ild]);
1271  nfft_free(ld);
1272 
1273  /* Free memory for cut-off bandwidths. */
1274  nfft_free(m);
1275 
1276  /* Free memory for parameter sets. */
1277  for (ip = 0; ip < ip_max; ip++)
1278  nfft_free(p[ip]);
1279  nfft_free(p);
1280  } /* for (tc = 0; tc < tc_max; tc++) - Process each testcase. */
1281 
1282  /* Return exit code for successful run. */
1283  return EXIT_SUCCESS;
1284 }
1285 /* \} */
double * x
the nodes for ,
Definition: nfft3.h:576
static double gaussianKernel(const double x, const double sigma)
Evaluates the spherical Gaussian kernel at a node .
Definition: fastsumS2.c:522
#define KT_SINGULARITY
Singularity kernel.
Definition: fastsumS2.c:54
static double poissonKernel(const double x, const double h)
Evaluates the Poisson kernel at a node .
Definition: fastsumS2.c:470
fftw_complex * f_hat
Fourier coefficients.
Definition: nfft3.h:576
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
static int smbi(const R x, const R alpha, const int nb, const int ize, R *b)
Calculates the modified bessel function , possibly scaled by , for real non-negative with ...
Definition: fastsumS2.c:194
static double innerProduct(const double phi1, const double theta1, const double phi2, const double theta2)
Computes the standard inner product between two vectors on the unit sphere given in spherical coord...
Definition: fastsumS2.c:451
data structure for an NFSFT (nonequispaced fast spherical Fourier transform) plan with double precisi...
Definition: nfft3.h:576
void nfft_free(void *p)
static double locallySupportedKernel(const double x, const double h, const double lambda)
Evaluates the locally supported kernel at a node .
Definition: fastsumS2.c:504
static double singularityKernel(const double x, const double h)
Evaluates the singularity kernel at a node .
Definition: fastsumS2.c:486
#define X(name)
Include header for C99 complex datatype.
Definition: fastsum.h:53
#define KT_ABEL_POISSON
Abel-Poisson kernel.
Definition: fastsumS2.c:52
void * nfft_malloc(size_t n)
fftw_complex * f
Samples.
Definition: nfft3.h:576
pvalue
Enumeration type for yes/no/both-type parameters.
Definition: fastsumS2.c:61
#define KT_LOC_SUPP
Locally supported kernel.
Definition: fastsumS2.c:56
#define KT_GAUSSIAN
Gaussian kernel.
Definition: fastsumS2.c:58