00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
00034 #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
00035
00036 #include <omp.h>
00037 #include <new>
00038 #include <bits/stl_algobase.h>
00039 #include <parallel/parallel.h>
00040 #include <parallel/numericfwd.h>
00041
00042 namespace __gnu_parallel
00043 {
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054 template<typename _IIter,
00055 typename _OutputIterator,
00056 typename _BinaryOperation>
00057 _OutputIterator
00058 __parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
00059 _OutputIterator __result,
00060 _BinaryOperation __bin_op,
00061 typename std::iterator_traits <_IIter>::value_type __value)
00062 {
00063 if (__begin == __end)
00064 return __result;
00065
00066 while (__begin != __end)
00067 {
00068 __value = __bin_op(__value, *__begin);
00069 *__result = __value;
00070 ++__result;
00071 ++__begin;
00072 }
00073 return __result;
00074 }
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086 template<typename _IIter,
00087 typename _OutputIterator,
00088 typename _BinaryOperation>
00089 _OutputIterator
00090 __parallel_partial_sum_linear(_IIter __begin, _IIter __end,
00091 _OutputIterator __result,
00092 _BinaryOperation __bin_op,
00093 typename std::iterator_traits<_IIter>::difference_type __n)
00094 {
00095 typedef std::iterator_traits<_IIter> _TraitsType;
00096 typedef typename _TraitsType::value_type _ValueType;
00097 typedef typename _TraitsType::difference_type _DifferenceType;
00098
00099 if (__begin == __end)
00100 return __result;
00101
00102 _ThreadIndex __num_threads =
00103 std::min<_DifferenceType>(__get_max_threads(), __n - 1);
00104
00105 if (__num_threads < 2)
00106 {
00107 *__result = *__begin;
00108 return __parallel_partial_sum_basecase(__begin + 1, __end,
00109 __result + 1, __bin_op,
00110 *__begin);
00111 }
00112
00113 _DifferenceType* __borders;
00114 _ValueType* __sums;
00115
00116 const _Settings& __s = _Settings::get();
00117
00118 # pragma omp parallel num_threads(__num_threads)
00119 {
00120 # pragma omp single
00121 {
00122 __num_threads = omp_get_num_threads();
00123
00124 __borders = new _DifferenceType[__num_threads + 2];
00125
00126 if (__s.partial_sum_dilation == 1.0f)
00127 equally_split(__n, __num_threads + 1, __borders);
00128 else
00129 {
00130 _DifferenceType __first_part_length =
00131 std::max<_DifferenceType>(1,
00132 __n / (1.0f + __s.partial_sum_dilation * __num_threads));
00133 _DifferenceType __chunk_length =
00134 (__n - __first_part_length) / __num_threads;
00135 _DifferenceType __borderstart =
00136 __n - __num_threads * __chunk_length;
00137 __borders[0] = 0;
00138 for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i)
00139 {
00140 __borders[__i] = __borderstart;
00141 __borderstart += __chunk_length;
00142 }
00143 __borders[__num_threads + 1] = __n;
00144 }
00145
00146 __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
00147 * __num_threads));
00148 _OutputIterator __target_end;
00149 }
00150
00151 _ThreadIndex __iam = omp_get_thread_num();
00152 if (__iam == 0)
00153 {
00154 *__result = *__begin;
00155 __parallel_partial_sum_basecase(__begin + 1,
00156 __begin + __borders[1],
00157 __result + 1,
00158 __bin_op, *__begin);
00159 ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
00160 }
00161 else
00162 {
00163 ::new(&(__sums[__iam]))
00164 _ValueType(__gnu_parallel::accumulate(
00165 __begin + __borders[__iam] + 1,
00166 __begin + __borders[__iam + 1],
00167 *(__begin + __borders[__iam]),
00168 __bin_op,
00169 __gnu_parallel::sequential_tag()));
00170 }
00171
00172 # pragma omp barrier
00173
00174 # pragma omp single
00175 __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
00176 __sums + 1, __bin_op, __sums[0]);
00177
00178 # pragma omp barrier
00179
00180
00181 __parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
00182 __begin + __borders[__iam + 2],
00183 __result + __borders[__iam + 1],
00184 __bin_op, __sums[__iam]);
00185 }
00186
00187 ::operator delete(__sums);
00188 delete[] __borders;
00189
00190 return __result + __n;
00191 }
00192
00193
00194
00195
00196
00197
00198
00199 template<typename _IIter,
00200 typename _OutputIterator,
00201 typename _BinaryOperation>
00202 _OutputIterator
00203 __parallel_partial_sum(_IIter __begin, _IIter __end,
00204 _OutputIterator __result, _BinaryOperation __bin_op)
00205 {
00206 _GLIBCXX_CALL(__begin - __end)
00207
00208 typedef std::iterator_traits<_IIter> _TraitsType;
00209 typedef typename _TraitsType::value_type _ValueType;
00210 typedef typename _TraitsType::difference_type _DifferenceType;
00211
00212 _DifferenceType __n = __end - __begin;
00213
00214 switch (_Settings::get().partial_sum_algorithm)
00215 {
00216 case LINEAR:
00217
00218 return __parallel_partial_sum_linear(__begin, __end, __result,
00219 __bin_op, __n);
00220 default:
00221
00222 _GLIBCXX_PARALLEL_ASSERT(0);
00223 return __result + __n;
00224 }
00225 }
00226 }
00227
00228 #endif