Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #ifndef _GLIBCXX_PARALLEL_WORKSTEALING_H
00041 #define _GLIBCXX_PARALLEL_WORKSTEALING_H 1
00042
00043 #include <parallel/parallel.h>
00044 #include <parallel/random_number.h>
00045 #include <parallel/compatibility.h>
00046
00047 namespace __gnu_parallel
00048 {
00049
00050 #define _GLIBCXX_JOB_VOLATILE volatile
00051
00052
00053 template<typename _DifferenceTp>
00054 struct _Job
00055 {
00056 typedef _DifferenceTp _DifferenceType;
00057
00058
00059
00060
00061
00062 _GLIBCXX_JOB_VOLATILE _DifferenceType _M_first;
00063
00064
00065
00066
00067 _GLIBCXX_JOB_VOLATILE _DifferenceType _M_last;
00068
00069
00070
00071
00072 _GLIBCXX_JOB_VOLATILE _DifferenceType _M_load;
00073 };
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093 template<typename _RAIter,
00094 typename _Op,
00095 typename _Fu,
00096 typename _Red,
00097 typename _Result>
00098 _Op
00099 __for_each_template_random_access_workstealing(_RAIter __begin,
00100 _RAIter __end, _Op __op,
00101 _Fu& __f, _Red __r,
00102 _Result __base,
00103 _Result& __output,
00104 typename std::iterator_traits<_RAIter>::difference_type __bound)
00105 {
00106 _GLIBCXX_CALL(__end - __begin)
00107
00108 typedef std::iterator_traits<_RAIter> _TraitsType;
00109 typedef typename _TraitsType::difference_type _DifferenceType;
00110
00111 const _Settings& __s = _Settings::get();
00112
00113 _DifferenceType __chunk_size =
00114 static_cast<_DifferenceType>(__s.workstealing_chunk_size);
00115
00116
00117 _DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound;
00118
00119
00120 const int __stride = (__s.cache_line_size * 10
00121 / sizeof(_Job<_DifferenceType>) + 1);
00122
00123
00124 _ThreadIndex __busy = 0;
00125
00126 _Job<_DifferenceType> *__job;
00127
00128 omp_lock_t __output_lock;
00129 omp_init_lock(&__output_lock);
00130
00131
00132 __output = __base;
00133
00134
00135 _ThreadIndex __num_threads = __gnu_parallel::max<_ThreadIndex>
00136 (1, __gnu_parallel::min<_DifferenceType>(__length,
00137 __get_max_threads()));
00138
00139 # pragma omp parallel shared(__busy) num_threads(__num_threads)
00140 {
00141 # pragma omp single
00142 {
00143 __num_threads = omp_get_num_threads();
00144
00145
00146 __job = new _Job<_DifferenceType>[__num_threads * __stride];
00147 }
00148
00149
00150
00151
00152 bool __iam_working = false;
00153
00154
00155 _ThreadIndex __iam = omp_get_thread_num();
00156
00157
00158 _Job<_DifferenceType>& __my_job = __job[__iam * __stride];
00159
00160
00161 _ThreadIndex __victim;
00162
00163
00164 _Result __result = _Result();
00165
00166
00167 _DifferenceType __steal;
00168
00169
00170
00171 _RandomNumber __rand_gen(__iam, __num_threads);
00172
00173
00174 # pragma omp atomic
00175 ++__busy;
00176
00177 __iam_working = true;
00178
00179
00180 __my_job._M_first = static_cast<_DifferenceType>
00181 (__iam * (__length / __num_threads));
00182
00183 __my_job._M_last = (__iam == (__num_threads - 1)
00184 ? (__length - 1)
00185 : ((__iam + 1) * (__length / __num_threads) - 1));
00186 __my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
00187
00188
00189 if (__my_job._M_first <= __my_job._M_last)
00190 {
00191
00192 _DifferenceType __my_first = __my_job._M_first;
00193 __result = __f(__op, __begin + __my_first);
00194 ++__my_job._M_first;
00195 --__my_job._M_load;
00196 }
00197
00198 _RAIter __current;
00199
00200 # pragma omp barrier
00201
00202
00203
00204 while (__busy > 0)
00205 {
00206
00207 # pragma omp flush(__busy)
00208
00209
00210 while (__my_job._M_first <= __my_job._M_last)
00211 {
00212
00213
00214 _DifferenceType __current_job =
00215 __fetch_and_add<_DifferenceType>(&(__my_job._M_first),
00216 __chunk_size);
00217
00218
00219
00220 __my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
00221 for (_DifferenceType __job_counter = 0;
00222 __job_counter < __chunk_size
00223 && __current_job <= __my_job._M_last;
00224 ++__job_counter)
00225 {
00226
00227 __current = __begin + __current_job;
00228 ++__current_job;
00229
00230
00231 __result = __r(__result, __f(__op, __current));
00232 }
00233
00234 # pragma omp flush(__busy)
00235 }
00236
00237
00238 if (__iam_working)
00239 {
00240
00241 # pragma omp atomic
00242 --__busy;
00243
00244 __iam_working = false;
00245 }
00246
00247 _DifferenceType __supposed_first, __supposed_last,
00248 __supposed_load;
00249 do
00250 {
00251
00252 __yield();
00253 # pragma omp flush(__busy)
00254 __victim = __rand_gen();
00255 __supposed_first = __job[__victim * __stride]._M_first;
00256 __supposed_last = __job[__victim * __stride]._M_last;
00257 __supposed_load = __job[__victim * __stride]._M_load;
00258 }
00259 while (__busy > 0
00260 && ((__supposed_load <= 0)
00261 || ((__supposed_first + __supposed_load - 1)
00262 != __supposed_last)));
00263
00264 if (__busy == 0)
00265 break;
00266
00267 if (__supposed_load > 0)
00268 {
00269
00270
00271 __steal = (__supposed_load < 2) ? 1 : __supposed_load / 2;
00272
00273
00274 _DifferenceType __stolen_first =
00275 __fetch_and_add<_DifferenceType>
00276 (&(__job[__victim * __stride]._M_first), __steal);
00277 _DifferenceType __stolen_try = (__stolen_first + __steal
00278 - _DifferenceType(1));
00279
00280 __my_job._M_first = __stolen_first;
00281 __my_job._M_last = __gnu_parallel::min(__stolen_try,
00282 __supposed_last);
00283 __my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
00284
00285
00286 # pragma omp atomic
00287 ++__busy;
00288 __iam_working = true;
00289
00290 # pragma omp flush(__busy)
00291 }
00292 # pragma omp flush(__busy)
00293 }
00294
00295 omp_set_lock(&__output_lock);
00296 __output = __r(__output, __result);
00297 omp_unset_lock(&__output_lock);
00298 }
00299
00300 delete[] __job;
00301
00302
00303
00304 __f._M_finish_iterator = __begin + __length;
00305
00306 omp_destroy_lock(&__output_lock);
00307
00308 return __op;
00309 }
00310 }
00311
00312 #endif