Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
parallel_invoke.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #ifndef __TBB_parallel_invoke_H
18 #define __TBB_parallel_invoke_H
19 
20 #include "task.h"
21 #include "tbb_profiling.h"
22 
23 #if __TBB_VARIADIC_PARALLEL_INVOKE
24  #include <utility> // std::forward
25 #endif
26 
27 namespace tbb {
28 
29 #if !__TBB_TASK_GROUP_CONTEXT
30 
31  struct task_group_context {
33  };
34 #endif /* __TBB_TASK_GROUP_CONTEXT */
35 
37 namespace internal {
38  // Simple task object, executing user method
39  template<typename function>
40  class function_invoker : public task{
41  public:
42  function_invoker(const function& _function) : my_function(_function) {}
43  private:
44  const function &my_function;
46  {
47  my_function();
48  return NULL;
49  }
50  };
51 
52  // The class spawns two or three child tasks
53  template <size_t N, typename function1, typename function2, typename function3>
54  class spawner : public task {
55  private:
56  const function1& my_func1;
57  const function2& my_func2;
58  const function3& my_func3;
60 
62  if(is_recycled){
63  return NULL;
64  }else{
65  __TBB_ASSERT(N==2 || N==3, "Number of arguments passed to spawner is wrong");
66  set_ref_count(N);
68  internal::function_invoker<function2>* invoker2 = new (allocate_child()) internal::function_invoker<function2>(my_func2);
69  __TBB_ASSERT(invoker2, "Child task allocation failed");
70  spawn(*invoker2);
71  size_t n = N; // To prevent compiler warnings
72  if (n>2) {
73  internal::function_invoker<function3>* invoker3 = new (allocate_child()) internal::function_invoker<function3>(my_func3);
74  __TBB_ASSERT(invoker3, "Child task allocation failed");
75  spawn(*invoker3);
76  }
77  my_func1();
78  is_recycled = true;
79  return NULL;
80  }
81  } // execute
82 
83  public:
84  spawner(const function1& _func1, const function2& _func2, const function3& _func3) : my_func1(_func1), my_func2(_func2), my_func3(_func3), is_recycled(false) {}
85  };
86 
87  // Creates and spawns child tasks
89  public:
90  // Dummy functor class
92  public:
93  void operator() () const {}
94  };
95  // Creates a helper object with user-defined number of children expected
96  parallel_invoke_helper(int number_of_children)
97  {
98  set_ref_count(number_of_children + 1);
99  }
100 
101 #if __TBB_VARIADIC_PARALLEL_INVOKE
102  void add_children() {}
104 
105  template <typename function>
106  void add_children(function&& _func)
107  {
108  internal::function_invoker<function>* invoker = new (allocate_child()) internal::function_invoker<function>(std::forward<function>(_func));
109  __TBB_ASSERT(invoker, "Child task allocation failed");
110  spawn(*invoker);
111  }
112 
113  template<typename function>
114  void add_children(function&& _func, tbb::task_group_context&)
115  {
116  add_children(std::forward<function>(_func));
117  }
118 
119  // Adds child(ren) task(s) and spawns them
120  template <typename function1, typename function2, typename... function>
121  void add_children(function1&& _func1, function2&& _func2, function&&... _func)
122  {
123  // The third argument is dummy, it is ignored actually.
124  parallel_invoke_noop noop;
125  typedef internal::spawner<2, function1, function2, parallel_invoke_noop> spawner_type;
126  spawner_type & sub_root = *new(allocate_child()) spawner_type(std::forward<function1>(_func1), std::forward<function2>(_func2), noop);
127  spawn(sub_root);
128  add_children(std::forward<function>(_func)...);
129  }
130 #else
131  // Adds child task and spawns it
132  template <typename function>
133  void add_children (const function &_func)
134  {
135  internal::function_invoker<function>* invoker = new (allocate_child()) internal::function_invoker<function>(_func);
136  __TBB_ASSERT(invoker, "Child task allocation failed");
137  spawn(*invoker);
138  }
139 
140  // Adds a task with multiple child tasks and spawns it
141  // two arguments
142  template <typename function1, typename function2>
143  void add_children (const function1& _func1, const function2& _func2)
144  {
145  // The third argument is dummy, it is ignored actually.
147  internal::spawner<2, function1, function2, parallel_invoke_noop>& sub_root = *new(allocate_child())internal::spawner<2, function1, function2, parallel_invoke_noop>(_func1, _func2, noop);
148  spawn(sub_root);
149  }
150  // three arguments
151  template <typename function1, typename function2, typename function3>
152  void add_children (const function1& _func1, const function2& _func2, const function3& _func3)
153  {
154  internal::spawner<3, function1, function2, function3>& sub_root = *new(allocate_child())internal::spawner<3, function1, function2, function3>(_func1, _func2, _func3);
155  spawn(sub_root);
156  }
157 #endif // __TBB_VARIADIC_PARALLEL_INVOKE
158 
159  // Waits for all child tasks
160  template <typename F0>
161  void run_and_finish(const F0& f0)
162  {
163  internal::function_invoker<F0>* invoker = new (allocate_child()) internal::function_invoker<F0>(f0);
164  __TBB_ASSERT(invoker, "Child task allocation failed");
165  spawn_and_wait_for_all(*invoker);
166  }
167  };
168  // The class destroys root if exception occurred as well as in normal case
169  class parallel_invoke_cleaner: internal::no_copy {
170  public:
171 #if __TBB_TASK_GROUP_CONTEXT
172  parallel_invoke_cleaner(int number_of_children, tbb::task_group_context& context)
173  : root(*new(task::allocate_root(context)) internal::parallel_invoke_helper(number_of_children))
174 #else
175  parallel_invoke_cleaner(int number_of_children, tbb::task_group_context&)
176  : root(*new(task::allocate_root()) internal::parallel_invoke_helper(number_of_children))
177 #endif /* !__TBB_TASK_GROUP_CONTEXT */
178  {}
179 
181  root.destroy(root);
182  }
183  internal::parallel_invoke_helper& root;
184  };
185 
186 #if __TBB_VARIADIC_PARALLEL_INVOKE
187 // Determine whether the last parameter in a pack is task_group_context
188  template<typename... T> struct impl_selector; // to workaround a GCC bug
189 
190  template<typename T1, typename... T> struct impl_selector<T1, T...> {
191  typedef typename impl_selector<T...>::type type;
192  };
193 
194  template<typename T> struct impl_selector<T> {
195  typedef false_type type;
196  };
197  template<> struct impl_selector<task_group_context&> {
198  typedef true_type type;
199  };
200 
201  // Select task_group_context parameter from the back of a pack
202  inline task_group_context& get_context( task_group_context& tgc ) { return tgc; }
203 
204  template<typename T1, typename... T>
205  task_group_context& get_context( T1&& /*ignored*/, T&&... t )
206  { return get_context( std::forward<T>(t)... ); }
207 
208  // task_group_context is known to be at the back of the parameter pack
209  template<typename F0, typename F1, typename... F>
210  void parallel_invoke_impl(true_type, F0&& f0, F1&& f1, F&&... f) {
211  __TBB_STATIC_ASSERT(sizeof...(F)>0, "Variadic parallel_invoke implementation broken?");
212  // # of child tasks: f0, f1, and a task for each two elements of the pack except the last
213  const size_t number_of_children = 2 + sizeof...(F)/2;
214  parallel_invoke_cleaner cleaner(number_of_children, get_context(std::forward<F>(f)...));
215  parallel_invoke_helper& root = cleaner.root;
216 
217  root.add_children(std::forward<F>(f)...);
218  root.add_children(std::forward<F1>(f1));
219  root.run_and_finish(std::forward<F0>(f0));
220  }
221 
222  // task_group_context is not in the pack, needs to be added
223  template<typename F0, typename F1, typename... F>
224  void parallel_invoke_impl(false_type, F0&& f0, F1&& f1, F&&... f) {
225  tbb::task_group_context context(PARALLEL_INVOKE);
226  // Add context to the arguments, and redirect to the other overload
227  parallel_invoke_impl(true_type(), std::forward<F0>(f0), std::forward<F1>(f1), std::forward<F>(f)..., context);
228  }
229 #endif
230 } // namespace internal
232 
236 
239 #if __TBB_VARIADIC_PARALLEL_INVOKE
240 
241 // parallel_invoke for two or more arguments via variadic templates
242 // presence of task_group_context is defined automatically
243 template<typename F0, typename F1, typename... F>
244 void parallel_invoke(F0&& f0, F1&& f1, F&&... f) {
245  typedef typename internal::impl_selector<internal::false_type, F...>::type selector_type;
246  internal::parallel_invoke_impl(selector_type(), std::forward<F0>(f0), std::forward<F1>(f1), std::forward<F>(f)...);
247 }
248 
249 #else
250 
251 // parallel_invoke with user-defined context
252 // two arguments
253 template<typename F0, typename F1 >
254 void parallel_invoke(const F0& f0, const F1& f1, tbb::task_group_context& context) {
255  internal::parallel_invoke_cleaner cleaner(2, context);
256  internal::parallel_invoke_helper& root = cleaner.root;
257 
258  root.add_children(f1);
259 
260  root.run_and_finish(f0);
261 }
262 
263 // three arguments
264 template<typename F0, typename F1, typename F2 >
265 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, tbb::task_group_context& context) {
266  internal::parallel_invoke_cleaner cleaner(3, context);
267  internal::parallel_invoke_helper& root = cleaner.root;
268 
269  root.add_children(f2);
270  root.add_children(f1);
271 
272  root.run_and_finish(f0);
273 }
274 
275 // four arguments
276 template<typename F0, typename F1, typename F2, typename F3>
277 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3,
278  tbb::task_group_context& context)
279 {
280  internal::parallel_invoke_cleaner cleaner(4, context);
281  internal::parallel_invoke_helper& root = cleaner.root;
282 
283  root.add_children(f3);
284  root.add_children(f2);
285  root.add_children(f1);
286 
287  root.run_and_finish(f0);
288 }
289 
290 // five arguments
291 template<typename F0, typename F1, typename F2, typename F3, typename F4 >
292 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
293  tbb::task_group_context& context)
294 {
295  internal::parallel_invoke_cleaner cleaner(3, context);
296  internal::parallel_invoke_helper& root = cleaner.root;
297 
298  root.add_children(f4, f3);
299  root.add_children(f2, f1);
300 
301  root.run_and_finish(f0);
302 }
303 
304 // six arguments
305 template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
306 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5,
307  tbb::task_group_context& context)
308 {
309  internal::parallel_invoke_cleaner cleaner(3, context);
310  internal::parallel_invoke_helper& root = cleaner.root;
311 
312  root.add_children(f5, f4, f3);
313  root.add_children(f2, f1);
314 
315  root.run_and_finish(f0);
316 }
317 
318 // seven arguments
319 template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
320 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
321  const F5& f5, const F6& f6,
322  tbb::task_group_context& context)
323 {
324  internal::parallel_invoke_cleaner cleaner(3, context);
325  internal::parallel_invoke_helper& root = cleaner.root;
326 
327  root.add_children(f6, f5, f4);
328  root.add_children(f3, f2, f1);
329 
330  root.run_and_finish(f0);
331 }
332 
333 // eight arguments
334 template<typename F0, typename F1, typename F2, typename F3, typename F4,
335  typename F5, typename F6, typename F7>
336 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
337  const F5& f5, const F6& f6, const F7& f7,
338  tbb::task_group_context& context)
339 {
340  internal::parallel_invoke_cleaner cleaner(4, context);
341  internal::parallel_invoke_helper& root = cleaner.root;
342 
343  root.add_children(f7, f6, f5);
344  root.add_children(f4, f3);
345  root.add_children(f2, f1);
346 
347  root.run_and_finish(f0);
348 }
349 
350 // nine arguments
351 template<typename F0, typename F1, typename F2, typename F3, typename F4,
352  typename F5, typename F6, typename F7, typename F8>
353 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
354  const F5& f5, const F6& f6, const F7& f7, const F8& f8,
355  tbb::task_group_context& context)
356 {
357  internal::parallel_invoke_cleaner cleaner(4, context);
358  internal::parallel_invoke_helper& root = cleaner.root;
359 
360  root.add_children(f8, f7, f6);
361  root.add_children(f5, f4, f3);
362  root.add_children(f2, f1);
363 
364  root.run_and_finish(f0);
365 }
366 
367 // ten arguments
368 template<typename F0, typename F1, typename F2, typename F3, typename F4,
369  typename F5, typename F6, typename F7, typename F8, typename F9>
370 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
371  const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9,
372  tbb::task_group_context& context)
373 {
374  internal::parallel_invoke_cleaner cleaner(4, context);
375  internal::parallel_invoke_helper& root = cleaner.root;
376 
377  root.add_children(f9, f8, f7);
378  root.add_children(f6, f5, f4);
379  root.add_children(f3, f2, f1);
380 
381  root.run_and_finish(f0);
382 }
383 
384 // two arguments
385 template<typename F0, typename F1>
386 void parallel_invoke(const F0& f0, const F1& f1) {
387  task_group_context context(internal::PARALLEL_INVOKE);
388  parallel_invoke<F0, F1>(f0, f1, context);
389 }
390 // three arguments
391 template<typename F0, typename F1, typename F2>
392 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2) {
393  task_group_context context(internal::PARALLEL_INVOKE);
394  parallel_invoke<F0, F1, F2>(f0, f1, f2, context);
395 }
396 // four arguments
397 template<typename F0, typename F1, typename F2, typename F3 >
398 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3) {
399  task_group_context context(internal::PARALLEL_INVOKE);
400  parallel_invoke<F0, F1, F2, F3>(f0, f1, f2, f3, context);
401 }
402 // five arguments
403 template<typename F0, typename F1, typename F2, typename F3, typename F4>
404 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4) {
405  task_group_context context(internal::PARALLEL_INVOKE);
406  parallel_invoke<F0, F1, F2, F3, F4>(f0, f1, f2, f3, f4, context);
407 }
408 // six arguments
409 template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
410 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5) {
411  task_group_context context(internal::PARALLEL_INVOKE);
412  parallel_invoke<F0, F1, F2, F3, F4, F5>(f0, f1, f2, f3, f4, f5, context);
413 }
414 // seven arguments
415 template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
416 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
417  const F5& f5, const F6& f6)
418 {
419  task_group_context context(internal::PARALLEL_INVOKE);
420  parallel_invoke<F0, F1, F2, F3, F4, F5, F6>(f0, f1, f2, f3, f4, f5, f6, context);
421 }
422 // eight arguments
423 template<typename F0, typename F1, typename F2, typename F3, typename F4,
424  typename F5, typename F6, typename F7>
425 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
426  const F5& f5, const F6& f6, const F7& f7)
427 {
428  task_group_context context(internal::PARALLEL_INVOKE);
429  parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7>(f0, f1, f2, f3, f4, f5, f6, f7, context);
430 }
431 // nine arguments
432 template<typename F0, typename F1, typename F2, typename F3, typename F4,
433  typename F5, typename F6, typename F7, typename F8>
434 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
435  const F5& f5, const F6& f6, const F7& f7, const F8& f8)
436 {
437  task_group_context context(internal::PARALLEL_INVOKE);
438  parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8>(f0, f1, f2, f3, f4, f5, f6, f7, f8, context);
439 }
440 // ten arguments
441 template<typename F0, typename F1, typename F2, typename F3, typename F4,
442  typename F5, typename F6, typename F7, typename F8, typename F9>
443 void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
444  const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9)
445 {
446  task_group_context context(internal::PARALLEL_INVOKE);
447  parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8, F9>(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, context);
448 }
449 #endif // __TBB_VARIADIC_PARALLEL_INVOKE
450 
451 
452 } // namespace
453 
454 #endif /* __TBB_parallel_invoke_H */
task * execute() __TBB_override
Should be overridden by derived classes.
internal::parallel_invoke_helper & root
#define __TBB_override
Definition: tbb_stddef.h:240
bool_constant< false > false_type
Definition: tbb_stddef.h:469
task_group_context(kind_type relation_with_parent=bound, uintptr_t t=default_traits)
Default & binding constructor.
Definition: task.h:478
parallel_invoke_cleaner(int number_of_children, tbb::task_group_context &context)
const function3 & my_func3
const function1 & my_func1
void spawn_and_wait_for_all(task &child)
Similar to spawn followed by wait_for_all, but more efficient.
Definition: task.h:770
void parallel_invoke(const F0 &f0, const F1 &f1, tbb::task_group_context &context)
Executes a list of tasks in parallel and waits for all tasks to complete.
void add_children(const function1 &_func1, const function2 &_func2, const function3 &_func3)
The graph class.
function_invoker(const function &_function)
task that does nothing. Useful for synchronization.
Definition: task.h:964
Used to form groups of tasks.
Definition: task.h:332
#define __TBB_STATIC_ASSERT(condition, msg)
Definition: tbb_stddef.h:532
internal::allocate_child_proxy & allocate_child()
Returns proxy for overloaded new that allocates a child task of *this.
Definition: task.h:651
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
void add_children(const function &_func)
void add_children(const function1 &_func1, const function2 &_func2)
void set_ref_count(int count)
Set reference count.
Definition: task.h:731
parallel_invoke_helper(int number_of_children)
Base class for user-defined tasks.
Definition: task.h:589
spawner(const function1 &_func1, const function2 &_func2, const function3 &_func3)
task * execute() __TBB_override
Should be overridden by derived classes.
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
const function2 & my_func2
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type type
void recycle_as_safe_continuation()
Recommended to use, safe variant of recycle_as_continuation.
Definition: task.h:689
bool_constant< true > true_type
Definition: tbb_stddef.h:468

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.