Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_tasking.c
1 /*
2  * kmp_tasking.c -- OpenMP 3.0 tasking support.
3  */
4 
5 /* <copyright>
6  Copyright (c) 1997-2015 Intel Corporation. All Rights Reserved.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11 
12  * Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  * Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in the
16  documentation and/or other materials provided with the distribution.
17  * Neither the name of Intel Corporation nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 </copyright> */
34 
35 #include "kmp.h"
36 #include "kmp_i18n.h"
37 #include "kmp_itt.h"
38 #include "kmp_wait_release.h"
39 
40 #if OMPT_SUPPORT
41 #include "ompt-specific.h"
42 #endif
43 
44 
45 
46 /* ------------------------------------------------------------------------ */
47 /* ------------------------------------------------------------------------ */
48 
49 
50 /* forward declaration */
51 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
52 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
53 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
54 
55 #ifdef OMP_41_ENABLED
56 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
57 #endif
58 
59 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
60  if (!flag) return;
61  switch (((kmp_flag_64 *)flag)->get_type()) {
62  case flag32: __kmp_resume_32(gtid, NULL); break;
63  case flag64: __kmp_resume_64(gtid, NULL); break;
64  case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
65  }
66 }
67 
68 #ifdef BUILD_TIED_TASK_STACK
69 
70 //---------------------------------------------------------------------------
71 // __kmp_trace_task_stack: print the tied tasks from the task stack in order
72 // from top do bottom
73 //
74 // gtid: global thread identifier for thread containing stack
75 // thread_data: thread data for task team thread containing stack
76 // threshold: value above which the trace statement triggers
77 // location: string identifying call site of this function (for trace)
78 
79 static void
80 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
81 {
82  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
83  kmp_taskdata_t **stack_top = task_stack -> ts_top;
84  kmp_int32 entries = task_stack -> ts_entries;
85  kmp_taskdata_t *tied_task;
86 
87  KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
88  "first_block = %p, stack_top = %p \n",
89  location, gtid, entries, task_stack->ts_first_block, stack_top ) );
90 
91  KMP_DEBUG_ASSERT( stack_top != NULL );
92  KMP_DEBUG_ASSERT( entries > 0 );
93 
94  while ( entries != 0 )
95  {
96  KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
97  // fix up ts_top if we need to pop from previous block
98  if ( entries & TASK_STACK_INDEX_MASK == 0 )
99  {
100  kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
101 
102  stack_block = stack_block -> sb_prev;
103  stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
104  }
105 
106  // finish bookkeeping
107  stack_top--;
108  entries--;
109 
110  tied_task = * stack_top;
111 
112  KMP_DEBUG_ASSERT( tied_task != NULL );
113  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
114 
115  KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
116  "stack_top=%p, tied_task=%p\n",
117  location, gtid, entries, stack_top, tied_task ) );
118  }
119  KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
120 
121  KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
122  location, gtid ) );
123 }
124 
125 //---------------------------------------------------------------------------
126 // __kmp_init_task_stack: initialize the task stack for the first time
127 // after a thread_data structure is created.
128 // It should not be necessary to do this again (assuming the stack works).
129 //
130 // gtid: global thread identifier of calling thread
131 // thread_data: thread data for task team thread containing stack
132 
133 static void
134 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
135 {
136  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
137  kmp_stack_block_t *first_block;
138 
139  // set up the first block of the stack
140  first_block = & task_stack -> ts_first_block;
141  task_stack -> ts_top = (kmp_taskdata_t **) first_block;
142  memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
143 
144  // initialize the stack to be empty
145  task_stack -> ts_entries = TASK_STACK_EMPTY;
146  first_block -> sb_next = NULL;
147  first_block -> sb_prev = NULL;
148 }
149 
150 
151 //---------------------------------------------------------------------------
152 // __kmp_free_task_stack: free the task stack when thread_data is destroyed.
153 //
154 // gtid: global thread identifier for calling thread
155 // thread_data: thread info for thread containing stack
156 
157 static void
158 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
159 {
160  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
161  kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
162 
163  KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
164  // free from the second block of the stack
165  while ( stack_block != NULL ) {
166  kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
167 
168  stack_block -> sb_next = NULL;
169  stack_block -> sb_prev = NULL;
170  if (stack_block != & task_stack -> ts_first_block) {
171  __kmp_thread_free( thread, stack_block ); // free the block, if not the first
172  }
173  stack_block = next_block;
174  }
175  // initialize the stack to be empty
176  task_stack -> ts_entries = 0;
177  task_stack -> ts_top = NULL;
178 }
179 
180 
181 //---------------------------------------------------------------------------
182 // __kmp_push_task_stack: Push the tied task onto the task stack.
183 // Grow the stack if necessary by allocating another block.
184 //
185 // gtid: global thread identifier for calling thread
186 // thread: thread info for thread containing stack
187 // tied_task: the task to push on the stack
188 
189 static void
190 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
191 {
192  // GEH - need to consider what to do if tt_threads_data not allocated yet
193  kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
194  tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
195  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
196 
197  if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
198  return; // Don't push anything on stack if team or team tasks are serialized
199  }
200 
201  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
202  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
203 
204  KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
205  gtid, thread, tied_task ) );
206  // Store entry
207  * (task_stack -> ts_top) = tied_task;
208 
209  // Do bookkeeping for next push
210  task_stack -> ts_top++;
211  task_stack -> ts_entries++;
212 
213  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
214  {
215  // Find beginning of this task block
216  kmp_stack_block_t *stack_block =
217  (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
218 
219  // Check if we already have a block
220  if ( stack_block -> sb_next != NULL )
221  { // reset ts_top to beginning of next block
222  task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
223  }
224  else
225  { // Alloc new block and link it up
226  kmp_stack_block_t *new_block = (kmp_stack_block_t *)
227  __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
228 
229  task_stack -> ts_top = & new_block -> sb_block[0];
230  stack_block -> sb_next = new_block;
231  new_block -> sb_prev = stack_block;
232  new_block -> sb_next = NULL;
233 
234  KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
235  gtid, tied_task, new_block ) );
236  }
237  }
238  KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
239 }
240 
241 //---------------------------------------------------------------------------
242 // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
243 // the task, just check to make sure it matches the ending task passed in.
244 //
245 // gtid: global thread identifier for the calling thread
246 // thread: thread info structure containing stack
247 // tied_task: the task popped off the stack
248 // ending_task: the task that is ending (should match popped task)
249 
250 static void
251 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
252 {
253  // GEH - need to consider what to do if tt_threads_data not allocated yet
254  kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
255  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
256  kmp_taskdata_t *tied_task;
257 
258  if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
259  return; // Don't pop anything from stack if team or team tasks are serialized
260  }
261 
262  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
263  KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
264 
265  KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
266 
267  // fix up ts_top if we need to pop from previous block
268  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
269  {
270  kmp_stack_block_t *stack_block =
271  (kmp_stack_block_t *) (task_stack -> ts_top) ;
272 
273  stack_block = stack_block -> sb_prev;
274  task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
275  }
276 
277  // finish bookkeeping
278  task_stack -> ts_top--;
279  task_stack -> ts_entries--;
280 
281  tied_task = * (task_stack -> ts_top );
282 
283  KMP_DEBUG_ASSERT( tied_task != NULL );
284  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
285  KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
286 
287  KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
288  return;
289 }
290 #endif /* BUILD_TIED_TASK_STACK */
291 
292 //---------------------------------------------------
293 // __kmp_push_task: Add a task to the thread's deque
294 
295 static kmp_int32
296 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
297 {
298  kmp_info_t * thread = __kmp_threads[ gtid ];
299  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
300  kmp_task_team_t * task_team = thread->th.th_task_team;
301  kmp_int32 tid = __kmp_tid_from_gtid( gtid );
302  kmp_thread_data_t * thread_data;
303 
304  KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
305 
306  // The first check avoids building task_team thread data if serialized
307  if ( taskdata->td_flags.task_serial ) {
308  KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
309  gtid, taskdata ) );
310  return TASK_NOT_PUSHED;
311  }
312 
313  // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
314  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
315  if ( ! KMP_TASKING_ENABLED(task_team) ) {
316  __kmp_enable_tasking( task_team, thread );
317  }
318  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
319  KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
320 
321  // Find tasking deque specific to encountering thread
322  thread_data = & task_team -> tt.tt_threads_data[ tid ];
323 
324  // No lock needed since only owner can allocate
325  if (thread_data -> td.td_deque == NULL ) {
326  __kmp_alloc_task_deque( thread, thread_data );
327  }
328 
329  // Check if deque is full
330  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
331  {
332  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
333  gtid, taskdata ) );
334  return TASK_NOT_PUSHED;
335  }
336 
337  // Lock the deque for the task push operation
338  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
339 
340 #if OMP_41_ENABLED
341  // Need to recheck as we can get a proxy task from a thread outside of OpenMP
342  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
343  {
344  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
345  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
346  gtid, taskdata ) );
347  return TASK_NOT_PUSHED;
348  }
349 #else
350  // Must have room since no thread can add tasks but calling thread
351  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
352 #endif
353 
354  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
355  // Wrap index.
356  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
357  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
358 
359  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
360 
361  KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
362  "task=%p ntasks=%d head=%u tail=%u\n",
363  gtid, taskdata, thread_data->td.td_deque_ntasks,
364  thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
365 
366  return TASK_SUCCESSFULLY_PUSHED;
367 }
368 
369 
370 //-----------------------------------------------------------------------------------------
371 // __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
372 // this_thr: thread structure to set current_task in.
373 
374 void
375 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
376 {
377  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
378  "curtask_parent=%p\n",
379  0, this_thr, this_thr -> th.th_current_task,
380  this_thr -> th.th_current_task -> td_parent ) );
381 
382  this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
383 
384  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
385  "curtask_parent=%p\n",
386  0, this_thr, this_thr -> th.th_current_task,
387  this_thr -> th.th_current_task -> td_parent ) );
388 }
389 
390 
391 //---------------------------------------------------------------------------------------
392 // __kmp_push_current_task_to_thread: set up current task in called thread for a new team
393 // this_thr: thread structure to set up
394 // team: team for implicit task data
395 // tid: thread within team to set up
396 
397 void
398 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
399 {
400  // current task of the thread is a parent of the new just created implicit tasks of new team
401  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
402  "parent_task=%p\n",
403  tid, this_thr, this_thr->th.th_current_task,
404  team->t.t_implicit_task_taskdata[tid].td_parent ) );
405 
406  KMP_DEBUG_ASSERT (this_thr != NULL);
407 
408  if( tid == 0 ) {
409  if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
410  team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
411  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
412  }
413  } else {
414  team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
415  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
416  }
417 
418  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
419  "parent_task=%p\n",
420  tid, this_thr, this_thr->th.th_current_task,
421  team->t.t_implicit_task_taskdata[tid].td_parent ) );
422 }
423 
424 
425 //----------------------------------------------------------------------
426 // __kmp_task_start: bookkeeping for a task starting execution
427 // GTID: global thread id of calling thread
428 // task: task starting execution
429 // current_task: task suspending
430 
431 static void
432 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
433 {
434  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
435  kmp_info_t * thread = __kmp_threads[ gtid ];
436 
437  KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
438  gtid, taskdata, current_task) );
439 
440  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
441 
442  // mark currently executing task as suspended
443  // TODO: GEH - make sure root team implicit task is initialized properly.
444  // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
445  current_task -> td_flags.executing = 0;
446 
447  // Add task to stack if tied
448 #ifdef BUILD_TIED_TASK_STACK
449  if ( taskdata -> td_flags.tiedness == TASK_TIED )
450  {
451  __kmp_push_task_stack( gtid, thread, taskdata );
452  }
453 #endif /* BUILD_TIED_TASK_STACK */
454 
455  // mark starting task as executing and as current task
456  thread -> th.th_current_task = taskdata;
457 
458  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
459  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
460  taskdata -> td_flags.started = 1;
461  taskdata -> td_flags.executing = 1;
462  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
463  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
464 
465  // GEH TODO: shouldn't we pass some sort of location identifier here?
466  // APT: yes, we will pass location here.
467  // need to store current thread state (in a thread or taskdata structure)
468  // before setting work_state, otherwise wrong state is set after end of task
469 
470  KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
471  gtid, taskdata ) );
472 
473 #if OMPT_SUPPORT
474  if ((ompt_status == ompt_status_track_callback) &&
475  ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
476  kmp_taskdata_t *parent = taskdata->td_parent;
477  ompt_callbacks.ompt_callback(ompt_event_task_begin)(
478  parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
479  parent ? &(parent->ompt_task_info.frame) : NULL,
480  taskdata->ompt_task_info.task_id,
481  taskdata->ompt_task_info.function);
482  }
483 #endif
484 
485  return;
486 }
487 
488 
489 //----------------------------------------------------------------------
490 // __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
491 // loc_ref: source location information; points to beginning of task block.
492 // gtid: global thread number.
493 // task: task thunk for the started task.
494 
495 void
496 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
497 {
498  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
499  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
500 
501  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
502  gtid, loc_ref, taskdata, current_task ) );
503 
504  taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
505  __kmp_task_start( gtid, task, current_task );
506 
507  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
508  gtid, loc_ref, taskdata ) );
509 
510  return;
511 }
512 
513 #ifdef TASK_UNUSED
514 //----------------------------------------------------------------------
515 // __kmpc_omp_task_begin: report that a given task has started execution
516 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
517 
518 void
519 __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
520 {
521  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
522 
523  KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
524  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
525 
526  __kmp_task_start( gtid, task, current_task );
527 
528  KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
529  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
530 
531  return;
532 }
533 #endif // TASK_UNUSED
534 
535 
536 //-------------------------------------------------------------------------------------
537 // __kmp_free_task: free the current task space and the space for shareds
538 // gtid: Global thread ID of calling thread
539 // taskdata: task to free
540 // thread: thread data structure of caller
541 
542 static void
543 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
544 {
545  KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
546  gtid, taskdata) );
547 
548  // Check to make sure all flags and counters have the correct values
549  KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
550  KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
551  KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
552  KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
553  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
554  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
555 
556  taskdata->td_flags.freed = 1;
557  // deallocate the taskdata and shared variable blocks associated with this task
558  #if USE_FAST_MEMORY
559  __kmp_fast_free( thread, taskdata );
560  #else /* ! USE_FAST_MEMORY */
561  __kmp_thread_free( thread, taskdata );
562  #endif
563 
564  KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
565  gtid, taskdata) );
566 }
567 
568 //-------------------------------------------------------------------------------------
569 // __kmp_free_task_and_ancestors: free the current task and ancestors without children
570 //
571 // gtid: Global thread ID of calling thread
572 // taskdata: task to free
573 // thread: thread data structure of caller
574 
575 static void
576 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
577 {
578  kmp_int32 children = 0;
579  kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
580 
581  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
582 
583  if ( !team_or_tasking_serialized ) {
584  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
585  KMP_DEBUG_ASSERT( children >= 0 );
586  }
587 
588  // Now, go up the ancestor tree to see if any ancestors can now be freed.
589  while ( children == 0 )
590  {
591  kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
592 
593  KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
594  "and freeing itself\n", gtid, taskdata) );
595 
596  // --- Deallocate my ancestor task ---
597  __kmp_free_task( gtid, taskdata, thread );
598 
599  taskdata = parent_taskdata;
600 
601  // Stop checking ancestors at implicit task or if tasking serialized
602  // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
603  if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
604  return;
605 
606  if ( !team_or_tasking_serialized ) {
607  // Predecrement simulated by "- 1" calculation
608  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
609  KMP_DEBUG_ASSERT( children >= 0 );
610  }
611  }
612 
613  KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
614  "not freeing it yet\n", gtid, taskdata, children) );
615 }
616 
617 //---------------------------------------------------------------------
618 // __kmp_task_finish: bookkeeping to do when a task finishes execution
619 // gtid: global thread ID for calling thread
620 // task: task to be finished
621 // resumed_task: task to be resumed. (may be NULL if task is serialized)
622 
623 static void
624 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
625 {
626  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
627  kmp_info_t * thread = __kmp_threads[ gtid ];
628  kmp_int32 children = 0;
629 
630 #if OMPT_SUPPORT
631  if ((ompt_status == ompt_status_track_callback) &&
632  ompt_callbacks.ompt_callback(ompt_event_task_end)) {
633  kmp_taskdata_t *parent = taskdata->td_parent;
634  ompt_callbacks.ompt_callback(ompt_event_task_end)(
635  taskdata->ompt_task_info.task_id);
636  }
637 #endif
638 
639  KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
640  gtid, taskdata, resumed_task) );
641 
642  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
643 
644  // Pop task from stack if tied
645 #ifdef BUILD_TIED_TASK_STACK
646  if ( taskdata -> td_flags.tiedness == TASK_TIED )
647  {
648  __kmp_pop_task_stack( gtid, thread, taskdata );
649  }
650 #endif /* BUILD_TIED_TASK_STACK */
651 
652  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
653  taskdata -> td_flags.complete = 1; // mark the task as completed
654  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
655  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
656 
657  // Only need to keep track of count if team parallel and tasking not serialized
658  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
659  // Predecrement simulated by "- 1" calculation
660  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
661  KMP_DEBUG_ASSERT( children >= 0 );
662 #if OMP_40_ENABLED
663  if ( taskdata->td_taskgroup )
664  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
665  __kmp_release_deps(gtid,taskdata);
666 #endif
667  }
668 
669  // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
670  // Othertwise, if a task is executed immediately from the release_deps code
671  // the flag will be reset to 1 again by this same function
672  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
673  taskdata -> td_flags.executing = 0; // suspend the finishing task
674 
675  KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
676  gtid, taskdata, children) );
677 
678 #if OMP_40_ENABLED
679  /* If the tasks' destructor thunk flag has been set, we need to invoke the
680  destructor thunk that has been generated by the compiler.
681  The code is placed here, since at this point other tasks might have been released
682  hence overlapping the destructor invokations with some other work in the
683  released tasks. The OpenMP spec is not specific on when the destructors are
684  invoked, so we should be free to choose.
685  */
686  if (taskdata->td_flags.destructors_thunk) {
687  kmp_routine_entry_t destr_thunk = task->destructors;
688  KMP_ASSERT(destr_thunk);
689  destr_thunk(gtid, task);
690  }
691 #endif // OMP_40_ENABLED
692 
693  // bookkeeping for resuming task:
694  // GEH - note tasking_ser => task_serial
695  KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
696  taskdata->td_flags.task_serial);
697  if ( taskdata->td_flags.task_serial )
698  {
699  if (resumed_task == NULL) {
700  resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
701  }
702  else {
703  // verify resumed task passed in points to parent
704  KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
705  }
706  }
707  else {
708  KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
709  }
710 
711  // Free this task and then ancestor tasks if they have no children.
712  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
713 
714  // FIXME johnmc: It looks like this statement should be before the last one so if an
715  // asynchronous inquiry peers into the runtime system it doesn't see the freed
716  // task as the current task
717  __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
718 
719  // TODO: GEH - make sure root team implicit task is initialized properly.
720  // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
721  resumed_task->td_flags.executing = 1; // resume previous task
722 
723  KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
724  gtid, taskdata, resumed_task) );
725 
726  return;
727 }
728 
729 //---------------------------------------------------------------------
730 // __kmpc_omp_task_complete_if0: report that a task has completed execution
731 // loc_ref: source location information; points to end of task block.
732 // gtid: global thread number.
733 // task: task thunk for the completed task.
734 
735 void
736 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
737 {
738  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
739  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
740 
741  __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
742 
743  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
744  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
745 
746  return;
747 }
748 
749 #ifdef TASK_UNUSED
750 //---------------------------------------------------------------------
751 // __kmpc_omp_task_complete: report that a task has completed execution
752 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
753 
754 void
755 __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
756 {
757  KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
758  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
759 
760  __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
761 
762  KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
763  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
764  return;
765 }
766 #endif // TASK_UNUSED
767 
768 #if OMPT_SUPPORT
769 //----------------------------------------------------------------------------------------------------
770 // __kmp_task_init_ompt:
771 // Initialize OMPT fields maintained by a task. Since the serial task is initialized before
772 // ompt_initialize is called, at the point the serial task is initialized we don't know whether
773 // OMPT will be used or not when the serial task is initialized. This function provides the support
774 // needed to initialize OMPT for the serial task after the fact.
775 
776 void
777 __kmp_task_init_ompt( kmp_taskdata_t * task, int tid )
778 {
779  task->ompt_task_info.task_id = __ompt_task_id_new(tid);
780  task->ompt_task_info.function = NULL;
781  task->ompt_task_info.frame.exit_runtime_frame = NULL;
782  task->ompt_task_info.frame.reenter_runtime_frame = NULL;
783 /* task->ompt_task_info.frame = (ompt_frame_t) {
784  .exit_runtime_frame = NULL,
785  .reenter_runtime_frame = NULL
786  };*/
787 }
788 #endif
789 
790 
791 //----------------------------------------------------------------------------------------------------
792 // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
793 //
794 // loc_ref: reference to source location of parallel region
795 // this_thr: thread data structure corresponding to implicit task
796 // team: team for this_thr
797 // tid: thread id of given thread within team
798 // set_curr_task: TRUE if need to push current task to thread
799 // NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
800 // TODO: Get better loc_ref. Value passed in may be NULL
801 
802 void
803 __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
804 {
805  kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
806 
807  KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
808  tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
809 
810  task->td_task_id = KMP_GEN_TASK_ID();
811  task->td_team = team;
812 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
813  task->td_ident = loc_ref;
814  task->td_taskwait_ident = NULL;
815  task->td_taskwait_counter = 0;
816  task->td_taskwait_thread = 0;
817 
818  task->td_flags.tiedness = TASK_TIED;
819  task->td_flags.tasktype = TASK_IMPLICIT;
820 #if OMP_41_ENABLED
821  task->td_flags.proxy = TASK_FULL;
822 #endif
823 
824  // All implicit tasks are executed immediately, not deferred
825  task->td_flags.task_serial = 1;
826  task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
827  task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
828 
829  task->td_flags.started = 1;
830  task->td_flags.executing = 1;
831  task->td_flags.complete = 0;
832  task->td_flags.freed = 0;
833 
834 #if OMP_40_ENABLED
835  task->td_dephash = NULL;
836  task->td_depnode = NULL;
837 #endif
838 
839  if (set_curr_task) { // only do this initialization the first time a thread is created
840  task->td_incomplete_child_tasks = 0;
841  task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
842 #if OMP_40_ENABLED
843  task->td_taskgroup = NULL; // An implicit task does not have taskgroup
844 #endif
845  __kmp_push_current_task_to_thread( this_thr, team, tid );
846  } else {
847  KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
848  KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
849  }
850 
851 #if OMPT_SUPPORT
852  __kmp_task_init_ompt(task, tid);
853 #endif
854 
855  KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
856  tid, team, task ) );
857 }
858 
859 // Round up a size to a power of two specified by val
860 // Used to insert padding between structures co-allocated using a single malloc() call
861 static size_t
862 __kmp_round_up_to_val( size_t size, size_t val ) {
863  if ( size & ( val - 1 ) ) {
864  size &= ~ ( val - 1 );
865  if ( size <= KMP_SIZE_T_MAX - val ) {
866  size += val; // Round up if there is no overflow.
867  }; // if
868  }; // if
869  return size;
870 } // __kmp_round_up_to_va
871 
872 
873 //---------------------------------------------------------------------------------
874 // __kmp_task_alloc: Allocate the taskdata and task data structures for a task
875 //
876 // loc_ref: source location information
877 // gtid: global thread number.
878 // flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
879 // Converted from kmp_int32 to kmp_tasking_flags_t in routine.
880 // sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
881 // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
882 // task_entry: Pointer to task code entry point generated by compiler.
883 // returns: a pointer to the allocated kmp_task_t structure (task).
884 
885 kmp_task_t *
886 __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
887  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
888  kmp_routine_entry_t task_entry )
889 {
890  kmp_task_t *task;
891  kmp_taskdata_t *taskdata;
892  kmp_info_t *thread = __kmp_threads[ gtid ];
893  kmp_team_t *team = thread->th.th_team;
894  kmp_taskdata_t *parent_task = thread->th.th_current_task;
895  size_t shareds_offset;
896 
897  KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
898  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
899  gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
900  sizeof_shareds, task_entry) );
901 
902  if ( parent_task->td_flags.final ) {
903  if (flags->merged_if0) {
904  }
905  flags->final = 1;
906  }
907 
908 #if OMP_41_ENABLED
909  if ( flags->proxy == TASK_PROXY ) {
910  flags->tiedness = TASK_UNTIED;
911  flags->merged_if0 = 1;
912 
913  /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
914  if ( (thread->th.th_task_team) == NULL ) {
915  /* This should only happen if the team is serialized
916  setup a task team and propagate it to the thread
917  */
918  KMP_DEBUG_ASSERT(team->t.t_serialized);
919  KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
920  __kmp_task_team_setup(thread,team,0,1); // 0,1 indicates only setup the current team regardless of nthreads
921  thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
922  }
923  kmp_task_team_t * task_team = thread->th.th_task_team;
924 
925  /* tasking must be enabled now as the task might not be pushed */
926  if ( !KMP_TASKING_ENABLED( task_team ) ) {
927  KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
928  __kmp_enable_tasking( task_team, thread );
929  kmp_int32 tid = thread->th.th_info.ds.ds_tid;
930  kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
931  // No lock needed since only owner can allocate
932  if (thread_data -> td.td_deque == NULL ) {
933  __kmp_alloc_task_deque( thread, thread_data );
934  }
935  }
936 
937  if ( task_team->tt.tt_found_proxy_tasks == FALSE )
938  TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
939  }
940 #endif
941 
942  // Calculate shared structure offset including padding after kmp_task_t struct
943  // to align pointers in shared struct
944  shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
945  shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
946 
947  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
948  KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
949  gtid, shareds_offset) );
950  KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
951  gtid, sizeof_shareds) );
952 
953  // Avoid double allocation here by combining shareds with taskdata
954  #if USE_FAST_MEMORY
955  taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
956  #else /* ! USE_FAST_MEMORY */
957  taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
958  #endif /* USE_FAST_MEMORY */
959 
960  task = KMP_TASKDATA_TO_TASK(taskdata);
961 
962  // Make sure task & taskdata are aligned appropriately
963 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
964  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
965  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
966 #else
967  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
968  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
969 #endif
970  if (sizeof_shareds > 0) {
971  // Avoid double allocation here by combining shareds with taskdata
972  task->shareds = & ((char *) taskdata)[ shareds_offset ];
973  // Make sure shareds struct is aligned to pointer size
974  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
975  } else {
976  task->shareds = NULL;
977  }
978  task->routine = task_entry;
979  task->part_id = 0; // AC: Always start with 0 part id
980 
981  taskdata->td_task_id = KMP_GEN_TASK_ID();
982  taskdata->td_team = team;
983  taskdata->td_alloc_thread = thread;
984  taskdata->td_parent = parent_task;
985  taskdata->td_level = parent_task->td_level + 1; // increment nesting level
986  taskdata->td_ident = loc_ref;
987  taskdata->td_taskwait_ident = NULL;
988  taskdata->td_taskwait_counter = 0;
989  taskdata->td_taskwait_thread = 0;
990  KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
991 #if OMP_41_ENABLED
992  // avoid copying icvs for proxy tasks
993  if ( flags->proxy == TASK_FULL )
994 #endif
995  copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
996 
997  taskdata->td_flags.tiedness = flags->tiedness;
998  taskdata->td_flags.final = flags->final;
999  taskdata->td_flags.merged_if0 = flags->merged_if0;
1000 #if OMP_40_ENABLED
1001  taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1002 #endif // OMP_40_ENABLED
1003 #if OMP_41_ENABLED
1004  taskdata->td_flags.proxy = flags->proxy;
1005 #endif
1006  taskdata->td_flags.tasktype = TASK_EXPLICIT;
1007 
1008  // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1009  taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
1010 
1011  // GEH - TODO: fix this to copy parent task's value of team_serial flag
1012  taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
1013 
1014  // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
1015  // tasks are not left until program termination to execute. Also, it helps locality to execute
1016  // immediately.
1017 
1018  taskdata->td_flags.task_serial = ( parent_task->td_flags.final
1019  || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
1020 
1021  taskdata->td_flags.started = 0;
1022  taskdata->td_flags.executing = 0;
1023  taskdata->td_flags.complete = 0;
1024  taskdata->td_flags.freed = 0;
1025 
1026  taskdata->td_flags.native = flags->native;
1027 
1028  taskdata->td_incomplete_child_tasks = 0;
1029  taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
1030 #if OMP_40_ENABLED
1031  taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
1032  taskdata->td_dephash = NULL;
1033  taskdata->td_depnode = NULL;
1034 #endif
1035 
1036  // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
1037 #if OMP_41_ENABLED
1038  if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1039 #else
1040  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1041 #endif
1042  {
1043  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1044 #if OMP_40_ENABLED
1045  if ( parent_task->td_taskgroup )
1046  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1047 #endif
1048  // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
1049  if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1050  KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1051  }
1052  }
1053 
1054  KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1055  gtid, taskdata, taskdata->td_parent) );
1056 
1057 #if OMPT_SUPPORT
1058  if (ompt_status & ompt_status_track) {
1059  taskdata->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1060  taskdata->ompt_task_info.function = (void*) task_entry;
1061  taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
1062  taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
1063 /* taskdata->ompt_task_info.frame = (ompt_frame_t)
1064  { .exit_runtime_frame = NULL, .reenter_runtime_frame = NULL };*/
1065  }
1066 #endif
1067 
1068  return task;
1069 }
1070 
1071 
1072 kmp_task_t *
1073 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1074  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1075  kmp_routine_entry_t task_entry )
1076 {
1077  kmp_task_t *retval;
1078  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1079 
1080  input_flags->native = FALSE;
1081  // __kmp_task_alloc() sets up all other runtime flags
1082 
1083 #if OMP_41_ENABLED
1084  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
1085  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1086  gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1087  input_flags->proxy ? "proxy" : "",
1088  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1089 #else
1090  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1091  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1092  gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
1093  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1094 #endif
1095 
1096  retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1097  sizeof_shareds, task_entry );
1098 
1099  KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1100 
1101  return retval;
1102 }
1103 
1104 //-----------------------------------------------------------
1105 // __kmp_invoke_task: invoke the specified task
1106 //
1107 // gtid: global thread ID of caller
1108 // task: the task to invoke
1109 // current_task: the task to resume after task invokation
1110 
1111 static void
1112 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1113 {
1114  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
1115 #if OMP_40_ENABLED
1116  int discard = 0 /* false */;
1117 #endif
1118  KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1119  gtid, taskdata, current_task) );
1120  KMP_DEBUG_ASSERT(task);
1121 #if OMP_41_ENABLED
1122  if ( taskdata->td_flags.proxy == TASK_PROXY &&
1123  taskdata->td_flags.complete == 1)
1124  {
1125  // This is a proxy task that was already completed but it needs to run
1126  // its bottom-half finish
1127  KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1128  gtid, taskdata) );
1129 
1130  __kmp_bottom_half_finish_proxy(gtid,task);
1131 
1132  KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1133 
1134  return;
1135  }
1136 #endif
1137 
1138 #if OMP_41_ENABLED
1139  // Proxy tasks are not handled by the runtime
1140  if ( taskdata->td_flags.proxy != TASK_PROXY )
1141 #endif
1142  __kmp_task_start( gtid, task, current_task );
1143 
1144 #if OMPT_SUPPORT
1145  ompt_thread_info_t oldInfo;
1146  kmp_info_t * thread;
1147  if (ompt_status & ompt_status_track) {
1148  // Store the threads states and restore them after the task
1149  thread = __kmp_threads[ gtid ];
1150  oldInfo = thread->th.ompt_thread_info;
1151  thread->th.ompt_thread_info.wait_id = 0;
1152  thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1153  taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1154  }
1155 #endif
1156 
1157 #if OMP_40_ENABLED
1158  // TODO: cancel tasks if the parallel region has also been cancelled
1159  // TODO: check if this sequence can be hoisted above __kmp_task_start
1160  // if cancellation has been enabled for this run ...
1161  if (__kmp_omp_cancellation) {
1162  kmp_info_t *this_thr = __kmp_threads [ gtid ];
1163  kmp_team_t * this_team = this_thr->th.th_team;
1164  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1165  if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1166  // this task belongs to a task group and we need to cancel it
1167  discard = 1 /* true */;
1168  }
1169  }
1170 
1171  //
1172  // Invoke the task routine and pass in relevant data.
1173  // Thunks generated by gcc take a different argument list.
1174  //
1175  if (!discard) {
1176 #endif // OMP_40_ENABLED
1177 #ifdef KMP_GOMP_COMPAT
1178  if (taskdata->td_flags.native) {
1179  ((void (*)(void *))(*(task->routine)))(task->shareds);
1180  }
1181  else
1182 #endif /* KMP_GOMP_COMPAT */
1183  {
1184  (*(task->routine))(gtid, task);
1185  }
1186 #if OMP_40_ENABLED
1187  }
1188 #endif // OMP_40_ENABLED
1189 
1190 
1191 #if OMPT_SUPPORT
1192  if (ompt_status & ompt_status_track) {
1193  thread->th.ompt_thread_info = oldInfo;
1194  taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1195  }
1196 #endif
1197 
1198 #if OMP_41_ENABLED
1199  // Proxy tasks are not handled by the runtime
1200  if ( taskdata->td_flags.proxy != TASK_PROXY )
1201 #endif
1202  __kmp_task_finish( gtid, task, current_task );
1203 
1204  KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1205  gtid, taskdata, current_task) );
1206  return;
1207 }
1208 
1209 //-----------------------------------------------------------------------
1210 // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1211 //
1212 // loc_ref: location of original task pragma (ignored)
1213 // gtid: Global Thread ID of encountering thread
1214 // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1215 // Returns:
1216 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1217 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1218 
1219 kmp_int32
1220 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1221 {
1222  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1223 
1224  KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1225  gtid, loc_ref, new_taskdata ) );
1226 
1227  /* Should we execute the new task or queue it? For now, let's just always try to
1228  queue it. If the queue fills up, then we'll execute it. */
1229 
1230  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1231  { // Execute this task immediately
1232  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1233  new_taskdata->td_flags.task_serial = 1;
1234  __kmp_invoke_task( gtid, new_task, current_task );
1235  }
1236 
1237  KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1238  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1239  new_taskdata ) );
1240 
1241  return TASK_CURRENT_NOT_QUEUED;
1242 }
1243 
1244 //---------------------------------------------------------------------
1245 // __kmp_omp_task: Schedule a non-thread-switchable task for execution
1246 // gtid: Global Thread ID of encountering thread
1247 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1248 // serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
1249 // returns:
1250 //
1251 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1252 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1253 kmp_int32
1254 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
1255 {
1256  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1257 
1258 #if OMPT_SUPPORT
1259  if (ompt_status & ompt_status_track) {
1260  new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1261  __builtin_frame_address(0);
1262  }
1263 #endif
1264 
1265  /* Should we execute the new task or queue it? For now, let's just always try to
1266  queue it. If the queue fills up, then we'll execute it. */
1267 #if OMP_41_ENABLED
1268  if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1269 #else
1270  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1271 #endif
1272  { // Execute this task immediately
1273  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1274  if ( serialize_immediate )
1275  new_taskdata -> td_flags.task_serial = 1;
1276  __kmp_invoke_task( gtid, new_task, current_task );
1277  }
1278 
1279 #if OMPT_SUPPORT
1280  if (ompt_status & ompt_status_track) {
1281  new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1282  }
1283 #endif
1284 
1285  return TASK_CURRENT_NOT_QUEUED;
1286 }
1287 
1288 //---------------------------------------------------------------------
1289 // __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
1290 // the parent thread only!
1291 // loc_ref: location of original task pragma (ignored)
1292 // gtid: Global Thread ID of encountering thread
1293 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1294 // returns:
1295 //
1296 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1297 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1298 
1299 kmp_int32
1300 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1301 {
1302  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1303  kmp_int32 res;
1304 
1305  KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1306  gtid, loc_ref, new_taskdata ) );
1307 
1308  res = __kmp_omp_task(gtid,new_task,true);
1309 
1310  KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1311  gtid, loc_ref, new_taskdata ) );
1312  return res;
1313 }
1314 
1315 //-------------------------------------------------------------------------------------
1316 // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1317 
1318 kmp_int32
1319 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1320 {
1321  kmp_taskdata_t * taskdata;
1322  kmp_info_t * thread;
1323  int thread_finished = FALSE;
1324 
1325  KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1326  gtid, loc_ref) );
1327 
1328  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1329  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1330  thread = __kmp_threads[ gtid ];
1331  taskdata = thread -> th.th_current_task;
1332 #if USE_ITT_BUILD
1333  // Note: These values are used by ITT events as well.
1334 #endif /* USE_ITT_BUILD */
1335  taskdata->td_taskwait_counter += 1;
1336  taskdata->td_taskwait_ident = loc_ref;
1337  taskdata->td_taskwait_thread = gtid + 1;
1338 
1339 #if USE_ITT_BUILD
1340  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1341  if ( itt_sync_obj != NULL )
1342  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1343 #endif /* USE_ITT_BUILD */
1344 
1345 #if OMP_41_ENABLED
1346  if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1347 #else
1348  if ( ! taskdata->td_flags.team_serial )
1349 #endif
1350  {
1351  // GEH: if team serialized, avoid reading the volatile variable below.
1352  kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
1353  while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1354  flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1355  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1356  }
1357  }
1358 #if USE_ITT_BUILD
1359  if ( itt_sync_obj != NULL )
1360  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1361 #endif /* USE_ITT_BUILD */
1362 
1363  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1364  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1365  }
1366 
1367  KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1368  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1369 
1370  return TASK_CURRENT_NOT_QUEUED;
1371 }
1372 
1373 
1374 //-------------------------------------------------
1375 // __kmpc_omp_taskyield: switch to a different task
1376 
1377 kmp_int32
1378 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1379 {
1380  kmp_taskdata_t * taskdata;
1381  kmp_info_t * thread;
1382  int thread_finished = FALSE;
1383 
1384  KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1385  gtid, loc_ref, end_part) );
1386 
1387  if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
1388  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1389 
1390  thread = __kmp_threads[ gtid ];
1391  taskdata = thread -> th.th_current_task;
1392  // Should we model this as a task wait or not?
1393 #if USE_ITT_BUILD
1394  // Note: These values are used by ITT events as well.
1395 #endif /* USE_ITT_BUILD */
1396  taskdata->td_taskwait_counter += 1;
1397  taskdata->td_taskwait_ident = loc_ref;
1398  taskdata->td_taskwait_thread = gtid + 1;
1399 
1400 #if USE_ITT_BUILD
1401  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1402  if ( itt_sync_obj != NULL )
1403  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1404 #endif /* USE_ITT_BUILD */
1405  if ( ! taskdata->td_flags.team_serial ) {
1406  kmp_task_team_t * task_team = thread->th.th_task_team;
1407  if (task_team != NULL) {
1408  if (KMP_TASKING_ENABLED(task_team)) {
1409  __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1410  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1411  }
1412  }
1413  }
1414 #if USE_ITT_BUILD
1415  if ( itt_sync_obj != NULL )
1416  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1417 #endif /* USE_ITT_BUILD */
1418 
1419  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1420  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1421  }
1422 
1423  KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1424  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1425 
1426  return TASK_CURRENT_NOT_QUEUED;
1427 }
1428 
1429 
1430 #if OMP_40_ENABLED
1431 //-------------------------------------------------------------------------------------
1432 // __kmpc_taskgroup: Start a new taskgroup
1433 
1434 void
1435 __kmpc_taskgroup( ident_t* loc, int gtid )
1436 {
1437  kmp_info_t * thread = __kmp_threads[ gtid ];
1438  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1439  kmp_taskgroup_t * tg_new =
1440  (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1441  KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1442  tg_new->count = 0;
1443  tg_new->cancel_request = cancel_noreq;
1444  tg_new->parent = taskdata->td_taskgroup;
1445  taskdata->td_taskgroup = tg_new;
1446 }
1447 
1448 
1449 //-------------------------------------------------------------------------------------
1450 // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1451 // and its descendants are complete
1452 
1453 void
1454 __kmpc_end_taskgroup( ident_t* loc, int gtid )
1455 {
1456  kmp_info_t * thread = __kmp_threads[ gtid ];
1457  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1458  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1459  int thread_finished = FALSE;
1460 
1461  KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1462  KMP_DEBUG_ASSERT( taskgroup != NULL );
1463 
1464  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1465 #if USE_ITT_BUILD
1466  // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1467  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1468  if ( itt_sync_obj != NULL )
1469  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1470 #endif /* USE_ITT_BUILD */
1471 
1472 #if OMP_41_ENABLED
1473  if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1474 #else
1475  if ( ! taskdata->td_flags.team_serial )
1476 #endif
1477  {
1478  kmp_flag_32 flag(&(taskgroup->count), 0U);
1479  while ( TCR_4(taskgroup->count) != 0 ) {
1480  flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1481  USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1482  }
1483  }
1484 
1485 #if USE_ITT_BUILD
1486  if ( itt_sync_obj != NULL )
1487  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1488 #endif /* USE_ITT_BUILD */
1489  }
1490  KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1491 
1492  // Restore parent taskgroup for the current task
1493  taskdata->td_taskgroup = taskgroup->parent;
1494  __kmp_thread_free( thread, taskgroup );
1495 
1496  KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1497 }
1498 #endif
1499 
1500 
1501 //------------------------------------------------------
1502 // __kmp_remove_my_task: remove a task from my own deque
1503 
1504 static kmp_task_t *
1505 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1506  kmp_int32 is_constrained )
1507 {
1508  kmp_task_t * task;
1509  kmp_taskdata_t * taskdata;
1510  kmp_thread_data_t *thread_data;
1511  kmp_uint32 tail;
1512 
1513  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1514  KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1515 
1516  thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1517 
1518  KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1519  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1520  thread_data->td.td_deque_tail) );
1521 
1522  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1523  KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1524  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1525  thread_data->td.td_deque_tail) );
1526  return NULL;
1527  }
1528 
1529  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1530 
1531  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1532  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1533  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1534  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1535  thread_data->td.td_deque_tail) );
1536  return NULL;
1537  }
1538 
1539  tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1540  taskdata = thread_data -> td.td_deque[ tail ];
1541 
1542  if (is_constrained) {
1543  // we need to check if the candidate obeys task scheduling constraint:
1544  // only child of current task can be scheduled
1545  kmp_taskdata_t * current = thread->th.th_current_task;
1546  kmp_int32 level = current->td_level;
1547  kmp_taskdata_t * parent = taskdata->td_parent;
1548  while ( parent != current && parent->td_level > level ) {
1549  parent = parent->td_parent; // check generation up to the level of the current task
1550  KMP_DEBUG_ASSERT(parent != NULL);
1551  }
1552  if ( parent != current ) {
1553  // If the tail task is not a child, then no other childs can appear in the deque.
1554  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1555  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1556  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1557  thread_data->td.td_deque_tail) );
1558  return NULL;
1559  }
1560  }
1561 
1562  thread_data -> td.td_deque_tail = tail;
1563  TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1564 
1565  __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1566 
1567  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1568  gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1569  thread_data->td.td_deque_tail) );
1570 
1571  task = KMP_TASKDATA_TO_TASK( taskdata );
1572  return task;
1573 }
1574 
1575 
1576 //-----------------------------------------------------------
1577 // __kmp_steal_task: remove a task from another thread's deque
1578 // Assume that calling thread has already checked existence of
1579 // task_team thread_data before calling this routine.
1580 
1581 static kmp_task_t *
1582 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1583  volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1584  kmp_int32 is_constrained
1585  )
1586 {
1587  kmp_task_t * task;
1588  kmp_taskdata_t * taskdata;
1589  kmp_thread_data_t *victim_td, *threads_data;
1590  kmp_int32 victim_tid, thread_tid;
1591 
1592  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1593 
1594  threads_data = task_team -> tt.tt_threads_data;
1595  KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1596 
1597  victim_tid = victim->th.th_info.ds.ds_tid;
1598  victim_td = & threads_data[ victim_tid ];
1599 
1600  KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1601  "head=%u tail=%u\n",
1602  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1603  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1604 
1605  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1606  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1607  {
1608  KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1609  "ntasks=%d head=%u tail=%u\n",
1610  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1611  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1612  return NULL;
1613  }
1614 
1615  __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1616 
1617  // Check again after we acquire the lock
1618  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1619  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1620  {
1621  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1622  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1623  "ntasks=%d head=%u tail=%u\n",
1624  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1625  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1626  return NULL;
1627  }
1628 
1629  KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1630 
1631  if ( !is_constrained ) {
1632  taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1633  // Bump head pointer and Wrap.
1634  victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1635  } else {
1636  // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1637  kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1638  taskdata = victim_td -> td.td_deque[ tail ];
1639  // we need to check if the candidate obeys task scheduling constraint:
1640  // only child of current task can be scheduled
1641  kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1642  kmp_int32 level = current->td_level;
1643  kmp_taskdata_t * parent = taskdata->td_parent;
1644  while ( parent != current && parent->td_level > level ) {
1645  parent = parent->td_parent; // check generation up to the level of the current task
1646  KMP_DEBUG_ASSERT(parent != NULL);
1647  }
1648  if ( parent != current ) {
1649  // If the tail task is not a child, then no other childs can appear in the deque (?).
1650  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1651  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1652  "ntasks=%d head=%u tail=%u\n",
1653  gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1654  task_team, victim_td->td.td_deque_ntasks,
1655  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1656  return NULL;
1657  }
1658  victim_td -> td.td_deque_tail = tail;
1659  }
1660  if (*thread_finished) {
1661  // We need to un-mark this victim as a finished victim. This must be done before
1662  // releasing the lock, or else other threads (starting with the master victim)
1663  // might be prematurely released from the barrier!!!
1664  kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1665 
1666  KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1667  gtid, count + 1, task_team) );
1668 
1669  *thread_finished = FALSE;
1670  }
1671  TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1672 
1673 
1674  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1675 
1676  KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
1677  "ntasks=%d head=%u tail=%u\n",
1678  gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1679  victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1680  victim_td->td.td_deque_tail) );
1681 
1682  task = KMP_TASKDATA_TO_TASK( taskdata );
1683  return task;
1684 }
1685 
1686 
1687 //-----------------------------------------------------------------------------
1688 // __kmp_execute_tasks_template: Choose and execute tasks until either the condition
1689 // is statisfied (return true) or there are none left (return false).
1690 // final_spin is TRUE if this is the spin at the release barrier.
1691 // thread_finished indicates whether the thread is finished executing all
1692 // the tasks it has on its deque, and is at the release barrier.
1693 // spinner is the location on which to spin.
1694 // spinner == NULL means only execute a single task and return.
1695 // checker is the value to check to terminate the spin.
1696 template <class C>
1697 static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
1698  int *thread_finished
1699  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1700 {
1701  kmp_task_team_t * task_team;
1702  kmp_team_t * team;
1703  kmp_thread_data_t * threads_data;
1704  kmp_task_t * task;
1705  kmp_taskdata_t * current_task = thread -> th.th_current_task;
1706  volatile kmp_uint32 * unfinished_threads;
1707  kmp_int32 nthreads, last_stolen, k, tid;
1708 
1709  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1710  KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1711 
1712  task_team = thread -> th.th_task_team;
1713  KMP_DEBUG_ASSERT( task_team != NULL );
1714 
1715  KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
1716  gtid, final_spin, *thread_finished) );
1717 
1718  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1719  KMP_DEBUG_ASSERT( threads_data != NULL );
1720 
1721  nthreads = task_team -> tt.tt_nproc;
1722  unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1723 #if OMP_41_ENABLED
1724  KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1725 #else
1726  KMP_DEBUG_ASSERT( nthreads > 1 );
1727 #endif
1728  KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1729 
1730  // Choose tasks from our own work queue.
1731  start:
1732  while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1733 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1734  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1735  if ( itt_sync_obj == NULL ) {
1736  // we are at fork barrier where we could not get the object reliably
1737  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1738  }
1739  __kmp_itt_task_starting( itt_sync_obj );
1740  }
1741 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1742  __kmp_invoke_task( gtid, task, current_task );
1743 #if USE_ITT_BUILD
1744  if ( itt_sync_obj != NULL )
1745  __kmp_itt_task_finished( itt_sync_obj );
1746 #endif /* USE_ITT_BUILD */
1747 
1748  // If this thread is only partway through the barrier and the condition
1749  // is met, then return now, so that the barrier gather/release pattern can proceed.
1750  // If this thread is in the last spin loop in the barrier, waiting to be
1751  // released, we know that the termination condition will not be satisified,
1752  // so don't waste any cycles checking it.
1753  if (flag == NULL || (!final_spin && flag->done_check())) {
1754  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
1755  return TRUE;
1756  }
1757  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1758  }
1759 
1760  // This thread's work queue is empty. If we are in the final spin loop
1761  // of the barrier, check and see if the termination condition is satisfied.
1762 #if OMP_41_ENABLED
1763  // The work queue may be empty but there might be proxy tasks still executing
1764  if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1765 #else
1766  if (final_spin)
1767 #endif
1768  {
1769  // First, decrement the #unfinished threads, if that has not already
1770  // been done. This decrement might be to the spin location, and
1771  // result in the termination condition being satisfied.
1772  if (! *thread_finished) {
1773  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1774  KA_TRACE(20, ("__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
1775  gtid, count, task_team) );
1776  *thread_finished = TRUE;
1777  }
1778 
1779  // It is now unsafe to reference thread->th.th_team !!!
1780  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1781  // thread to pass through the barrier, where it might reset each thread's
1782  // th.th_team field for the next parallel region.
1783  // If we can steal more work, we know that this has not happened yet.
1784  if (flag != NULL && flag->done_check()) {
1785  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
1786  return TRUE;
1787  }
1788  }
1789 
1790 #if OMP_41_ENABLED
1791  // check if there are other threads to steal from, otherwise go back
1792  if ( nthreads == 1 )
1793  goto start;
1794 #endif
1795 
1796  // Try to steal from the last place I stole from successfully.
1797  tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1798  last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1799 
1800  if (last_stolen != -1) {
1801  kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1802 
1803  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1804  thread_finished, is_constrained
1805  )) != NULL)
1806  {
1807 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1808  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1809  if ( itt_sync_obj == NULL ) {
1810  // we are at fork barrier where we could not get the object reliably
1811  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1812  }
1813  __kmp_itt_task_starting( itt_sync_obj );
1814  }
1815 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1816  __kmp_invoke_task( gtid, task, current_task );
1817 #if USE_ITT_BUILD
1818  if ( itt_sync_obj != NULL )
1819  __kmp_itt_task_finished( itt_sync_obj );
1820 #endif /* USE_ITT_BUILD */
1821 
1822  // Check to see if this thread can proceed.
1823  if (flag == NULL || (!final_spin && flag->done_check())) {
1824  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
1825  gtid) );
1826  return TRUE;
1827  }
1828 
1829  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1830  // If the execution of the stolen task resulted in more tasks being
1831  // placed on our run queue, then restart the whole process.
1832  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1833  KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1834  gtid) );
1835  goto start;
1836  }
1837  }
1838 
1839  // Don't give priority to stealing from this thread anymore.
1840  threads_data[ tid ].td.td_deque_last_stolen = -1;
1841 
1842  // The victims's work queue is empty. If we are in the final spin loop
1843  // of the barrier, check and see if the termination condition is satisfied.
1844 #if OMP_41_ENABLED
1845  // The work queue may be empty but there might be proxy tasks still executing
1846  if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1847 #else
1848  if (final_spin)
1849 #endif
1850  {
1851  // First, decrement the #unfinished threads, if that has not already
1852  // been done. This decrement might be to the spin location, and
1853  // result in the termination condition being satisfied.
1854  if (! *thread_finished) {
1855  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1856  KA_TRACE(20, ("__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
1857  "task_team=%p\n", gtid, count, task_team) );
1858  *thread_finished = TRUE;
1859  }
1860 
1861  // If __kmp_tasking_mode != tskm_immediate_exec
1862  // then it is now unsafe to reference thread->th.th_team !!!
1863  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1864  // thread to pass through the barrier, where it might reset each thread's
1865  // th.th_team field for the next parallel region.
1866  // If we can steal more work, we know that this has not happened yet.
1867  if (flag != NULL && flag->done_check()) {
1868  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
1869  gtid) );
1870  return TRUE;
1871  }
1872  }
1873  }
1874 
1875  // Find a different thread to steal work from. Pick a random thread.
1876  // My initial plan was to cycle through all the threads, and only return
1877  // if we tried to steal from every thread, and failed. Arch says that's
1878  // not such a great idea.
1879  // GEH - need yield code in this loop for throughput library mode?
1880  new_victim:
1881  k = __kmp_get_random( thread ) % (nthreads - 1);
1882  if ( k >= thread -> th.th_info.ds.ds_tid ) {
1883  ++k; // Adjusts random distribution to exclude self
1884  }
1885  {
1886  kmp_info_t *other_thread = threads_data[k].td.td_thr;
1887  int first;
1888 
1889  // There is a slight chance that __kmp_enable_tasking() did not wake up
1890  // all threads waiting at the barrier. If this thread is sleeping, then
1891  // wake it up. Since we were going to pay the cache miss penalty
1892  // for referencing another thread's kmp_info_t struct anyway, the check
1893  // shouldn't cost too much performance at this point.
1894  // In extra barrier mode, tasks do not sleep at the separate tasking
1895  // barrier, so this isn't a problem.
1896  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1897  (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1898  (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1899  {
1900  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
1901  // A sleeping thread should not have any tasks on it's queue.
1902  // There is a slight possibility that it resumes, steals a task from
1903  // another thread, which spawns more tasks, all in the time that it takes
1904  // this thread to check => don't write an assertion that the victim's
1905  // queue is empty. Try stealing from a different thread.
1906  goto new_victim;
1907  }
1908 
1909  // Now try to steal work from the selected thread
1910  first = TRUE;
1911  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1912  thread_finished, is_constrained
1913  )) != NULL)
1914  {
1915 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1916  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1917  if ( itt_sync_obj == NULL ) {
1918  // we are at fork barrier where we could not get the object reliably
1919  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1920  }
1921  __kmp_itt_task_starting( itt_sync_obj );
1922  }
1923 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1924  __kmp_invoke_task( gtid, task, current_task );
1925 #if USE_ITT_BUILD
1926  if ( itt_sync_obj != NULL )
1927  __kmp_itt_task_finished( itt_sync_obj );
1928 #endif /* USE_ITT_BUILD */
1929 
1930  // Try stealing from this victim again, in the future.
1931  if (first) {
1932  threads_data[ tid ].td.td_deque_last_stolen = k;
1933  first = FALSE;
1934  }
1935 
1936  // Check to see if this thread can proceed.
1937  if (flag == NULL || (!final_spin && flag->done_check())) {
1938  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
1939  gtid) );
1940  return TRUE;
1941  }
1942  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1943 
1944  // If the execution of the stolen task resulted in more tasks being
1945  // placed on our run queue, then restart the whole process.
1946  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1947  KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1948  gtid) );
1949  goto start;
1950  }
1951  }
1952 
1953  // The victims's work queue is empty. If we are in the final spin loop
1954  // of the barrier, check and see if the termination condition is satisfied.
1955  // Going on and finding a new victim to steal from is expensive, as it
1956  // involves a lot of cache misses, so we definitely want to re-check the
1957  // termination condition before doing that.
1958 #if OMP_41_ENABLED
1959  // The work queue may be empty but there might be proxy tasks still executing
1960  if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1961 #else
1962  if (final_spin)
1963 #endif
1964  {
1965  // First, decrement the #unfinished threads, if that has not already
1966  // been done. This decrement might be to the spin location, and
1967  // result in the termination condition being satisfied.
1968  if (! *thread_finished) {
1969  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1970  KA_TRACE(20, ("__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
1971  "task_team=%p\n",
1972  gtid, count, task_team) );
1973  *thread_finished = TRUE;
1974  }
1975 
1976  // If __kmp_tasking_mode != tskm_immediate_exec,
1977  // then it is now unsafe to reference thread->th.th_team !!!
1978  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1979  // thread to pass through the barrier, where it might reset each thread's
1980  // th.th_team field for the next parallel region.
1981  // If we can steal more work, we know that this has not happened yet.
1982  if (flag != NULL && flag->done_check()) {
1983  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
1984  return TRUE;
1985  }
1986  }
1987  }
1988 
1989  KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
1990  return FALSE;
1991 }
1992 
1993 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
1994  int *thread_finished
1995  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
1996 {
1997  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1998  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1999 }
2000 
2001 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
2002  int *thread_finished
2003  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2004 {
2005  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2006  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2007 }
2008 
2009 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
2010  int *thread_finished
2011  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
2012 {
2013  return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
2014  USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2015 }
2016 
2017 //-----------------------------------------------------------------------------
2018 // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
2019 // next barrier so they can assist in executing enqueued tasks.
2020 // First thread in allocates the task team atomically.
2021 
2022 static void
2023 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2024 {
2025  kmp_team_t *team = this_thr->th.th_team;
2026  kmp_thread_data_t *threads_data;
2027  int nthreads, i, is_init_thread;
2028 
2029  KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
2030  __kmp_gtid_from_thread( this_thr ) ) );
2031 
2032  KMP_DEBUG_ASSERT(task_team != NULL);
2033  KMP_DEBUG_ASSERT(team != NULL);
2034 
2035  nthreads = task_team->tt.tt_nproc;
2036  KMP_DEBUG_ASSERT(nthreads > 0);
2037  KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
2038 
2039  // Allocate or increase the size of threads_data if necessary
2040  is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2041 
2042  if (!is_init_thread) {
2043  // Some other thread already set up the array.
2044  KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2045  __kmp_gtid_from_thread( this_thr ) ) );
2046  return;
2047  }
2048  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2049  KMP_DEBUG_ASSERT( threads_data != NULL );
2050 
2051  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2052  ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2053  {
2054  // Release any threads sleeping at the barrier, so that they can steal
2055  // tasks and execute them. In extra barrier mode, tasks do not sleep
2056  // at the separate tasking barrier, so this isn't a problem.
2057  for (i = 0; i < nthreads; i++) {
2058  volatile void *sleep_loc;
2059  kmp_info_t *thread = threads_data[i].td.td_thr;
2060 
2061  if (i == this_thr->th.th_info.ds.ds_tid) {
2062  continue;
2063  }
2064  // Since we haven't locked the thread's suspend mutex lock at this
2065  // point, there is a small window where a thread might be putting
2066  // itself to sleep, but hasn't set the th_sleep_loc field yet.
2067  // To work around this, __kmp_execute_tasks_template() periodically checks
2068  // see if other threads are sleeping (using the same random
2069  // mechanism that is used for task stealing) and awakens them if
2070  // they are.
2071  if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
2072  {
2073  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2074  __kmp_gtid_from_thread( this_thr ),
2075  __kmp_gtid_from_thread( thread ) ) );
2076  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2077  }
2078  else {
2079  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2080  __kmp_gtid_from_thread( this_thr ),
2081  __kmp_gtid_from_thread( thread ) ) );
2082  }
2083  }
2084  }
2085 
2086  KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
2087  __kmp_gtid_from_thread( this_thr ) ) );
2088 }
2089 
2090 
2091 /* ------------------------------------------------------------------------ */
2092 /* // TODO: Check the comment consistency
2093  * Utility routines for "task teams". A task team (kmp_task_t) is kind of
2094  * like a shadow of the kmp_team_t data struct, with a different lifetime.
2095  * After a child * thread checks into a barrier and calls __kmp_release() from
2096  * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
2097  * longer assume that the kmp_team_t structure is intact (at any moment, the
2098  * master thread may exit the barrier code and free the team data structure,
2099  * and return the threads to the thread pool).
2100  *
2101  * This does not work with the the tasking code, as the thread is still
2102  * expected to participate in the execution of any tasks that may have been
2103  * spawned my a member of the team, and the thread still needs access to all
2104  * to each thread in the team, so that it can steal work from it.
2105  *
2106  * Enter the existence of the kmp_task_team_t struct. It employs a reference
2107  * counting mechanims, and is allocated by the master thread before calling
2108  * __kmp_<barrier_kind>_release, and then is release by the last thread to
2109  * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
2110  * of the kmp_task_team_t structs for consecutive barriers can overlap
2111  * (and will, unless the master thread is the last thread to exit the barrier
2112  * release phase, which is not typical).
2113  *
2114  * The existence of such a struct is useful outside the context of tasking,
2115  * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
2116  * so that any performance differences show up when comparing the 2.5 vs. 3.0
2117  * libraries.
2118  *
2119  * We currently use the existence of the threads array as an indicator that
2120  * tasks were spawned since the last barrier. If the structure is to be
2121  * useful outside the context of tasking, then this will have to change, but
2122  * not settting the field minimizes the performance impact of tasking on
2123  * barriers, when no explicit tasks were spawned (pushed, actually).
2124  */
2125 
2126 
2127 static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
2128 // Lock for task team data structures
2129 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2130 
2131 
2132 //------------------------------------------------------------------------------
2133 // __kmp_alloc_task_deque:
2134 // Allocates a task deque for a particular thread, and initialize the necessary
2135 // data structures relating to the deque. This only happens once per thread
2136 // per task team since task teams are recycled.
2137 // No lock is needed during allocation since each thread allocates its own
2138 // deque.
2139 
2140 static void
2141 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2142 {
2143  __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2144  KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2145 
2146  // Initialize last stolen task field to "none"
2147  thread_data -> td.td_deque_last_stolen = -1;
2148 
2149  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2150  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2151  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2152 
2153  KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2154  __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2155  // Allocate space for task deque, and zero the deque
2156  // Cannot use __kmp_thread_calloc() because threads not around for
2157  // kmp_reap_task_team( ).
2158  thread_data -> td.td_deque = (kmp_taskdata_t **)
2159  __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
2160 }
2161 
2162 
2163 //------------------------------------------------------------------------------
2164 // __kmp_free_task_deque:
2165 // Deallocates a task deque for a particular thread.
2166 // Happens at library deallocation so don't need to reset all thread data fields.
2167 
2168 static void
2169 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
2170 {
2171  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2172 
2173  if ( thread_data -> td.td_deque != NULL ) {
2174  TCW_4(thread_data -> td.td_deque_ntasks, 0);
2175  __kmp_free( thread_data -> td.td_deque );
2176  thread_data -> td.td_deque = NULL;
2177  }
2178  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2179 
2180 #ifdef BUILD_TIED_TASK_STACK
2181  // GEH: Figure out what to do here for td_susp_tied_tasks
2182  if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2183  __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2184  }
2185 #endif // BUILD_TIED_TASK_STACK
2186 }
2187 
2188 
2189 //------------------------------------------------------------------------------
2190 // __kmp_realloc_task_threads_data:
2191 // Allocates a threads_data array for a task team, either by allocating an initial
2192 // array or enlarging an existing array. Only the first thread to get the lock
2193 // allocs or enlarges the array and re-initializes the array eleemnts.
2194 // That thread returns "TRUE", the rest return "FALSE".
2195 // Assumes that the new array size is given by task_team -> tt.tt_nproc.
2196 // The current size is given by task_team -> tt.tt_max_threads.
2197 
2198 static int
2199 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2200 {
2201  kmp_thread_data_t ** threads_data_p;
2202  kmp_int32 nthreads, maxthreads;
2203  int is_init_thread = FALSE;
2204 
2205  if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2206  // Already reallocated and initialized.
2207  return FALSE;
2208  }
2209 
2210  threads_data_p = & task_team -> tt.tt_threads_data;
2211  nthreads = task_team -> tt.tt_nproc;
2212  maxthreads = task_team -> tt.tt_max_threads;
2213 
2214  // All threads must lock when they encounter the first task of the implicit task
2215  // region to make sure threads_data fields are (re)initialized before used.
2216  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2217 
2218  if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2219  // first thread to enable tasking
2220  kmp_team_t *team = thread -> th.th_team;
2221  int i;
2222 
2223  is_init_thread = TRUE;
2224  if ( maxthreads < nthreads ) {
2225 
2226  if ( *threads_data_p != NULL ) {
2227  kmp_thread_data_t *old_data = *threads_data_p;
2228  kmp_thread_data_t *new_data = NULL;
2229 
2230  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
2231  "threads data for task_team %p, new_size = %d, old_size = %d\n",
2232  __kmp_gtid_from_thread( thread ), task_team,
2233  nthreads, maxthreads ) );
2234  // Reallocate threads_data to have more elements than current array
2235  // Cannot use __kmp_thread_realloc() because threads not around for
2236  // kmp_reap_task_team( ). Note all new array entries are initialized
2237  // to zero by __kmp_allocate().
2238  new_data = (kmp_thread_data_t *)
2239  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2240  // copy old data to new data
2241  KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
2242  (void *) old_data,
2243  maxthreads * sizeof(kmp_taskdata_t *) );
2244 
2245 #ifdef BUILD_TIED_TASK_STACK
2246  // GEH: Figure out if this is the right thing to do
2247  for (i = maxthreads; i < nthreads; i++) {
2248  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2249  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2250  }
2251 #endif // BUILD_TIED_TASK_STACK
2252  // Install the new data and free the old data
2253  (*threads_data_p) = new_data;
2254  __kmp_free( old_data );
2255  }
2256  else {
2257  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
2258  "threads data for task_team %p, size = %d\n",
2259  __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2260  // Make the initial allocate for threads_data array, and zero entries
2261  // Cannot use __kmp_thread_calloc() because threads not around for
2262  // kmp_reap_task_team( ).
2263  *threads_data_p = (kmp_thread_data_t *)
2264  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
2265 #ifdef BUILD_TIED_TASK_STACK
2266  // GEH: Figure out if this is the right thing to do
2267  for (i = 0; i < nthreads; i++) {
2268  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2269  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2270  }
2271 #endif // BUILD_TIED_TASK_STACK
2272  }
2273  task_team -> tt.tt_max_threads = nthreads;
2274  }
2275  else {
2276  // If array has (more than) enough elements, go ahead and use it
2277  KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2278  }
2279 
2280  // initialize threads_data pointers back to thread_info structures
2281  for (i = 0; i < nthreads; i++) {
2282  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2283  thread_data -> td.td_thr = team -> t.t_threads[i];
2284 
2285  if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2286  // The last stolen field survives across teams / barrier, and the number
2287  // of threads may have changed. It's possible (likely?) that a new
2288  // parallel region will exhibit the same behavior as the previous region.
2289  thread_data -> td.td_deque_last_stolen = -1;
2290  }
2291  }
2292 
2293  KMP_MB();
2294  TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2295  }
2296 
2297  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2298  return is_init_thread;
2299 }
2300 
2301 
2302 //------------------------------------------------------------------------------
2303 // __kmp_free_task_threads_data:
2304 // Deallocates a threads_data array for a task team, including any attached
2305 // tasking deques. Only occurs at library shutdown.
2306 
2307 static void
2308 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2309 {
2310  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2311  if ( task_team -> tt.tt_threads_data != NULL ) {
2312  int i;
2313  for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2314  __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2315  }
2316  __kmp_free( task_team -> tt.tt_threads_data );
2317  task_team -> tt.tt_threads_data = NULL;
2318  }
2319  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2320 }
2321 
2322 
2323 //------------------------------------------------------------------------------
2324 // __kmp_allocate_task_team:
2325 // Allocates a task team associated with a specific team, taking it from
2326 // the global task team free list if possible. Also initializes data structures.
2327 
2328 static kmp_task_team_t *
2329 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2330 {
2331  kmp_task_team_t *task_team = NULL;
2332  int nthreads;
2333 
2334  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2335  (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2336 
2337  if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2338  // Take a task team from the task team pool
2339  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2340  if (__kmp_free_task_teams != NULL) {
2341  task_team = __kmp_free_task_teams;
2342  TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2343  task_team -> tt.tt_next = NULL;
2344  }
2345  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2346  }
2347 
2348  if (task_team == NULL) {
2349  KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2350  "task team for team %p\n",
2351  __kmp_gtid_from_thread( thread ), team ) );
2352  // Allocate a new task team if one is not available.
2353  // Cannot use __kmp_thread_malloc() because threads not around for
2354  // kmp_reap_task_team( ).
2355  task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2356  __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2357  //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2358  //task_team -> tt.tt_max_threads = 0;
2359  //task_team -> tt.tt_next = NULL;
2360  }
2361 
2362  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2363 #if OMP_41_ENABLED
2364  TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2365 #endif
2366  task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2367 
2368  TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2369  TCW_4( task_team -> tt.tt_active, TRUE );
2370  TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2371 
2372  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2373  (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2374  return task_team;
2375 }
2376 
2377 
2378 //------------------------------------------------------------------------------
2379 // __kmp_free_task_team:
2380 // Frees the task team associated with a specific thread, and adds it
2381 // to the global task team free list.
2382 //
2383 
2384 static void
2385 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2386 {
2387  KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2388  thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2389 
2390  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2391 
2392  // Put task team back on free list
2393  __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2394 
2395  KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2396  task_team -> tt.tt_next = __kmp_free_task_teams;
2397  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2398  TCW_PTR(__kmp_free_task_teams, task_team);
2399 
2400  __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2401 }
2402 
2403 
2404 //------------------------------------------------------------------------------
2405 // __kmp_reap_task_teams:
2406 // Free all the task teams on the task team free list.
2407 // Should only be done during library shutdown.
2408 // Cannot do anything that needs a thread structure or gtid since they are already gone.
2409 
2410 void
2411 __kmp_reap_task_teams( void )
2412 {
2413  kmp_task_team_t *task_team;
2414 
2415  if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2416  // Free all task_teams on the free list
2417  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2418  while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2419  __kmp_free_task_teams = task_team -> tt.tt_next;
2420  task_team -> tt.tt_next = NULL;
2421 
2422  // Free threads_data if necessary
2423  if ( task_team -> tt.tt_threads_data != NULL ) {
2424  __kmp_free_task_threads_data( task_team );
2425  }
2426  __kmp_free( task_team );
2427  }
2428  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2429  }
2430 }
2431 
2432 
2433 //------------------------------------------------------------------------------
2434 // __kmp_unref_task_teams:
2435 // Remove one thread from referencing the task team structure by
2436 // decreasing the reference count and deallocate task team if no more
2437 // references to it.
2438 //
2439 void
2440 __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2441 {
2442  kmp_uint ref_ct;
2443 
2444  ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2445 
2446  KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2447  __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2448 
2449 
2450  if ( ref_ct == 0 ) {
2451  __kmp_free_task_team( thread, task_team );
2452  }
2453 
2454  TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2455 }
2456 
2457 
2458 //------------------------------------------------------------------------------
2459 // __kmp_wait_to_unref_task_teams:
2460 // Some threads could still be in the fork barrier release code, possibly
2461 // trying to steal tasks. Wait for each thread to unreference its task team.
2462 //
2463 void
2464 __kmp_wait_to_unref_task_teams(void)
2465 {
2466  kmp_info_t *thread;
2467  kmp_uint32 spins;
2468  int done;
2469 
2470  KMP_INIT_YIELD( spins );
2471 
2472 
2473  for (;;) {
2474  done = TRUE;
2475 
2476  // TODO: GEH - this may be is wrong because some sync would be necessary
2477  // in case threads are added to the pool during the traversal.
2478  // Need to verify that lock for thread pool is held when calling
2479  // this routine.
2480  for (thread = (kmp_info_t *)__kmp_thread_pool;
2481  thread != NULL;
2482  thread = thread->th.th_next_pool)
2483  {
2484 #if KMP_OS_WINDOWS
2485  DWORD exit_val;
2486 #endif
2487  if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2488  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2489  __kmp_gtid_from_thread( thread ) ) );
2490  continue;
2491  }
2492 #if KMP_OS_WINDOWS
2493  // TODO: GEH - add this check for Linux* OS / OS X* as well?
2494  if (!__kmp_is_thread_alive(thread, &exit_val)) {
2495  if (TCR_PTR(thread->th.th_task_team) != NULL) {
2496  __kmp_unref_task_team( thread->th.th_task_team, thread );
2497  }
2498  continue;
2499  }
2500 #endif
2501 
2502  done = FALSE; // Because th_task_team pointer is not NULL for this thread
2503 
2504  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2505  __kmp_gtid_from_thread( thread ) ) );
2506 
2507  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2508  volatile void *sleep_loc;
2509  // If the thread is sleeping, awaken it.
2510  if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2511  KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2512  __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2513  __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2514  }
2515  }
2516  }
2517  if (done) {
2518  break;
2519  }
2520 
2521  // If we are oversubscribed,
2522  // or have waited a bit (and library mode is throughput), yield.
2523  // Pause is in the following code.
2524  KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2525  KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2526  }
2527 
2528 
2529 }
2530 
2531 
2532 //------------------------------------------------------------------------------
2533 // __kmp_task_team_setup: Create a task_team for the current team, but use
2534 // an already created, unused one if it already exists.
2535 // This may be called by any thread, but only for teams with # threads >1.
2536 void
2537 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both, int always )
2538 {
2539  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2540 
2541  if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( always || team->t.t_nproc > 1 ) ) {
2542  // Allocate a new task team, which will be propagated to
2543  // all of the worker threads after the barrier. As they
2544  // spin in the barrier release phase, then will continue
2545  // to use the previous task team struct, until they receive
2546  // the signal to stop checking for tasks (they can't safely
2547  // reference the kmp_team_t struct, which could be reallocated
2548  // by the master thread).
2549  team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2550  KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
2551  __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
2552  ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
2553  }
2554  // else: Either all threads have reported in, and no tasks were spawned for this release->gather region
2555  // Leave the old task team struct in place for the upcoming region.
2556  // No task teams are formed for serialized teams.
2557  if (both) {
2558  int other_team = 1 - this_thr->th.th_task_state;
2559  if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well
2560  team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2561  KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
2562  __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2563  ((team != NULL) ? team->t.t_id : -1), other_team ));
2564  }
2565  }
2566 }
2567 
2568 
2569 //------------------------------------------------------------------------------
2570 // __kmp_task_team_sync: Propagation of task team data from team to threads
2571 // which happens just after the release phase of a team barrier. This may be
2572 // called by any thread, but only for teams with # threads > 1.
2573 void
2574 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2575 {
2576  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2577 
2578  // In case this thread never saw that the task team was no longer active, unref/deallocate it now.
2579  if ( this_thr->th.th_task_team != NULL ) {
2580  if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2581  KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2582  KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team (%p)is not active, unrefing\n",
2583  __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team));
2584  __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
2585  }
2586 #if KMP_DEBUG
2587  else { // We are re-using a task team that was never enabled.
2588  KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
2589  }
2590 #endif
2591  }
2592 
2593  // Toggle the th_task_state field, to switch which task_team this thread refers to
2594  this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2595  // It is now safe to propagate the task team pointer from the team struct to the current thread.
2596  TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
2597  KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to %p from Team #%d task team (parity=%d)\n",
2598  __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
2599  ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
2600 }
2601 
2602 
2603 //--------------------------------------------------------------------------------------------
2604 // __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
2605 // phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created
2606 void
2607 __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
2608  USE_ITT_BUILD_ARG(void * itt_sync_obj)
2609  )
2610 {
2611  kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
2612 
2613  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2614  KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2615 
2616  if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
2617  KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
2618  __kmp_gtid_from_thread(this_thr), task_team));
2619  // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
2620  // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
2621  kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2622  flag.wait(this_thr, TRUE
2623  USE_ITT_BUILD_ARG(itt_sync_obj));
2624 
2625  // Kill the old task team, so that the worker threads will stop referencing it while spinning.
2626  // They will deallocate it when the reference count reaches zero.
2627  // The master thread is not included in the ref count.
2628  KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: setting active to false, setting local and team's pointer to NULL\n",
2629  __kmp_gtid_from_thread(this_thr), task_team));
2630 #if OMP_41_ENABLED
2631  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2632  TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2633 #else
2634  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2635 #endif
2636  TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2637  KMP_MB();
2638 
2639  TCW_PTR(this_thr->th.th_task_team, NULL);
2640  team->t.t_task_team[this_thr->th.th_task_state] = NULL;
2641  }
2642 }
2643 
2644 
2645 //------------------------------------------------------------------------------
2646 // __kmp_tasking_barrier:
2647 // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2648 // Internal function to execute all tasks prior to a regular barrier or a
2649 // join barrier. It is a full barrier itself, which unfortunately turns
2650 // regular barriers into double barriers and join barriers into 1 1/2
2651 // barriers.
2652 void
2653 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2654 {
2655  volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
2656  int flag = FALSE;
2657  KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2658 
2659 #if USE_ITT_BUILD
2660  KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2661 #endif /* USE_ITT_BUILD */
2662  kmp_flag_32 spin_flag(spin, 0U);
2663  while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2664  USE_ITT_BUILD_ARG(NULL), 0 ) ) {
2665 #if USE_ITT_BUILD
2666  // TODO: What about itt_sync_obj??
2667  KMP_FSYNC_SPIN_PREPARE( spin );
2668 #endif /* USE_ITT_BUILD */
2669 
2670  if( TCR_4(__kmp_global.g.g_done) ) {
2671  if( __kmp_global.g.g_abort )
2672  __kmp_abort_thread( );
2673  break;
2674  }
2675  KMP_YIELD( TRUE ); // GH: We always yield here
2676  }
2677 #if USE_ITT_BUILD
2678  KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2679 #endif /* USE_ITT_BUILD */
2680 }
2681 
2682 
2683 #if OMP_41_ENABLED
2684 
2685 /* __kmp_give_task puts a task into a given thread queue if:
2686  - the queue for that thread it was created
2687  - there's space in that queue
2688 
2689  Because of this, __kmp_push_task needs to check if there's space after getting the lock
2690  */
2691 static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2692 {
2693  kmp_task_team_t * task_team = thread->th.th_task_team;
2694  kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2695  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2696  bool result = false;
2697 
2698  KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2699 
2700  // assert tasking is enabled? what if not?
2701  KMP_DEBUG_ASSERT( task_team != NULL );
2702 
2703  if (thread_data -> td.td_deque == NULL ) {
2704  // There's no queue in this thread, go find another one
2705  // We're guaranteed that at least one thread has a queue
2706  KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2707  return result;
2708  }
2709 
2710  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2711  {
2712  KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2713  return result;
2714  }
2715 
2716  __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2717 
2718  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2719  {
2720  KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2721  goto release_and_exit;
2722  }
2723 
2724  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2725  // Wrap index.
2726  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2727  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2728 
2729  result = true;
2730  KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
2731 
2732 release_and_exit:
2733  __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2734 
2735  return result;
2736 }
2737 
2738 
2739 /* The finish of the a proxy tasks is divided in two pieces:
2740  - the top half is the one that can be done from a thread outside the team
2741  - the bottom half must be run from a them within the team
2742 
2743  In order to run the bottom half the task gets queued back into one of the threads of the team.
2744  Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
2745  So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
2746  - things that can be run before queuing the bottom half
2747  - things that must be run after queuing the bottom half
2748 
2749  This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
2750  we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
2751 */
2752 
2753 static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2754 {
2755  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2756  KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2757  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2758  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2759 
2760  taskdata -> td_flags.complete = 1; // mark the task as completed
2761 
2762  if ( taskdata->td_taskgroup )
2763  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2764 
2765  // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
2766  TCR_4(taskdata->td_incomplete_child_tasks++);
2767 }
2768 
2769 static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2770 {
2771  kmp_int32 children = 0;
2772 
2773  // Predecrement simulated by "- 1" calculation
2774  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2775  KMP_DEBUG_ASSERT( children >= 0 );
2776 
2777  // Remove the imaginary children
2778  TCR_4(taskdata->td_incomplete_child_tasks--);
2779 }
2780 
2781 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2782 {
2783  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2784  kmp_info_t * thread = __kmp_threads[ gtid ];
2785 
2786  KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2787  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
2788 
2789  // We need to wait to make sure the top half is finished
2790  // Spinning here should be ok as this should happen quickly
2791  while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2792 
2793  __kmp_release_deps(gtid,taskdata);
2794  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2795 }
2796 
2804 void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2805 {
2806  KMP_DEBUG_ASSERT( ptask != NULL );
2807  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2808  KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2809 
2810  KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2811 
2812  __kmp_first_top_half_finish_proxy(taskdata);
2813  __kmp_second_top_half_finish_proxy(taskdata);
2814  __kmp_bottom_half_finish_proxy(gtid,ptask);
2815 
2816  KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2817 }
2818 
2825 void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2826 {
2827  KMP_DEBUG_ASSERT( ptask != NULL );
2828  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2829 
2830  KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2831 
2832  KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2833 
2834  __kmp_first_top_half_finish_proxy(taskdata);
2835 
2836  // Enqueue task to complete bottom half completation from a thread within the corresponding team
2837  kmp_team_t * team = taskdata->td_team;
2838  kmp_int32 nthreads = team->t.t_nproc;
2839  kmp_info_t *thread;
2840  kmp_int32 k = 0;
2841 
2842  do {
2843  //This should be similar to k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
2844  //For now we're just linearly trying to find a thread
2845  k = (k+1) % nthreads;
2846  thread = team->t.t_threads[k];
2847  } while ( !__kmp_give_task( thread, k, ptask ) );
2848 
2849  __kmp_second_top_half_finish_proxy(taskdata);
2850 
2851  KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
2852 }
2853 
2854 #endif
Definition: kmp.h:218