35 #ifndef KMP_WAIT_RELEASE_H
36 #define KMP_WAIT_RELEASE_H
76 volatile P *
get() {
return loc; }
80 void set(
volatile P *new_loc) {
loc = new_loc; }
108 static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag,
int final_spin
109 USE_ITT_BUILD_ARG(
void * itt_sync_obj) )
112 volatile typename C::flag_t *spin = flag->get();
114 kmp_uint32 hibernate;
116 int tasks_completed = FALSE;
118 KMP_FSYNC_SPIN_INIT(spin, NULL);
119 if (flag->done_check()) {
120 KMP_FSYNC_SPIN_ACQUIRED(spin);
123 th_gtid = this_thr->th.th_info.ds.ds_gtid;
124 KA_TRACE(20, (
"__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
126 #if OMPT_SUPPORT && OMPT_BLAME
127 if (ompt_status == ompt_status_track_callback) {
128 if (this_thr->th.ompt_thread_info.state == ompt_state_idle){
129 if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
130 ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
132 }
else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
133 KMP_DEBUG_ASSERT(this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier ||
134 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit ||
135 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_explicit);
137 ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
138 ompt_parallel_id_t pId;
141 pId = team->ompt_team_info.parallel_id;
142 tId = team->ompt_task_info.task_id;
144 pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
145 tId = this_thr->th.th_current_task->ompt_task_info.task_id;
147 ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
153 KMP_INIT_YIELD(spins);
155 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
158 #ifdef KMP_ADJUST_BLOCKTIME
159 if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
163 hibernate = this_thr->th.th_team_bt_intervals;
165 hibernate = this_thr->th.th_team_bt_intervals;
175 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
176 KF_TRACE(20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
177 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
178 hibernate - __kmp_global.g.g_time.dt.t_value));
186 while (flag->notdone_check()) {
195 kmp_task_team_t * task_team = NULL;
196 if (__kmp_tasking_mode != tskm_immediate_exec) {
197 task_team = this_thr->th.th_task_team;
198 if (task_team != NULL) {
199 if (!TCR_SYNC_4(task_team->tt.tt_active)) {
200 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
201 __kmp_unref_task_team(task_team, this_thr);
202 }
else if (KMP_TASKING_ENABLED(task_team)) {
203 flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
204 USE_ITT_BUILD_ARG(itt_sync_obj), 0);
209 KMP_FSYNC_SPIN_PREPARE(spin);
210 if (TCR_4(__kmp_global.g.g_done)) {
211 if (__kmp_global.g.g_abort)
212 __kmp_abort_thread();
217 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
221 KMP_YIELD_SPIN(spins);
225 in_pool = !!TCR_4(this_thr->th.th_in_pool);
226 if (in_pool != !!this_thr->th.th_active_in_pool) {
228 KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
229 this_thr->th.th_active_in_pool = TRUE;
237 KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth);
238 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
239 this_thr->th.th_active_in_pool = FALSE;
244 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
248 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
252 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
255 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
256 flag->suspend(th_gtid);
258 if (TCR_4(__kmp_global.g.g_done)) {
259 if (__kmp_global.g.g_abort)
260 __kmp_abort_thread();
267 #if OMPT_SUPPORT && OMPT_BLAME
268 if (ompt_status == ompt_status_track_callback) {
269 if (this_thr->th.ompt_thread_info.state == ompt_state_idle){
270 if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
271 ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
273 }
else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
274 KMP_DEBUG_ASSERT(this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier ||
275 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit ||
276 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_explicit);
278 ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
279 ompt_parallel_id_t pId;
282 pId = team->ompt_team_info.parallel_id;
283 tId = team->ompt_task_info.task_id;
285 pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
286 tId = this_thr->th.th_current_task->ompt_task_info.task_id;
288 ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
293 KMP_FSYNC_SPIN_ACQUIRED(spin);
300 static inline void __kmp_release_template(C *flag)
303 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
305 KF_TRACE(20, (
"__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
306 KMP_DEBUG_ASSERT(flag->get());
307 KMP_FSYNC_RELEASING(flag->get());
309 flag->internal_release();
311 KF_TRACE(100, (
"__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), *(flag->get())));
313 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
315 if (flag->is_any_sleeping()) {
316 for (
unsigned int i=0; i<flag->get_num_waiters(); ++i) {
317 kmp_info_t * waiter = flag->get_waiter(i);
319 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
321 KF_TRACE(50, (
"__kmp_release: T#%d waking up thread T#%d since sleep flag(%p) set\n",
322 gtid, wait_gtid, flag->get()));
323 flag->resume(wait_gtid);
330 template <
typename FlagType>
331 struct flag_traits {};
334 struct flag_traits<kmp_uint32> {
335 typedef kmp_uint32 flag_t;
337 static inline flag_t tcr(flag_t f) {
return TCR_4(f); }
338 static inline flag_t test_then_add4(
volatile flag_t *f) {
return KMP_TEST_THEN_ADD4_32((
volatile kmp_int32 *)f); }
339 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_OR32((
volatile kmp_int32 *)f, v); }
340 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_AND32((
volatile kmp_int32 *)f, v); }
344 struct flag_traits<kmp_uint64> {
345 typedef kmp_uint64 flag_t;
347 static inline flag_t tcr(flag_t f) {
return TCR_8(f); }
348 static inline flag_t test_then_add4(
volatile flag_t *f) {
return KMP_TEST_THEN_ADD4_64((
volatile kmp_int64 *)f); }
349 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_OR64((
volatile kmp_int64 *)f, v); }
350 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_AND64((
volatile kmp_int64 *)f, v); }
353 template <
typename FlagType>
354 class kmp_basic_flag :
public kmp_flag<FlagType> {
355 typedef flag_traits<FlagType> traits_type;
357 kmp_info_t * waiting_threads[1];
358 kmp_uint32 num_waiting_threads;
360 kmp_basic_flag(
volatile FlagType *p) :
kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
361 kmp_basic_flag(
volatile FlagType *p, kmp_info_t *thr) :
kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
362 waiting_threads[0] = thr;
364 kmp_basic_flag(
volatile FlagType *p, FlagType c) :
kmp_flag<FlagType>(p, traits_type::t), checker(c), num_waiting_threads(0) {}
369 kmp_info_t * get_waiter(kmp_uint32 i) {
370 KMP_DEBUG_ASSERT(i<num_waiting_threads);
371 return waiting_threads[i];
376 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
382 void set_waiter(kmp_info_t *thr) {
383 waiting_threads[0] = thr;
384 num_waiting_threads = 1;
389 bool done_check() {
return traits_type::tcr(*(this->
get())) == checker; }
394 bool done_check_val(FlagType old_loc) {
return old_loc == checker; }
402 bool notdone_check() {
return traits_type::tcr(*(this->
get())) != checker; }
407 void internal_release() {
408 (void) traits_type::test_then_add4((
volatile FlagType *)this->
get());
414 FlagType set_sleeping() {
415 return traits_type::test_then_or((
volatile FlagType *)this->
get(), KMP_BARRIER_SLEEP_STATE);
421 FlagType unset_sleeping() {
422 return traits_type::test_then_and((
volatile FlagType *)this->
get(), ~KMP_BARRIER_SLEEP_STATE);
428 bool is_sleeping_val(FlagType old_loc) {
return old_loc & KMP_BARRIER_SLEEP_STATE; }
432 bool is_sleeping() {
return is_sleeping_val(*(this->
get())); }
433 bool is_any_sleeping() {
return is_sleeping_val(*(this->
get())); }
434 kmp_uint8 *get_stolen() {
return NULL; }
435 enum barrier_type get_bt() {
return bs_last_barrier; }
438 class kmp_flag_32 :
public kmp_basic_flag<kmp_uint32> {
440 kmp_flag_32(
volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
441 kmp_flag_32(
volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint32>(p, thr) {}
442 kmp_flag_32(
volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag<kmp_uint32>(p, c) {}
443 void suspend(
int th_gtid) { __kmp_suspend_32(th_gtid,
this); }
444 void resume(
int th_gtid) { __kmp_resume_32(th_gtid,
this); }
445 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
int *thread_finished
446 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained) {
447 return __kmp_execute_tasks_32(this_thr, gtid,
this, final_spin, thread_finished
448 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
450 void wait(kmp_info_t *this_thr,
int final_spin
451 USE_ITT_BUILD_ARG(
void * itt_sync_obj)) {
452 __kmp_wait_template(this_thr,
this, final_spin
453 USE_ITT_BUILD_ARG(itt_sync_obj));
455 void release() { __kmp_release_template(
this); }
458 class kmp_flag_64 :
public kmp_basic_flag<kmp_uint64> {
460 kmp_flag_64(
volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
461 kmp_flag_64(
volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint64>(p, thr) {}
462 kmp_flag_64(
volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag<kmp_uint64>(p, c) {}
463 void suspend(
int th_gtid) { __kmp_suspend_64(th_gtid,
this); }
464 void resume(
int th_gtid) { __kmp_resume_64(th_gtid,
this); }
465 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
int *thread_finished
466 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained) {
467 return __kmp_execute_tasks_64(this_thr, gtid,
this, final_spin, thread_finished
468 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
470 void wait(kmp_info_t *this_thr,
int final_spin
471 USE_ITT_BUILD_ARG(
void * itt_sync_obj)) {
472 __kmp_wait_template(this_thr,
this, final_spin
473 USE_ITT_BUILD_ARG(itt_sync_obj));
475 void release() { __kmp_release_template(
this); }
479 class kmp_flag_oncore :
public kmp_flag<kmp_uint64> {
481 kmp_info_t * waiting_threads[1];
482 kmp_uint32 num_waiting_threads;
485 enum barrier_type bt;
486 kmp_info_t * this_thr;
490 unsigned char& byteref(
volatile kmp_uint64* loc,
size_t offset) {
return ((
unsigned char *)loc)[offset]; }
492 kmp_flag_oncore(
volatile kmp_uint64 *p)
494 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint32 idx)
495 :
kmp_flag<kmp_uint64>(p,
flag_oncore), num_waiting_threads(0), offset(idx), flag_switch(false) {}
496 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
enum barrier_type bar_t,
503 flag_switch(false), bt(bar_t), this_thr(thr)
508 kmp_info_t * get_waiter(kmp_uint32 i) {
509 KMP_DEBUG_ASSERT(i<num_waiting_threads);
510 return waiting_threads[i];
512 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
513 void set_waiter(kmp_info_t *thr) {
514 waiting_threads[0] = thr;
515 num_waiting_threads = 1;
517 bool done_check_val(kmp_uint64 old_loc) {
return byteref(&old_loc,offset) == checker; }
518 bool done_check() {
return done_check_val(*
get()); }
519 bool notdone_check() {
521 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
523 if (byteref(
get(),offset) != 1 && !flag_switch)
525 else if (flag_switch) {
526 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
527 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP);
528 __kmp_wait_64(this_thr, &flag, TRUE
536 void internal_release() {
537 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
538 byteref(
get(),offset) = 1;
542 byteref(&mask,offset) = 1;
543 (void) KMP_TEST_THEN_OR64((
volatile kmp_int64 *)
get(), mask);
546 kmp_uint64 set_sleeping() {
547 return KMP_TEST_THEN_OR64((kmp_int64
volatile *)
get(), KMP_BARRIER_SLEEP_STATE);
549 kmp_uint64 unset_sleeping() {
550 return KMP_TEST_THEN_AND64((kmp_int64
volatile *)
get(), ~KMP_BARRIER_SLEEP_STATE);
552 bool is_sleeping_val(kmp_uint64 old_loc) {
return old_loc & KMP_BARRIER_SLEEP_STATE; }
553 bool is_sleeping() {
return is_sleeping_val(*
get()); }
554 bool is_any_sleeping() {
return is_sleeping_val(*
get()); }
555 void wait(kmp_info_t *this_thr,
int final_spin) {
556 __kmp_wait_template<kmp_flag_oncore>(this_thr,
this, final_spin
557 USE_ITT_BUILD_ARG(itt_sync_obj));
559 void release() { __kmp_release_template(
this); }
560 void suspend(
int th_gtid) { __kmp_suspend_oncore(th_gtid,
this); }
561 void resume(
int th_gtid) { __kmp_resume_oncore(th_gtid,
this); }
562 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
int *thread_finished
563 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained) {
564 return __kmp_execute_tasks_oncore(this_thr, gtid,
this, final_spin, thread_finished
565 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
567 kmp_uint8 *get_stolen() {
return NULL; }
568 enum barrier_type get_bt() {
return bt; }
576 #endif // KMP_WAIT_RELEASE_H
void set(volatile P *new_loc)