Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_itt.h
1 #if USE_ITT_BUILD
2 /*
3  * kmp_itt.h -- ITT Notify interface.
4  */
5 
6 /* <copyright>
7  Copyright (c) 1997-2015 Intel Corporation. All Rights Reserved.
8 
9  Redistribution and use in source and binary forms, with or without
10  modification, are permitted provided that the following conditions
11  are met:
12 
13  * Redistributions of source code must retain the above copyright
14  notice, this list of conditions and the following disclaimer.
15  * Redistributions in binary form must reproduce the above copyright
16  notice, this list of conditions and the following disclaimer in the
17  documentation and/or other materials provided with the distribution.
18  * Neither the name of Intel Corporation nor the names of its
19  contributors may be used to endorse or promote products derived
20  from this software without specific prior written permission.
21 
22  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 
34 </copyright> */
35 
36 #ifndef KMP_ITT_H
37 #define KMP_ITT_H
38 
39 #include "kmp_lock.h"
40 
41 #define INTEL_ITTNOTIFY_API_PRIVATE
42 #include "ittnotify.h"
43 #include "legacy/ittnotify.h"
44 
45 #if KMP_DEBUG
46  #define __kmp_inline // Turn off inlining in debug mode.
47 #else
48  #define __kmp_inline static inline
49 #endif
50 
51 #if USE_ITT_NOTIFY
52  extern kmp_int32 __kmp_itt_prepare_delay;
53 # ifdef __cplusplus
54  extern "C" void __kmp_itt_fini_ittlib(void);
55 # else
56  extern void __kmp_itt_fini_ittlib(void);
57 # endif
58 #endif
59 
60 // Simplify the handling of an argument that is only required when USE_ITT_BUILD is enabled.
61 #define USE_ITT_BUILD_ARG(x) ,x
62 
63 void __kmp_itt_initialize();
64 void __kmp_itt_destroy();
65 
66 // -------------------------------------------------------------------------------------------------
67 // New stuff for reporting high-level constructs.
68 // -------------------------------------------------------------------------------------------------
69 
70 // Note the naming convention:
71 // __kmp_itt_xxxing() function should be called before action, while
72 // __kmp_itt_xxxed() function should be called after action.
73 
74 // --- Parallel region reporting ---
75 __kmp_inline void __kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized = 0 ); // Master only, before forking threads.
76 __kmp_inline void __kmp_itt_region_joined( int gtid, int serialized = 0 ); // Master only, after joining threads.
77  // (*) Note: A thread may execute tasks after this point, though.
78 
79 // --- Frame reporting ---
80 // region = 0 - no regions, region = 1 - parallel, region = 2 - serialized parallel
81 __kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc, int team_size, int region = 0 );
82 
83 // --- Metadata reporting ---
84 // begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated wait time value, reduction -if this is a reduction barrier
85 __kmp_inline void __kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction );
86 // sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); iterations - loop trip count, chunk - chunk size
87 __kmp_inline void __kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk );
88 __kmp_inline void __kmp_itt_metadata_single( ident_t * loc );
89 
90 // --- Barrier reporting ---
91 __kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 );
92 __kmp_inline void __kmp_itt_barrier_starting( int gtid, void * object );
93 __kmp_inline void __kmp_itt_barrier_middle( int gtid, void * object );
94 __kmp_inline void __kmp_itt_barrier_finished( int gtid, void * object );
95 
96 // --- Taskwait reporting ---
97 __kmp_inline void * __kmp_itt_taskwait_object( int gtid );
98 __kmp_inline void __kmp_itt_taskwait_starting( int gtid, void * object );
99 __kmp_inline void __kmp_itt_taskwait_finished( int gtid, void * object );
100 
101 // --- Task reporting ---
102 __kmp_inline void __kmp_itt_task_starting( void * object );
103 __kmp_inline void __kmp_itt_task_finished( void * object );
104 
105 // --- Lock reporting ---
106 #if KMP_USE_DYNAMIC_LOCK
107 __kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t * );
108 #else
109 __kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock );
110 #endif
111 __kmp_inline void __kmp_itt_lock_acquiring( kmp_user_lock_p lock );
112 __kmp_inline void __kmp_itt_lock_acquired( kmp_user_lock_p lock );
113 __kmp_inline void __kmp_itt_lock_releasing( kmp_user_lock_p lock );
114 __kmp_inline void __kmp_itt_lock_cancelled( kmp_user_lock_p lock );
115 __kmp_inline void __kmp_itt_lock_destroyed( kmp_user_lock_p lock );
116 
117 // --- Critical reporting ---
118 #if KMP_USE_DYNAMIC_LOCK
119 __kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t * );
120 #else
121 __kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock );
122 #endif
123 __kmp_inline void __kmp_itt_critical_acquiring( kmp_user_lock_p lock );
124 __kmp_inline void __kmp_itt_critical_acquired( kmp_user_lock_p lock );
125 __kmp_inline void __kmp_itt_critical_releasing( kmp_user_lock_p lock );
126 __kmp_inline void __kmp_itt_critical_destroyed( kmp_user_lock_p lock );
127 
128 // --- Single reporting ---
129 __kmp_inline void __kmp_itt_single_start( int gtid );
130 __kmp_inline void __kmp_itt_single_end( int gtid );
131 
132 // --- Ordered reporting ---
133 __kmp_inline void __kmp_itt_ordered_init( int gtid );
134 __kmp_inline void __kmp_itt_ordered_prep( int gtid );
135 __kmp_inline void __kmp_itt_ordered_start( int gtid );
136 __kmp_inline void __kmp_itt_ordered_end( int gtid );
137 
138 // --- Threads reporting ---
139 __kmp_inline void __kmp_itt_thread_ignore();
140 __kmp_inline void __kmp_itt_thread_name( int gtid );
141 
142 // --- System objects ---
143 __kmp_inline void __kmp_itt_system_object_created( void * object, char const * name );
144 
145 // --- Stack stitching ---
146 __kmp_inline __itt_caller __kmp_itt_stack_caller_create(void);
147 __kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller);
148 __kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller);
149 __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
150 
151 // -------------------------------------------------------------------------------------------------
152 // Old stuff for reporting low-level internal synchronization.
153 // -------------------------------------------------------------------------------------------------
154 
155 #if USE_ITT_NOTIFY
156 
157  /*
158  * Support for SSC marks, which are used by SDE
159  * http://software.intel.com/en-us/articles/intel-software-development-emulator
160  * to mark points in instruction traces that represent spin-loops and are
161  * therefore uninteresting when collecting traces for architecture simulation.
162  */
163  #ifndef INCLUDE_SSC_MARKS
164  # define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64)
165  #endif
166 
167  /* Linux 64 only for now */
168  #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64)
169  // Portable (at least for gcc and icc) code to insert the necessary instructions
170  // to set %ebx and execute the unlikely no-op.
171  #if defined( __INTEL_COMPILER )
172  # define INSERT_SSC_MARK(tag) __SSC_MARK(tag)
173  #else
174  # define INSERT_SSC_MARK(tag) \
175  __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag):"%ebx")
176  #endif
177  #else
178  # define INSERT_SSC_MARK(tag) ((void)0)
179  #endif
180 
181  /* Markers for the start and end of regions that represent polling and
182  * are therefore uninteresting to architectural simulations 0x4376 and
183  * 0x4377 are arbitrary numbers that should be unique in the space of
184  * SSC tags, but there is no central issuing authority rather
185  * randomness is expected to work.
186  */
187  #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376)
188  #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377)
189 
190  // Markers for architecture simulation.
191  // FORKING : Before the master thread forks.
192  // JOINING : At the start of the join.
193  // INVOKING : Before the threads invoke microtasks.
194  // DISPATCH_INIT: At the start of dynamically scheduled loop.
195  // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop.
196  #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693)
197  #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694)
198  #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695)
199  #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696)
200  #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697)
201 
202  // The object is an address that associates a specific set of the prepare, acquire, release,
203  // and cancel operations.
204 
205  /* Sync prepare indicates a thread is going to start waiting for another thread
206  to send a release event. This operation should be done just before the thread
207  begins checking for the existence of the release event */
208 
209  /* Sync cancel indicates a thread is cancelling a wait on another thread anc
210  continuing execution without waiting for the other thread to release it */
211 
212  /* Sync acquired indicates a thread has received a release event from another
213  thread and has stopped waiting. This operation must occur only after the release
214  event is received. */
215 
216  /* Sync release indicates a thread is going to send a release event to another thread
217  so it will stop waiting and continue execution. This operation must just happen before
218  the release event. */
219 
220  #define KMP_FSYNC_PREPARE( obj ) __itt_fsync_prepare( (void *)( obj ) )
221  #define KMP_FSYNC_CANCEL( obj ) __itt_fsync_cancel( (void *)( obj ) )
222  #define KMP_FSYNC_ACQUIRED( obj ) __itt_fsync_acquired( (void *)( obj ) )
223  #define KMP_FSYNC_RELEASING( obj ) __itt_fsync_releasing( (void *)( obj ) )
224 
225  /*
226  In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called with a delay
227  (and not called at all if waiting time is small). So, in spin loops, do not use
228  KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before spin loop),
229  KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and KMP_FSYNC_SPIN_ACQUIRED().
230  See KMP_WAIT_YIELD() for example.
231  */
232 
233  #undef KMP_FSYNC_SPIN_INIT
234  #define KMP_FSYNC_SPIN_INIT( obj, spin ) \
235  int sync_iters = 0; \
236  if ( __itt_fsync_prepare_ptr ) { \
237  if ( obj == NULL ) { \
238  obj = spin; \
239  } /* if */ \
240  } /* if */ \
241  SSC_MARK_SPIN_START()
242 
243  #undef KMP_FSYNC_SPIN_PREPARE
244  #define KMP_FSYNC_SPIN_PREPARE( obj ) do { \
245  if ( __itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay ) { \
246  ++ sync_iters; \
247  if ( sync_iters >= __kmp_itt_prepare_delay ) { \
248  KMP_FSYNC_PREPARE( (void*) obj ); \
249  } /* if */ \
250  } /* if */ \
251  } while (0)
252  #undef KMP_FSYNC_SPIN_ACQUIRED
253  #define KMP_FSYNC_SPIN_ACQUIRED( obj ) do { \
254  SSC_MARK_SPIN_END(); \
255  if ( sync_iters >= __kmp_itt_prepare_delay ) { \
256  KMP_FSYNC_ACQUIRED( (void*) obj ); \
257  } /* if */ \
258  } while (0)
259 
260  /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.:
261  KMP_ITT_IGNORE(
262  ptr = malloc( size );
263  );
264  */
265  #define KMP_ITT_IGNORE( statement ) do { \
266  __itt_state_t __itt_state_; \
267  if ( __itt_state_get_ptr ) { \
268  __itt_state_ = __itt_state_get(); \
269  __itt_obj_mode_set( __itt_obj_prop_ignore, __itt_obj_state_set ); \
270  } /* if */ \
271  { statement } \
272  if ( __itt_state_get_ptr ) { \
273  __itt_state_set( __itt_state_ ); \
274  } /* if */ \
275  } while (0)
276 
277  const int KMP_MAX_FRAME_DOMAINS = 512; // Maximum number of frame domains to use (maps to
278  // different OpenMP regions in the user source code).
279  extern kmp_int32 __kmp_barrier_domain_count;
280  extern kmp_int32 __kmp_region_domain_count;
281  extern __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS];
282  extern __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS];
283  extern __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS];
284  extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS];
285  extern __itt_domain * metadata_domain;
286 
287 #else
288 
289 // Null definitions of the synchronization tracing functions.
290 # define KMP_FSYNC_PREPARE( obj ) ((void)0)
291 # define KMP_FSYNC_CANCEL( obj ) ((void)0)
292 # define KMP_FSYNC_ACQUIRED( obj ) ((void)0)
293 # define KMP_FSYNC_RELEASING( obj ) ((void)0)
294 
295 # define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0)
296 # define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0)
297 # define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0)
298 
299 # define KMP_ITT_IGNORE(stmt ) do { stmt } while (0)
300 
301 #endif // USE_ITT_NOTIFY
302 
303 #if ! KMP_DEBUG
304  // In release mode include definitions of inline functions.
305  #include "kmp_itt.inl"
306 #endif
307 
308 #endif // KMP_ITT_H
309 
310 #else /* USE_ITT_BUILD */
311 
312 // Null definitions of the synchronization tracing functions.
313 // If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either.
314 // By defining these we avoid unpleasant ifdef tests in many places.
315 # define KMP_FSYNC_PREPARE( obj ) ((void)0)
316 # define KMP_FSYNC_CANCEL( obj ) ((void)0)
317 # define KMP_FSYNC_ACQUIRED( obj ) ((void)0)
318 # define KMP_FSYNC_RELEASING( obj ) ((void)0)
319 
320 # define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0)
321 # define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0)
322 # define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0)
323 
324 # define KMP_ITT_IGNORE(stmt ) do { stmt } while (0)
325 
326 # define USE_ITT_BUILD_ARG(x)
327 
328 #endif /* USE_ITT_BUILD */
Definition: kmp.h:218
sched_type
Definition: kmp.h:320