Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_atomic.h
1 /*
2  * kmp_atomic.h - ATOMIC header file
3  */
4 
5 /* <copyright>
6  Copyright (c) 1997-2015 Intel Corporation. All Rights Reserved.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11 
12  * Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  * Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in the
16  documentation and/or other materials provided with the distribution.
17  * Neither the name of Intel Corporation nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 </copyright> */
34 
35 #ifndef KMP_ATOMIC_H
36 #define KMP_ATOMIC_H
37 
38 #include "kmp_os.h"
39 #include "kmp_lock.h"
40 
41 #if OMPT_SUPPORT
42 #include "ompt-specific.h"
43 #endif
44 
45 // C++ build port.
46 // Intel compiler does not support _Complex datatype on win.
47 // Intel compiler supports _Complex datatype on lin and mac.
48 // On the other side, there is a problem of stack alignment on lin_32 and mac_32
49 // if the rhs is cmplx80 or cmplx128 typedef'ed datatype.
50 // The decision is: to use compiler supported _Complex type on lin and mac,
51 // to use typedef'ed types on win.
52 // Condition for WIN64 was modified in anticipation of 10.1 build compiler.
53 
54 #if defined( __cplusplus ) && ( KMP_OS_WINDOWS )
55  // create shortcuts for c99 complex types
56 
57  #if (_MSC_VER < 1600) && defined(_DEBUG)
58  // Workaround for the problem of _DebugHeapTag unresolved external.
59  // This problem prevented to use our static debug library for C tests
60  // compiled with /MDd option (the library itself built with /MTd),
61  #undef _DEBUG
62  #define _DEBUG_TEMPORARILY_UNSET_
63  #endif
64 
65  #include <complex>
66 
67  template< typename type_lhs, typename type_rhs >
68  std::complex< type_lhs > __kmp_lhs_div_rhs(
69  const std::complex< type_lhs >& lhs,
70  const std::complex< type_rhs >& rhs ) {
71  type_lhs a = lhs.real();
72  type_lhs b = lhs.imag();
73  type_rhs c = rhs.real();
74  type_rhs d = rhs.imag();
75  type_rhs den = c*c + d*d;
76  type_rhs r = ( a*c + b*d );
77  type_rhs i = ( b*c - a*d );
78  std::complex< type_lhs > ret( r/den, i/den );
79  return ret;
80  }
81 
82  // complex8
83  struct __kmp_cmplx64_t : std::complex< double > {
84 
85  __kmp_cmplx64_t() : std::complex< double > () {}
86 
87  __kmp_cmplx64_t( const std::complex< double >& cd )
88  : std::complex< double > ( cd ) {}
89 
90  void operator /= ( const __kmp_cmplx64_t& rhs ) {
91  std::complex< double > lhs = *this;
92  *this = __kmp_lhs_div_rhs( lhs, rhs );
93  }
94 
95  __kmp_cmplx64_t operator / ( const __kmp_cmplx64_t& rhs ) {
96  std::complex< double > lhs = *this;
97  return __kmp_lhs_div_rhs( lhs, rhs );
98  }
99 
100  };
101  typedef struct __kmp_cmplx64_t kmp_cmplx64;
102 
103  // complex4
104  struct __kmp_cmplx32_t : std::complex< float > {
105 
106  __kmp_cmplx32_t() : std::complex< float > () {}
107 
108  __kmp_cmplx32_t( const std::complex<float>& cf )
109  : std::complex< float > ( cf ) {}
110 
111  __kmp_cmplx32_t operator + ( const __kmp_cmplx32_t& b ) {
112  std::complex< float > lhs = *this;
113  std::complex< float > rhs = b;
114  return ( lhs + rhs );
115  }
116  __kmp_cmplx32_t operator - ( const __kmp_cmplx32_t& b ) {
117  std::complex< float > lhs = *this;
118  std::complex< float > rhs = b;
119  return ( lhs - rhs );
120  }
121  __kmp_cmplx32_t operator * ( const __kmp_cmplx32_t& b ) {
122  std::complex< float > lhs = *this;
123  std::complex< float > rhs = b;
124  return ( lhs * rhs );
125  }
126 
127  __kmp_cmplx32_t operator + ( const kmp_cmplx64& b ) {
128  kmp_cmplx64 t = kmp_cmplx64( *this ) + b;
129  std::complex< double > d( t );
130  std::complex< float > f( d );
131  __kmp_cmplx32_t r( f );
132  return r;
133  }
134  __kmp_cmplx32_t operator - ( const kmp_cmplx64& b ) {
135  kmp_cmplx64 t = kmp_cmplx64( *this ) - b;
136  std::complex< double > d( t );
137  std::complex< float > f( d );
138  __kmp_cmplx32_t r( f );
139  return r;
140  }
141  __kmp_cmplx32_t operator * ( const kmp_cmplx64& b ) {
142  kmp_cmplx64 t = kmp_cmplx64( *this ) * b;
143  std::complex< double > d( t );
144  std::complex< float > f( d );
145  __kmp_cmplx32_t r( f );
146  return r;
147  }
148 
149  void operator /= ( const __kmp_cmplx32_t& rhs ) {
150  std::complex< float > lhs = *this;
151  *this = __kmp_lhs_div_rhs( lhs, rhs );
152  }
153 
154  __kmp_cmplx32_t operator / ( const __kmp_cmplx32_t& rhs ) {
155  std::complex< float > lhs = *this;
156  return __kmp_lhs_div_rhs( lhs, rhs );
157  }
158 
159  void operator /= ( const kmp_cmplx64& rhs ) {
160  std::complex< float > lhs = *this;
161  *this = __kmp_lhs_div_rhs( lhs, rhs );
162  }
163 
164  __kmp_cmplx32_t operator / ( const kmp_cmplx64& rhs ) {
165  std::complex< float > lhs = *this;
166  return __kmp_lhs_div_rhs( lhs, rhs );
167  }
168  };
169  typedef struct __kmp_cmplx32_t kmp_cmplx32;
170 
171  // complex10
172  struct KMP_DO_ALIGN( 16 ) __kmp_cmplx80_t : std::complex< long double > {
173 
174  __kmp_cmplx80_t() : std::complex< long double > () {}
175 
176  __kmp_cmplx80_t( const std::complex< long double >& cld )
177  : std::complex< long double > ( cld ) {}
178 
179  void operator /= ( const __kmp_cmplx80_t& rhs ) {
180  std::complex< long double > lhs = *this;
181  *this = __kmp_lhs_div_rhs( lhs, rhs );
182  }
183 
184  __kmp_cmplx80_t operator / ( const __kmp_cmplx80_t& rhs ) {
185  std::complex< long double > lhs = *this;
186  return __kmp_lhs_div_rhs( lhs, rhs );
187  }
188 
189  };
190  typedef KMP_DO_ALIGN( 16 ) struct __kmp_cmplx80_t kmp_cmplx80;
191 
192  // complex16
193  #if KMP_HAVE_QUAD
194  struct __kmp_cmplx128_t : std::complex< _Quad > {
195 
196  __kmp_cmplx128_t() : std::complex< _Quad > () {}
197 
198  __kmp_cmplx128_t( const std::complex< _Quad >& cq )
199  : std::complex< _Quad > ( cq ) {}
200 
201  void operator /= ( const __kmp_cmplx128_t& rhs ) {
202  std::complex< _Quad > lhs = *this;
203  *this = __kmp_lhs_div_rhs( lhs, rhs );
204  }
205 
206  __kmp_cmplx128_t operator / ( const __kmp_cmplx128_t& rhs ) {
207  std::complex< _Quad > lhs = *this;
208  return __kmp_lhs_div_rhs( lhs, rhs );
209  }
210 
211  };
212  typedef struct __kmp_cmplx128_t kmp_cmplx128;
213  #endif /* KMP_HAVE_QUAD */
214 
215  #ifdef _DEBUG_TEMPORARILY_UNSET_
216  #undef _DEBUG_TEMPORARILY_UNSET_
217  // Set it back now
218  #define _DEBUG 1
219  #endif
220 
221 #else
222  // create shortcuts for c99 complex types
223  typedef float _Complex kmp_cmplx32;
224  typedef double _Complex kmp_cmplx64;
225  typedef long double _Complex kmp_cmplx80;
226  #if KMP_HAVE_QUAD
227  typedef _Quad _Complex kmp_cmplx128;
228  #endif
229 #endif
230 
231 // Compiler 12.0 changed alignment of 16 and 32-byte arguments (like _Quad
232 // and kmp_cmplx128) on IA-32 architecture. The following aligned structures
233 // are implemented to support the old alignment in 10.1, 11.0, 11.1 and
234 // introduce the new alignment in 12.0. See CQ88405.
235 #if KMP_ARCH_X86 && KMP_HAVE_QUAD
236 
237  // 4-byte aligned structures for backward compatibility.
238 
239  #pragma pack( push, 4 )
240 
241 
242  struct KMP_DO_ALIGN( 4 ) Quad_a4_t {
243  _Quad q;
244 
245  Quad_a4_t( ) : q( ) {}
246  Quad_a4_t( const _Quad & cq ) : q ( cq ) {}
247 
248  Quad_a4_t operator + ( const Quad_a4_t& b ) {
249  _Quad lhs = (*this).q;
250  _Quad rhs = b.q;
251  return (Quad_a4_t)( lhs + rhs );
252  }
253 
254  Quad_a4_t operator - ( const Quad_a4_t& b ) {
255  _Quad lhs = (*this).q;
256  _Quad rhs = b.q;
257  return (Quad_a4_t)( lhs - rhs );
258  }
259  Quad_a4_t operator * ( const Quad_a4_t& b ) {
260  _Quad lhs = (*this).q;
261  _Quad rhs = b.q;
262  return (Quad_a4_t)( lhs * rhs );
263  }
264 
265  Quad_a4_t operator / ( const Quad_a4_t& b ) {
266  _Quad lhs = (*this).q;
267  _Quad rhs = b.q;
268  return (Quad_a4_t)( lhs / rhs );
269  }
270 
271  };
272 
273  struct KMP_DO_ALIGN( 4 ) kmp_cmplx128_a4_t {
274  kmp_cmplx128 q;
275 
276  kmp_cmplx128_a4_t() : q () {}
277 
278  kmp_cmplx128_a4_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {}
279 
280  kmp_cmplx128_a4_t operator + ( const kmp_cmplx128_a4_t& b ) {
281  kmp_cmplx128 lhs = (*this).q;
282  kmp_cmplx128 rhs = b.q;
283  return (kmp_cmplx128_a4_t)( lhs + rhs );
284  }
285  kmp_cmplx128_a4_t operator - ( const kmp_cmplx128_a4_t& b ) {
286  kmp_cmplx128 lhs = (*this).q;
287  kmp_cmplx128 rhs = b.q;
288  return (kmp_cmplx128_a4_t)( lhs - rhs );
289  }
290  kmp_cmplx128_a4_t operator * ( const kmp_cmplx128_a4_t& b ) {
291  kmp_cmplx128 lhs = (*this).q;
292  kmp_cmplx128 rhs = b.q;
293  return (kmp_cmplx128_a4_t)( lhs * rhs );
294  }
295 
296  kmp_cmplx128_a4_t operator / ( const kmp_cmplx128_a4_t& b ) {
297  kmp_cmplx128 lhs = (*this).q;
298  kmp_cmplx128 rhs = b.q;
299  return (kmp_cmplx128_a4_t)( lhs / rhs );
300  }
301 
302  };
303 
304  #pragma pack( pop )
305 
306  // New 16-byte aligned structures for 12.0 compiler.
307  struct KMP_DO_ALIGN( 16 ) Quad_a16_t {
308  _Quad q;
309 
310  Quad_a16_t( ) : q( ) {}
311  Quad_a16_t( const _Quad & cq ) : q ( cq ) {}
312 
313  Quad_a16_t operator + ( const Quad_a16_t& b ) {
314  _Quad lhs = (*this).q;
315  _Quad rhs = b.q;
316  return (Quad_a16_t)( lhs + rhs );
317  }
318 
319  Quad_a16_t operator - ( const Quad_a16_t& b ) {
320  _Quad lhs = (*this).q;
321  _Quad rhs = b.q;
322  return (Quad_a16_t)( lhs - rhs );
323  }
324  Quad_a16_t operator * ( const Quad_a16_t& b ) {
325  _Quad lhs = (*this).q;
326  _Quad rhs = b.q;
327  return (Quad_a16_t)( lhs * rhs );
328  }
329 
330  Quad_a16_t operator / ( const Quad_a16_t& b ) {
331  _Quad lhs = (*this).q;
332  _Quad rhs = b.q;
333  return (Quad_a16_t)( lhs / rhs );
334  }
335  };
336 
337  struct KMP_DO_ALIGN( 16 ) kmp_cmplx128_a16_t {
338  kmp_cmplx128 q;
339 
340  kmp_cmplx128_a16_t() : q () {}
341 
342  kmp_cmplx128_a16_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {}
343 
344  kmp_cmplx128_a16_t operator + ( const kmp_cmplx128_a16_t& b ) {
345  kmp_cmplx128 lhs = (*this).q;
346  kmp_cmplx128 rhs = b.q;
347  return (kmp_cmplx128_a16_t)( lhs + rhs );
348  }
349  kmp_cmplx128_a16_t operator - ( const kmp_cmplx128_a16_t& b ) {
350  kmp_cmplx128 lhs = (*this).q;
351  kmp_cmplx128 rhs = b.q;
352  return (kmp_cmplx128_a16_t)( lhs - rhs );
353  }
354  kmp_cmplx128_a16_t operator * ( const kmp_cmplx128_a16_t& b ) {
355  kmp_cmplx128 lhs = (*this).q;
356  kmp_cmplx128 rhs = b.q;
357  return (kmp_cmplx128_a16_t)( lhs * rhs );
358  }
359 
360  kmp_cmplx128_a16_t operator / ( const kmp_cmplx128_a16_t& b ) {
361  kmp_cmplx128 lhs = (*this).q;
362  kmp_cmplx128 rhs = b.q;
363  return (kmp_cmplx128_a16_t)( lhs / rhs );
364  }
365  };
366 
367 #endif
368 
369 #if ( KMP_ARCH_X86 )
370  #define QUAD_LEGACY Quad_a4_t
371  #define CPLX128_LEG kmp_cmplx128_a4_t
372 #else
373  #define QUAD_LEGACY _Quad
374  #define CPLX128_LEG kmp_cmplx128
375 #endif
376 
377 #ifdef __cplusplus
378  extern "C" {
379 #endif
380 
381 extern int __kmp_atomic_mode;
382 
383 //
384 // Atomic locks can easily become contended, so we use queuing locks for them.
385 //
386 
387 typedef kmp_queuing_lock_t kmp_atomic_lock_t;
388 
389 static inline void
390 __kmp_acquire_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
391 {
392 #if OMPT_SUPPORT && OMPT_TRACE
393  if ((ompt_status == ompt_status_track_callback) &&
394  ompt_callbacks.ompt_callback(ompt_event_wait_atomic)) {
395  ompt_callbacks.ompt_callback(ompt_event_wait_atomic)(
396  (ompt_wait_id_t) lck);
397  }
398 #endif
399 
400  __kmp_acquire_queuing_lock( lck, gtid );
401 
402 #if OMPT_SUPPORT && OMPT_TRACE
403  if ((ompt_status == ompt_status_track_callback) &&
404  ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)) {
405  ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)(
406  (ompt_wait_id_t) lck);
407  }
408 #endif
409 }
410 
411 static inline int
412 __kmp_test_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
413 {
414  return __kmp_test_queuing_lock( lck, gtid );
415 }
416 
417 static inline void
418 __kmp_release_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
419 {
420  __kmp_release_queuing_lock( lck, gtid );
421 #if OMPT_SUPPORT && OMPT_BLAME
422  if ((ompt_status == ompt_status_track_callback) &&
423  ompt_callbacks.ompt_callback(ompt_event_release_atomic)) {
424  ompt_callbacks.ompt_callback(ompt_event_release_atomic)(
425  (ompt_wait_id_t) lck);
426  }
427 #endif
428 }
429 
430 static inline void
431 __kmp_init_atomic_lock( kmp_atomic_lock_t *lck )
432 {
433  __kmp_init_queuing_lock( lck );
434 }
435 
436 static inline void
437 __kmp_destroy_atomic_lock( kmp_atomic_lock_t *lck )
438 {
439  __kmp_destroy_queuing_lock( lck );
440 }
441 
442 // Global Locks
443 
444 extern kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */
445 extern kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */
446 extern kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */
447 extern kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */
448 extern kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */
449 extern kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */
450 extern kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */
451 extern kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */
452 extern kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */
453 extern kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */
454 extern kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
455 extern kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
456 extern kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
457 
458 //
459 // Below routines for atomic UPDATE are listed
460 //
461 
462 // 1-byte
463 void __kmpc_atomic_fixed1_add( ident_t *id_ref, int gtid, char * lhs, char rhs );
464 void __kmpc_atomic_fixed1_andb( ident_t *id_ref, int gtid, char * lhs, char rhs );
465 void __kmpc_atomic_fixed1_div( ident_t *id_ref, int gtid, char * lhs, char rhs );
466 void __kmpc_atomic_fixed1u_div( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs );
467 void __kmpc_atomic_fixed1_mul( ident_t *id_ref, int gtid, char * lhs, char rhs );
468 void __kmpc_atomic_fixed1_orb( ident_t *id_ref, int gtid, char * lhs, char rhs );
469 void __kmpc_atomic_fixed1_shl( ident_t *id_ref, int gtid, char * lhs, char rhs );
470 void __kmpc_atomic_fixed1_shr( ident_t *id_ref, int gtid, char * lhs, char rhs );
471 void __kmpc_atomic_fixed1u_shr( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs );
472 void __kmpc_atomic_fixed1_sub( ident_t *id_ref, int gtid, char * lhs, char rhs );
473 void __kmpc_atomic_fixed1_xor( ident_t *id_ref, int gtid, char * lhs, char rhs );
474 // 2-byte
475 void __kmpc_atomic_fixed2_add( ident_t *id_ref, int gtid, short * lhs, short rhs );
476 void __kmpc_atomic_fixed2_andb( ident_t *id_ref, int gtid, short * lhs, short rhs );
477 void __kmpc_atomic_fixed2_div( ident_t *id_ref, int gtid, short * lhs, short rhs );
478 void __kmpc_atomic_fixed2u_div( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs );
479 void __kmpc_atomic_fixed2_mul( ident_t *id_ref, int gtid, short * lhs, short rhs );
480 void __kmpc_atomic_fixed2_orb( ident_t *id_ref, int gtid, short * lhs, short rhs );
481 void __kmpc_atomic_fixed2_shl( ident_t *id_ref, int gtid, short * lhs, short rhs );
482 void __kmpc_atomic_fixed2_shr( ident_t *id_ref, int gtid, short * lhs, short rhs );
483 void __kmpc_atomic_fixed2u_shr( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs );
484 void __kmpc_atomic_fixed2_sub( ident_t *id_ref, int gtid, short * lhs, short rhs );
485 void __kmpc_atomic_fixed2_xor( ident_t *id_ref, int gtid, short * lhs, short rhs );
486 // 4-byte add / sub fixed
487 void __kmpc_atomic_fixed4_add( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
488 void __kmpc_atomic_fixed4_sub( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
489 // 4-byte add / sub float
490 void __kmpc_atomic_float4_add( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
491 void __kmpc_atomic_float4_sub( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
492 // 8-byte add / sub fixed
493 void __kmpc_atomic_fixed8_add( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
494 void __kmpc_atomic_fixed8_sub( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
495 // 8-byte add / sub float
496 void __kmpc_atomic_float8_add( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
497 void __kmpc_atomic_float8_sub( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
498 // 4-byte fixed
499 void __kmpc_atomic_fixed4_andb( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
500 void __kmpc_atomic_fixed4_div( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
501 void __kmpc_atomic_fixed4u_div( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs );
502 void __kmpc_atomic_fixed4_mul( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
503 void __kmpc_atomic_fixed4_orb( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
504 void __kmpc_atomic_fixed4_shl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
505 void __kmpc_atomic_fixed4_shr( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
506 void __kmpc_atomic_fixed4u_shr( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs );
507 void __kmpc_atomic_fixed4_xor( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
508 // 8-byte fixed
509 void __kmpc_atomic_fixed8_andb( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
510 void __kmpc_atomic_fixed8_div( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
511 void __kmpc_atomic_fixed8u_div( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs );
512 void __kmpc_atomic_fixed8_mul( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
513 void __kmpc_atomic_fixed8_orb( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
514 void __kmpc_atomic_fixed8_shl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
515 void __kmpc_atomic_fixed8_shr( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
516 void __kmpc_atomic_fixed8u_shr( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs );
517 void __kmpc_atomic_fixed8_xor( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
518 // 4-byte float
519 void __kmpc_atomic_float4_div( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
520 void __kmpc_atomic_float4_mul( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
521 // 8-byte float
522 void __kmpc_atomic_float8_div( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
523 void __kmpc_atomic_float8_mul( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
524 // 1-, 2-, 4-, 8-byte logical (&&, ||)
525 void __kmpc_atomic_fixed1_andl( ident_t *id_ref, int gtid, char * lhs, char rhs );
526 void __kmpc_atomic_fixed1_orl( ident_t *id_ref, int gtid, char * lhs, char rhs );
527 void __kmpc_atomic_fixed2_andl( ident_t *id_ref, int gtid, short * lhs, short rhs );
528 void __kmpc_atomic_fixed2_orl( ident_t *id_ref, int gtid, short * lhs, short rhs );
529 void __kmpc_atomic_fixed4_andl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
530 void __kmpc_atomic_fixed4_orl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
531 void __kmpc_atomic_fixed8_andl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
532 void __kmpc_atomic_fixed8_orl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
533 // MIN / MAX
534 void __kmpc_atomic_fixed1_max( ident_t *id_ref, int gtid, char * lhs, char rhs );
535 void __kmpc_atomic_fixed1_min( ident_t *id_ref, int gtid, char * lhs, char rhs );
536 void __kmpc_atomic_fixed2_max( ident_t *id_ref, int gtid, short * lhs, short rhs );
537 void __kmpc_atomic_fixed2_min( ident_t *id_ref, int gtid, short * lhs, short rhs );
538 void __kmpc_atomic_fixed4_max( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
539 void __kmpc_atomic_fixed4_min( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
540 void __kmpc_atomic_fixed8_max( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
541 void __kmpc_atomic_fixed8_min( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
542 void __kmpc_atomic_float4_max( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
543 void __kmpc_atomic_float4_min( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
544 void __kmpc_atomic_float8_max( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
545 void __kmpc_atomic_float8_min( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
546 #if KMP_HAVE_QUAD
547 void __kmpc_atomic_float16_max( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
548 void __kmpc_atomic_float16_min( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
549 #if ( KMP_ARCH_X86 )
550  // Routines with 16-byte arguments aligned to 16-byte boundary; IA-32 architecture only
551  void __kmpc_atomic_float16_max_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
552  void __kmpc_atomic_float16_min_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
553 #endif
554 #endif
555 // .NEQV. (same as xor)
556 void __kmpc_atomic_fixed1_neqv( ident_t *id_ref, int gtid, char * lhs, char rhs );
557 void __kmpc_atomic_fixed2_neqv( ident_t *id_ref, int gtid, short * lhs, short rhs );
558 void __kmpc_atomic_fixed4_neqv( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
559 void __kmpc_atomic_fixed8_neqv( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
560 // .EQV. (same as ~xor)
561 void __kmpc_atomic_fixed1_eqv( ident_t *id_ref, int gtid, char * lhs, char rhs );
562 void __kmpc_atomic_fixed2_eqv( ident_t *id_ref, int gtid, short * lhs, short rhs );
563 void __kmpc_atomic_fixed4_eqv( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
564 void __kmpc_atomic_fixed8_eqv( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
565 // long double type
566 void __kmpc_atomic_float10_add( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
567 void __kmpc_atomic_float10_sub( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
568 void __kmpc_atomic_float10_mul( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
569 void __kmpc_atomic_float10_div( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
570 // _Quad type
571 #if KMP_HAVE_QUAD
572 void __kmpc_atomic_float16_add( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
573 void __kmpc_atomic_float16_sub( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
574 void __kmpc_atomic_float16_mul( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
575 void __kmpc_atomic_float16_div( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
576 #if ( KMP_ARCH_X86 )
577  // Routines with 16-byte arguments aligned to 16-byte boundary
578  void __kmpc_atomic_float16_add_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
579  void __kmpc_atomic_float16_sub_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
580  void __kmpc_atomic_float16_mul_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
581  void __kmpc_atomic_float16_div_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
582 #endif
583 #endif
584 // routines for complex types
585 void __kmpc_atomic_cmplx4_add( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
586 void __kmpc_atomic_cmplx4_sub( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
587 void __kmpc_atomic_cmplx4_mul( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
588 void __kmpc_atomic_cmplx4_div( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
589 void __kmpc_atomic_cmplx8_add( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
590 void __kmpc_atomic_cmplx8_sub( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
591 void __kmpc_atomic_cmplx8_mul( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
592 void __kmpc_atomic_cmplx8_div( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
593 void __kmpc_atomic_cmplx10_add( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
594 void __kmpc_atomic_cmplx10_sub( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
595 void __kmpc_atomic_cmplx10_mul( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
596 void __kmpc_atomic_cmplx10_div( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
597 #if KMP_HAVE_QUAD
598 void __kmpc_atomic_cmplx16_add( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
599 void __kmpc_atomic_cmplx16_sub( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
600 void __kmpc_atomic_cmplx16_mul( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
601 void __kmpc_atomic_cmplx16_div( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
602 #if ( KMP_ARCH_X86 )
603  // Routines with 16-byte arguments aligned to 16-byte boundary
604  void __kmpc_atomic_cmplx16_add_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
605  void __kmpc_atomic_cmplx16_sub_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
606  void __kmpc_atomic_cmplx16_mul_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
607  void __kmpc_atomic_cmplx16_div_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
608 #endif
609 #endif
610 
611 #if OMP_40_ENABLED
612 
613 // OpenMP 4.0: x = expr binop x for non-commutative operations.
614 // Supported only on IA-32 architecture and Intel(R) 64
615 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
616 
617 void __kmpc_atomic_fixed1_sub_rev( ident_t *id_ref, int gtid, char * lhs, char rhs );
618 void __kmpc_atomic_fixed1_div_rev( ident_t *id_ref, int gtid, char * lhs, char rhs );
619 void __kmpc_atomic_fixed1u_div_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs );
620 void __kmpc_atomic_fixed1_shl_rev( ident_t *id_ref, int gtid, char * lhs, char rhs );
621 void __kmpc_atomic_fixed1_shr_rev( ident_t *id_ref, int gtid, char * lhs, char rhs );
622 void __kmpc_atomic_fixed1u_shr_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs );
623 void __kmpc_atomic_fixed2_sub_rev( ident_t *id_ref, int gtid, short * lhs, short rhs );
624 void __kmpc_atomic_fixed2_div_rev( ident_t *id_ref, int gtid, short * lhs, short rhs );
625 void __kmpc_atomic_fixed2u_div_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs );
626 void __kmpc_atomic_fixed2_shl_rev( ident_t *id_ref, int gtid, short * lhs, short rhs );
627 void __kmpc_atomic_fixed2_shr_rev( ident_t *id_ref, int gtid, short * lhs, short rhs );
628 void __kmpc_atomic_fixed2u_shr_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs );
629 void __kmpc_atomic_fixed4_sub_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
630 void __kmpc_atomic_fixed4_div_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
631 void __kmpc_atomic_fixed4u_div_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs );
632 void __kmpc_atomic_fixed4_shl_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
633 void __kmpc_atomic_fixed4_shr_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
634 void __kmpc_atomic_fixed4u_shr_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs );
635 void __kmpc_atomic_fixed8_sub_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
636 void __kmpc_atomic_fixed8_div_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
637 void __kmpc_atomic_fixed8u_div_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs );
638 void __kmpc_atomic_fixed8_shl_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
639 void __kmpc_atomic_fixed8_shr_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
640 void __kmpc_atomic_fixed8u_shr_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs );
641 void __kmpc_atomic_float4_sub_rev( ident_t *id_ref, int gtid, float * lhs, float rhs );
642 void __kmpc_atomic_float4_div_rev( ident_t *id_ref, int gtid, float * lhs, float rhs );
643 void __kmpc_atomic_float8_sub_rev( ident_t *id_ref, int gtid, double * lhs, double rhs );
644 void __kmpc_atomic_float8_div_rev( ident_t *id_ref, int gtid, double * lhs, double rhs );
645 void __kmpc_atomic_float10_sub_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
646 void __kmpc_atomic_float10_div_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
647 #if KMP_HAVE_QUAD
648 void __kmpc_atomic_float16_sub_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
649 void __kmpc_atomic_float16_div_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
650 #endif
651 void __kmpc_atomic_cmplx4_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
652 void __kmpc_atomic_cmplx4_div_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
653 void __kmpc_atomic_cmplx8_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
654 void __kmpc_atomic_cmplx8_div_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
655 void __kmpc_atomic_cmplx10_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
656 void __kmpc_atomic_cmplx10_div_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
657 #if KMP_HAVE_QUAD
658 void __kmpc_atomic_cmplx16_sub_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
659 void __kmpc_atomic_cmplx16_div_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
660 #if ( KMP_ARCH_X86 )
661  // Routines with 16-byte arguments aligned to 16-byte boundary
662  void __kmpc_atomic_float16_sub_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
663  void __kmpc_atomic_float16_div_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
664  void __kmpc_atomic_cmplx16_sub_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
665  void __kmpc_atomic_cmplx16_div_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
666 #endif
667 #endif // KMP_HAVE_QUAD
668 
669 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
670 
671 #endif //OMP_40_ENABLED
672 
673 // routines for mixed types
674 
675 // RHS=float8
676 void __kmpc_atomic_fixed1_mul_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs );
677 void __kmpc_atomic_fixed1_div_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs );
678 void __kmpc_atomic_fixed2_mul_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs );
679 void __kmpc_atomic_fixed2_div_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs );
680 void __kmpc_atomic_fixed4_mul_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs );
681 void __kmpc_atomic_fixed4_div_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs );
682 void __kmpc_atomic_fixed8_mul_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs );
683 void __kmpc_atomic_fixed8_div_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs );
684 void __kmpc_atomic_float4_add_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
685 void __kmpc_atomic_float4_sub_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
686 void __kmpc_atomic_float4_mul_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
687 void __kmpc_atomic_float4_div_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs );
688 
689 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
690 #if KMP_HAVE_QUAD
691 void __kmpc_atomic_fixed1_add_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
692 void __kmpc_atomic_fixed1_sub_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
693 void __kmpc_atomic_fixed1_mul_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
694 void __kmpc_atomic_fixed1_div_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs );
695 void __kmpc_atomic_fixed1u_div_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs );
696 
697 void __kmpc_atomic_fixed2_add_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs );
698 void __kmpc_atomic_fixed2_sub_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs );
699 void __kmpc_atomic_fixed2_mul_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs );
700 void __kmpc_atomic_fixed2_div_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs );
701 void __kmpc_atomic_fixed2u_div_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs );
702 
703 void __kmpc_atomic_fixed4_add_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs );
704 void __kmpc_atomic_fixed4_sub_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs );
705 void __kmpc_atomic_fixed4_mul_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs );
706 void __kmpc_atomic_fixed4_div_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs );
707 void __kmpc_atomic_fixed4u_div_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs );
708 
709 void __kmpc_atomic_fixed8_add_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs );
710 void __kmpc_atomic_fixed8_sub_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs );
711 void __kmpc_atomic_fixed8_mul_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs );
712 void __kmpc_atomic_fixed8_div_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs );
713 void __kmpc_atomic_fixed8u_div_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs );
714 
715 void __kmpc_atomic_float4_add_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs );
716 void __kmpc_atomic_float4_sub_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs );
717 void __kmpc_atomic_float4_mul_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs );
718 void __kmpc_atomic_float4_div_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs );
719 
720 void __kmpc_atomic_float8_add_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs );
721 void __kmpc_atomic_float8_sub_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs );
722 void __kmpc_atomic_float8_mul_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs );
723 void __kmpc_atomic_float8_div_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs );
724 
725 void __kmpc_atomic_float10_add_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
726 void __kmpc_atomic_float10_sub_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
727 void __kmpc_atomic_float10_mul_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
728 void __kmpc_atomic_float10_div_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs );
729 #endif // KMP_HAVE_QUAD
730 
731 // RHS=cmplx8
732 void __kmpc_atomic_cmplx4_add_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
733 void __kmpc_atomic_cmplx4_sub_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
734 void __kmpc_atomic_cmplx4_mul_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
735 void __kmpc_atomic_cmplx4_div_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs );
736 
737 // generic atomic routines
738 void __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
739 void __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
740 void __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
741 void __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
742 void __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
743 void __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
744 void __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
745 void __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) );
746 
747 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
748 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
749 
750 //
751 // Below routines for atomic READ are listed
752 //
753 
754 char __kmpc_atomic_fixed1_rd( ident_t *id_ref, int gtid, char * loc );
755 short __kmpc_atomic_fixed2_rd( ident_t *id_ref, int gtid, short * loc );
756 kmp_int32 __kmpc_atomic_fixed4_rd( ident_t *id_ref, int gtid, kmp_int32 * loc );
757 kmp_int64 __kmpc_atomic_fixed8_rd( ident_t *id_ref, int gtid, kmp_int64 * loc );
758 kmp_real32 __kmpc_atomic_float4_rd( ident_t *id_ref, int gtid, kmp_real32 * loc );
759 kmp_real64 __kmpc_atomic_float8_rd( ident_t *id_ref, int gtid, kmp_real64 * loc );
760 long double __kmpc_atomic_float10_rd( ident_t *id_ref, int gtid, long double * loc );
761 #if KMP_HAVE_QUAD
762 QUAD_LEGACY __kmpc_atomic_float16_rd( ident_t *id_ref, int gtid, QUAD_LEGACY * loc );
763 #endif
764 // Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value will be
765 // returned through an additional parameter
766 #if ( KMP_OS_WINDOWS )
767  void __kmpc_atomic_cmplx4_rd( kmp_cmplx32 * out, ident_t *id_ref, int gtid, kmp_cmplx32 * loc );
768 #else
769  kmp_cmplx32 __kmpc_atomic_cmplx4_rd( ident_t *id_ref, int gtid, kmp_cmplx32 * loc );
770 #endif
771 kmp_cmplx64 __kmpc_atomic_cmplx8_rd( ident_t *id_ref, int gtid, kmp_cmplx64 * loc );
772 kmp_cmplx80 __kmpc_atomic_cmplx10_rd( ident_t *id_ref, int gtid, kmp_cmplx80 * loc );
773 #if KMP_HAVE_QUAD
774 CPLX128_LEG __kmpc_atomic_cmplx16_rd( ident_t *id_ref, int gtid, CPLX128_LEG * loc );
775 #if ( KMP_ARCH_X86 )
776  // Routines with 16-byte arguments aligned to 16-byte boundary
777  Quad_a16_t __kmpc_atomic_float16_a16_rd( ident_t * id_ref, int gtid, Quad_a16_t * loc );
778  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * loc );
779 #endif
780 #endif
781 
782 
783 //
784 // Below routines for atomic WRITE are listed
785 //
786 
787 void __kmpc_atomic_fixed1_wr( ident_t *id_ref, int gtid, char * lhs, char rhs );
788 void __kmpc_atomic_fixed2_wr( ident_t *id_ref, int gtid, short * lhs, short rhs );
789 void __kmpc_atomic_fixed4_wr( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
790 void __kmpc_atomic_fixed8_wr( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
791 void __kmpc_atomic_float4_wr( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs );
792 void __kmpc_atomic_float8_wr( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs );
793 void __kmpc_atomic_float10_wr( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
794 #if KMP_HAVE_QUAD
795 void __kmpc_atomic_float16_wr( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
796 #endif
797 void __kmpc_atomic_cmplx4_wr( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
798 void __kmpc_atomic_cmplx8_wr( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
799 void __kmpc_atomic_cmplx10_wr( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
800 #if KMP_HAVE_QUAD
801 void __kmpc_atomic_cmplx16_wr( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
802 #if ( KMP_ARCH_X86 )
803  // Routines with 16-byte arguments aligned to 16-byte boundary
804  void __kmpc_atomic_float16_a16_wr( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
805  void __kmpc_atomic_cmplx16_a16_wr( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
806 #endif
807 #endif
808 
809 //
810 // Below routines for atomic CAPTURE are listed
811 //
812 
813 // 1-byte
814 char __kmpc_atomic_fixed1_add_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
815 char __kmpc_atomic_fixed1_andb_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
816 char __kmpc_atomic_fixed1_div_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
817 unsigned char __kmpc_atomic_fixed1u_div_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag);
818 char __kmpc_atomic_fixed1_mul_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
819 char __kmpc_atomic_fixed1_orb_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
820 char __kmpc_atomic_fixed1_shl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
821 char __kmpc_atomic_fixed1_shr_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
822 unsigned char __kmpc_atomic_fixed1u_shr_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag);
823 char __kmpc_atomic_fixed1_sub_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
824 char __kmpc_atomic_fixed1_xor_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
825 // 2-byte
826 short __kmpc_atomic_fixed2_add_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
827 short __kmpc_atomic_fixed2_andb_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
828 short __kmpc_atomic_fixed2_div_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
829 unsigned short __kmpc_atomic_fixed2u_div_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag);
830 short __kmpc_atomic_fixed2_mul_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
831 short __kmpc_atomic_fixed2_orb_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
832 short __kmpc_atomic_fixed2_shl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
833 short __kmpc_atomic_fixed2_shr_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
834 unsigned short __kmpc_atomic_fixed2u_shr_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag);
835 short __kmpc_atomic_fixed2_sub_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
836 short __kmpc_atomic_fixed2_xor_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
837 // 4-byte add / sub fixed
838 kmp_int32 __kmpc_atomic_fixed4_add_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
839 kmp_int32 __kmpc_atomic_fixed4_sub_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
840 // 4-byte add / sub float
841 kmp_real32 __kmpc_atomic_float4_add_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
842 kmp_real32 __kmpc_atomic_float4_sub_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
843 // 8-byte add / sub fixed
844 kmp_int64 __kmpc_atomic_fixed8_add_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
845 kmp_int64 __kmpc_atomic_fixed8_sub_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
846 // 8-byte add / sub float
847 kmp_real64 __kmpc_atomic_float8_add_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
848 kmp_real64 __kmpc_atomic_float8_sub_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
849 // 4-byte fixed
850 kmp_int32 __kmpc_atomic_fixed4_andb_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
851 kmp_int32 __kmpc_atomic_fixed4_div_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
852 kmp_uint32 __kmpc_atomic_fixed4u_div_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag);
853 kmp_int32 __kmpc_atomic_fixed4_mul_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
854 kmp_int32 __kmpc_atomic_fixed4_orb_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
855 kmp_int32 __kmpc_atomic_fixed4_shl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
856 kmp_int32 __kmpc_atomic_fixed4_shr_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
857 kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag);
858 kmp_int32 __kmpc_atomic_fixed4_xor_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
859 // 8-byte fixed
860 kmp_int64 __kmpc_atomic_fixed8_andb_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
861 kmp_int64 __kmpc_atomic_fixed8_div_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
862 kmp_uint64 __kmpc_atomic_fixed8u_div_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag);
863 kmp_int64 __kmpc_atomic_fixed8_mul_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
864 kmp_int64 __kmpc_atomic_fixed8_orb_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
865 kmp_int64 __kmpc_atomic_fixed8_shl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
866 kmp_int64 __kmpc_atomic_fixed8_shr_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
867 kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag);
868 kmp_int64 __kmpc_atomic_fixed8_xor_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
869 // 4-byte float
870 kmp_real32 __kmpc_atomic_float4_div_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
871 kmp_real32 __kmpc_atomic_float4_mul_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
872 // 8-byte float
873 kmp_real64 __kmpc_atomic_float8_div_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
874 kmp_real64 __kmpc_atomic_float8_mul_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
875 // 1-, 2-, 4-, 8-byte logical (&&, ||)
876 char __kmpc_atomic_fixed1_andl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
877 char __kmpc_atomic_fixed1_orl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
878 short __kmpc_atomic_fixed2_andl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
879 short __kmpc_atomic_fixed2_orl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
880 kmp_int32 __kmpc_atomic_fixed4_andl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
881 kmp_int32 __kmpc_atomic_fixed4_orl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
882 kmp_int64 __kmpc_atomic_fixed8_andl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
883 kmp_int64 __kmpc_atomic_fixed8_orl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
884 // MIN / MAX
885 char __kmpc_atomic_fixed1_max_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
886 char __kmpc_atomic_fixed1_min_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
887 short __kmpc_atomic_fixed2_max_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
888 short __kmpc_atomic_fixed2_min_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
889 kmp_int32 __kmpc_atomic_fixed4_max_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
890 kmp_int32 __kmpc_atomic_fixed4_min_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
891 kmp_int64 __kmpc_atomic_fixed8_max_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
892 kmp_int64 __kmpc_atomic_fixed8_min_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
893 kmp_real32 __kmpc_atomic_float4_max_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
894 kmp_real32 __kmpc_atomic_float4_min_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag);
895 kmp_real64 __kmpc_atomic_float8_max_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
896 kmp_real64 __kmpc_atomic_float8_min_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag);
897 #if KMP_HAVE_QUAD
898 QUAD_LEGACY __kmpc_atomic_float16_max_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
899 QUAD_LEGACY __kmpc_atomic_float16_min_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
900 #endif
901 // .NEQV. (same as xor)
902 char __kmpc_atomic_fixed1_neqv_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
903 short __kmpc_atomic_fixed2_neqv_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
904 kmp_int32 __kmpc_atomic_fixed4_neqv_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
905 kmp_int64 __kmpc_atomic_fixed8_neqv_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
906 // .EQV. (same as ~xor)
907 char __kmpc_atomic_fixed1_eqv_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag);
908 short __kmpc_atomic_fixed2_eqv_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag);
909 kmp_int32 __kmpc_atomic_fixed4_eqv_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag);
910 kmp_int64 __kmpc_atomic_fixed8_eqv_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag);
911 // long double type
912 long double __kmpc_atomic_float10_add_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
913 long double __kmpc_atomic_float10_sub_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
914 long double __kmpc_atomic_float10_mul_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
915 long double __kmpc_atomic_float10_div_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag);
916 #if KMP_HAVE_QUAD
917 // _Quad type
918 QUAD_LEGACY __kmpc_atomic_float16_add_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
919 QUAD_LEGACY __kmpc_atomic_float16_sub_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
920 QUAD_LEGACY __kmpc_atomic_float16_mul_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
921 QUAD_LEGACY __kmpc_atomic_float16_div_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag);
922 #endif
923 // routines for complex types
924 // Workaround for cmplx4 routines - return void; captured value is returned via the argument
925 void __kmpc_atomic_cmplx4_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
926 void __kmpc_atomic_cmplx4_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
927 void __kmpc_atomic_cmplx4_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
928 void __kmpc_atomic_cmplx4_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag);
929 
930 kmp_cmplx64 __kmpc_atomic_cmplx8_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag);
931 kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag);
932 kmp_cmplx64 __kmpc_atomic_cmplx8_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag);
933 kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag);
934 kmp_cmplx80 __kmpc_atomic_cmplx10_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
935 kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
936 kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
937 kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag);
938 #if KMP_HAVE_QUAD
939 CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
940 CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
941 CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
942 CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag);
943 #if ( KMP_ARCH_X86 )
944  // Routines with 16-byte arguments aligned to 16-byte boundary
945  Quad_a16_t __kmpc_atomic_float16_add_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
946  Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
947  Quad_a16_t __kmpc_atomic_float16_mul_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
948  Quad_a16_t __kmpc_atomic_float16_div_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
949  Quad_a16_t __kmpc_atomic_float16_max_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
950  Quad_a16_t __kmpc_atomic_float16_min_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag);
951  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_add_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
952  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
953  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
954  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag);
955 #endif
956 #endif
957 
958 void __kmpc_atomic_start(void);
959 void __kmpc_atomic_end(void);
960 
961 #if OMP_40_ENABLED
962 
963 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations.
964 
965 char __kmpc_atomic_fixed1_sub_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag );
966 char __kmpc_atomic_fixed1_div_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag );
967 unsigned char __kmpc_atomic_fixed1u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag );
968 char __kmpc_atomic_fixed1_shl_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs , int flag);
969 char __kmpc_atomic_fixed1_shr_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag );
970 unsigned char __kmpc_atomic_fixed1u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag );
971 short __kmpc_atomic_fixed2_sub_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag );
972 short __kmpc_atomic_fixed2_div_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag );
973 unsigned short __kmpc_atomic_fixed2u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag );
974 short __kmpc_atomic_fixed2_shl_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag );
975 short __kmpc_atomic_fixed2_shr_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag );
976 unsigned short __kmpc_atomic_fixed2u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag );
977 kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag );
978 kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag );
979 kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag );
980 kmp_int32 __kmpc_atomic_fixed4_shl_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag );
981 kmp_int32 __kmpc_atomic_fixed4_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag );
982 kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag );
983 kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag );
984 kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag );
985 kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag );
986 kmp_int64 __kmpc_atomic_fixed8_shl_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag );
987 kmp_int64 __kmpc_atomic_fixed8_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag );
988 kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag );
989 float __kmpc_atomic_float4_sub_cpt_rev( ident_t *id_ref, int gtid, float * lhs, float rhs, int flag );
990 float __kmpc_atomic_float4_div_cpt_rev( ident_t *id_ref, int gtid, float * lhs, float rhs, int flag );
991 double __kmpc_atomic_float8_sub_cpt_rev( ident_t *id_ref, int gtid, double * lhs, double rhs, int flag );
992 double __kmpc_atomic_float8_div_cpt_rev( ident_t *id_ref, int gtid, double * lhs, double rhs, int flag );
993 long double __kmpc_atomic_float10_sub_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag );
994 long double __kmpc_atomic_float10_div_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag );
995 #if KMP_HAVE_QUAD
996 QUAD_LEGACY __kmpc_atomic_float16_sub_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag );
997 QUAD_LEGACY __kmpc_atomic_float16_div_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag );
998 #endif
999 // Workaround for cmplx4 routines - return void; captured value is returned via the argument
1000 void __kmpc_atomic_cmplx4_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
1001 void __kmpc_atomic_cmplx4_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
1002 kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag );
1003 kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag );
1004 kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag );
1005 kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag );
1006 #if KMP_HAVE_QUAD
1007 CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag );
1008 CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag );
1009 #if ( KMP_ARCH_X86 )
1010  Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag );
1011  Quad_a16_t __kmpc_atomic_float16_div_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag );
1012  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag );
1013  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag );
1014 #endif
1015 #endif
1016 
1017 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
1018 char __kmpc_atomic_fixed1_swp( ident_t *id_ref, int gtid, char * lhs, char rhs );
1019 short __kmpc_atomic_fixed2_swp( ident_t *id_ref, int gtid, short * lhs, short rhs );
1020 kmp_int32 __kmpc_atomic_fixed4_swp( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs );
1021 kmp_int64 __kmpc_atomic_fixed8_swp( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs );
1022 float __kmpc_atomic_float4_swp( ident_t *id_ref, int gtid, float * lhs, float rhs );
1023 double __kmpc_atomic_float8_swp( ident_t *id_ref, int gtid, double * lhs, double rhs );
1024 long double __kmpc_atomic_float10_swp( ident_t *id_ref, int gtid, long double * lhs, long double rhs );
1025 #if KMP_HAVE_QUAD
1026 QUAD_LEGACY __kmpc_atomic_float16_swp( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs );
1027 #endif
1028 // !!! TODO: check if we need a workaround here
1029 void __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out );
1030 //kmp_cmplx32 __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs );
1031 
1032 kmp_cmplx64 __kmpc_atomic_cmplx8_swp( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs );
1033 kmp_cmplx80 __kmpc_atomic_cmplx10_swp( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs );
1034 #if KMP_HAVE_QUAD
1035 CPLX128_LEG __kmpc_atomic_cmplx16_swp( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs );
1036 #if ( KMP_ARCH_X86 )
1037  Quad_a16_t __kmpc_atomic_float16_a16_swp( ident_t *id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs );
1038  kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_swp( ident_t *id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs );
1039 #endif
1040 #endif
1041 
1042 // End of OpenMP 4.0 capture
1043 
1044 #endif //OMP_40_ENABLED
1045 
1046 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
1047 
1048 /* ------------------------------------------------------------------------ */
1049 /* ------------------------------------------------------------------------ */
1050 
1051 #ifdef __cplusplus
1052  } // extern "C"
1053 #endif
1054 
1055 #endif /* KMP_ATOMIC_H */
1056 
1057 // end of file
Definition: kmp.h:218