35 #ifndef KMP_AFFINITY_H
36 #define KMP_AFFINITY_H
38 extern int __kmp_affinity_compact;
42 static const unsigned maxDepth = 32;
43 unsigned labels[maxDepth];
44 unsigned childNums[maxDepth];
47 Address(
unsigned _depth)
48 : depth(_depth), leader(FALSE) {
50 Address &operator=(
const Address &b) {
52 for (
unsigned i = 0; i < depth; i++) {
53 labels[i] = b.labels[i];
54 childNums[i] = b.childNums[i];
59 bool operator==(
const Address &b)
const {
62 for (
unsigned i = 0; i < depth; i++)
63 if(labels[i] != b.labels[i])
67 bool isClose(
const Address &b,
int level)
const {
70 if ((
unsigned)level >= depth)
72 for (
unsigned i = 0; i < (depth - level); i++)
73 if(labels[i] != b.labels[i])
77 bool operator!=(
const Address &b)
const {
78 return !operator==(b);
86 AddrUnsPair(Address _first,
unsigned _second)
87 : first(_first), second(_second) {
89 AddrUnsPair &operator=(
const AddrUnsPair &b)
99 __kmp_affinity_cmp_Address_labels(
const void *a,
const void *b)
101 const Address *aa = (
const Address *)&(((AddrUnsPair *)a)
103 const Address *bb = (
const Address *)&(((AddrUnsPair *)b)
105 unsigned depth = aa->depth;
107 KMP_DEBUG_ASSERT(depth == bb->depth);
108 for (i = 0; i < depth; i++) {
109 if (aa->labels[i] < bb->labels[i])
return -1;
110 if (aa->labels[i] > bb->labels[i])
return 1;
117 __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b)
119 const Address *aa = (
const Address *)&(((AddrUnsPair *)a)
121 const Address *bb = (
const Address *)&(((AddrUnsPair *)b)
123 unsigned depth = aa->depth;
125 KMP_DEBUG_ASSERT(depth == bb->depth);
126 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
127 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
128 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
129 int j = depth - i - 1;
130 if (aa->childNums[j] < bb->childNums[j])
return -1;
131 if (aa->childNums[j] > bb->childNums[j])
return 1;
133 for (; i < depth; i++) {
134 int j = i - __kmp_affinity_compact;
135 if (aa->childNums[j] < bb->childNums[j])
return -1;
136 if (aa->childNums[j] > bb->childNums[j])
return 1;
152 static const kmp_uint32 minBranch=4;
163 kmp_uint32 base_num_threads;
164 enum init_status { initialized=0, not_initialized=1, initializing=2 };
165 volatile kmp_int8 uninitialized;
166 volatile kmp_int8 resizing;
172 kmp_uint32 *skipPerLevel;
174 void deriveLevels(AddrUnsPair *adr2os,
int num_addrs) {
175 int hier_depth = adr2os[0].first.depth;
177 for (
int i=hier_depth-1; i>=0; --i) {
179 for (
int j=0; j<num_addrs; ++j) {
180 int next = adr2os[j].first.childNums[i];
181 if (next > max) max = next;
183 numPerLevel[level] = max+1;
188 hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
190 void fini() {
if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
192 void init(AddrUnsPair *adr2os,
int num_addrs)
194 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
195 if (bool_result == 0) {
196 while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
199 KMP_DEBUG_ASSERT(bool_result==1);
207 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*
sizeof(kmp_uint32));
208 skipPerLevel = &(numPerLevel[
maxLevels]);
216 qsort(adr2os, num_addrs,
sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
217 deriveLevels(adr2os, num_addrs);
222 if (num_addrs%maxLeaves) numPerLevel[1]++;
225 base_num_threads = num_addrs;
226 for (
int i=maxLevels-1; i>=0; --i)
227 if (numPerLevel[i] != 1 || depth > 1)
230 kmp_uint32 branch = minBranch;
231 if (numPerLevel[0] == 1) branch = num_addrs/
maxLeaves;
232 if (branch<minBranch) branch=minBranch;
233 for (kmp_uint32 d=0; d<depth-1; ++d) {
234 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) {
235 if (numPerLevel[d] & 1) numPerLevel[d]++;
236 numPerLevel[d] = numPerLevel[d] >> 1;
237 if (numPerLevel[d+1] == 1) depth++;
238 numPerLevel[d+1] = numPerLevel[d+1] << 1;
240 if(numPerLevel[0] == 1) {
241 branch = branch >> 1;
242 if (branch<4) branch = minBranch;
246 for (kmp_uint32 i=1; i<
depth; ++i)
247 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
249 for (kmp_uint32 i=depth; i<
maxLevels; ++i)
250 skipPerLevel[i] = 2*skipPerLevel[i-1];
252 uninitialized = initialized;
257 void resize(kmp_uint32 nproc)
259 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
260 if (bool_result == 0) {
261 while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
264 KMP_DEBUG_ASSERT(bool_result!=0);
265 KMP_DEBUG_ASSERT(nproc > base_num_threads);
268 kmp_uint32 old_sz = skipPerLevel[depth-1];
269 kmp_uint32 incs = 0, old_maxLevels =
maxLevels;
271 for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
272 skipPerLevel[i] = 2*skipPerLevel[i-1];
279 while (nproc > old_sz) {
288 kmp_uint32 *old_skipPerLevel = skipPerLevel;
289 numPerLevel = skipPerLevel = NULL;
290 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*
sizeof(kmp_uint32));
291 skipPerLevel = &(numPerLevel[
maxLevels]);
294 for (kmp_uint32 i=0; i<old_maxLevels; ++i) {
295 numPerLevel[i] = old_numPerLevel[i];
296 skipPerLevel[i] = old_skipPerLevel[i];
300 for (kmp_uint32 i=old_maxLevels; i<
maxLevels; ++i) {
306 __kmp_free(old_numPerLevel);
309 for (kmp_uint32 i=old_maxLevels; i<
maxLevels; ++i)
310 skipPerLevel[i] = 2*skipPerLevel[i-1];
312 base_num_threads = nproc;
317 #endif // KMP_AFFINITY_H
static const kmp_uint32 maxLeaves