Bug #1559: fixed cpuaffinity.c build errors
[charm.git] / src / conv-core / cpuaffinity.c
blob07a3a5865ca6093cf2552c788f67eafc9a6efe38
2 /*
3 This scheme relies on using IP address to identify nodes and assigning
4 cpu affinity.
6 when CMK_NO_SOCKETS, which is typically on cray xt3 and bluegene/L.
7 There is no hostname for the compute nodes.
9 * last updated 3/20/2010 Gengbin Zheng
10 * new options +pemap +commmap takes complex pattern of a list of cores
13 #define _GNU_SOURCE
15 #include "converse.h"
16 #include "sockRoutines.h"
18 #define DEBUGP(x) /* CmiPrintf x; */
19 CpvDeclare(int, myCPUAffToCore);
20 #if CMK_OS_IS_LINUX
21 /*
22 * /proc/<PID>/[task/<TID>]/stat file descriptor
23 * Used to retrieve the info about which physical
24 * coer this process or thread is on.
25 **/
26 CpvDeclare(void *, myProcStatFP);
27 #endif
29 #if CMK_HAS_SETAFFINITY || defined (_WIN32) || CMK_HAS_BINDPROCESSOR
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <unistd.h>
34 #include <errno.h>
36 #ifdef _WIN32
37 #include <windows.h>
38 #include <winbase.h>
39 #else
40 #define _GNU_SOURCE
41 #include <sched.h>
42 //long sched_setaffinity(pid_t pid, unsigned int len, unsigned long *user_mask_ptr);
43 //long sched_getaffinity(pid_t pid, unsigned int len, unsigned long *user_mask_ptr);
44 #endif
46 #if CMK_OS_IS_LINUX
47 #include <sys/syscall.h>
48 #endif
50 #if defined(__APPLE__)
51 #include <Carbon/Carbon.h> /* Carbon APIs for Multiprocessing */
52 #endif
55 #define MAX_EXCLUDE 64
56 static int excludecore[MAX_EXCLUDE] = {-1};
57 static int excludecount = 0;
59 static int affinity_doneflag = 0;
61 #ifndef _WIN32
62 static int affMsgsRecvd = 1; // number of affinity messages received at PE0
63 static cpu_set_t core_usage; // used to record union of CPUs used by every PE in physical node
64 static int aff_is_set = 0;
65 #endif
67 static int in_exclude(int core)
69 int i;
70 for (i=0; i<excludecount; i++) if (core == excludecore[i]) return 1;
71 return 0;
74 static void add_exclude(int core)
76 if (in_exclude(core)) return;
77 CmiAssert(excludecount < MAX_EXCLUDE);
78 excludecore[excludecount++] = core;
81 #if CMK_HAS_BINDPROCESSOR
82 #include <sys/processor.h>
83 #endif
85 #define SET_MASK(cpuid) \
86 /* set the affinity mask if possible */ \
87 if ((cpuid / 8) > len) { \
88 printf("Mask size too small to handle requested CPU ID\n"); \
89 return -1; \
90 } else { \
91 mask = 1 << cpuid; /* set the affinity mask exclusively to one CPU */ \
95 /* This implementation assumes the default x86 CPU mask size used by Linux */
96 /* For a large SMP machine, this code should be changed to use a variable sized */
97 /* CPU affinity mask buffer instead, as the present code will fail beyond 32 CPUs */
98 int set_cpu_affinity(unsigned int cpuid) {
99 unsigned long mask = 0xffffffff;
100 unsigned int len = sizeof(mask);
101 int retValue = 0;
102 int pid;
104 #ifdef _WIN32
105 HANDLE hProcess;
106 #endif
108 #ifdef _WIN32
109 SET_MASK(cpuid)
110 hProcess = GetCurrentProcess();
111 if (SetProcessAffinityMask(hProcess, mask) == 0) {
112 return -1;
114 #elif CMK_HAS_BINDPROCESSOR
115 pid = getpid();
116 if (bindprocessor(BINDPROCESS, pid, cpuid) == -1) return -1;
117 #else
118 #ifdef CPU_ALLOC
119 if ( cpuid >= CPU_SETSIZE ) {
120 cpu_set_t *cpusetp;
121 size_t size;
122 int num_cpus;
123 num_cpus = cpuid + 1;
124 cpusetp = CPU_ALLOC(num_cpus);
125 if (cpusetp == NULL) {
126 perror("set_cpu_affinity CPU_ALLOC");
127 return -1;
129 size = CPU_ALLOC_SIZE(num_cpus);
130 CPU_ZERO_S(size, cpusetp);
131 CPU_SET_S(cpuid, size, cpusetp);
132 if (sched_setaffinity(0, size, cpusetp) < 0) {
133 perror("sched_setaffinity dynamically allocated");
134 CPU_FREE(cpusetp);
135 return -1;
137 CPU_FREE(cpusetp);
138 } else
139 #endif
141 cpu_set_t cpuset;
142 CPU_ZERO(&cpuset);
143 CPU_SET(cpuid, &cpuset);
144 /*SET_MASK(cpuid)*/
146 /* PID 0 refers to the current process */
147 /*if (sched_setaffinity(0, len, &mask) < 0) {*/
148 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) < 0) {
149 perror("sched_setaffinity");
150 return -1;
153 #endif
155 return 0;
158 #if CMK_SMP
159 int set_thread_affinity(int cpuid) {
160 unsigned long mask = 0xffffffff;
161 unsigned int len = sizeof(mask);
163 #ifdef _WIN32
164 HANDLE hThread;
165 #endif
167 #ifdef _WIN32
168 SET_MASK(cpuid)
169 hThread = GetCurrentThread();
170 if (SetThreadAffinityMask(hThread, mask) == 0) {
171 return -1;
173 #elif CMK_HAS_PTHREAD_SETAFFINITY
174 #ifdef CPU_ALLOC
175 if ( cpuid >= CPU_SETSIZE ) {
176 cpu_set_t *cpusetp;
177 pthread_t thread;
178 size_t size;
179 int num_cpus;
180 num_cpus = cpuid + 1;
181 cpusetp = CPU_ALLOC(num_cpus);
182 if (cpusetp == NULL) {
183 perror("set_thread_affinity CPU_ALLOC");
184 return -1;
186 size = CPU_ALLOC_SIZE(num_cpus);
187 thread = pthread_self();
188 CPU_ZERO_S(size, cpusetp);
189 CPU_SET_S(cpuid, size, cpusetp);
190 if (errno = pthread_setaffinity_np(thread, size, cpusetp)) {
191 perror("pthread_setaffinity dynamically allocated");
192 CPU_FREE(cpusetp);
193 return -1;
195 CPU_FREE(cpusetp);
196 } else
197 #endif
199 int s, j;
200 cpu_set_t cpuset;
201 pthread_t thread;
203 thread = pthread_self();
205 CPU_ZERO(&cpuset);
206 CPU_SET(cpuid, &cpuset);
208 if (errno = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset)) {
209 perror("pthread_setaffinity");
210 return -1;
213 #elif CMK_HAS_BINDPROCESSOR
214 if (bindprocessor(BINDTHREAD, thread_self(), cpuid) != 0)
215 return -1;
216 #else
217 return set_cpu_affinity(cpuid);
218 #endif
220 return 0;
222 #endif
225 int CmiSetCPUAffinity(int mycore)
227 int core = mycore;
228 if (core < 0) {
229 core = CmiNumCores() + core;
231 if (core < 0) {
232 CmiError("Error: Invalid cpu affinity core number: %d\n", mycore);
233 CmiAbort("CmiSetCPUAffinity failed");
236 CpvAccess(myCPUAffToCore) = core;
238 /* set cpu affinity */
239 #if CMK_SMP
240 return set_thread_affinity(core);
241 #else
242 return set_cpu_affinity(core);
243 /* print_cpu_affinity(); */
244 #endif
247 /* This implementation assumes the default x86 CPU mask size used by Linux */
248 /* For a large SMP machine, this code should be changed to use a variable sized */
249 /* CPU affinity mask buffer instead, as the present code will fail beyond 32 CPUs */
250 int print_cpu_affinity() {
251 #ifdef _WIN32
252 unsigned long pMask, sMask;
253 HANDLE hProcess = GetCurrentProcess();
254 if(GetProcessAffinityMask(hProcess, &pMask, &sMask)){
255 perror("On Windows: GetProcessAffinityMask");
256 return -1;
259 CmiPrintf("[%d] CPU affinity mask is: 0x%08lx\n", CmiMyPe(), pMask);
261 #elif CMK_HAS_BINDPROCESSOR
262 printf("[%d] CPU affinity mask is unknown for AIX. \n", CmiMyPe());
263 #else
264 /*unsigned long mask;
265 unsigned int len = sizeof(mask);*/
266 cpu_set_t cpuset;
267 char str[256], pe[16];
268 int i;
269 CPU_ZERO(&cpuset);
271 /* PID 0 refers to the current process */
272 /*if (sched_getaffinity(0, len, &mask) < 0) {*/
273 if (sched_getaffinity(0, sizeof(cpuset), &cpuset) < 0) {
274 perror("sched_getaffinity");
275 return -1;
278 sprintf(str, "[%d] CPU affinity mask is: ", CmiMyPe());
279 for (i = 0; i < CPU_SETSIZE; i++)
280 if (CPU_ISSET(i, &cpuset)) {
281 sprintf(pe, " %d ", i);
282 strcat(str, pe);
284 CmiPrintf("%s\n", str);
285 #endif
286 return 0;
289 #if CMK_SMP
290 int print_thread_affinity() {
291 unsigned long mask;
292 size_t len = sizeof(mask);
294 #if CMK_HAS_PTHREAD_SETAFFINITY
295 int j;
296 cpu_set_t cpuset;
297 pthread_t thread;
298 char str[256], pe[16];
300 thread = pthread_self();
302 if (errno = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuset)) {
303 perror("pthread_getaffinity");
304 return -1;
307 sprintf(str, "[%d] %s affinity is: ", CmiMyPe(), CmiMyPe()>=CmiNumPes()?"communication pthread":"pthread");
308 for (j = 0; j < CPU_SETSIZE; j++)
309 if (CPU_ISSET(j, &cpuset)) {
310 sprintf(pe, " %d ", j);
311 strcat(str, pe);
313 CmiPrintf("%s\n", str);
314 #endif
315 return 0;
317 #endif
319 int CmiPrintCPUAffinity()
321 #if CMK_SMP
322 return print_thread_affinity();
323 #else
324 return print_cpu_affinity();
325 #endif
328 #ifndef _WIN32
329 int get_cpu_affinity(cpu_set_t *cpuset) {
330 CPU_ZERO(cpuset);
331 if (sched_getaffinity(0, sizeof(cpuset), cpuset) < 0) {
332 perror("sched_getaffinity");
333 return -1;
335 return 0;
338 #if CMK_SMP
339 int get_thread_affinity(cpu_set_t *cpuset) {
340 #if CMK_HAS_PTHREAD_SETAFFINITY
341 CPU_ZERO(cpuset);
342 if (errno = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), cpuset)) {
343 perror("pthread_getaffinity");
344 return -1;
346 return 0;
347 #else
348 return -1;
349 #endif
351 #endif
353 int get_affinity(cpu_set_t *cpuset) {
354 #if CMK_SMP
355 return get_thread_affinity(cpuset);
356 #else
357 return get_cpu_affinity(cpuset);
358 #endif
360 #endif
362 int CmiOnCore() {
363 #if CMK_OS_IS_LINUX
365 * The info (task_cpu) is read from the Linux /proc virtual file system.
366 * The /proc/<PID>/[task/<TID>]/stat is explained in the Linux
367 * kernel documentation. The online one could be found in:
368 * http://www.mjmwired.net/kernel/Documentation/filesystems/proc.txt
369 * Based on the documentation, task_cpu is found at the 39th field in
370 * the stat file.
372 #define TASK_CPU_POS (39)
373 int n;
374 char str[128];
375 FILE *fp = (FILE *)CpvAccess(myProcStatFP);
376 if (fp == NULL){
377 printf("WARNING: CmiOnCore IS NOT SUPPORTED ON THIS PLATFORM\n");
378 return -1;
380 fseek(fp, 0, SEEK_SET);
381 for (n=0; n<TASK_CPU_POS; n++) {
382 fscanf(fp, "%s", str);
384 return atoi(str);
385 #else
386 printf("WARNING: CmiOnCore IS NOT SUPPORTED ON THIS PLATFORM\n");
387 return -1;
388 #endif
392 static int cpuAffinityHandlerIdx;
393 static int cpuAffinityRecvHandlerIdx;
394 static int cpuPhyNodeAffinityRecvHandlerIdx;
396 typedef struct _hostnameMsg {
397 char core[CmiMsgHeaderSizeBytes];
398 int pe;
399 skt_ip_t ip;
400 int ncores;
401 int rank;
402 int seq;
403 } hostnameMsg;
405 typedef struct _rankMsg {
406 char core[CmiMsgHeaderSizeBytes];
407 int *ranks; /* PE => core rank mapping */
408 int *nodes; /* PE => node number mapping */
409 } rankMsg;
411 typedef struct _affMsg {
412 char core[CmiMsgHeaderSizeBytes];
413 #ifndef _WIN32
414 cpu_set_t affinity;
415 #endif
416 } affMsg;
418 static rankMsg *rankmsg = NULL;
419 static CmmTable hostTable;
420 static CmiNodeLock affLock = 0;
422 /* called on PE 0 */
423 static void cpuAffinityHandler(void *m)
425 static int count = 0;
426 static int nodecount = 0;
427 hostnameMsg *rec;
428 hostnameMsg *msg = (hostnameMsg *)m;
429 hostnameMsg *tmpm;
430 int tag, tag1, pe, myrank;
431 int npes = CmiNumPes();
433 /* for debug
434 char str[128];
435 skt_print_ip(str, msg->ip);
436 printf("hostname: %d %s\n", msg->pe, str);
438 CmiAssert(CmiMyPe()==0 && rankmsg != NULL);
439 tag = *(int*)&msg->ip;
440 pe = msg->pe;
441 if ((rec = (hostnameMsg *)CmmProbe(hostTable, 1, &tag, &tag1)) != NULL) {
442 CmiFree(msg);
444 else {
445 rec = msg;
446 rec->seq = nodecount;
447 nodecount++; /* a new node record */
448 CmmPut(hostTable, 1, &tag, msg);
450 myrank = rec->rank%rec->ncores;
451 while (in_exclude(myrank)) { /* skip excluded core */
452 myrank = (myrank+1)%rec->ncores;
453 rec->rank ++;
455 rankmsg->ranks[pe] = myrank; /* core rank */
456 rankmsg->nodes[pe] = rec->seq; /* on which node */
457 rec->rank ++;
458 count ++;
459 if (count == CmiNumPes()) {
460 /* CmiPrintf("Cpuaffinity> %d unique compute nodes detected! \n", CmmEntries(hostTable)); */
461 tag = CmmWildCard;
462 while ((tmpm = CmmGet(hostTable, 1, &tag, &tag1))) CmiFree(tmpm);
463 CmmFree(hostTable);
464 #if 1
465 /* bubble sort ranks on each node according to the PE number */
467 int i,j;
468 for (i=0; i<npes-1; i++)
469 for(j=i+1; j<npes; j++) {
470 if (rankmsg->nodes[i] == rankmsg->nodes[j] &&
471 rankmsg->ranks[i] > rankmsg->ranks[j])
473 int tmp = rankmsg->ranks[i];
474 rankmsg->ranks[i] = rankmsg->ranks[j];
475 rankmsg->ranks[j] = tmp;
479 #endif
480 CmiSyncBroadcastAllAndFree(sizeof(rankMsg)+CmiNumPes()*sizeof(int)*2, (void *)rankmsg);
484 /* called on each processor */
485 static void cpuAffinityRecvHandler(void *msg)
487 int myrank, mynode;
488 rankMsg *m = (rankMsg *)msg;
489 m->ranks = (int *)((char*)m + sizeof(rankMsg));
490 m->nodes = (int *)((char*)m + sizeof(rankMsg) + CmiNumPes()*sizeof(int));
491 myrank = m->ranks[CmiMyPe()];
492 mynode = m->nodes[CmiMyPe()];
494 /*CmiPrintf("[%d %d] set to core #: %d\n", CmiMyNode(), CmiMyPe(), myrank);*/
496 if (-1 != CmiSetCPUAffinity(myrank)) {
497 DEBUGP(("Processor %d is bound to core #%d on node #%d\n", CmiMyPe(), myrank, mynode));
499 else{
500 CmiPrintf("Processor %d set affinity failed!\n", CmiMyPe());
501 CmiAbort("set cpu affinity abort!\n");
503 CmiFree(m);
506 /* called on first PE in physical node, receive affinity set from other PEs in phy node */
507 static void cpuPhyNodeAffinityRecvHandler(void *msg)
509 affMsg *m = (affMsg *)msg;
510 #if !defined(_WIN32) && defined(CPU_OR)
511 CPU_OR(&core_usage, &core_usage, &m->affinity);
512 affMsgsRecvd++;
513 #endif
514 CmiFree(m);
517 #if defined(_WIN32)
518 /* strtok is thread safe in VC++ */
519 #define strtok_r(x,y,z) strtok(x,y)
520 #endif
522 static int search_pemap(char *pecoremap, int pe)
524 int *map = (int *)malloc(CmiNumPesGlobal()*sizeof(int));
525 char *ptr = NULL;
526 int h, i, j, k, count;
527 int plusarr[128];
528 char *str;
530 char *mapstr = (char*)malloc(strlen(pecoremap)+1);
531 strcpy(mapstr, pecoremap);
533 str = strtok_r(mapstr, ",", &ptr);
534 count = 0;
535 while (str && count < CmiNumPesGlobal())
537 int hasdash=0, hascolon=0, hasdot=0, hasstar1=0, hasstar2=0, numplus=0;
538 int start, end, stride=1, block=1;
539 int iter=1;
540 plusarr[0] = 0;
541 for (i=0; i<strlen(str); i++) {
542 if (str[i] == '-' && i!=0) hasdash=1;
543 else if (str[i] == ':') hascolon=1;
544 else if (str[i] == '.') hasdot=1;
545 else if (str[i] == 'x') hasstar1=1;
546 else if (str[i] == 'X') hasstar2=1;
547 else if (str[i] == '+') {
548 if (str[i+1] == '+' || str[i+1] == '-') {
549 printf("Warning: Check the format of \"%s\".\n", str);
550 } else if (sscanf(&str[i], "+%d", &plusarr[++numplus]) != 1) {
551 printf("Warning: Check the format of \"%s\".\n", str);
552 --numplus;
556 if (hasstar1 || hasstar2) {
557 if (hasstar1) sscanf(str, "%dx", &iter);
558 if (hasstar2) sscanf(str, "%dX", &iter);
559 while (*str!='x' && *str!='X') str++;
560 str++;
562 if (hasdash) {
563 if (hascolon) {
564 if (hasdot) {
565 if (sscanf(str, "%d-%d:%d.%d", &start, &end, &stride, &block) != 4)
566 printf("Warning: Check the format of \"%s\".\n", str);
568 else {
569 if (sscanf(str, "%d-%d:%d", &start, &end, &stride) != 3)
570 printf("Warning: Check the format of \"%s\".\n", str);
573 else {
574 if (sscanf(str, "%d-%d", &start, &end) != 2)
575 printf("Warning: Check the format of \"%s\".\n", str);
578 else {
579 sscanf(str, "%d", &start);
580 end = start;
582 if (block > stride) {
583 printf("Warning: invalid block size in \"%s\" ignored.\n", str);
584 block=1;
586 //if (CmiMyPe() == 0) printf("iter: %d start: %d end: %d stride: %d, block: %d. plus %d \n", iter, start, end, stride, block, numplus);
587 for (k = 0; k<iter; k++) {
588 for (i = start; i<=end; i+=stride) {
589 for (j=0; j<block; j++) {
590 if (i+j>end) break;
591 for (h=0; h<=numplus; h++) {
592 map[count++] = i+j+plusarr[h];
593 if (count == CmiNumPesGlobal()) break;
595 if (count == CmiNumPesGlobal()) break;
597 if (count == CmiNumPesGlobal()) break;
599 if (count == CmiNumPesGlobal()) break;
601 str = strtok_r(NULL, ",", &ptr);
603 i = map[pe % count];
605 free(map);
606 free(mapstr);
607 return i;
610 #if CMK_CRAYXE || CMK_CRAYXC
611 extern int getXTNodeID(int mpirank, int nummpiranks);
612 #endif
615 * Check that there are not multiple PEs assigned to the same core.
616 * If a pemap has been computed by this module (or passed by the user) this
617 * function will print a warning if oversubscription detected. If no affinity
618 * has been set explicitly by this module, it will print error and abort if
619 * oversubscription detected.
621 void CmiCheckAffinity()
623 #if !defined(_WIN32) && CMK_SMP && CMK_HAS_PTHREAD_SETAFFINITY && defined(CPU_OR)
625 if (!CmiCpuTopologyEnabled()) return; // only works if cpu topology enabled
627 if (CmiMyPe() == 0) {
628 // wait for every PE affinity from my physical node (for now only done on phy node 0)
630 cpu_set_t my_aff;
631 if (get_affinity(&my_aff) == -1) CmiAbort("get_affinity failed\n");
632 CPU_OR(&core_usage, &core_usage, &my_aff); // add my affinity (pe0)
633 int N = CmiNumPesOnPhysicalNode(0);
634 while (affMsgsRecvd < N)
635 CmiDeliverSpecificMsg(cpuPhyNodeAffinityRecvHandlerIdx);
637 // NOTE this test is simple and may not detect every possible case of
638 // oversubscription
639 if (CPU_COUNT(&core_usage) < N) {
640 // TODO suggest command line arguments?
641 if (!aff_is_set) {
642 CmiAbort("Multiple PEs assigned to same core. Set affinity "
643 "options to correct or lower the number of threads.\n");
644 } else {
645 CmiPrintf("WARNING: Multiple PEs assigned to same core, recommend "
646 "adjusting processor affinity or passing +CmiSleepOnIdle to reduce "
647 "interference.\n");
650 } else if ((CmiPhysicalNodeID(CmiMyPe()) == 0) && (CmiMyPe() < CmiNumPes())) {
651 // send my affinity to first PE on physical node (only done on phy node 0 for now)
652 affMsg *m = (affMsg*)CmiAlloc(sizeof(affMsg));
653 CmiSetHandler((char *)m, cpuPhyNodeAffinityRecvHandlerIdx);
654 if (get_affinity(&m->affinity) == -1) { // put my affinity in msg
655 CmiFree(m);
656 CmiAbort("get_affinity failed\n");
658 CmiSyncSendAndFree(0, sizeof(affMsg), (void *)m);
660 #endif
663 void CmiInitCPUAffinity(char **argv)
665 static skt_ip_t myip;
666 int ret, i, exclude;
667 hostnameMsg *msg;
668 char *pemap = NULL;
669 char *commap = NULL;
670 char *pemapfile = NULL;
672 int show_affinity_flag;
673 int affinity_flag = CmiGetArgFlagDesc(argv,"+setcpuaffinity",
674 "set cpu affinity");
676 while (CmiGetArgIntDesc(argv,"+excludecore", &exclude, "avoid core when setting cpuaffinity")) {
677 if (CmiMyRank() == 0) add_exclude(exclude);
678 affinity_flag = 1;
681 if (CmiGetArgStringDesc(argv, "+pemapfile", &pemapfile, "define pe to core mapping file")) {
682 FILE *fp;
683 char buf[128];
684 pemap = (char*)malloc(1024);
685 fp = fopen(pemapfile, "r");
686 if (fp == NULL) CmiAbort("pemapfile does not exist");
687 while (!feof(fp)) {
688 if (fgets(buf, 128, fp)) {
689 if (buf[strlen(buf)-1] == '\n') buf[strlen(buf)-1] = 0;
690 strcat(pemap, buf);
693 fclose(fp);
694 if (CmiMyPe()==0) CmiPrintf("Charm++> read from pemap file '%s': %s\n", pemapfile, pemap);
697 CmiGetArgStringDesc(argv, "+pemap", &pemap, "define pe to core mapping");
698 if (pemap!=NULL && excludecount>0)
699 CmiAbort("Charm++> +pemap can not be used with +excludecore.\n");
701 CmiGetArgStringDesc(argv, "+commap", &commap, "define comm threads to core mapping");
703 if (pemap!=NULL || commap!=NULL) affinity_flag = 1;
705 #if CMK_PAMI_LINUX_PPC8
706 affinity_flag = 1;
707 #endif
709 show_affinity_flag = CmiGetArgFlagDesc(argv,"+showcpuaffinity",
710 "print cpu affinity");
712 cpuAffinityHandlerIdx =
713 CmiRegisterHandler((CmiHandler)cpuAffinityHandler);
714 cpuAffinityRecvHandlerIdx =
715 CmiRegisterHandler((CmiHandler)cpuAffinityRecvHandler);
716 cpuPhyNodeAffinityRecvHandlerIdx =
717 CmiRegisterHandler((CmiHandler)cpuPhyNodeAffinityRecvHandler);
719 if (CmiMyRank() ==0) {
720 affLock = CmiCreateLock();
721 #ifndef _WIN32
722 aff_is_set = affinity_flag;
723 CPU_ZERO(&core_usage);
724 #endif
727 #if CMK_BLUEGENEQ
728 if(affinity_flag){
729 affinity_flag = 0;
730 if(CmiMyPe()==0) CmiPrintf("Charm++> cpu affinity setting is not needed on Blue Gene/Q, thus ignored.\n");
732 if(show_affinity_flag){
733 show_affinity_flag = 0;
734 if(CmiMyPe()==0) CmiPrintf("Charm++> printing cpu affinity is not supported on Blue Gene/Q.\n");
736 #endif
738 if (!affinity_flag) {
739 if (show_affinity_flag) CmiPrintCPUAffinity();
740 return;
743 if (CmiMyPe() == 0) {
744 CmiPrintf("Charm++> cpu affinity enabled. \n");
745 if (excludecount > 0) {
746 CmiPrintf("Charm++> cpuaffinity excludes core: %d", excludecore[0]);
747 for (i=1; i<excludecount; i++) CmiPrintf(" %d", excludecore[i]);
748 CmiPrintf(".\n");
750 if (pemap!=NULL)
751 CmiPrintf("Charm++> cpuaffinity PE-core map : %s\n", pemap);
754 if (CmiMyPe() >= CmiNumPes()) { /* this is comm thread */
755 /* comm thread either can float around, or pin down to the last rank.
756 however it seems to be reportedly slower if it is floating */
757 CmiNodeAllBarrier();
758 if (commap != NULL) {
759 int mycore = search_pemap(commap, CmiMyPeGlobal()-CmiNumPesGlobal());
760 if(CmiMyPe()-CmiNumPes()==0) printf("Charm++> set comm %d on node %d to core #%d\n", CmiMyPe()-CmiNumPes(), CmiMyNode(), mycore);
761 if (-1 == CmiSetCPUAffinity(mycore))
762 CmiAbort("set_cpu_affinity abort!");
763 CmiNodeAllBarrier();
764 if (show_affinity_flag) CmiPrintCPUAffinity();
765 return; /* comm thread return */
767 else {
768 /* if (CmiSetCPUAffinity(CmiNumCores()-1) == -1) CmiAbort("set_cpu_affinity abort!"); */
769 #if !CMK_CRAYXE && !CMK_CRAYXC && !CMK_BLUEGENEQ && !CMK_PAMI_LINUX_PPC8
770 if (pemap == NULL) {
771 #if CMK_MACHINE_PROGRESS_DEFINED
772 while (affinity_doneflag < CmiMyNodeSize()) CmiNetworkProgress();
773 #else
774 #if CMK_SMP
775 #error "Machine progress call needs to be implemented for cpu affinity!"
776 #endif
777 #endif
779 #endif
780 #if CMK_CRAYXE || CMK_CRAYXC
781 /* if both pemap and commmap are NULL, will compute one */
782 if (pemap != NULL)
783 #endif
785 CmiNodeAllBarrier();
786 if (show_affinity_flag) CmiPrintCPUAffinity();
787 return; /* comm thread return */
792 if (pemap != NULL && CmiMyPe()<CmiNumPes()) { /* work thread */
793 int mycore = search_pemap(pemap, CmiMyPeGlobal());
794 if(show_affinity_flag) CmiPrintf("Charm++> set PE %d on node %d to core #%d\n", CmiMyPe(), CmiMyNode(), mycore);
795 if (mycore >= CmiNumCores()) {
796 CmiPrintf("Error> Invalid core number %d, only have %d cores (0-%d) on the node. \n", mycore, CmiNumCores(), CmiNumCores()-1);
797 CmiAbort("Invalid core number");
799 if (CmiSetCPUAffinity(mycore) == -1) CmiAbort("set_cpu_affinity abort!");
800 CmiNodeAllBarrier();
801 CmiNodeAllBarrier();
802 /* if (show_affinity_flag) CmiPrintCPUAffinity(); */
803 return;
806 #if CMK_CRAYXE || CMK_CRAYXC
808 int numCores = CmiNumCores();
810 int myid = getXTNodeID(CmiMyNodeGlobal(), CmiNumNodesGlobal());
811 int myrank;
812 int pe, mype = CmiMyPeGlobal();
813 int node = CmiMyNodeGlobal();
814 int nnodes = 0;
815 #if CMK_SMP
816 if (CmiMyPe() >= CmiNumPes()) { /* this is comm thread */
817 int node = CmiMyPe() - CmiNumPes();
818 mype = CmiGetPeGlobal(CmiNodeFirst(node) + CmiMyNodeSize() - 1, CmiMyPartition()); /* last pe on SMP node */
819 node = CmiGetNodeGlobal(node, CmiMyPartition());
821 #endif
822 pe = mype - 1;
823 while (pe >= 0) {
824 int n = CmiNodeOf(pe);
825 if (n != node) { nnodes++; node = n; }
826 if (getXTNodeID(n, CmiNumNodesGlobal()) != myid) break;
827 pe --;
829 CmiAssert(numCores > 0);
830 myrank = (mype - pe - 1 + nnodes)%numCores;
831 #if CMK_SMP
832 if (CmiMyPe() >= CmiNumPes())
833 myrank = (myrank + 1)%numCores;
834 #endif
836 if (-1 != CmiSetCPUAffinity(myrank)) {
837 DEBUGP(("Processor %d is bound to core #%d on node #%d\n", CmiMyPe(), myrank, mynode));
839 else{
840 CmiPrintf("Processor %d set affinity failed!\n", CmiMyPe());
841 CmiAbort("set cpu affinity abort!\n");
844 if (CmiMyPe() < CmiNumPes())
845 CmiNodeAllBarrier();
846 CmiNodeAllBarrier();
847 #elif CMK_SMP && CMK_PAMI_LINUX_PPC8
848 #define CMK_PAMI_LINUX_PPC8_CORES_PER_NODE 20
849 #define CMK_PAMI_LINUX_PPC8_THREADS_PER_CORE 8
850 #define CMK_PAMI_LINUX_PPC8_SKIP_CORE_0 0
851 int cores_per_node = CMK_PAMI_LINUX_PPC8_CORES_PER_NODE;
852 int threads_per_core = CMK_PAMI_LINUX_PPC8_THREADS_PER_CORE;
854 CmiGetArgInt(argv,"+cores_per_node", &cores_per_node);
855 CmiGetArgInt(argv,"+threads_per_core", &threads_per_core);
857 int my_core = CmiMyPe() % cores_per_node;
858 int my_core_2 = CmiMyPe() % (cores_per_node/2);
859 #if CMK_PAMI_LINUX_PPC8_SKIP_CORE_0
860 my_core_2 = (my_core_2 + 1) % (CMK_PAMI_LINUX_PPC8_CORES_PER_NODE/2);
861 #endif
863 int cpu = 0;
864 if (my_core < (cores_per_node/2))
865 cpu = my_core_2 * threads_per_core;
866 else
867 cpu = (my_core_2 + CMK_PAMI_LINUX_PPC8_CORES_PER_NODE/2) * threads_per_core;
869 cpu_set_t cset;
870 CPU_ZERO(&cset);
871 CPU_SET(cpu, &cset);
872 CPU_SET(cpu+1, &cset);
873 if(sched_setaffinity(0, sizeof(cpu_set_t), &cset) < 0)
874 perror("sched_setaffinity");
876 CPU_ZERO(&cset);
877 if (sched_getaffinity(0, sizeof(cset), &cset) < 0)
878 perror("sched_getaffinity");
880 sched_yield();
881 if(CmiMyPe() == 0)
882 printf("Setting default affinity\n");
883 return;
884 #else
885 /* get my ip address */
886 if (CmiMyRank() == 0)
888 #if CMK_HAS_GETHOSTNAME
889 myip = skt_my_ip(); /* not thread safe, so only calls on rank 0 */
890 #else
891 CmiAbort("Can not get unique name for the compute nodes. \n");
892 #endif
894 CmiNodeAllBarrier();
896 /* prepare a msg to send */
897 msg = (hostnameMsg *)CmiAlloc(sizeof(hostnameMsg));
898 CmiSetHandler((char *)msg, cpuAffinityHandlerIdx);
899 msg->pe = CmiMyPe();
900 msg->ip = myip;
901 msg->ncores = CmiNumCores();
902 DEBUGP(("PE %d's node has %d number of cores. \n", CmiMyPe(), msg->ncores));
903 msg->rank = 0;
904 CmiSyncSendAndFree(0, sizeof(hostnameMsg), (void *)msg);
906 if (CmiMyPe() == 0) {
907 int i;
908 hostTable = CmmNew();
909 rankmsg = (rankMsg *)CmiAlloc(sizeof(rankMsg)+CmiNumPes()*sizeof(int)*2);
910 CmiSetHandler((char *)rankmsg, cpuAffinityRecvHandlerIdx);
911 rankmsg->ranks = (int *)((char*)rankmsg + sizeof(rankMsg));
912 rankmsg->nodes = (int *)((char*)rankmsg + sizeof(rankMsg) + CmiNumPes()*sizeof(int));
913 for (i=0; i<CmiNumPes(); i++) {
914 rankmsg->ranks[i] = 0;
915 rankmsg->nodes[i] = -1;
918 for (i=0; i<CmiNumPes(); i++) CmiDeliverSpecificMsg(cpuAffinityHandlerIdx);
921 /* receive broadcast from PE 0 */
922 CmiDeliverSpecificMsg(cpuAffinityRecvHandlerIdx);
923 CmiLock(affLock);
924 affinity_doneflag++;
925 CmiUnlock(affLock);
926 CmiNodeAllBarrier();
927 #endif
929 if (show_affinity_flag) CmiPrintCPUAffinity();
932 /* called in ConverseCommonInit to initialize basic variables */
933 void CmiInitCPUAffinityUtil(){
934 char fname[64];
935 CpvInitialize(int, myCPUAffToCore);
936 CpvAccess(myCPUAffToCore) = -1;
937 #if CMK_OS_IS_LINUX
938 CpvInitialize(void *, myProcStatFP);
939 CmiLock(_smp_mutex);
940 #if CMK_SMP
941 sprintf(fname, "/proc/%d/task/%d/stat", getpid(), syscall(SYS_gettid));
942 #else
943 sprintf(fname, "/proc/%d/stat", getpid());
944 #endif
945 CpvAccess(myProcStatFP) = (void *)fopen(fname, "r");
946 CmiUnlock(_smp_mutex);
948 if(CmiMyPe()==0 && CpvAccess(myProcStatFP) == NULL){
949 CmiPrintf("WARNING: ERROR IN OPENING FILE %s on PROC %d, CmiOnCore() SHOULDN'T BE CALLED\n", fname, CmiMyPe());
952 #endif
955 #else /* not supporting affinity */
957 int CmiSetCPUAffinity(int mycore)
959 return -1;
962 int CmiPrintCPUAffinity()
964 CmiPrintf("Warning: CmiPrintCPUAffinity not supported.\n");
965 return -1;
968 void CmiCheckAffinity() {
971 void CmiInitCPUAffinity(char **argv)
973 char *pemap = NULL;
974 char *pemapfile = NULL;
975 char *commap = NULL;
976 int excludecore = -1;
977 int affinity_flag = CmiGetArgFlagDesc(argv,"+setcpuaffinity",
978 "set cpu affinity");
979 while (CmiGetArgIntDesc(argv,"+excludecore",&excludecore, "avoid core when setting cpuaffinity"));
980 CmiGetArgStringDesc(argv, "+pemap", &pemap, "define pe to core mapping");
981 CmiGetArgStringDesc(argv, "+pemapfile", &pemapfile, "define pe to core mapping file");
982 CmiGetArgStringDesc(argv, "+commap", &commap, "define comm threads to core mapping");
983 if (affinity_flag && CmiMyPe()==0)
984 CmiPrintf("sched_setaffinity() is not supported, +setcpuaffinity disabled.\n");
985 if (excludecore != -1 && CmiMyPe()==0)
986 CmiPrintf("sched_setaffinity() is not supported, +excludecore disabled.\n");
987 if (pemap && CmiMyPe()==0)
988 CmiPrintf("sched_setaffinity() is not supported, +pemap disabled.\n");
989 if (pemapfile && CmiMyPe()==0)
990 CmiPrintf("sched_setaffinity() is not supported, +pemapfile disabled.\n");
991 if (commap && CmiMyPe()==0)
992 CmiPrintf("sched_setaffinity() is not supported, +commap disabled.\n");
995 /* called in ConverseCommonInit to initialize basic variables */
996 void CmiInitCPUAffinityUtil(){
997 CpvInitialize(int, myCPUAffToCore);
998 CpvAccess(myCPUAffToCore) = -1;
999 #if CMK_OS_IS_LINUX
1000 CpvInitialize(void *, myProcStatFP);
1001 CpvAccess(myProcStatFP) = NULL;
1002 #endif
1005 int CmiOnCore(){
1006 printf("WARNING: CmiOnCore IS NOT SUPPORTED ON THIS PLATFORM\n");
1007 return -1;
1009 #endif