3 This scheme relies on using IP address to identify nodes and assigning
6 when CMK_NO_SOCKETS, which is typically on cray xt3 and bluegene/L.
7 There is no hostname for the compute nodes.
9 * last updated 3/20/2010 Gengbin Zheng
10 * new options +pemap +commmap takes complex pattern of a list of cores
16 #include "sockRoutines.h"
18 #define DEBUGP(x) /* CmiPrintf x; */
19 CpvDeclare(int, myCPUAffToCore
);
22 * /proc/<PID>/[task/<TID>]/stat file descriptor
23 * Used to retrieve the info about which physical
24 * coer this process or thread is on.
26 CpvDeclare(void *, myProcStatFP
);
29 #if CMK_HAS_SETAFFINITY || defined (_WIN32) || CMK_HAS_BINDPROCESSOR
42 //long sched_setaffinity(pid_t pid, unsigned int len, unsigned long *user_mask_ptr);
43 //long sched_getaffinity(pid_t pid, unsigned int len, unsigned long *user_mask_ptr);
47 #include <sys/syscall.h>
50 #if defined(__APPLE__)
51 #include <Carbon/Carbon.h> /* Carbon APIs for Multiprocessing */
55 #define MAX_EXCLUDE 64
56 static int excludecore
[MAX_EXCLUDE
] = {-1};
57 static int excludecount
= 0;
59 static int affinity_doneflag
= 0;
62 static int affMsgsRecvd
= 1; // number of affinity messages received at PE0
63 static cpu_set_t core_usage
; // used to record union of CPUs used by every PE in physical node
64 static int aff_is_set
= 0;
67 static int in_exclude(int core
)
70 for (i
=0; i
<excludecount
; i
++) if (core
== excludecore
[i
]) return 1;
74 static void add_exclude(int core
)
76 if (in_exclude(core
)) return;
77 CmiAssert(excludecount
< MAX_EXCLUDE
);
78 excludecore
[excludecount
++] = core
;
81 #if CMK_HAS_BINDPROCESSOR
82 #include <sys/processor.h>
85 #define SET_MASK(cpuid) \
86 /* set the affinity mask if possible */ \
87 if ((cpuid / 8) > len) { \
88 printf("Mask size too small to handle requested CPU ID\n"); \
91 mask = 1 << cpuid; /* set the affinity mask exclusively to one CPU */ \
95 /* This implementation assumes the default x86 CPU mask size used by Linux */
96 /* For a large SMP machine, this code should be changed to use a variable sized */
97 /* CPU affinity mask buffer instead, as the present code will fail beyond 32 CPUs */
98 int set_cpu_affinity(unsigned int cpuid
) {
99 unsigned long mask
= 0xffffffff;
100 unsigned int len
= sizeof(mask
);
110 hProcess
= GetCurrentProcess();
111 if (SetProcessAffinityMask(hProcess
, mask
) == 0) {
114 #elif CMK_HAS_BINDPROCESSOR
116 if (bindprocessor(BINDPROCESS
, pid
, cpuid
) == -1) return -1;
119 if ( cpuid
>= CPU_SETSIZE
) {
123 num_cpus
= cpuid
+ 1;
124 cpusetp
= CPU_ALLOC(num_cpus
);
125 if (cpusetp
== NULL
) {
126 perror("set_cpu_affinity CPU_ALLOC");
129 size
= CPU_ALLOC_SIZE(num_cpus
);
130 CPU_ZERO_S(size
, cpusetp
);
131 CPU_SET_S(cpuid
, size
, cpusetp
);
132 if (sched_setaffinity(0, size
, cpusetp
) < 0) {
133 perror("sched_setaffinity dynamically allocated");
143 CPU_SET(cpuid
, &cpuset
);
146 /* PID 0 refers to the current process */
147 /*if (sched_setaffinity(0, len, &mask) < 0) {*/
148 if (sched_setaffinity(0, sizeof(cpuset
), &cpuset
) < 0) {
149 perror("sched_setaffinity");
159 int set_thread_affinity(int cpuid
) {
160 unsigned long mask
= 0xffffffff;
161 unsigned int len
= sizeof(mask
);
169 hThread
= GetCurrentThread();
170 if (SetThreadAffinityMask(hThread
, mask
) == 0) {
173 #elif CMK_HAS_PTHREAD_SETAFFINITY
175 if ( cpuid
>= CPU_SETSIZE
) {
180 num_cpus
= cpuid
+ 1;
181 cpusetp
= CPU_ALLOC(num_cpus
);
182 if (cpusetp
== NULL
) {
183 perror("set_thread_affinity CPU_ALLOC");
186 size
= CPU_ALLOC_SIZE(num_cpus
);
187 thread
= pthread_self();
188 CPU_ZERO_S(size
, cpusetp
);
189 CPU_SET_S(cpuid
, size
, cpusetp
);
190 if (errno
= pthread_setaffinity_np(thread
, size
, cpusetp
)) {
191 perror("pthread_setaffinity dynamically allocated");
203 thread
= pthread_self();
206 CPU_SET(cpuid
, &cpuset
);
208 if (errno
= pthread_setaffinity_np(thread
, sizeof(cpu_set_t
), &cpuset
)) {
209 perror("pthread_setaffinity");
213 #elif CMK_HAS_BINDPROCESSOR
214 if (bindprocessor(BINDTHREAD
, thread_self(), cpuid
) != 0)
217 return set_cpu_affinity(cpuid
);
225 int CmiSetCPUAffinity(int mycore
)
229 core
= CmiNumCores() + core
;
232 CmiError("Error: Invalid cpu affinity core number: %d\n", mycore
);
233 CmiAbort("CmiSetCPUAffinity failed");
236 CpvAccess(myCPUAffToCore
) = core
;
238 /* set cpu affinity */
240 return set_thread_affinity(core
);
242 return set_cpu_affinity(core
);
243 /* print_cpu_affinity(); */
247 /* This implementation assumes the default x86 CPU mask size used by Linux */
248 /* For a large SMP machine, this code should be changed to use a variable sized */
249 /* CPU affinity mask buffer instead, as the present code will fail beyond 32 CPUs */
250 int print_cpu_affinity() {
252 unsigned long pMask
, sMask
;
253 HANDLE hProcess
= GetCurrentProcess();
254 if(GetProcessAffinityMask(hProcess
, &pMask
, &sMask
)){
255 perror("On Windows: GetProcessAffinityMask");
259 CmiPrintf("[%d] CPU affinity mask is: 0x%08lx\n", CmiMyPe(), pMask
);
261 #elif CMK_HAS_BINDPROCESSOR
262 printf("[%d] CPU affinity mask is unknown for AIX. \n", CmiMyPe());
264 /*unsigned long mask;
265 unsigned int len = sizeof(mask);*/
267 char str
[256], pe
[16];
271 /* PID 0 refers to the current process */
272 /*if (sched_getaffinity(0, len, &mask) < 0) {*/
273 if (sched_getaffinity(0, sizeof(cpuset
), &cpuset
) < 0) {
274 perror("sched_getaffinity");
278 sprintf(str
, "[%d] CPU affinity mask is: ", CmiMyPe());
279 for (i
= 0; i
< CPU_SETSIZE
; i
++)
280 if (CPU_ISSET(i
, &cpuset
)) {
281 sprintf(pe
, " %d ", i
);
284 CmiPrintf("%s\n", str
);
290 int print_thread_affinity() {
292 size_t len
= sizeof(mask
);
294 #if CMK_HAS_PTHREAD_SETAFFINITY
298 char str
[256], pe
[16];
300 thread
= pthread_self();
302 if (errno
= pthread_getaffinity_np(thread
, sizeof(cpu_set_t
), &cpuset
)) {
303 perror("pthread_getaffinity");
307 sprintf(str
, "[%d] %s affinity is: ", CmiMyPe(), CmiMyPe()>=CmiNumPes()?"communication pthread":"pthread");
308 for (j
= 0; j
< CPU_SETSIZE
; j
++)
309 if (CPU_ISSET(j
, &cpuset
)) {
310 sprintf(pe
, " %d ", j
);
313 CmiPrintf("%s\n", str
);
319 int CmiPrintCPUAffinity()
322 return print_thread_affinity();
324 return print_cpu_affinity();
329 int get_cpu_affinity(cpu_set_t
*cpuset
) {
331 if (sched_getaffinity(0, sizeof(cpuset
), cpuset
) < 0) {
332 perror("sched_getaffinity");
339 int get_thread_affinity(cpu_set_t
*cpuset
) {
340 #if CMK_HAS_PTHREAD_SETAFFINITY
342 if (errno
= pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t
), cpuset
)) {
343 perror("pthread_getaffinity");
353 int get_affinity(cpu_set_t
*cpuset
) {
355 return get_thread_affinity(cpuset
);
357 return get_cpu_affinity(cpuset
);
365 * The info (task_cpu) is read from the Linux /proc virtual file system.
366 * The /proc/<PID>/[task/<TID>]/stat is explained in the Linux
367 * kernel documentation. The online one could be found in:
368 * http://www.mjmwired.net/kernel/Documentation/filesystems/proc.txt
369 * Based on the documentation, task_cpu is found at the 39th field in
372 #define TASK_CPU_POS (39)
375 FILE *fp
= (FILE *)CpvAccess(myProcStatFP
);
377 printf("WARNING: CmiOnCore IS NOT SUPPORTED ON THIS PLATFORM\n");
380 fseek(fp
, 0, SEEK_SET
);
381 for (n
=0; n
<TASK_CPU_POS
; n
++) {
382 fscanf(fp
, "%s", str
);
386 printf("WARNING: CmiOnCore IS NOT SUPPORTED ON THIS PLATFORM\n");
392 static int cpuAffinityHandlerIdx
;
393 static int cpuAffinityRecvHandlerIdx
;
394 static int cpuPhyNodeAffinityRecvHandlerIdx
;
396 typedef struct _hostnameMsg
{
397 char core
[CmiMsgHeaderSizeBytes
];
405 typedef struct _rankMsg
{
406 char core
[CmiMsgHeaderSizeBytes
];
407 int *ranks
; /* PE => core rank mapping */
408 int *nodes
; /* PE => node number mapping */
411 typedef struct _affMsg
{
412 char core
[CmiMsgHeaderSizeBytes
];
418 static rankMsg
*rankmsg
= NULL
;
419 static CmmTable hostTable
;
420 static CmiNodeLock affLock
= 0;
423 static void cpuAffinityHandler(void *m
)
425 static int count
= 0;
426 static int nodecount
= 0;
428 hostnameMsg
*msg
= (hostnameMsg
*)m
;
430 int tag
, tag1
, pe
, myrank
;
431 int npes
= CmiNumPes();
435 skt_print_ip(str, msg->ip);
436 printf("hostname: %d %s\n", msg->pe, str);
438 CmiAssert(CmiMyPe()==0 && rankmsg
!= NULL
);
439 tag
= *(int*)&msg
->ip
;
441 if ((rec
= (hostnameMsg
*)CmmProbe(hostTable
, 1, &tag
, &tag1
)) != NULL
) {
446 rec
->seq
= nodecount
;
447 nodecount
++; /* a new node record */
448 CmmPut(hostTable
, 1, &tag
, msg
);
450 myrank
= rec
->rank
%rec
->ncores
;
451 while (in_exclude(myrank
)) { /* skip excluded core */
452 myrank
= (myrank
+1)%rec
->ncores
;
455 rankmsg
->ranks
[pe
] = myrank
; /* core rank */
456 rankmsg
->nodes
[pe
] = rec
->seq
; /* on which node */
459 if (count
== CmiNumPes()) {
460 /* CmiPrintf("Cpuaffinity> %d unique compute nodes detected! \n", CmmEntries(hostTable)); */
462 while ((tmpm
= CmmGet(hostTable
, 1, &tag
, &tag1
))) CmiFree(tmpm
);
465 /* bubble sort ranks on each node according to the PE number */
468 for (i
=0; i
<npes
-1; i
++)
469 for(j
=i
+1; j
<npes
; j
++) {
470 if (rankmsg
->nodes
[i
] == rankmsg
->nodes
[j
] &&
471 rankmsg
->ranks
[i
] > rankmsg
->ranks
[j
])
473 int tmp
= rankmsg
->ranks
[i
];
474 rankmsg
->ranks
[i
] = rankmsg
->ranks
[j
];
475 rankmsg
->ranks
[j
] = tmp
;
480 CmiSyncBroadcastAllAndFree(sizeof(rankMsg
)+CmiNumPes()*sizeof(int)*2, (void *)rankmsg
);
484 /* called on each processor */
485 static void cpuAffinityRecvHandler(void *msg
)
488 rankMsg
*m
= (rankMsg
*)msg
;
489 m
->ranks
= (int *)((char*)m
+ sizeof(rankMsg
));
490 m
->nodes
= (int *)((char*)m
+ sizeof(rankMsg
) + CmiNumPes()*sizeof(int));
491 myrank
= m
->ranks
[CmiMyPe()];
492 mynode
= m
->nodes
[CmiMyPe()];
494 /*CmiPrintf("[%d %d] set to core #: %d\n", CmiMyNode(), CmiMyPe(), myrank);*/
496 if (-1 != CmiSetCPUAffinity(myrank
)) {
497 DEBUGP(("Processor %d is bound to core #%d on node #%d\n", CmiMyPe(), myrank
, mynode
));
500 CmiPrintf("Processor %d set affinity failed!\n", CmiMyPe());
501 CmiAbort("set cpu affinity abort!\n");
506 /* called on first PE in physical node, receive affinity set from other PEs in phy node */
507 static void cpuPhyNodeAffinityRecvHandler(void *msg
)
509 affMsg
*m
= (affMsg
*)msg
;
510 #if !defined(_WIN32) && defined(CPU_OR)
511 CPU_OR(&core_usage
, &core_usage
, &m
->affinity
);
518 /* strtok is thread safe in VC++ */
519 #define strtok_r(x,y,z) strtok(x,y)
522 static int search_pemap(char *pecoremap
, int pe
)
524 int *map
= (int *)malloc(CmiNumPesGlobal()*sizeof(int));
526 int h
, i
, j
, k
, count
;
530 char *mapstr
= (char*)malloc(strlen(pecoremap
)+1);
531 strcpy(mapstr
, pecoremap
);
533 str
= strtok_r(mapstr
, ",", &ptr
);
535 while (str
&& count
< CmiNumPesGlobal())
537 int hasdash
=0, hascolon
=0, hasdot
=0, hasstar1
=0, hasstar2
=0, numplus
=0;
538 int start
, end
, stride
=1, block
=1;
541 for (i
=0; i
<strlen(str
); i
++) {
542 if (str
[i
] == '-' && i
!=0) hasdash
=1;
543 else if (str
[i
] == ':') hascolon
=1;
544 else if (str
[i
] == '.') hasdot
=1;
545 else if (str
[i
] == 'x') hasstar1
=1;
546 else if (str
[i
] == 'X') hasstar2
=1;
547 else if (str
[i
] == '+') {
548 if (str
[i
+1] == '+' || str
[i
+1] == '-') {
549 printf("Warning: Check the format of \"%s\".\n", str
);
550 } else if (sscanf(&str
[i
], "+%d", &plusarr
[++numplus
]) != 1) {
551 printf("Warning: Check the format of \"%s\".\n", str
);
556 if (hasstar1
|| hasstar2
) {
557 if (hasstar1
) sscanf(str
, "%dx", &iter
);
558 if (hasstar2
) sscanf(str
, "%dX", &iter
);
559 while (*str
!='x' && *str
!='X') str
++;
565 if (sscanf(str
, "%d-%d:%d.%d", &start
, &end
, &stride
, &block
) != 4)
566 printf("Warning: Check the format of \"%s\".\n", str
);
569 if (sscanf(str
, "%d-%d:%d", &start
, &end
, &stride
) != 3)
570 printf("Warning: Check the format of \"%s\".\n", str
);
574 if (sscanf(str
, "%d-%d", &start
, &end
) != 2)
575 printf("Warning: Check the format of \"%s\".\n", str
);
579 sscanf(str
, "%d", &start
);
582 if (block
> stride
) {
583 printf("Warning: invalid block size in \"%s\" ignored.\n", str
);
586 //if (CmiMyPe() == 0) printf("iter: %d start: %d end: %d stride: %d, block: %d. plus %d \n", iter, start, end, stride, block, numplus);
587 for (k
= 0; k
<iter
; k
++) {
588 for (i
= start
; i
<=end
; i
+=stride
) {
589 for (j
=0; j
<block
; j
++) {
591 for (h
=0; h
<=numplus
; h
++) {
592 map
[count
++] = i
+j
+plusarr
[h
];
593 if (count
== CmiNumPesGlobal()) break;
595 if (count
== CmiNumPesGlobal()) break;
597 if (count
== CmiNumPesGlobal()) break;
599 if (count
== CmiNumPesGlobal()) break;
601 str
= strtok_r(NULL
, ",", &ptr
);
610 #if CMK_CRAYXE || CMK_CRAYXC
611 extern int getXTNodeID(int mpirank
, int nummpiranks
);
615 * Check that there are not multiple PEs assigned to the same core.
616 * If a pemap has been computed by this module (or passed by the user) this
617 * function will print a warning if oversubscription detected. If no affinity
618 * has been set explicitly by this module, it will print error and abort if
619 * oversubscription detected.
621 void CmiCheckAffinity()
623 #if !defined(_WIN32) && CMK_SMP && CMK_HAS_PTHREAD_SETAFFINITY && defined(CPU_OR)
625 if (!CmiCpuTopologyEnabled()) return; // only works if cpu topology enabled
627 if (CmiMyPe() == 0) {
628 // wait for every PE affinity from my physical node (for now only done on phy node 0)
631 if (get_affinity(&my_aff
) == -1) CmiAbort("get_affinity failed\n");
632 CPU_OR(&core_usage
, &core_usage
, &my_aff
); // add my affinity (pe0)
633 int N
= CmiNumPesOnPhysicalNode(0);
634 while (affMsgsRecvd
< N
)
635 CmiDeliverSpecificMsg(cpuPhyNodeAffinityRecvHandlerIdx
);
637 // NOTE this test is simple and may not detect every possible case of
639 if (CPU_COUNT(&core_usage
) < N
) {
640 // TODO suggest command line arguments?
642 CmiAbort("Multiple PEs assigned to same core. Set affinity "
643 "options to correct or lower the number of threads.\n");
645 CmiPrintf("WARNING: Multiple PEs assigned to same core, recommend "
646 "adjusting processor affinity or passing +CmiSleepOnIdle to reduce "
650 } else if ((CmiPhysicalNodeID(CmiMyPe()) == 0) && (CmiMyPe() < CmiNumPes())) {
651 // send my affinity to first PE on physical node (only done on phy node 0 for now)
652 affMsg
*m
= (affMsg
*)CmiAlloc(sizeof(affMsg
));
653 CmiSetHandler((char *)m
, cpuPhyNodeAffinityRecvHandlerIdx
);
654 if (get_affinity(&m
->affinity
) == -1) { // put my affinity in msg
656 CmiAbort("get_affinity failed\n");
658 CmiSyncSendAndFree(0, sizeof(affMsg
), (void *)m
);
663 void CmiInitCPUAffinity(char **argv
)
665 static skt_ip_t myip
;
670 char *pemapfile
= NULL
;
672 int show_affinity_flag
;
673 int affinity_flag
= CmiGetArgFlagDesc(argv
,"+setcpuaffinity",
676 while (CmiGetArgIntDesc(argv
,"+excludecore", &exclude
, "avoid core when setting cpuaffinity")) {
677 if (CmiMyRank() == 0) add_exclude(exclude
);
681 if (CmiGetArgStringDesc(argv
, "+pemapfile", &pemapfile
, "define pe to core mapping file")) {
684 pemap
= (char*)malloc(1024);
685 fp
= fopen(pemapfile
, "r");
686 if (fp
== NULL
) CmiAbort("pemapfile does not exist");
688 if (fgets(buf
, 128, fp
)) {
689 if (buf
[strlen(buf
)-1] == '\n') buf
[strlen(buf
)-1] = 0;
694 if (CmiMyPe()==0) CmiPrintf("Charm++> read from pemap file '%s': %s\n", pemapfile
, pemap
);
697 CmiGetArgStringDesc(argv
, "+pemap", &pemap
, "define pe to core mapping");
698 if (pemap
!=NULL
&& excludecount
>0)
699 CmiAbort("Charm++> +pemap can not be used with +excludecore.\n");
701 CmiGetArgStringDesc(argv
, "+commap", &commap
, "define comm threads to core mapping");
703 if (pemap
!=NULL
|| commap
!=NULL
) affinity_flag
= 1;
705 #if CMK_PAMI_LINUX_PPC8
709 show_affinity_flag
= CmiGetArgFlagDesc(argv
,"+showcpuaffinity",
710 "print cpu affinity");
712 cpuAffinityHandlerIdx
=
713 CmiRegisterHandler((CmiHandler
)cpuAffinityHandler
);
714 cpuAffinityRecvHandlerIdx
=
715 CmiRegisterHandler((CmiHandler
)cpuAffinityRecvHandler
);
716 cpuPhyNodeAffinityRecvHandlerIdx
=
717 CmiRegisterHandler((CmiHandler
)cpuPhyNodeAffinityRecvHandler
);
719 if (CmiMyRank() ==0) {
720 affLock
= CmiCreateLock();
722 aff_is_set
= affinity_flag
;
723 CPU_ZERO(&core_usage
);
730 if(CmiMyPe()==0) CmiPrintf("Charm++> cpu affinity setting is not needed on Blue Gene/Q, thus ignored.\n");
732 if(show_affinity_flag
){
733 show_affinity_flag
= 0;
734 if(CmiMyPe()==0) CmiPrintf("Charm++> printing cpu affinity is not supported on Blue Gene/Q.\n");
738 if (!affinity_flag
) {
739 if (show_affinity_flag
) CmiPrintCPUAffinity();
743 if (CmiMyPe() == 0) {
744 CmiPrintf("Charm++> cpu affinity enabled. \n");
745 if (excludecount
> 0) {
746 CmiPrintf("Charm++> cpuaffinity excludes core: %d", excludecore
[0]);
747 for (i
=1; i
<excludecount
; i
++) CmiPrintf(" %d", excludecore
[i
]);
751 CmiPrintf("Charm++> cpuaffinity PE-core map : %s\n", pemap
);
754 if (CmiMyPe() >= CmiNumPes()) { /* this is comm thread */
755 /* comm thread either can float around, or pin down to the last rank.
756 however it seems to be reportedly slower if it is floating */
758 if (commap
!= NULL
) {
759 int mycore
= search_pemap(commap
, CmiMyPeGlobal()-CmiNumPesGlobal());
760 if(CmiMyPe()-CmiNumPes()==0) printf("Charm++> set comm %d on node %d to core #%d\n", CmiMyPe()-CmiNumPes(), CmiMyNode(), mycore
);
761 if (-1 == CmiSetCPUAffinity(mycore
))
762 CmiAbort("set_cpu_affinity abort!");
764 if (show_affinity_flag
) CmiPrintCPUAffinity();
765 return; /* comm thread return */
768 /* if (CmiSetCPUAffinity(CmiNumCores()-1) == -1) CmiAbort("set_cpu_affinity abort!"); */
769 #if !CMK_CRAYXE && !CMK_CRAYXC && !CMK_BLUEGENEQ && !CMK_PAMI_LINUX_PPC8
771 #if CMK_MACHINE_PROGRESS_DEFINED
772 while (affinity_doneflag
< CmiMyNodeSize()) CmiNetworkProgress();
775 #error "Machine progress call needs to be implemented for cpu affinity!"
780 #if CMK_CRAYXE || CMK_CRAYXC
781 /* if both pemap and commmap are NULL, will compute one */
786 if (show_affinity_flag
) CmiPrintCPUAffinity();
787 return; /* comm thread return */
792 if (pemap
!= NULL
&& CmiMyPe()<CmiNumPes()) { /* work thread */
793 int mycore
= search_pemap(pemap
, CmiMyPeGlobal());
794 if(show_affinity_flag
) CmiPrintf("Charm++> set PE %d on node %d to core #%d\n", CmiMyPe(), CmiMyNode(), mycore
);
795 if (mycore
>= CmiNumCores()) {
796 CmiPrintf("Error> Invalid core number %d, only have %d cores (0-%d) on the node. \n", mycore
, CmiNumCores(), CmiNumCores()-1);
797 CmiAbort("Invalid core number");
799 if (CmiSetCPUAffinity(mycore
) == -1) CmiAbort("set_cpu_affinity abort!");
802 /* if (show_affinity_flag) CmiPrintCPUAffinity(); */
806 #if CMK_CRAYXE || CMK_CRAYXC
808 int numCores
= CmiNumCores();
810 int myid
= getXTNodeID(CmiMyNodeGlobal(), CmiNumNodesGlobal());
812 int pe
, mype
= CmiMyPeGlobal();
813 int node
= CmiMyNodeGlobal();
816 if (CmiMyPe() >= CmiNumPes()) { /* this is comm thread */
817 int node
= CmiMyPe() - CmiNumPes();
818 mype
= CmiGetPeGlobal(CmiNodeFirst(node
) + CmiMyNodeSize() - 1, CmiMyPartition()); /* last pe on SMP node */
819 node
= CmiGetNodeGlobal(node
, CmiMyPartition());
824 int n
= CmiNodeOf(pe
);
825 if (n
!= node
) { nnodes
++; node
= n
; }
826 if (getXTNodeID(n
, CmiNumNodesGlobal()) != myid
) break;
829 CmiAssert(numCores
> 0);
830 myrank
= (mype
- pe
- 1 + nnodes
)%numCores
;
832 if (CmiMyPe() >= CmiNumPes())
833 myrank
= (myrank
+ 1)%numCores
;
836 if (-1 != CmiSetCPUAffinity(myrank
)) {
837 DEBUGP(("Processor %d is bound to core #%d on node #%d\n", CmiMyPe(), myrank
, mynode
));
840 CmiPrintf("Processor %d set affinity failed!\n", CmiMyPe());
841 CmiAbort("set cpu affinity abort!\n");
844 if (CmiMyPe() < CmiNumPes())
847 #elif CMK_SMP && CMK_PAMI_LINUX_PPC8
848 #define CMK_PAMI_LINUX_PPC8_CORES_PER_NODE 20
849 #define CMK_PAMI_LINUX_PPC8_THREADS_PER_CORE 8
850 #define CMK_PAMI_LINUX_PPC8_SKIP_CORE_0 0
851 int cores_per_node
= CMK_PAMI_LINUX_PPC8_CORES_PER_NODE
;
852 int threads_per_core
= CMK_PAMI_LINUX_PPC8_THREADS_PER_CORE
;
854 CmiGetArgInt(argv
,"+cores_per_node", &cores_per_node
);
855 CmiGetArgInt(argv
,"+threads_per_core", &threads_per_core
);
857 int my_core
= CmiMyPe() % cores_per_node
;
858 int my_core_2
= CmiMyPe() % (cores_per_node
/2);
859 #if CMK_PAMI_LINUX_PPC8_SKIP_CORE_0
860 my_core_2
= (my_core_2
+ 1) % (CMK_PAMI_LINUX_PPC8_CORES_PER_NODE
/2);
864 if (my_core
< (cores_per_node
/2))
865 cpu
= my_core_2
* threads_per_core
;
867 cpu
= (my_core_2
+ CMK_PAMI_LINUX_PPC8_CORES_PER_NODE
/2) * threads_per_core
;
872 CPU_SET(cpu
+1, &cset
);
873 if(sched_setaffinity(0, sizeof(cpu_set_t
), &cset
) < 0)
874 perror("sched_setaffinity");
877 if (sched_getaffinity(0, sizeof(cset
), &cset
) < 0)
878 perror("sched_getaffinity");
882 printf("Setting default affinity\n");
885 /* get my ip address */
886 if (CmiMyRank() == 0)
888 #if CMK_HAS_GETHOSTNAME
889 myip
= skt_my_ip(); /* not thread safe, so only calls on rank 0 */
891 CmiAbort("Can not get unique name for the compute nodes. \n");
896 /* prepare a msg to send */
897 msg
= (hostnameMsg
*)CmiAlloc(sizeof(hostnameMsg
));
898 CmiSetHandler((char *)msg
, cpuAffinityHandlerIdx
);
901 msg
->ncores
= CmiNumCores();
902 DEBUGP(("PE %d's node has %d number of cores. \n", CmiMyPe(), msg
->ncores
));
904 CmiSyncSendAndFree(0, sizeof(hostnameMsg
), (void *)msg
);
906 if (CmiMyPe() == 0) {
908 hostTable
= CmmNew();
909 rankmsg
= (rankMsg
*)CmiAlloc(sizeof(rankMsg
)+CmiNumPes()*sizeof(int)*2);
910 CmiSetHandler((char *)rankmsg
, cpuAffinityRecvHandlerIdx
);
911 rankmsg
->ranks
= (int *)((char*)rankmsg
+ sizeof(rankMsg
));
912 rankmsg
->nodes
= (int *)((char*)rankmsg
+ sizeof(rankMsg
) + CmiNumPes()*sizeof(int));
913 for (i
=0; i
<CmiNumPes(); i
++) {
914 rankmsg
->ranks
[i
] = 0;
915 rankmsg
->nodes
[i
] = -1;
918 for (i
=0; i
<CmiNumPes(); i
++) CmiDeliverSpecificMsg(cpuAffinityHandlerIdx
);
921 /* receive broadcast from PE 0 */
922 CmiDeliverSpecificMsg(cpuAffinityRecvHandlerIdx
);
929 if (show_affinity_flag
) CmiPrintCPUAffinity();
932 /* called in ConverseCommonInit to initialize basic variables */
933 void CmiInitCPUAffinityUtil(){
935 CpvInitialize(int, myCPUAffToCore
);
936 CpvAccess(myCPUAffToCore
) = -1;
938 CpvInitialize(void *, myProcStatFP
);
941 sprintf(fname
, "/proc/%d/task/%d/stat", getpid(), syscall(SYS_gettid
));
943 sprintf(fname
, "/proc/%d/stat", getpid());
945 CpvAccess(myProcStatFP
) = (void *)fopen(fname
, "r");
946 CmiUnlock(_smp_mutex
);
948 if(CmiMyPe()==0 && CpvAccess(myProcStatFP) == NULL){
949 CmiPrintf("WARNING: ERROR IN OPENING FILE %s on PROC %d, CmiOnCore() SHOULDN'T BE CALLED\n", fname, CmiMyPe());
955 #else /* not supporting affinity */
957 int CmiSetCPUAffinity(int mycore
)
962 int CmiPrintCPUAffinity()
964 CmiPrintf("Warning: CmiPrintCPUAffinity not supported.\n");
968 void CmiCheckAffinity() {
971 void CmiInitCPUAffinity(char **argv
)
974 char *pemapfile
= NULL
;
976 int excludecore
= -1;
977 int affinity_flag
= CmiGetArgFlagDesc(argv
,"+setcpuaffinity",
979 while (CmiGetArgIntDesc(argv
,"+excludecore",&excludecore
, "avoid core when setting cpuaffinity"));
980 CmiGetArgStringDesc(argv
, "+pemap", &pemap
, "define pe to core mapping");
981 CmiGetArgStringDesc(argv
, "+pemapfile", &pemapfile
, "define pe to core mapping file");
982 CmiGetArgStringDesc(argv
, "+commap", &commap
, "define comm threads to core mapping");
983 if (affinity_flag
&& CmiMyPe()==0)
984 CmiPrintf("sched_setaffinity() is not supported, +setcpuaffinity disabled.\n");
985 if (excludecore
!= -1 && CmiMyPe()==0)
986 CmiPrintf("sched_setaffinity() is not supported, +excludecore disabled.\n");
987 if (pemap
&& CmiMyPe()==0)
988 CmiPrintf("sched_setaffinity() is not supported, +pemap disabled.\n");
989 if (pemapfile
&& CmiMyPe()==0)
990 CmiPrintf("sched_setaffinity() is not supported, +pemapfile disabled.\n");
991 if (commap
&& CmiMyPe()==0)
992 CmiPrintf("sched_setaffinity() is not supported, +commap disabled.\n");
995 /* called in ConverseCommonInit to initialize basic variables */
996 void CmiInitCPUAffinityUtil(){
997 CpvInitialize(int, myCPUAffToCore
);
998 CpvAccess(myCPUAffToCore
) = -1;
1000 CpvInitialize(void *, myProcStatFP
);
1001 CpvAccess(myProcStatFP
) = NULL
;
1006 printf("WARNING: CmiOnCore IS NOT SUPPORTED ON THIS PLATFORM\n");