13 #include <hwi/include/bqc/A2_inlines.h>
14 #include "spi/include/kernel/process.h"
15 #include "spi/include/kernel/memory.h"
20 #if CMK_ENABLE_ASYNC_PROGRESS
21 #error "async progress non supported with non-smp"
25 #define CMI_LIKELY(x) (__builtin_expect(x,1))
26 #define CMI_UNLIKELY(x) (__builtin_expect(x,0))
28 char *ALIGN_32(char *p
) {
29 return((char *)((((unsigned long)p
)+0x1f) & (~0x1FUL
)));
33 #define CMI_MAGIC(msg) ((CmiMsgHeaderBasic *)msg)->magic
34 /* FIXME: need a random number that everyone agrees ! */
35 #define CHARM_MAGIC_NUMBER 126
37 #define CMI_IS_BCAST_ON_CORES(msg) (CMI_BROADCAST_ROOT(msg) > 0)
38 #define CMI_IS_BCAST_ON_NODES(msg) (CMI_BROADCAST_ROOT(msg) < 0)
40 #define CMI_PAMI_SHORT_DISPATCH 7
41 #define CMI_PAMI_RZV_DISPATCH 8
42 #define CMI_PAMI_ACK_DISPATCH 9
43 #define CMI_PAMI_DISPATCH 10
45 #define SHORT_CUTOFF 128
46 #define EAGER_CUTOFF 4096
48 #if CMK_PERSISTENT_COMM
49 #include "machine-persistent.h"
52 #if CMK_ERROR_CHECKING
53 static int checksum_flag
= 0;
54 extern unsigned char computeCheckSum(unsigned char *data
, int len
);
56 #define CMI_SET_CHECKSUM(msg, len) \
57 if (checksum_flag) { \
58 ((CmiMsgHeaderBasic *)msg)->cksum = 0; \
59 ((CmiMsgHeaderBasic *)msg)->cksum = computeCheckSum((unsigned char*)msg, len); \
62 #define CMI_CHECK_CHECKSUM(msg, len) \
65 if (computeCheckSum((unsigned char*)msg, len) != 0) { \
66 printf("\n\n------------------------------\n\nReceiver %d size %d:", CmiMyPe(), len); \
67 for(count = 0; count < len; count++) { \
68 printf("%2x", msg[count]); \
70 printf("------------------------------\n\n"); \
71 CmiAbort("Fatal error: checksum doesn't agree!\n"); \
74 #define CMI_SET_CHECKSUM(msg, len)
75 #define CMI_CHECK_CHECKSUM(msg, len)
78 #if CMK_SMP && !CMK_MULTICORE
79 //static volatile int commThdExit = 0;
80 //static CmiNodeLock commThdExitLock = 0;
82 //The random seed to pick destination context
83 __thread
uint32_t r_seed
= 0xdeadbeef;
84 __thread
int32_t _cmi_bgq_incommthread
= 0;
87 //int CmiInCommThread () {
88 // //if (_cmi_bgq_incommthread)
89 // //printf ("CmiInCommThread: %d\n", _cmi_bgq_incommthread);
90 // return _cmi_bgq_incommthread;
93 static void CmiNetworkBarrier(int async
);
94 #if SPECIFIC_PCQUEUE && CMK_SMP
95 #define QUEUE_NUMS _Cmi_mynodesize + 3
96 #include "lrtsqueue.h"
99 #include "machine-lrts.h"
100 #include "machine-common-core.c"
102 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
103 CpvDeclare(int, uselock
);
106 #if CMK_ENABLE_ASYNC_PROGRESS
107 //Immediate messages not supported yet
108 void LrtsAdvanceCommunication(int whenidle
) {}
111 void _alias_rank (int rank
);
113 #define MAX_NUM_CONTEXTS 64
116 #define CMK_PAMI_MULTI_CONTEXT 1
118 #define CMK_PAMI_MULTI_CONTEXT 0
121 #if CMK_PAMI_MULTI_CONTEXT
122 volatile int msgQueueLen
[MAX_NUM_CONTEXTS
];
123 volatile int outstanding_recvs
[MAX_NUM_CONTEXTS
];
125 //#if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
126 //#define THREADS_PER_CONTEXT 2
127 //#define LTPS 1 //Log Threads Per Context (TPS)
129 #define THREADS_PER_CONTEXT 4
130 #define LTPS 2 //Log Threads Per Context (TPS)
133 #define MY_CONTEXT_ID() (CmiMyRank() >> LTPS)
134 #define MY_CONTEXT() (cmi_pami_contexts[CmiMyRank() >> LTPS])
136 #define INCR_MSGQLEN() //(msgQueueLen[CmiMyRank() >> LTPS] ++)
137 #define DECR_MSGQLEN() //(msgQueueLen[CmiMyRank() >> LTPS] --)
138 #define MSGQLEN() 0 //(msgQueueLen[CmiMyRank() >> LTPS])
139 #define INCR_ORECVS() //(outstanding_recvs[CmiMyRank() >> LTPS] ++)
140 #define DECR_ORECVS() //(outstanding_recvs[CmiMyRank() >> LTPS] --)
141 #define ORECVS() 0 //(outstanding_recvs[CmiMyRank() >> LTPS])
144 volatile int msgQueueLen
;
145 volatile int outstanding_recvs
;
146 #define MY_CONTEXT_ID() (0)
147 #define MY_CONTEXT() (cmi_pami_contexts[0])
149 #define INCR_MSGQLEN() (msgQueueLen ++)
150 #define DECR_MSGQLEN() (msgQueueLen --)
151 #define MSGQLEN() (msgQueueLen)
152 #define INCR_ORECVS() (outstanding_recvs ++)
153 #define DECR_ORECVS() (outstanding_recvs --)
154 #define ORECVS() (outstanding_recvs)
157 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
158 #define PAMIX_CONTEXT_LOCK_INIT(x)
159 #define PAMIX_CONTEXT_LOCK(x) if(LTPS) PAMI_Context_lock(x)
160 #define PAMIX_CONTEXT_UNLOCK(x) if(LTPS) {ppc_msync(); PAMI_Context_unlock(x);}
161 #define PAMIX_CONTEXT_TRYLOCK(x) ((LTPS)?(PAMI_Context_trylock(x) == PAMI_SUCCESS):(1))
163 #define PAMIX_CONTEXT_LOCK_INIT(x)
164 #define PAMIX_CONTEXT_LOCK(x)
165 #define PAMIX_CONTEXT_UNLOCK(x)
166 #define PAMIX_CONTEXT_TRYLOCK(x) 1
172 static INLINE_KEYWORD
unsigned myrand (unsigned *seed
) {
173 *seed
= A_PRIME
* (*seed
) + B_PRIME
;
177 static void send_done(pami_context_t ctxt
, void *data
, pami_result_t result
)
184 static void CmiSendPeer (int rank
, int size
, char *msg
) {
185 //fprintf(stderr, "%d Send messages to peer\n", CmiMyPe());
186 CmiPushPE (rank
, msg
);
191 static void recv_done(pami_context_t ctxt
, void *clientdata
, pami_result_t result
)
192 /* recv done callback: push the recved msg to recv queue */
194 char *msg
= (char *) clientdata
;
195 int sndlen
= ((CmiMsgHeaderBasic
*) msg
)->size
;
196 //int rank = *(int *) (msg + sndlen); //get rank from bottom of the message
197 //CMI_DEST_RANK(msg) = rank;
199 CMI_CHECK_CHECKSUM(msg
, sndlen
);
200 if (CMI_MAGIC(msg
) != CHARM_MAGIC_NUMBER
) {
201 CmiAbort("Charm++ Warning: Non Charm++ Message Received. If your application has a large number of messages, this may be because of overflow in the low-level FIFOs. Please set the environment variable MUSPI_INJFIFOSIZE if the application has large number of small messages (<=4K bytes), and/or PAMI_RGETINJFIFOSIZE if the application has a large number of large messages. The default value of these variable is 65536 which is sufficient for 1000 messages in flight; please try a larger value. Please note that the memory used for these FIFOs eats up the memory = 10*FIFO_SIZE per core. Please contact Charm++ developers for further information. \n");
205 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
206 CpvAccess(uselock
) = 0;
208 handleOneRecvedMsg(sndlen
,msg
);
209 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
210 CpvAccess(uselock
) = 1;
216 typedef struct _cmi_pami_rzv
{
223 typedef struct _cmi_pami_rzv_recv
{
229 static void pkt_dispatch (pami_context_t context
,
231 const void * header_addr
,
233 const void * pipe_addr
,
235 pami_endpoint_t origin
,
238 //fprintf (stderr, "Received Message of size %d %p\n", pipe_size, recv);
240 int alloc_size
= pipe_size
;
241 char * buffer
= (char *)CmiAlloc(alloc_size
);
244 recv
->local_fn
= recv_done
;
245 recv
->cookie
= buffer
;
246 recv
->type
= PAMI_TYPE_BYTE
;
249 recv
->data_fn
= PAMI_DATA_COPY
;
252 memcpy (buffer
, pipe_addr
, pipe_size
);
253 recv_done (NULL
, buffer
, PAMI_SUCCESS
);
257 static void short_pkt_dispatch (pami_context_t context
,
259 const void * header_addr
,
261 const void * pipe_addr
,
263 pami_endpoint_t origin
,
266 int alloc_size
= pipe_size
;
267 char * buffer
= (char *)CmiAlloc(alloc_size
);
269 memcpy (buffer
, pipe_addr
, pipe_size
);
270 char *smsg
= (char *)pipe_addr
;
271 char *msg
= (char *)buffer
;
273 CMI_CHECK_CHECKSUM(smsg
, pipe_size
);
274 if (CMI_MAGIC(smsg
) != CHARM_MAGIC_NUMBER
) {
275 /* received a non-charm msg */
276 CmiAbort("Charm++ Warning: Non Charm++ Message Received. If your application has a large number of messages, this may be because of overflow in the low-level FIFOs. Please set the environment variable MUSPI_INJFIFOSIZE if the application has large number of small messages (<=4K bytes), and/or PAMI_RGETINJFIFOSIZE if the application has a large number of large messages. The default value of these variable is 65536 which is sufficient for 1000 messages in flight; please try a larger value. Please note that the memory used for these FIFOs eats up the memory = 10*FIFO_SIZE per core. Please contact Charm++ developers for further information. \n");
278 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
279 CpvAccess(uselock
) = 0;
281 handleOneRecvedMsg(pipe_size
,msg
);
282 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
283 CpvAccess(uselock
) = 1;
287 void rzv_pkt_dispatch (pami_context_t context
,
289 const void * header_addr
,
291 const void * pipe_addr
,
293 pami_endpoint_t origin
,
296 void ack_pkt_dispatch (pami_context_t context
,
298 const void * header_addr
,
300 const void * pipe_addr
,
302 pami_endpoint_t origin
,
305 void rzv_recv_done (pami_context_t ctxt
,
307 pami_result_t result
);
309 //approx sleep command
310 size_t mysleep_iter
= 0;
311 void mysleep (unsigned long cycles
) {
312 unsigned long start
= GetTimeBase();
313 unsigned long end
= start
+ cycles
;
315 while (start
< end
) {
317 start
= GetTimeBase();
323 static void * test_buf
;
324 volatile int pami_barrier_flag
= 0;
325 //typedef pami_result_t (*pamix_proc_memalign_fn) (void**, size_t, size_t, const char*);
327 void pami_barrier_done (void *ctxt
, void * clientdata
, pami_result_t err
)
329 int * active
= (int *) clientdata
;
333 pami_client_t cmi_pami_client
;
334 pami_context_t
* cmi_pami_contexts
;
335 size_t cmi_pami_numcontexts
;
336 pami_geometry_t world_geometry
;
337 pami_xfer_t pami_barrier
;
338 char clientname
[] = "Converse";
341 typedef struct _cmi_pami_mregion_t
{
342 pami_memregion_t mregion
;
344 } CmiPAMIMemRegion_t
;
346 //one for each of the 64 possible contexts
347 CmiPAMIMemRegion_t cmi_pami_memregion
[64];
353 void _alias_rank (int rank
) {
354 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
356 CmiState cs
= CmiGetState();
357 CmiState cs_r
= CmiGetStateN(rank
);
359 cs
->rank
= cs_r
->rank
;
364 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
366 pami_result_t
init_comm_thread (pami_context_t context
,
369 CmiState cs
= CmiGetState();
370 CmiState cs0
= CmiGetStateN(0);
371 *cs
= *cs0
; //Alias comm thread to rank 0
372 //printf("Initialized comm thread, my rank %d, my pe %d\n",
376 //Notify main thread comm thread has been initialized
380 //set the seed to choose destination context
381 uint64_t rseedl
= r_seed
;
382 rseedl
|= (uint64_t)context
;
383 r_seed
= ((uint32_t)rseedl
)^((uint32_t)(rseedl
>> 32));
386 _cmi_bgq_incommthread
= 1;
391 typedef void (*pamix_progress_function
) (pami_context_t context
, void *cookie
);
392 typedef pami_result_t (*pamix_progress_register_fn
)
393 (pami_context_t context
,
394 pamix_progress_function progress_fn
,
395 pamix_progress_function suspend_fn
,
396 pamix_progress_function resume_fn
,
398 typedef pami_result_t (*pamix_progress_enable_fn
)(pami_context_t context
,
400 typedef pami_result_t (*pamix_progress_disable_fn
)(pami_context_t context
,
402 #define PAMI_EXTENSION_OPEN(client, name, ext) \
405 rc = PAMI_Extension_open(client, name, ext); \
406 CmiAssert (rc == PAMI_SUCCESS); \
408 #define PAMI_EXTENSION_FUNCTION(type, name, ext) \
411 fn = PAMI_Extension_symbol(ext, name); \
412 CmiAssert (fn != NULL); \
416 pami_extension_t cmi_ext_progress
;
417 pamix_progress_register_fn cmi_progress_register
;
418 pamix_progress_enable_fn cmi_progress_enable
;
419 pamix_progress_disable_fn cmi_progress_disable
;
421 extern int quietMode
;
423 int CMI_Progress_init(int start
, int ncontexts
) {
424 if ((CmiMyPe() == 0) && (!quietMode
))
425 printf("Enabling communication threads\n");
427 PAMI_EXTENSION_OPEN(cmi_pami_client
,"EXT_async_progress",&cmi_ext_progress
);
428 cmi_progress_register
= PAMI_EXTENSION_FUNCTION(pamix_progress_register_fn
, "register", cmi_ext_progress
);
429 cmi_progress_enable
= PAMI_EXTENSION_FUNCTION(pamix_progress_enable_fn
, "enable", cmi_ext_progress
);
430 cmi_progress_disable
= PAMI_EXTENSION_FUNCTION(pamix_progress_disable_fn
, "disable", cmi_ext_progress
);
433 for (i
= start
; i
< start
+ncontexts
; ++i
) {
434 //fprintf(stderr, "Enabling progress on context %d\n", i);
435 cmi_progress_register (cmi_pami_contexts
[i
],
439 cmi_progress_enable (cmi_pami_contexts
[i
], 0 /*progress all*/);
444 for (i
= start
; i
< start
+ncontexts
; ++i
) {
446 PAMI_Context_post(cmi_pami_contexts
[i
], &work
,
447 init_comm_thread
, (void*)&x
);
454 int CMI_Progress_finalize(int start
, int ncontexts
) {
456 for (i
= start
; i
< start
+ncontexts
; ++i
) {
457 cmi_progress_disable (cmi_pami_contexts
[i
], 0 /*progress all*/);
459 PAMI_Extension_close (cmi_ext_progress
);
463 #include "manytomany.c"
465 void LrtsInit(int *argc
, char ***argv
, int *numNodes
, int *myNodeID
)
469 PAMI_Client_create (clientname
, &cmi_pami_client
, NULL
, 0);
471 #if CMK_PAMI_MULTI_CONTEXT
472 if ((_Cmi_mynodesize
% THREADS_PER_CONTEXT
) == 0)
473 _n
= _Cmi_mynodesize
/ THREADS_PER_CONTEXT
; //have a context for each four threads
475 _n
= 1 + (_Cmi_mynodesize
/ THREADS_PER_CONTEXT
); //have a context for each four threads
478 cmi_pami_contexts
= (pami_context_t
*) malloc (sizeof(pami_context_t
) * _n
);
479 pami_result_t rc
= PAMI_Context_createv (cmi_pami_client
, NULL
, 0, cmi_pami_contexts
, _n
);
480 if (rc
!= PAMI_SUCCESS
) {
481 fprintf(stderr
, "PAMI_Context_createv failed for %d contexts\n", _n
);
484 cmi_pami_numcontexts
= _n
;
486 //fprintf(stderr,"Creating %d pami contexts\n", _n);
488 pami_configuration_t configuration
;
489 pami_result_t result
;
491 configuration
.name
= PAMI_CLIENT_TASK_ID
;
492 result
= PAMI_Client_query(cmi_pami_client
, &configuration
, 1);
493 *myNodeID
= configuration
.value
.intval
;
495 configuration
.name
= PAMI_CLIENT_NUM_TASKS
;
496 result
= PAMI_Client_query(cmi_pami_client
, &configuration
, 1);
497 *numNodes
= configuration
.value
.intval
;
499 pami_dispatch_hint_t options
= (pami_dispatch_hint_t
) {0};
500 pami_dispatch_callback_function pfn
;
501 for (i
= 0; i
< _n
; ++i
) {
502 pfn
.p2p
= pkt_dispatch
;
503 PAMI_Dispatch_set (cmi_pami_contexts
[i
],
509 pfn
.p2p
= ack_pkt_dispatch
;
510 PAMI_Dispatch_set (cmi_pami_contexts
[i
],
511 CMI_PAMI_ACK_DISPATCH
,
516 pfn
.p2p
= rzv_pkt_dispatch
;
517 PAMI_Dispatch_set (cmi_pami_contexts
[i
],
518 CMI_PAMI_RZV_DISPATCH
,
523 pfn
.p2p
= short_pkt_dispatch
;
524 PAMI_Dispatch_set (cmi_pami_contexts
[i
],
525 CMI_PAMI_SHORT_DISPATCH
,
533 void * buf
= malloc(sizeof(long));
535 Kernel_MemoryRegion_t k_mregion
;
536 retval
= Kernel_CreateMemoryRegion (&k_mregion
, buf
, sizeof(long));
538 for (i
= 0; i
< _n
; ++i
) {
539 cmi_pami_memregion
[i
].baseVA
= k_mregion
.BaseVa
;
540 PAMI_Memregion_create (cmi_pami_contexts
[i
],
544 &cmi_pami_memregion
[i
].mregion
);
549 //fprintf(stderr, "%d Initializing Converse PAMI machine Layer on %d tasks\n", _Cmi_mynode, _Cmi_numnodes);
551 ///////////---------------------------------/////////////////////
552 //////////----------- Initialize Barrier -------////////////////
553 size_t num_algorithm
[2];
554 pami_algorithm_t
*always_works_algo
= NULL
;
555 pami_metadata_t
*always_works_md
= NULL
;
556 pami_algorithm_t
*must_query_algo
= NULL
;
557 pami_metadata_t
*must_query_md
= NULL
;
558 pami_xfer_type_t xfer_type
= PAMI_XFER_BARRIER
;
560 /* Docs01: Get the World Geometry */
561 result
= PAMI_Geometry_world (cmi_pami_client
,&world_geometry
);
562 if (result
!= PAMI_SUCCESS
)
564 fprintf (stderr
, "Error. Unable to get world geometry: result = %d\n", result
);
568 result
= PAMI_Geometry_algorithms_num(world_geometry
,
570 (size_t*)num_algorithm
);
572 if (result
!= PAMI_SUCCESS
|| num_algorithm
[0]==0)
575 "Error. Unable to query algorithm, or no algorithms available result = %d\n",
580 always_works_algo
= (pami_algorithm_t
*)malloc(sizeof(pami_algorithm_t
)*num_algorithm
[0]);
581 always_works_md
= (pami_metadata_t
*)malloc(sizeof(pami_metadata_t
)*num_algorithm
[0]);
582 must_query_algo
= (pami_algorithm_t
*)malloc(sizeof(pami_algorithm_t
)*num_algorithm
[1]);
583 must_query_md
= (pami_metadata_t
*)malloc(sizeof(pami_metadata_t
)*num_algorithm
[1]);
585 /* Docs05: Query the algorithm lists */
586 result
= PAMI_Geometry_algorithms_query(world_geometry
,
595 int opt_alg
= 0, nalg
= 0;
596 for (nalg
= 0; nalg
< num_algorithm
[0]; ++nalg
)
597 if (strstr(always_works_md
[nalg
].name
, "GI") != NULL
) {
602 if ((_Cmi_mynode
== 0) && (!quietMode
))
603 printf ("Choosing optimized barrier algorithm name %s\n",
604 always_works_md
[opt_alg
]);
606 pami_barrier
.cb_done
= pami_barrier_done
;
607 pami_barrier
.cookie
= (void*) & pami_barrier_flag
;
608 pami_barrier
.algorithm
= always_works_algo
[opt_alg
];
610 /* Docs06: Query the algorithm lists */
611 if (result
!= PAMI_SUCCESS
)
613 fprintf (stderr
, "Error. Unable to get query algorithm. result = %d\n", result
);
617 CmiNetworkBarrier(0);
618 CmiNetworkBarrier(0);
619 CmiNetworkBarrier(0);
622 if (CmiGetArgFlag(*argv
,"+checksum")) {
623 #if CMK_ERROR_CHECKING
625 if (_Cmi_mynode
== 0) CmiPrintf("Charm++: CheckSum checking enabled! \n");
627 if (_Cmi_mynode
== 0) CmiPrintf("Charm++: +checksum ignored in optimized version! \n");
630 #if SPECIFIC_PCQUEUE && CMK_SMP
632 // printf(" in L2Atomic Queue\n");
634 //reserve for pe queues and node queue first
635 int actualNodeSize
= 64/Kernel_ProcessCount();
636 CmiMemAllocInit_bgq ((char*)l2atomicbuf
+
637 (QUEUE_NUMS
)*sizeof(L2AtomicState
),
638 2*actualNodeSize
*sizeof(L2AtomicState
));
641 //Initialize the manytomany api
642 #if CMK_PERSISTENT_COMM
643 _initPersistent(cmi_pami_contexts
, _n
);
646 _cmidirect_m2m_initialize (cmi_pami_contexts
, _n
);
649 void LrtsPreCommonInit(int everReturn
)
651 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
652 CpvInitialize(int, uselock
);
653 CpvAccess(uselock
) = 1;
655 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
656 if(CmiMyRank() == 0) {
657 CMI_Progress_init(0, cmi_pami_numcontexts
);
662 void LrtsPostCommonInit(int everReturn
)
664 //printf ("before calling CmiBarrier() \n");
668 void LrtsPostNonLocal() {}
670 void LrtsDrainResources()
672 while (MSGQLEN() > 0 || ORECVS() > 0) {
673 LrtsAdvanceCommunication(0);
682 if (CmiMyRank() == 0) {
684 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
685 CMI_Progress_finalize(0, cmi_pami_numcontexts
);
687 PAMI_Context_destroyv(cmi_pami_contexts
, cmi_pami_numcontexts
);
688 PAMI_Client_destroy(&cmi_pami_client
);
692 if(!CharmLibInterOperate
) {
706 void LrtsAbort(const char *message
) {
710 INLINE_KEYWORD
void LrtsBeginIdle() {}
712 INLINE_KEYWORD
void LrtsStillIdle() {}
714 void LrtsNotifyIdle()
716 #if CMK_SMP && CMK_PAMI_MULTI_CONTEXT
717 #if !CMK_ENABLE_ASYNC_PROGRESS && SPECIFIC_QUEUE
718 //Wait on the atomic queue to get a message with very low core
719 //overheads. One thread calls advance more frequently
720 ////spin wait for 2-4us when idle
721 ////process node queue messages every 10us
722 ////Idle cores will only use one LMQ slot and an int sum
723 CmiState cs
= CmiGetStateN(rank
);
724 if ((CmiMyRank()% THREADS_PER_CONTEXT
) == 0)
725 {LRTSQueueSpinWait(CmiMyRecvQueue(),
729 #if 0 && SPECIFIC_QUEUE && CMK_NODE_QUEUE_AVAILABLE
730 { LRTSQueueSpinWait(CmiMyRecvQueue(),
736 pami_result_t
machine_send_handoff (pami_context_t context
, void *msg
);
737 void machine_send (pami_context_t context
,
742 int to_lock
)__attribute__((always_inline
));
744 CmiCommHandle
LrtsSendFunc(int node
, int destPE
, int size
, char *msg
, int to_lock
)
746 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
747 //int c = myrand(&r_seed) % cmi_pami_numcontexts;
748 int c
= node
% cmi_pami_numcontexts
;
749 pami_context_t my_context
= cmi_pami_contexts
[c
];
750 CmiMsgHeaderBasic
*hdr
= (CmiMsgHeaderBasic
*)msg
;
754 PAMI_Context_post(my_context
, (pami_work_t
*)hdr
->work
,
755 machine_send_handoff
, msg
);
757 pami_context_t my_context
= MY_CONTEXT();
758 machine_send (my_context
, node
, CMI_DEST_RANK(msg
), size
, msg
, to_lock
);
763 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
764 pami_result_t
machine_send_handoff (pami_context_t context
, void *msg
) {
765 CmiMsgHeaderBasic
*hdr
= (CmiMsgHeaderBasic
*)msg
;
766 int node
= hdr
->dstnode
;
767 int rank
= hdr
->rank
;
768 int size
= hdr
->size
;
770 //As this is executed on the comm thread no locking is necessary
771 machine_send(context
, node
, rank
, size
, msg
, 0);
776 void machine_send (pami_context_t context
,
783 CMI_MAGIC(msg
) = CHARM_MAGIC_NUMBER
;
784 CMI_MSG_SIZE(msg
) = size
;
785 CMI_SET_CHECKSUM(msg
, size
);
787 pami_endpoint_t target
;
789 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
790 to_lock
= CpvAccess(uselock
);
793 #if CMK_PAMI_MULTI_CONTEXT && CMK_NODE_QUEUE_AVAILABLE
794 size_t dst_context
= (rank
!= DGRAM_NODEMESSAGE
) ? (rank
>>LTPS
) : (rand_r(&r_seed
) % cmi_pami_numcontexts
);
795 //Choose a context at random
796 //size_t dst_context = myrand(&r_seed) % cmi_pami_numcontexts;
798 size_t dst_context
= 0;
800 PAMI_Endpoint_create (cmi_pami_client
, (pami_task_t
)node
, dst_context
, &target
);
802 //fprintf (stderr, "Calling PAMI Send to %d magic %d size %d\n", node, CMI_MAGIC(msg), size);
803 if (CMI_LIKELY(size
< SHORT_CUTOFF
)) {
804 pami_send_immediate_t parameters
;
806 parameters
.dispatch
= CMI_PAMI_DISPATCH
;
807 if ( CMI_LIKELY(CMI_BROADCAST_ROOT(msg
) == 0))
808 #if CMK_NODE_QUEUE_AVAILABLE
809 if ( CMI_LIKELY(rank
!= DGRAM_NODEMESSAGE
) )
811 //use short callback if not a bcast and not an SMP node message
812 parameters
.dispatch
= CMI_PAMI_SHORT_DISPATCH
;
814 parameters
.header
.iov_base
= NULL
; //&rank;
815 parameters
.header
.iov_len
= 0; //sizeof(int);
816 parameters
.data
.iov_base
= msg
;
817 parameters
.data
.iov_len
= size
;
818 parameters
.dest
= target
;
821 PAMIX_CONTEXT_LOCK(context
);
823 PAMI_Send_immediate (context
, ¶meters
);
826 PAMIX_CONTEXT_UNLOCK(context
);
829 else if (size
< EAGER_CUTOFF
) {
830 pami_send_t parameters
;
831 parameters
.send
.dispatch
= CMI_PAMI_DISPATCH
;
832 parameters
.send
.header
.iov_base
= NULL
; //&rank;
833 parameters
.send
.header
.iov_len
= 0; //sizeof(int);
834 parameters
.send
.data
.iov_base
= msg
;
835 parameters
.send
.data
.iov_len
= size
;
836 parameters
.events
.cookie
= msg
;
837 parameters
.events
.local_fn
= send_done
;
838 parameters
.events
.remote_fn
= NULL
;
839 memset(¶meters
.send
.hints
, 0, sizeof(parameters
.send
.hints
));
840 parameters
.send
.dest
= target
;
843 PAMIX_CONTEXT_LOCK(context
);
845 PAMI_Send (context
, ¶meters
);
847 PAMIX_CONTEXT_UNLOCK(context
);
853 rzv
.offset
= (size_t)msg
- (size_t)cmi_pami_memregion
[0].baseVA
;
854 rzv
.dst_context
= dst_context
;
856 pami_send_immediate_t parameters
;
857 parameters
.dispatch
= CMI_PAMI_RZV_DISPATCH
;
858 parameters
.header
.iov_base
= &rzv
;
859 parameters
.header
.iov_len
= sizeof(rzv
);
860 parameters
.data
.iov_base
= &cmi_pami_memregion
[0].mregion
;
861 parameters
.data
.iov_len
= sizeof(pami_memregion_t
);
862 parameters
.dest
= target
;
865 PAMIX_CONTEXT_LOCK(context
);
867 PAMI_Send_immediate (context
, ¶meters
);
870 PAMIX_CONTEXT_UNLOCK(context
);
874 #if !CMK_ENABLE_ASYNC_PROGRESS
875 //threads have to progress contexts themselves
876 void LrtsAdvanceCommunication(int whenidle
) {
877 pami_context_t my_context
= MY_CONTEXT();
880 if (PAMIX_CONTEXT_TRYLOCK(my_context
))
882 PAMI_Context_advance(my_context
, 1);
883 PAMIX_CONTEXT_UNLOCK(my_context
);
886 PAMI_Context_advance(my_context
, 1);
891 static pami_result_t
machine_network_barrier(pami_context_t my_context
,
894 pami_result_t result
= PAMI_SUCCESS
;
896 PAMIX_CONTEXT_LOCK(my_context
);
897 result
= PAMI_Collective(my_context
, &pami_barrier
);
899 PAMIX_CONTEXT_UNLOCK(my_context
);
901 if (result
!= PAMI_SUCCESS
)
902 fprintf (stderr
, "Error. Unable to issue collective. result = %d\n", result
);
907 pami_result_t
network_barrier_handoff(pami_context_t context
, void *msg
)
909 return machine_network_barrier(context
, 0);
914 CmiNetworkBarrier(1);
916 static void CmiNetworkBarrier(int async
) {
917 pami_context_t my_context
= cmi_pami_contexts
[0];
918 pami_barrier_flag
= 1;
919 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
922 PAMI_Context_post(my_context
, &work
, network_barrier_handoff
, NULL
);
923 while (pami_barrier_flag
);
924 //fprintf (stderr, "After Network Barrier\n");
929 machine_network_barrier(my_context
, 1);
930 PAMIX_CONTEXT_LOCK(my_context
);
931 while (pami_barrier_flag
)
932 PAMI_Context_advance (my_context
, 100);
933 PAMIX_CONTEXT_UNLOCK(my_context
);
937 static void sendAck (pami_context_t context
,
938 CmiPAMIRzvRecv_t
*recv
)
940 pami_send_immediate_t parameters
;
941 parameters
.dispatch
= CMI_PAMI_ACK_DISPATCH
;
942 parameters
.header
.iov_base
= &recv
->src_buffer
;
943 parameters
.header
.iov_len
= sizeof(void *);
944 parameters
.data
.iov_base
= NULL
;
945 parameters
.data
.iov_len
= 0;
946 parameters
.dest
= recv
->src_ep
;
948 //Called from advance and hence we dont need a mutex
949 PAMI_Send_immediate (context
, ¶meters
);
953 void rzv_recv_done (pami_context_t ctxt
,
955 pami_result_t result
)
957 CmiPAMIRzvRecv_t recv
= *(CmiPAMIRzvRecv_t
*)clientdata
;
958 recv_done(ctxt
, recv
.msg
, PAMI_SUCCESS
);
959 sendAck(ctxt
, &recv
);
962 void rzv_pkt_dispatch (pami_context_t context
,
964 const void * header_addr
,
966 const void * pipe_addr
,
968 pami_endpoint_t origin
,
973 CmiPAMIRzv_t
*rzv_hdr
= (CmiPAMIRzv_t
*) header_addr
;
974 CmiAssert (header_size
== sizeof(CmiPAMIRzv_t
));
975 int alloc_size
= rzv_hdr
->bytes
;
976 char * buffer
= (char *)CmiAlloc(alloc_size
+ sizeof(CmiPAMIRzvRecv_t
));
977 //char *buffer=(char*)CmiAlloc(alloc_size+sizeof(CmiPAMIRzvRecv_t)+sizeof(int))
978 //*(int *)(buffer+alloc_size) = *(int *)header_addr;
979 CmiAssert (recv
== NULL
);
981 CmiPAMIRzvRecv_t
*rzv_recv
= (CmiPAMIRzvRecv_t
*)(buffer
+alloc_size
);
982 rzv_recv
->msg
= buffer
;
983 rzv_recv
->src_ep
= origin
;
984 rzv_recv
->src_buffer
= rzv_hdr
->buffer
;
986 CmiAssert (pipe_addr
!= NULL
);
987 pami_memregion_t
*mregion
= (pami_memregion_t
*) pipe_addr
;
988 CmiAssert (pipe_size
== sizeof(pami_memregion_t
));
990 //Rzv inj fifos are on the 17th core shared by all contexts
991 pami_rget_simple_t rget
;
992 rget
.rma
.dest
= origin
;
993 rget
.rma
.bytes
= rzv_hdr
->bytes
;
994 rget
.rma
.cookie
= rzv_recv
;
995 rget
.rma
.done_fn
= rzv_recv_done
;
996 rget
.rma
.hints
.buffer_registered
= PAMI_HINT_ENABLE
;
997 rget
.rma
.hints
.use_rdma
= PAMI_HINT_ENABLE
;
998 rget
.rdma
.local
.mr
= &cmi_pami_memregion
[rzv_hdr
->dst_context
].mregion
;
999 rget
.rdma
.local
.offset
= (size_t)buffer
-
1000 (size_t)cmi_pami_memregion
[rzv_hdr
->dst_context
].baseVA
;
1001 rget
.rdma
.remote
.mr
= mregion
; //from message payload
1002 rget
.rdma
.remote
.offset
= rzv_hdr
->offset
;
1004 //printf ("starting rget\n");
1005 PAMI_Rget (context
, &rget
);
1008 void ack_pkt_dispatch (pami_context_t context
,
1010 const void * header_addr
,
1012 const void * pipe_addr
,
1014 pami_endpoint_t origin
,
1017 char **buf
= (char **)header_addr
;
1022 /*==========================================================*/
1024 /* Optional routines which could use common code which is shared with
1025 other machine layer implementations. */
1027 /* MULTICAST/VECTOR SENDING FUNCTIONS
1029 * In relations to some flags, some other delivery functions may be needed.
1032 #if ! CMK_MULTICAST_LIST_USE_COMMON_CODE
1034 void LrtsSyncListSendFn(int npes
, int *pes
, int size
, char *msg
) {
1036 copymsg
= (char *)CmiAlloc(size
);
1037 CmiMemcpy(copymsg
,msg
,size
);
1038 CmiFreeListSendFn(npes
, pes
, size
, msg
);
1041 typedef struct ListMulticastVec_t
{
1048 void machineFreeListSendFn(pami_context_t context
,
1054 pami_result_t
machineFreeList_handoff(pami_context_t context
, void *cookie
)
1056 ListMulticastVec
*lvec
= (ListMulticastVec
*) cookie
;
1057 machineFreeListSendFn(context
, lvec
->npes
, lvec
->pes
, lvec
->size
, lvec
->msg
);
1061 void LrtsFreeListSendFn(int npes
, int *pes
, int size
, char *msg
) {
1062 //printf("%d: In Free List Send Fn imm %d\n", CmiMyPe(), CmiIsImmediate(msg));
1064 CMI_SET_BROADCAST_ROOT(msg
,0);
1065 CMI_MAGIC(msg
) = CHARM_MAGIC_NUMBER
;
1066 CmiMsgHeaderBasic
*hdr
= (CmiMsgHeaderBasic
*)msg
;
1071 CMI_DEST_RANK(msg
) = CmiRankOf(pes
[0]);
1072 LrtsSendFunc(CmiGetNodeGlobal(CmiNodeOf(pes
[0]),CmiMyPartition()), pes
[0], size
, msg
, 1);
1076 pami_context_t my_context
= MY_CONTEXT();
1077 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
1078 ListMulticastVec
*lvec
= (ListMulticastVec
*)
1079 CmiAlloc(sizeof(ListMulticastVec
) + sizeof(int)*npes
);
1080 lvec
->pes
= (int*)((char*)lvec
+ sizeof(ListMulticastVec
));
1082 for (i
=0; i
<npes
; i
++)
1083 lvec
->pes
[i
] = pes
[i
];
1087 PAMI_Context_post(my_context
, (pami_work_t
*)hdr
->work
,
1088 machineFreeList_handoff
, lvec
);
1090 machineFreeListSendFn(my_context
, npes
, pes
, size
, msg
);
1094 void machineFreeListSendFn(pami_context_t my_context
, int npes
, int *pes
, int size
, char *msg
) {
1098 for (i
=0; i
<npes
; i
++) {
1099 if (CmiNodeOf(pes
[i
]) == CmiMyNode()) {
1100 copymsg
= (char *)CmiAlloc(size
);
1101 CmiAssert(copymsg
!= NULL
);
1102 CmiMemcpy(copymsg
,msg
,size
);
1103 CmiSendPeer(CmiRankOf(pes
[i
]), size
, copymsg
);
1107 for (i
=0; i
<npes
; i
++) {
1108 if (CmiNodeOf(pes
[i
]) == CmiMyNode()) {
1109 CmiSyncSend(pes
[i
], size
, msg
);
1114 PAMIX_CONTEXT_LOCK(my_context
);
1115 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
1116 CpvAccess(uselock
) = 0;
1119 for (i
=0;i
<npes
;i
++) {
1120 if (CmiNodeOf(pes
[i
]) == CmiMyNode());
1121 else if (i
< npes
- 1) {
1126 copymsg
= (char *)CmiAlloc(size
);
1127 CmiAssert(copymsg
!= NULL
);
1128 CmiMemcpy(copymsg
,msg
,size
);
1130 CMI_DEST_RANK(copymsg
) = CmiRankOf(pes
[i
]);
1131 LrtsSendFunc(CmiGetNodeGlobal(CmiNodeOf(pes
[i
]),CmiMyPartition()), pes
[i
], size
, copymsg
, 0);
1135 if (npes
&& CmiNodeOf(pes
[npes
-1]) != CmiMyNode()) {
1136 CMI_DEST_RANK(msg
) = CmiRankOf(pes
[npes
-1]);
1137 LrtsSendFunc(CmiGetNodeGlobal(CmiNodeOf(pes
[npes
-1]),CmiMyPartition()), pes
[npes
-1], size
, msg
, 0);
1142 PAMIX_CONTEXT_UNLOCK(my_context
);
1143 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
1144 CpvAccess(uselock
) = 1;
1148 CmiCommHandle
LrtsAsyncListSendFn(int npes
, int *pes
, int size
, char *msg
) {
1149 CmiAbort("CmiAsyncListSendFn not implemented.");
1150 return (CmiCommHandle
) 0;
1155 #include "cmimemcpy_qpx.h"
1157 #if CMK_PERSISTENT_COMM
1158 #include "machine-persistent.c"