Feature #963: Added ++quiet option to omit charmrun and charm++ non-error messages
[charm.git] / src / arch / pamilrts / machine.c
blob8f59316c9d4629f0f1ac69cfb395411faff5e622
1 #include <stdio.h>
2 #include <errno.h>
3 #include <stdlib.h>
4 #include <unistd.h>
5 #include <math.h>
6 #include <string.h>
7 #include "machine.h"
8 #include "converse.h"
9 #include "pcqueue.h"
10 #include "assert.h"
11 #include "malloc.h"
13 #include <hwi/include/bqc/A2_inlines.h>
14 #include "spi/include/kernel/process.h"
15 #include "spi/include/kernel/memory.h"
16 #include "pami.h"
17 #include "pami_sys.h"
19 #if !CMK_SMP
20 #if CMK_ENABLE_ASYNC_PROGRESS
21 #error "async progress non supported with non-smp"
22 #endif
23 #endif
25 #define CMI_LIKELY(x) (__builtin_expect(x,1))
26 #define CMI_UNLIKELY(x) (__builtin_expect(x,0))
28 char *ALIGN_32(char *p) {
29 return((char *)((((unsigned long)p)+0x1f) & (~0x1FUL)));
33 #define CMI_MAGIC(msg) ((CmiMsgHeaderBasic *)msg)->magic
34 /* FIXME: need a random number that everyone agrees ! */
35 #define CHARM_MAGIC_NUMBER 126
37 #define CMI_IS_BCAST_ON_CORES(msg) (CMI_BROADCAST_ROOT(msg) > 0)
38 #define CMI_IS_BCAST_ON_NODES(msg) (CMI_BROADCAST_ROOT(msg) < 0)
40 #define CMI_PAMI_SHORT_DISPATCH 7
41 #define CMI_PAMI_RZV_DISPATCH 8
42 #define CMI_PAMI_ACK_DISPATCH 9
43 #define CMI_PAMI_DISPATCH 10
45 #define SHORT_CUTOFF 128
46 #define EAGER_CUTOFF 4096
48 #if CMK_PERSISTENT_COMM
49 #include "machine-persistent.h"
50 #endif
52 #if CMK_ERROR_CHECKING
53 static int checksum_flag = 0;
54 extern unsigned char computeCheckSum(unsigned char *data, int len);
56 #define CMI_SET_CHECKSUM(msg, len) \
57 if (checksum_flag) { \
58 ((CmiMsgHeaderBasic *)msg)->cksum = 0; \
59 ((CmiMsgHeaderBasic *)msg)->cksum = computeCheckSum((unsigned char*)msg, len); \
62 #define CMI_CHECK_CHECKSUM(msg, len) \
63 int count; \
64 if (checksum_flag) \
65 if (computeCheckSum((unsigned char*)msg, len) != 0) { \
66 printf("\n\n------------------------------\n\nReceiver %d size %d:", CmiMyPe(), len); \
67 for(count = 0; count < len; count++) { \
68 printf("%2x", msg[count]); \
69 } \
70 printf("------------------------------\n\n"); \
71 CmiAbort("Fatal error: checksum doesn't agree!\n"); \
73 #else
74 #define CMI_SET_CHECKSUM(msg, len)
75 #define CMI_CHECK_CHECKSUM(msg, len)
76 #endif
78 #if CMK_SMP && !CMK_MULTICORE
79 //static volatile int commThdExit = 0;
80 //static CmiNodeLock commThdExitLock = 0;
82 //The random seed to pick destination context
83 __thread uint32_t r_seed = 0xdeadbeef;
84 __thread int32_t _cmi_bgq_incommthread = 0;
85 #endif
87 //int CmiInCommThread () {
88 // //if (_cmi_bgq_incommthread)
89 // //printf ("CmiInCommThread: %d\n", _cmi_bgq_incommthread);
90 // return _cmi_bgq_incommthread;
91 //}
93 static void CmiNetworkBarrier(int async);
94 #if SPECIFIC_PCQUEUE && CMK_SMP
95 #define QUEUE_NUMS _Cmi_mynodesize + 3
96 #include "lrtsqueue.h"
97 #include "memalloc.c"
98 #endif
99 #include "machine-lrts.h"
100 #include "machine-common-core.c"
102 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
103 CpvDeclare(int, uselock);
104 #endif
106 #if CMK_ENABLE_ASYNC_PROGRESS
107 //Immediate messages not supported yet
108 void LrtsAdvanceCommunication(int whenidle) {}
109 #endif
111 void _alias_rank (int rank);
113 #define MAX_NUM_CONTEXTS 64
115 #if CMK_SMP
116 #define CMK_PAMI_MULTI_CONTEXT 1
117 #else
118 #define CMK_PAMI_MULTI_CONTEXT 0
119 #endif
121 #if CMK_PAMI_MULTI_CONTEXT
122 volatile int msgQueueLen [MAX_NUM_CONTEXTS];
123 volatile int outstanding_recvs [MAX_NUM_CONTEXTS];
125 //#if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
126 //#define THREADS_PER_CONTEXT 2
127 //#define LTPS 1 //Log Threads Per Context (TPS)
128 //#else
129 #define THREADS_PER_CONTEXT 4
130 #define LTPS 2 //Log Threads Per Context (TPS)
131 //#endif
133 #define MY_CONTEXT_ID() (CmiMyRank() >> LTPS)
134 #define MY_CONTEXT() (cmi_pami_contexts[CmiMyRank() >> LTPS])
136 #define INCR_MSGQLEN() //(msgQueueLen[CmiMyRank() >> LTPS] ++)
137 #define DECR_MSGQLEN() //(msgQueueLen[CmiMyRank() >> LTPS] --)
138 #define MSGQLEN() 0 //(msgQueueLen[CmiMyRank() >> LTPS])
139 #define INCR_ORECVS() //(outstanding_recvs[CmiMyRank() >> LTPS] ++)
140 #define DECR_ORECVS() //(outstanding_recvs[CmiMyRank() >> LTPS] --)
141 #define ORECVS() 0 //(outstanding_recvs[CmiMyRank() >> LTPS])
142 #else
143 #define LTPS 1
144 volatile int msgQueueLen;
145 volatile int outstanding_recvs;
146 #define MY_CONTEXT_ID() (0)
147 #define MY_CONTEXT() (cmi_pami_contexts[0])
149 #define INCR_MSGQLEN() (msgQueueLen ++)
150 #define DECR_MSGQLEN() (msgQueueLen --)
151 #define MSGQLEN() (msgQueueLen)
152 #define INCR_ORECVS() (outstanding_recvs ++)
153 #define DECR_ORECVS() (outstanding_recvs --)
154 #define ORECVS() (outstanding_recvs)
155 #endif
157 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
158 #define PAMIX_CONTEXT_LOCK_INIT(x)
159 #define PAMIX_CONTEXT_LOCK(x) if(LTPS) PAMI_Context_lock(x)
160 #define PAMIX_CONTEXT_UNLOCK(x) if(LTPS) {ppc_msync(); PAMI_Context_unlock(x);}
161 #define PAMIX_CONTEXT_TRYLOCK(x) ((LTPS)?(PAMI_Context_trylock(x) == PAMI_SUCCESS):(1))
162 #else
163 #define PAMIX_CONTEXT_LOCK_INIT(x)
164 #define PAMIX_CONTEXT_LOCK(x)
165 #define PAMIX_CONTEXT_UNLOCK(x)
166 #define PAMIX_CONTEXT_TRYLOCK(x) 1
167 #endif
169 #define A_PRIME 13
170 #define B_PRIME 19
172 static INLINE_KEYWORD unsigned myrand (unsigned *seed) {
173 *seed = A_PRIME * (*seed) + B_PRIME;
174 return *seed;
177 static void send_done(pami_context_t ctxt, void *data, pami_result_t result)
179 CmiFree(data);
180 DECR_MSGQLEN();
183 #if CMK_SMP
184 static void CmiSendPeer (int rank, int size, char *msg) {
185 //fprintf(stderr, "%d Send messages to peer\n", CmiMyPe());
186 CmiPushPE (rank, msg);
188 #endif
191 static void recv_done(pami_context_t ctxt, void *clientdata, pami_result_t result)
192 /* recv done callback: push the recved msg to recv queue */
194 char *msg = (char *) clientdata;
195 int sndlen = ((CmiMsgHeaderBasic *) msg)->size;
196 //int rank = *(int *) (msg + sndlen); //get rank from bottom of the message
197 //CMI_DEST_RANK(msg) = rank;
199 CMI_CHECK_CHECKSUM(msg, sndlen);
200 if (CMI_MAGIC(msg) != CHARM_MAGIC_NUMBER) {
201 CmiAbort("Charm++ Warning: Non Charm++ Message Received. If your application has a large number of messages, this may be because of overflow in the low-level FIFOs. Please set the environment variable MUSPI_INJFIFOSIZE if the application has large number of small messages (<=4K bytes), and/or PAMI_RGETINJFIFOSIZE if the application has a large number of large messages. The default value of these variable is 65536 which is sufficient for 1000 messages in flight; please try a larger value. Please note that the memory used for these FIFOs eats up the memory = 10*FIFO_SIZE per core. Please contact Charm++ developers for further information. \n");
202 return;
205 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
206 CpvAccess(uselock) = 0;
207 #endif
208 handleOneRecvedMsg(sndlen,msg);
209 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
210 CpvAccess(uselock) = 1;
211 #endif
213 DECR_ORECVS();
216 typedef struct _cmi_pami_rzv {
217 void * buffer;
218 size_t offset;
219 int bytes;
220 int dst_context;
221 }CmiPAMIRzv_t;
223 typedef struct _cmi_pami_rzv_recv {
224 void * msg;
225 void * src_buffer;
226 int src_ep;
227 } CmiPAMIRzvRecv_t;
229 static void pkt_dispatch (pami_context_t context,
230 void * clientdata,
231 const void * header_addr,
232 size_t header_size,
233 const void * pipe_addr,
234 size_t pipe_size,
235 pami_endpoint_t origin,
236 pami_recv_t * recv)
238 //fprintf (stderr, "Received Message of size %d %p\n", pipe_size, recv);
239 INCR_ORECVS();
240 int alloc_size = pipe_size;
241 char * buffer = (char *)CmiAlloc(alloc_size);
243 if (recv) {
244 recv->local_fn = recv_done;
245 recv->cookie = buffer;
246 recv->type = PAMI_TYPE_BYTE;
247 recv->addr = buffer;
248 recv->offset = 0;
249 recv->data_fn = PAMI_DATA_COPY;
251 else {
252 memcpy (buffer, pipe_addr, pipe_size);
253 recv_done (NULL, buffer, PAMI_SUCCESS);
257 static void short_pkt_dispatch (pami_context_t context,
258 void * clientdata,
259 const void * header_addr,
260 size_t header_size,
261 const void * pipe_addr,
262 size_t pipe_size,
263 pami_endpoint_t origin,
264 pami_recv_t * recv)
266 int alloc_size = pipe_size;
267 char * buffer = (char *)CmiAlloc(alloc_size);
269 memcpy (buffer, pipe_addr, pipe_size);
270 char *smsg = (char *)pipe_addr;
271 char *msg = (char *)buffer;
273 CMI_CHECK_CHECKSUM(smsg, pipe_size);
274 if (CMI_MAGIC(smsg) != CHARM_MAGIC_NUMBER) {
275 /* received a non-charm msg */
276 CmiAbort("Charm++ Warning: Non Charm++ Message Received. If your application has a large number of messages, this may be because of overflow in the low-level FIFOs. Please set the environment variable MUSPI_INJFIFOSIZE if the application has large number of small messages (<=4K bytes), and/or PAMI_RGETINJFIFOSIZE if the application has a large number of large messages. The default value of these variable is 65536 which is sufficient for 1000 messages in flight; please try a larger value. Please note that the memory used for these FIFOs eats up the memory = 10*FIFO_SIZE per core. Please contact Charm++ developers for further information. \n");
278 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
279 CpvAccess(uselock) = 0;
280 #endif
281 handleOneRecvedMsg(pipe_size,msg);
282 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
283 CpvAccess(uselock) = 1;
284 #endif
287 void rzv_pkt_dispatch (pami_context_t context,
288 void * clientdata,
289 const void * header_addr,
290 size_t header_size,
291 const void * pipe_addr,
292 size_t pipe_size,
293 pami_endpoint_t origin,
294 pami_recv_t * recv);
296 void ack_pkt_dispatch (pami_context_t context,
297 void * clientdata,
298 const void * header_addr,
299 size_t header_size,
300 const void * pipe_addr,
301 size_t pipe_size,
302 pami_endpoint_t origin,
303 pami_recv_t * recv);
305 void rzv_recv_done (pami_context_t ctxt,
306 void * clientdata,
307 pami_result_t result);
309 //approx sleep command
310 size_t mysleep_iter = 0;
311 void mysleep (unsigned long cycles) {
312 unsigned long start = GetTimeBase();
313 unsigned long end = start + cycles;
315 while (start < end) {
316 mysleep_iter ++;
317 start = GetTimeBase();
320 return;
323 static void * test_buf;
324 volatile int pami_barrier_flag = 0;
325 //typedef pami_result_t (*pamix_proc_memalign_fn) (void**, size_t, size_t, const char*);
327 void pami_barrier_done (void *ctxt, void * clientdata, pami_result_t err)
329 int * active = (int *) clientdata;
330 (*active)--;
333 pami_client_t cmi_pami_client;
334 pami_context_t * cmi_pami_contexts;
335 size_t cmi_pami_numcontexts;
336 pami_geometry_t world_geometry;
337 pami_xfer_t pami_barrier;
338 char clientname[] = "Converse";
340 #if 1
341 typedef struct _cmi_pami_mregion_t {
342 pami_memregion_t mregion;
343 void * baseVA;
344 } CmiPAMIMemRegion_t;
346 //one for each of the 64 possible contexts
347 CmiPAMIMemRegion_t cmi_pami_memregion[64];
348 #endif
350 #include "malloc.h"
351 //void *l2atomicbuf;
353 void _alias_rank (int rank) {
354 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
356 CmiState cs = CmiGetState();
357 CmiState cs_r = CmiGetStateN(rank);
359 cs->rank = cs_r->rank;
360 cs->pe = cs_r->pe;
361 #endif
364 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
366 pami_result_t init_comm_thread (pami_context_t context,
367 void * cookie)
369 CmiState cs = CmiGetState();
370 CmiState cs0 = CmiGetStateN(0);
371 *cs = *cs0; //Alias comm thread to rank 0
372 //printf("Initialized comm thread, my rank %d, my pe %d\n",
373 // CmiMyRank(),
374 // CmiMyPe());
376 //Notify main thread comm thread has been initialized
377 *(int*)cookie = 0;
379 #if 1
380 //set the seed to choose destination context
381 uint64_t rseedl = r_seed;
382 rseedl |= (uint64_t)context;
383 r_seed = ((uint32_t)rseedl)^((uint32_t)(rseedl >> 32));
384 #endif
386 _cmi_bgq_incommthread = 1;
388 return PAMI_SUCCESS;
391 typedef void (*pamix_progress_function) (pami_context_t context, void *cookie);
392 typedef pami_result_t (*pamix_progress_register_fn)
393 (pami_context_t context,
394 pamix_progress_function progress_fn,
395 pamix_progress_function suspend_fn,
396 pamix_progress_function resume_fn,
397 void * cookie);
398 typedef pami_result_t (*pamix_progress_enable_fn)(pami_context_t context,
399 int event_type);
400 typedef pami_result_t (*pamix_progress_disable_fn)(pami_context_t context,
401 int event_type);
402 #define PAMI_EXTENSION_OPEN(client, name, ext) \
403 ({ \
404 pami_result_t rc; \
405 rc = PAMI_Extension_open(client, name, ext); \
406 CmiAssert (rc == PAMI_SUCCESS); \
408 #define PAMI_EXTENSION_FUNCTION(type, name, ext) \
409 ({ \
410 void* fn; \
411 fn = PAMI_Extension_symbol(ext, name); \
412 CmiAssert (fn != NULL); \
413 (type)fn; \
416 pami_extension_t cmi_ext_progress;
417 pamix_progress_register_fn cmi_progress_register;
418 pamix_progress_enable_fn cmi_progress_enable;
419 pamix_progress_disable_fn cmi_progress_disable;
421 extern int quietMode;
423 int CMI_Progress_init(int start, int ncontexts) {
424 if ((CmiMyPe() == 0) && (!quietMode))
425 printf("Enabling communication threads\n");
427 PAMI_EXTENSION_OPEN(cmi_pami_client,"EXT_async_progress",&cmi_ext_progress);
428 cmi_progress_register = PAMI_EXTENSION_FUNCTION(pamix_progress_register_fn, "register", cmi_ext_progress);
429 cmi_progress_enable = PAMI_EXTENSION_FUNCTION(pamix_progress_enable_fn, "enable", cmi_ext_progress);
430 cmi_progress_disable = PAMI_EXTENSION_FUNCTION(pamix_progress_disable_fn, "disable", cmi_ext_progress);
432 int i = 0;
433 for (i = start; i < start+ncontexts; ++i) {
434 //fprintf(stderr, "Enabling progress on context %d\n", i);
435 cmi_progress_register (cmi_pami_contexts[i],
436 NULL,
437 NULL,
438 NULL, NULL);
439 cmi_progress_enable (cmi_pami_contexts[i], 0 /*progress all*/);
442 pami_work_t work;
443 volatile int x;
444 for (i = start; i < start+ncontexts; ++i) {
445 x = 1;
446 PAMI_Context_post(cmi_pami_contexts[i], &work,
447 init_comm_thread, (void*)&x);
448 while(x);
451 return 0;
454 int CMI_Progress_finalize(int start, int ncontexts) {
455 int i = 0;
456 for (i = start; i < start+ncontexts; ++i) {
457 cmi_progress_disable (cmi_pami_contexts[i], 0 /*progress all*/);
459 PAMI_Extension_close (cmi_ext_progress);
461 #endif
463 #include "manytomany.c"
465 void LrtsInit(int *argc, char ***argv, int *numNodes, int *myNodeID)
467 int n, i, count;
469 PAMI_Client_create (clientname, &cmi_pami_client, NULL, 0);
470 size_t _n = 1;
471 #if CMK_PAMI_MULTI_CONTEXT
472 if ((_Cmi_mynodesize % THREADS_PER_CONTEXT) == 0)
473 _n = _Cmi_mynodesize / THREADS_PER_CONTEXT; //have a context for each four threads
474 else
475 _n = 1 + (_Cmi_mynodesize / THREADS_PER_CONTEXT); //have a context for each four threads
476 #endif
478 cmi_pami_contexts = (pami_context_t *) malloc (sizeof(pami_context_t) * _n);
479 pami_result_t rc = PAMI_Context_createv (cmi_pami_client, NULL, 0, cmi_pami_contexts, _n);
480 if (rc != PAMI_SUCCESS) {
481 fprintf(stderr, "PAMI_Context_createv failed for %d contexts\n", _n);
482 assert(0);
484 cmi_pami_numcontexts = _n;
486 //fprintf(stderr,"Creating %d pami contexts\n", _n);
488 pami_configuration_t configuration;
489 pami_result_t result;
491 configuration.name = PAMI_CLIENT_TASK_ID;
492 result = PAMI_Client_query(cmi_pami_client, &configuration, 1);
493 *myNodeID = configuration.value.intval;
495 configuration.name = PAMI_CLIENT_NUM_TASKS;
496 result = PAMI_Client_query(cmi_pami_client, &configuration, 1);
497 *numNodes = configuration.value.intval;
499 pami_dispatch_hint_t options = (pami_dispatch_hint_t) {0};
500 pami_dispatch_callback_function pfn;
501 for (i = 0; i < _n; ++i) {
502 pfn.p2p = pkt_dispatch;
503 PAMI_Dispatch_set (cmi_pami_contexts[i],
504 CMI_PAMI_DISPATCH,
505 pfn,
506 NULL,
507 options);
509 pfn.p2p = ack_pkt_dispatch;
510 PAMI_Dispatch_set (cmi_pami_contexts[i],
511 CMI_PAMI_ACK_DISPATCH,
512 pfn,
513 NULL,
514 options);
516 pfn.p2p = rzv_pkt_dispatch;
517 PAMI_Dispatch_set (cmi_pami_contexts[i],
518 CMI_PAMI_RZV_DISPATCH,
519 pfn,
520 NULL,
521 options);
523 pfn.p2p = short_pkt_dispatch;
524 PAMI_Dispatch_set (cmi_pami_contexts[i],
525 CMI_PAMI_SHORT_DISPATCH,
526 pfn,
527 NULL,
528 options);
531 #if 1
532 size_t bytes_out;
533 void * buf = malloc(sizeof(long));
534 uint32_t retval;
535 Kernel_MemoryRegion_t k_mregion;
536 retval = Kernel_CreateMemoryRegion (&k_mregion, buf, sizeof(long));
537 assert(retval==0);
538 for (i = 0; i < _n; ++i) {
539 cmi_pami_memregion[i].baseVA = k_mregion.BaseVa;
540 PAMI_Memregion_create (cmi_pami_contexts[i],
541 k_mregion.BaseVa,
542 k_mregion.Bytes,
543 &bytes_out,
544 &cmi_pami_memregion[i].mregion);
546 free(buf);
547 #endif
549 //fprintf(stderr, "%d Initializing Converse PAMI machine Layer on %d tasks\n", _Cmi_mynode, _Cmi_numnodes);
551 ///////////---------------------------------/////////////////////
552 //////////----------- Initialize Barrier -------////////////////
553 size_t num_algorithm[2];
554 pami_algorithm_t *always_works_algo = NULL;
555 pami_metadata_t *always_works_md = NULL;
556 pami_algorithm_t *must_query_algo = NULL;
557 pami_metadata_t *must_query_md = NULL;
558 pami_xfer_type_t xfer_type = PAMI_XFER_BARRIER;
560 /* Docs01: Get the World Geometry */
561 result = PAMI_Geometry_world (cmi_pami_client,&world_geometry);
562 if (result != PAMI_SUCCESS)
564 fprintf (stderr, "Error. Unable to get world geometry: result = %d\n", result);
565 return;
568 result = PAMI_Geometry_algorithms_num(world_geometry,
569 xfer_type,
570 (size_t*)num_algorithm);
572 if (result != PAMI_SUCCESS || num_algorithm[0]==0)
574 fprintf (stderr,
575 "Error. Unable to query algorithm, or no algorithms available result = %d\n",
576 result);
577 return;
580 always_works_algo = (pami_algorithm_t*)malloc(sizeof(pami_algorithm_t)*num_algorithm[0]);
581 always_works_md = (pami_metadata_t*)malloc(sizeof(pami_metadata_t)*num_algorithm[0]);
582 must_query_algo = (pami_algorithm_t*)malloc(sizeof(pami_algorithm_t)*num_algorithm[1]);
583 must_query_md = (pami_metadata_t*)malloc(sizeof(pami_metadata_t)*num_algorithm[1]);
585 /* Docs05: Query the algorithm lists */
586 result = PAMI_Geometry_algorithms_query(world_geometry,
587 xfer_type,
588 always_works_algo,
589 always_works_md,
590 num_algorithm[0],
591 must_query_algo,
592 must_query_md,
593 num_algorithm[1]);
595 int opt_alg = 0, nalg = 0;
596 for (nalg = 0; nalg < num_algorithm[0]; ++nalg)
597 if (strstr(always_works_md[nalg].name, "GI") != NULL) {
598 opt_alg = nalg;
599 break;
602 if ((_Cmi_mynode == 0) && (!quietMode))
603 printf ("Choosing optimized barrier algorithm name %s\n",
604 always_works_md[opt_alg]);
606 pami_barrier.cb_done = pami_barrier_done;
607 pami_barrier.cookie = (void*) & pami_barrier_flag;
608 pami_barrier.algorithm = always_works_algo[opt_alg];
610 /* Docs06: Query the algorithm lists */
611 if (result != PAMI_SUCCESS)
613 fprintf (stderr, "Error. Unable to get query algorithm. result = %d\n", result);
614 return;
617 CmiNetworkBarrier(0);
618 CmiNetworkBarrier(0);
619 CmiNetworkBarrier(0);
621 /* checksum flag */
622 if (CmiGetArgFlag(*argv,"+checksum")) {
623 #if CMK_ERROR_CHECKING
624 checksum_flag = 1;
625 if (_Cmi_mynode == 0) CmiPrintf("Charm++: CheckSum checking enabled! \n");
626 #else
627 if (_Cmi_mynode == 0) CmiPrintf("Charm++: +checksum ignored in optimized version! \n");
628 #endif
630 #if SPECIFIC_PCQUEUE && CMK_SMP
631 //if(CmiMyPe() == 0)
632 // printf(" in L2Atomic Queue\n");
633 LRTSQueuePreInit();
634 //reserve for pe queues and node queue first
635 int actualNodeSize = 64/Kernel_ProcessCount();
636 CmiMemAllocInit_bgq ((char*)l2atomicbuf +
637 (QUEUE_NUMS)*sizeof(L2AtomicState),
638 2*actualNodeSize*sizeof(L2AtomicState));
639 #endif
641 //Initialize the manytomany api
642 #if CMK_PERSISTENT_COMM
643 _initPersistent(cmi_pami_contexts, _n);
644 #endif
646 _cmidirect_m2m_initialize (cmi_pami_contexts, _n);
649 void LrtsPreCommonInit(int everReturn)
651 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
652 CpvInitialize(int, uselock);
653 CpvAccess(uselock) = 1;
654 #endif
655 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
656 if(CmiMyRank() == 0) {
657 CMI_Progress_init(0, cmi_pami_numcontexts);
659 #endif
662 void LrtsPostCommonInit(int everReturn)
664 //printf ("before calling CmiBarrier() \n");
665 CmiBarrier();
668 void LrtsPostNonLocal() {}
670 void LrtsDrainResources()
672 while (MSGQLEN() > 0 || ORECVS() > 0) {
673 LrtsAdvanceCommunication(0);
675 CmiNodeBarrier();
678 void LrtsExit()
680 int rank0 = 0;
681 CmiBarrier();
682 if (CmiMyRank() == 0) {
683 rank0 = 1;
684 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
685 CMI_Progress_finalize(0, cmi_pami_numcontexts);
686 #endif
687 PAMI_Context_destroyv(cmi_pami_contexts, cmi_pami_numcontexts);
688 PAMI_Client_destroy(&cmi_pami_client);
691 CmiNodeBarrier();
692 if(!CharmLibInterOperate) {
693 #if CMK_SMP
694 if (rank0) {
695 Delay(100000);
696 exit(0);
698 else
699 pthread_exit(0);
700 #else
701 exit(0);
702 #endif
706 void LrtsAbort(const char *message) {
707 assert(0);
710 INLINE_KEYWORD void LrtsBeginIdle() {}
712 INLINE_KEYWORD void LrtsStillIdle() {}
714 void LrtsNotifyIdle()
716 #if CMK_SMP && CMK_PAMI_MULTI_CONTEXT
717 #if !CMK_ENABLE_ASYNC_PROGRESS && SPECIFIC_QUEUE
718 //Wait on the atomic queue to get a message with very low core
719 //overheads. One thread calls advance more frequently
720 ////spin wait for 2-4us when idle
721 ////process node queue messages every 10us
722 ////Idle cores will only use one LMQ slot and an int sum
723 CmiState cs = CmiGetStateN(rank);
724 if ((CmiMyRank()% THREADS_PER_CONTEXT) == 0)
725 {LRTSQueueSpinWait(CmiMyRecvQueue(),
726 10);}
727 else
728 #endif
729 #if 0 && SPECIFIC_QUEUE && CMK_NODE_QUEUE_AVAILABLE
730 { LRTSQueueSpinWait(CmiMyRecvQueue(),
731 1000);
733 #endif
734 #endif
736 pami_result_t machine_send_handoff (pami_context_t context, void *msg);
737 void machine_send (pami_context_t context,
738 int node,
739 int rank,
740 int size,
741 char * msg,
742 int to_lock)__attribute__((always_inline));
744 CmiCommHandle LrtsSendFunc(int node, int destPE, int size, char *msg, int to_lock)
746 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
747 //int c = myrand(&r_seed) % cmi_pami_numcontexts;
748 int c = node % cmi_pami_numcontexts;
749 pami_context_t my_context = cmi_pami_contexts[c];
750 CmiMsgHeaderBasic *hdr = (CmiMsgHeaderBasic *)msg;
751 hdr->dstnode = node;
752 hdr->size = size;
754 PAMI_Context_post(my_context, (pami_work_t *)hdr->work,
755 machine_send_handoff, msg);
756 #else
757 pami_context_t my_context = MY_CONTEXT();
758 machine_send (my_context, node, CMI_DEST_RANK(msg), size, msg, to_lock);
759 #endif
760 return 0;
763 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
764 pami_result_t machine_send_handoff (pami_context_t context, void *msg) {
765 CmiMsgHeaderBasic *hdr = (CmiMsgHeaderBasic *)msg;
766 int node = hdr->dstnode;
767 int rank = hdr->rank;
768 int size = hdr->size;
770 //As this is executed on the comm thread no locking is necessary
771 machine_send(context, node, rank, size, msg, 0);
772 return PAMI_SUCCESS;
774 #endif
776 void machine_send (pami_context_t context,
777 int node,
778 int rank,
779 int size,
780 char * msg,
781 int to_lock)
783 CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
784 CMI_MSG_SIZE(msg) = size;
785 CMI_SET_CHECKSUM(msg, size);
787 pami_endpoint_t target;
789 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
790 to_lock = CpvAccess(uselock);
791 #endif
793 #if CMK_PAMI_MULTI_CONTEXT && CMK_NODE_QUEUE_AVAILABLE
794 size_t dst_context = (rank != DGRAM_NODEMESSAGE) ? (rank>>LTPS) : (rand_r(&r_seed) % cmi_pami_numcontexts);
795 //Choose a context at random
796 //size_t dst_context = myrand(&r_seed) % cmi_pami_numcontexts;
797 #else
798 size_t dst_context = 0;
799 #endif
800 PAMI_Endpoint_create (cmi_pami_client, (pami_task_t)node, dst_context, &target);
802 //fprintf (stderr, "Calling PAMI Send to %d magic %d size %d\n", node, CMI_MAGIC(msg), size);
803 if (CMI_LIKELY(size < SHORT_CUTOFF)) {
804 pami_send_immediate_t parameters;
806 parameters.dispatch = CMI_PAMI_DISPATCH;
807 if ( CMI_LIKELY(CMI_BROADCAST_ROOT(msg) == 0))
808 #if CMK_NODE_QUEUE_AVAILABLE
809 if ( CMI_LIKELY(rank != DGRAM_NODEMESSAGE) )
810 #endif
811 //use short callback if not a bcast and not an SMP node message
812 parameters.dispatch = CMI_PAMI_SHORT_DISPATCH;
814 parameters.header.iov_base = NULL; //&rank;
815 parameters.header.iov_len = 0; //sizeof(int);
816 parameters.data.iov_base = msg;
817 parameters.data.iov_len = size;
818 parameters.dest = target;
820 if(to_lock)
821 PAMIX_CONTEXT_LOCK(context);
823 PAMI_Send_immediate (context, &parameters);
825 if(to_lock)
826 PAMIX_CONTEXT_UNLOCK(context);
827 CmiFree(msg);
829 else if (size < EAGER_CUTOFF) {
830 pami_send_t parameters;
831 parameters.send.dispatch = CMI_PAMI_DISPATCH;
832 parameters.send.header.iov_base = NULL; //&rank;
833 parameters.send.header.iov_len = 0; //sizeof(int);
834 parameters.send.data.iov_base = msg;
835 parameters.send.data.iov_len = size;
836 parameters.events.cookie = msg;
837 parameters.events.local_fn = send_done;
838 parameters.events.remote_fn = NULL;
839 memset(&parameters.send.hints, 0, sizeof(parameters.send.hints));
840 parameters.send.dest = target;
842 if (to_lock)
843 PAMIX_CONTEXT_LOCK(context);
844 INCR_MSGQLEN();
845 PAMI_Send (context, &parameters);
846 if (to_lock)
847 PAMIX_CONTEXT_UNLOCK(context);
849 else {
850 CmiPAMIRzv_t rzv;
851 rzv.bytes = size;
852 rzv.buffer = msg;
853 rzv.offset = (size_t)msg - (size_t)cmi_pami_memregion[0].baseVA;
854 rzv.dst_context = dst_context;
856 pami_send_immediate_t parameters;
857 parameters.dispatch = CMI_PAMI_RZV_DISPATCH;
858 parameters.header.iov_base = &rzv;
859 parameters.header.iov_len = sizeof(rzv);
860 parameters.data.iov_base = &cmi_pami_memregion[0].mregion;
861 parameters.data.iov_len = sizeof(pami_memregion_t);
862 parameters.dest = target;
864 if(to_lock)
865 PAMIX_CONTEXT_LOCK(context);
867 PAMI_Send_immediate (context, &parameters);
869 if(to_lock)
870 PAMIX_CONTEXT_UNLOCK(context);
874 #if !CMK_ENABLE_ASYNC_PROGRESS
875 //threads have to progress contexts themselves
876 void LrtsAdvanceCommunication(int whenidle) {
877 pami_context_t my_context = MY_CONTEXT();
879 #if CMK_SMP
880 if (PAMIX_CONTEXT_TRYLOCK(my_context))
882 PAMI_Context_advance(my_context, 1);
883 PAMIX_CONTEXT_UNLOCK(my_context);
885 #else
886 PAMI_Context_advance(my_context, 1);
887 #endif
889 #endif
891 static pami_result_t machine_network_barrier(pami_context_t my_context,
892 int to_lock)
894 pami_result_t result = PAMI_SUCCESS;
895 if (to_lock)
896 PAMIX_CONTEXT_LOCK(my_context);
897 result = PAMI_Collective(my_context, &pami_barrier);
898 if (to_lock)
899 PAMIX_CONTEXT_UNLOCK(my_context);
901 if (result != PAMI_SUCCESS)
902 fprintf (stderr, "Error. Unable to issue collective. result = %d\n", result);
904 return result;
907 pami_result_t network_barrier_handoff(pami_context_t context, void *msg)
909 return machine_network_barrier(context, 0);
912 void LrtsBarrier()
914 CmiNetworkBarrier(1);
916 static void CmiNetworkBarrier(int async) {
917 pami_context_t my_context = cmi_pami_contexts[0];
918 pami_barrier_flag = 1;
919 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
920 if (async) {
921 pami_work_t work;
922 PAMI_Context_post(my_context, &work, network_barrier_handoff, NULL);
923 while (pami_barrier_flag);
924 //fprintf (stderr, "After Network Barrier\n");
926 else
927 #endif
929 machine_network_barrier(my_context, 1);
930 PAMIX_CONTEXT_LOCK(my_context);
931 while (pami_barrier_flag)
932 PAMI_Context_advance (my_context, 100);
933 PAMIX_CONTEXT_UNLOCK(my_context);
937 static void sendAck (pami_context_t context,
938 CmiPAMIRzvRecv_t *recv)
940 pami_send_immediate_t parameters;
941 parameters.dispatch = CMI_PAMI_ACK_DISPATCH;
942 parameters.header.iov_base = &recv->src_buffer;
943 parameters.header.iov_len = sizeof(void *);
944 parameters.data.iov_base = NULL;
945 parameters.data.iov_len = 0;
946 parameters.dest = recv->src_ep;
948 //Called from advance and hence we dont need a mutex
949 PAMI_Send_immediate (context, &parameters);
953 void rzv_recv_done (pami_context_t ctxt,
954 void * clientdata,
955 pami_result_t result)
957 CmiPAMIRzvRecv_t recv = *(CmiPAMIRzvRecv_t *)clientdata;
958 recv_done(ctxt, recv.msg, PAMI_SUCCESS);
959 sendAck(ctxt, &recv);
962 void rzv_pkt_dispatch (pami_context_t context,
963 void * clientdata,
964 const void * header_addr,
965 size_t header_size,
966 const void * pipe_addr,
967 size_t pipe_size,
968 pami_endpoint_t origin,
969 pami_recv_t * recv)
971 INCR_ORECVS();
973 CmiPAMIRzv_t *rzv_hdr = (CmiPAMIRzv_t *) header_addr;
974 CmiAssert (header_size == sizeof(CmiPAMIRzv_t));
975 int alloc_size = rzv_hdr->bytes;
976 char * buffer = (char *)CmiAlloc(alloc_size + sizeof(CmiPAMIRzvRecv_t));
977 //char *buffer=(char*)CmiAlloc(alloc_size+sizeof(CmiPAMIRzvRecv_t)+sizeof(int))
978 //*(int *)(buffer+alloc_size) = *(int *)header_addr;
979 CmiAssert (recv == NULL);
981 CmiPAMIRzvRecv_t *rzv_recv = (CmiPAMIRzvRecv_t *)(buffer+alloc_size);
982 rzv_recv->msg = buffer;
983 rzv_recv->src_ep = origin;
984 rzv_recv->src_buffer = rzv_hdr->buffer;
986 CmiAssert (pipe_addr != NULL);
987 pami_memregion_t *mregion = (pami_memregion_t *) pipe_addr;
988 CmiAssert (pipe_size == sizeof(pami_memregion_t));
990 //Rzv inj fifos are on the 17th core shared by all contexts
991 pami_rget_simple_t rget;
992 rget.rma.dest = origin;
993 rget.rma.bytes = rzv_hdr->bytes;
994 rget.rma.cookie = rzv_recv;
995 rget.rma.done_fn = rzv_recv_done;
996 rget.rma.hints.buffer_registered = PAMI_HINT_ENABLE;
997 rget.rma.hints.use_rdma = PAMI_HINT_ENABLE;
998 rget.rdma.local.mr = &cmi_pami_memregion[rzv_hdr->dst_context].mregion;
999 rget.rdma.local.offset = (size_t)buffer -
1000 (size_t)cmi_pami_memregion[rzv_hdr->dst_context].baseVA;
1001 rget.rdma.remote.mr = mregion; //from message payload
1002 rget.rdma.remote.offset = rzv_hdr->offset;
1004 //printf ("starting rget\n");
1005 PAMI_Rget (context, &rget);
1008 void ack_pkt_dispatch (pami_context_t context,
1009 void * clientdata,
1010 const void * header_addr,
1011 size_t header_size,
1012 const void * pipe_addr,
1013 size_t pipe_size,
1014 pami_endpoint_t origin,
1015 pami_recv_t * recv)
1017 char **buf = (char **)header_addr;
1018 CmiFree (*buf);
1022 /*==========================================================*/
1024 /* Optional routines which could use common code which is shared with
1025 other machine layer implementations. */
1027 /* MULTICAST/VECTOR SENDING FUNCTIONS
1029 * In relations to some flags, some other delivery functions may be needed.
1032 #if ! CMK_MULTICAST_LIST_USE_COMMON_CODE
1034 void LrtsSyncListSendFn(int npes, int *pes, int size, char *msg) {
1035 char *copymsg;
1036 copymsg = (char *)CmiAlloc(size);
1037 CmiMemcpy(copymsg,msg,size);
1038 CmiFreeListSendFn(npes, pes, size, msg);
1041 typedef struct ListMulticastVec_t {
1042 int *pes;
1043 int npes;
1044 char *msg;
1045 int size;
1046 } ListMulticastVec;
1048 void machineFreeListSendFn(pami_context_t context,
1049 int npes,
1050 int * pes,
1051 int size,
1052 char * msg);
1054 pami_result_t machineFreeList_handoff(pami_context_t context, void *cookie)
1056 ListMulticastVec *lvec = (ListMulticastVec *) cookie;
1057 machineFreeListSendFn(context, lvec->npes, lvec->pes, lvec->size, lvec->msg);
1058 CmiFree(cookie);
1061 void LrtsFreeListSendFn(int npes, int *pes, int size, char *msg) {
1062 //printf("%d: In Free List Send Fn imm %d\n", CmiMyPe(), CmiIsImmediate(msg));
1064 CMI_SET_BROADCAST_ROOT(msg,0);
1065 CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
1066 CmiMsgHeaderBasic *hdr = (CmiMsgHeaderBasic *)msg;
1067 hdr->size = size;
1069 //Fast path
1070 if (npes == 1) {
1071 CMI_DEST_RANK(msg) = CmiRankOf(pes[0]);
1072 LrtsSendFunc(CmiGetNodeGlobal(CmiNodeOf(pes[0]),CmiMyPartition()), pes[0], size, msg, 1);
1073 return;
1076 pami_context_t my_context = MY_CONTEXT();
1077 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
1078 ListMulticastVec *lvec = (ListMulticastVec *)
1079 CmiAlloc(sizeof(ListMulticastVec) + sizeof(int)*npes);
1080 lvec->pes = (int*)((char*)lvec + sizeof(ListMulticastVec));
1081 int i = 0;
1082 for (i=0; i<npes; i++)
1083 lvec->pes[i] = pes[i];
1084 lvec->npes = npes;
1085 lvec->msg = msg;
1086 lvec->size = size;
1087 PAMI_Context_post(my_context, (pami_work_t*)hdr->work,
1088 machineFreeList_handoff, lvec);
1089 #else
1090 machineFreeListSendFn(my_context, npes, pes, size, msg);
1091 #endif
1094 void machineFreeListSendFn(pami_context_t my_context, int npes, int *pes, int size, char *msg) {
1095 int i;
1096 char *copymsg;
1097 #if CMK_SMP
1098 for (i=0; i<npes; i++) {
1099 if (CmiNodeOf(pes[i]) == CmiMyNode()) {
1100 copymsg = (char *)CmiAlloc(size);
1101 CmiAssert(copymsg != NULL);
1102 CmiMemcpy(copymsg,msg,size);
1103 CmiSendPeer(CmiRankOf(pes[i]), size, copymsg);
1106 #else
1107 for (i=0; i<npes; i++) {
1108 if (CmiNodeOf(pes[i]) == CmiMyNode()) {
1109 CmiSyncSend(pes[i], size, msg);
1112 #endif
1114 PAMIX_CONTEXT_LOCK(my_context);
1115 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
1116 CpvAccess(uselock) = 0;
1117 #endif
1119 for (i=0;i<npes;i++) {
1120 if (CmiNodeOf(pes[i]) == CmiMyNode());
1121 else if (i < npes - 1) {
1122 #if !CMK_SMP
1123 CmiReference(msg);
1124 copymsg = msg;
1125 #else
1126 copymsg = (char *)CmiAlloc(size);
1127 CmiAssert(copymsg != NULL);
1128 CmiMemcpy(copymsg,msg,size);
1129 #endif
1130 CMI_DEST_RANK(copymsg) = CmiRankOf(pes[i]);
1131 LrtsSendFunc(CmiGetNodeGlobal(CmiNodeOf(pes[i]),CmiMyPartition()), pes[i], size, copymsg, 0);
1135 if (npes && CmiNodeOf(pes[npes-1]) != CmiMyNode()) {
1136 CMI_DEST_RANK(msg) = CmiRankOf(pes[npes-1]);
1137 LrtsSendFunc(CmiGetNodeGlobal(CmiNodeOf(pes[npes-1]),CmiMyPartition()), pes[npes-1], size, msg, 0);
1139 else
1140 CmiFree(msg);
1142 PAMIX_CONTEXT_UNLOCK(my_context);
1143 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
1144 CpvAccess(uselock) = 1;
1145 #endif
1148 CmiCommHandle LrtsAsyncListSendFn(int npes, int *pes, int size, char *msg) {
1149 CmiAbort("CmiAsyncListSendFn not implemented.");
1150 return (CmiCommHandle) 0;
1152 #endif
1155 #include "cmimemcpy_qpx.h"
1157 #if CMK_PERSISTENT_COMM
1158 #include "machine-persistent.c"
1159 #endif