netlrts & verbs: remove unused CmiIdleState declaration
[charm.git] / src / arch / verbs / machine.c
blob5addd36731b01d781882036671c6a4109bd09d6f
2 /** @file
3 * Basic NET-LRTS implementation of Converse machine layer
4 * @ingroup NET
5 */
7 /** @defgroup NET
8 * NET implementation of machine layer, ethernet in particular
9 * @ingroup Machine
11 * THE DATAGRAM STREAM
13 * Messages are sent using UDP datagrams. The sender allocates a
14 * struct for each datagram to be sent. These structs stick around
15 * until slightly after the datagram is acknowledged.
17 * Datagrams are transmitted node-to-node (as opposed to pe-to-pe).
18 * Each node has an OtherNode struct for every other node in the
19 * system. The OtherNode struct contains:
21 * send_queue (all datagram-structs not yet transmitted)
22 * send_window (all datagram-structs transmitted but not ack'd)
24 * When an acknowledgement comes in, all packets in the send-window
25 * are either marked as acknowledged or pushed back into the send
26 * queue for retransmission.
28 * THE OUTGOING MESSAGE
30 * When you send or broadcast a message, the first thing the system
31 * does is system creates an OutgoingMsg struct to represent the
32 * operation. The OutgoingMsg contains a very direct expression
33 * of what you want to do:
35 * OutgoingMsg:
37 * size --- size of message in bytes
38 * data --- pointer to the buffer containing the message
39 * src --- processor which sent the message
40 * dst --- destination processor (-1=broadcast, -2=broadcast all)
41 * freemode --- see below.
42 * refcount --- see below.
44 * The OutgoingMsg is kept around until the transmission is done, then
45 * it is garbage collected --- the refcount and freemode fields are
46 * to assist garbage collection.
48 * The freemode indicates which kind of buffer-management policy was
49 * used (sync, async, or freeing). The sync policy is handled
50 * superficially by immediately converting sync sends into freeing
51 * sends. Thus, the freemode can either be 'A' (async) or 'F'
52 * (freeing). If the freemode is 'F', then garbage collection
53 * involves freeing the data and the OutgoingMsg structure itself. If
54 * the freemode is 'A', then the only cleanup is to change the
55 * freemode to 'X', a condition which is then detectable by
56 * CmiAsyncMsgSent. In this case, the actual freeing of the
57 * OutgoingMsg is done by CmiReleaseCommHandle.
59 * When the transmission is initiated, the system computes how many
60 * datagrams need to be sent, total. This number is stored in the
61 * refcount field. Each time a datagram is delivered, the refcount
62 * is decremented, when it reaches zero, cleanup is performed. There
63 * are two exceptions to this rule. Exception 1: if the OutgoingMsg
64 * is a send (not a broadcast) and can be performed with shared
65 * memory, the entire datagram system is bypassed, the message is
66 * simply delivered and freed, not using the refcount mechanism at
67 * all. Exception 2: If the message is a broadcast, then part of the
68 * broadcast that can be done via shared memory is performed prior to
69 * initiating the datagram/refcount system.
71 * DATAGRAM FORMATS AND MESSAGE FORMATS
73 * Datagrams have this format:
75 * srcpe (16 bits) --- source processor number.
76 * magic ( 8 bits) --- magic number to make sure DG is good.
77 * dstrank ( 8 bits) --- destination processor rank.
78 * seqno (32 bits) --- packet sequence number.
79 * data (XX byte) --- user data.
81 * The only reason the srcpe is in there is because the receiver needs
82 * to know which receive window to use. The dstrank field is needed
83 * because transmission is node-to-node. Once the message is
84 * assembled by the node, it must be delivered to the appropriate PE.
85 * The dstrank field is used to encode certain special-case scenarios.
86 * If the dstrank is DGRAM_BROADCAST, the transmission is a broadcast,
87 * and should be delivered to all processors in the node. If the dstrank
88 * is DGRAM_ACKNOWLEDGE, the datagram is an acknowledgement datagram, in
89 * which case the srcpe is the number of the acknowledger, the seqno is
90 * always zero, and the user data is a list of the seqno's being
91 * acknowledged. There may be other dstrank codes for special functions.
93 * To send a message, one chops it up into datagrams and stores those
94 * datagrams in a send-queue. These outgoing datagrams aren't stored
95 * in the explicit format shown above. Instead, they are stored as
96 * ImplicitDgrams, which contain the datagram header and a pointer to
97 * the user data (which is in the user message buffer, which is in the
98 * OutgoingMsg). At transmission time these are combined together.
100 * The combination of the datagram header with the user's data is
101 * performed right in the user's message buffer. Note that the
102 * datagram header is exactly 64 bits. One simply overwrites 64 bits
103 * of the user's message with a datagram header, sends the datagram
104 * straight from the user's message buffer, then restores the user's
105 * buffer to its original state. There is a small problem with the
106 * first datagram of the message: one needs 64 bits of space to store
107 * the datagram header. To make sure this space is there, we added a
108 * 64-bit unused space to the front of the Cmi message header. In
109 * addition to this, we also add 32 bits to the Cmi message header
110 * to make room for a length-field, making it possible to identify
111 * message boundaries.
113 * CONCURRENCY CONTROL
115 * This has changed recently.
117 * EFFICIENCY NOTES
119 * The sender-side does little copying. The async and freeing send
120 * routines do no copying at all. The sync send routines copy the
121 * message, then use the freeing-send routines. The other alternative
122 * is to not copy the message, and use the async send mechanism
123 * combined with a blocking wait. Blocking wait seems like a bad
124 * idea, since it could take a VERY long time to get all those
125 * datagrams out the door.
127 * The receiver side, unfortunately, must copy. To avoid copying,
128 * it would have to receive directly into a preallocated message buffer.
129 * Unfortunately, this can't work: there's no way to know how much
130 * memory to preallocate, and there's no way to know which datagram
131 * is coming next. Thus, we receive into fixed-size (large) datagram
132 * buffers. These are then inspected, and the messages extracted from
133 * them.
135 * Note that we are allocating a large number of structs: OutgoingMsg's,
136 * ImplicitDgrams, ExplicitDgrams. By design, each of these structs
137 * is a fixed-size structure. Thus, we can do memory allocation by
138 * simply keeping a linked-list of unused structs around. The only
139 * place where expensive memory allocation is performed is in the
140 * sync routines.
142 * Since the datagrams from one node to another are fully ordered,
143 * there is slightly more ordering than is needed: in theory, the
144 * datagrams of one message don't need to be ordered relative to the
145 * datagrams of another. This was done to simplify the sequencing
146 * mechanisms: implementing a fully-ordered stream is much simpler
147 * than a partially-ordered one. It also makes it possible to
148 * modularize, layering the message transmitter on top of the
149 * datagram-sequencer. In other words, it was just easier this way.
150 * Hopefully, this won't cause serious degradation: LAN's rarely get
151 * datagrams out of order anyway.
153 * A potential efficiency problem is the lack of message-combining.
154 * One datagram could conceivably contain several messages. This
155 * might be more efficient, it's not clear how much overhead is
156 * involved in sending a short datagram. Message-combining isn't
157 * really ``integrated'' into the design of this software, but you
158 * could fudge it as follows. Whenever you pull a short datagram from
159 * the send-queue, check the next one to see if it's also a short
160 * datagram. If so, pack them together into a ``combined'' datagram.
161 * At the receive side, simply check for ``combined'' datagrams, and
162 * treat them as if they were simply two datagrams. This would
163 * require extra copying. I have no idea if this would be worthwhile.
165 *****************************************************************************/
168 * @addtogroup NET
169 * @{
172 /*****************************************************************************
174 * Include Files
176 ****************************************************************************/
178 #define _GNU_SOURCE 1
179 #include <stdarg.h> /*<- was <varargs.h>*/
181 #define CMK_USE_PRINTF_HACK 0
182 #if CMK_USE_PRINTF_HACK
183 /*HACK: turn printf into CmiPrintf, by just defining our own
184 external symbol "printf". This may be more trouble than it's worth,
185 since the only advantage is that it works properly with +syncprint.
187 This version *won't* work with fprintf(stdout,...) or C++ or Fortran I/O,
188 because they don't call printf. Has to be defined up here because we probably
189 haven't properly guessed this compiler's prototype for "printf".
191 static void InternalPrintf(const char *f, va_list l);
192 int printf(const char *fmt, ...) {
193 int nChar;
194 va_list p; va_start(p, fmt);
195 InternalPrintf(fmt,p);
196 va_end(p);
197 return 10;
199 #endif
202 #include "converse.h"
203 #include "memory-isomalloc.h"
205 #include <stdio.h>
206 #include <stdlib.h>
207 #include <ctype.h>
208 #include <fcntl.h>
209 #include <errno.h>
210 #include <setjmp.h>
211 #include <signal.h>
212 #include <string.h>
213 #include <unistd.h>
215 /* define machine debug */
216 #include "machine.h"
218 /******************* Producer-Consumer Queues ************************/
219 #include "pcqueue.h"
221 #include "machine-smp.h"
223 #include "machine-lrts.h"
224 #include "machine-common-core.c"
226 #if CMK_USE_KQUEUE
227 #include <sys/event.h>
228 int _kq = -1;
229 #endif
231 #if CMK_USE_POLL
232 #include <poll.h>
233 #endif
235 #if CMK_MULTICORE
236 int Cmi_commthread = 0;
237 #endif
239 #include "conv-ccs.h"
240 #include "ccs-server.h"
241 #include "sockRoutines.h"
243 #if defined(_WIN32) && ! defined(__CYGWIN__)
244 /*For windows systems:*/
245 # include <windows.h>
246 # include <wincon.h>
247 # include <sys/types.h>
248 # include <sys/timeb.h>
249 # define fdopen _fdopen
250 # define SIGBUS -1 /*These signals don't exist in Win32*/
251 # define SIGKILL -1
252 # define SIGQUIT -1
253 /*# define SIGTERM -1*/ /* VC++ ver 8 now has SIGTERM */
255 #else /*UNIX*/
256 # include <pwd.h>
257 # include <unistd.h>
258 # include <fcntl.h>
259 # include <sys/file.h>
260 #endif
262 #if CMK_PERSISTENT_COMM
263 #include "machine-persistent.c"
264 #endif
266 #define PRINTBUFSIZE 16384
268 #ifdef __ONESIDED_IMPL
269 #ifdef __ONESIDED_NO_HARDWARE
270 int putSrcHandler;
271 int putDestHandler;
272 int getSrcHandler;
273 int getDestHandler;
274 #include "conv-onesided.c"
275 #endif
276 #endif
278 static void CommunicationServerNet(int withDelayMs, int where);
279 //static void CommunicationServer(int withDelayMs);
281 void CmiHandleImmediate();
282 extern int CmemInsideMem();
283 extern void CmemCallWhenMemAvail();
285 static unsigned int dataport=0;
286 static SOCKET dataskt;
288 extern void TokenUpdatePeriodic();
289 extern void getAvailSysMem();
291 static int Lrts_numNodes;
292 static int Lrts_myNode;
294 /****************************************************************************
296 * Handling Errors
298 * Errors should be handled by printing a message on stderr and
299 * calling exit(1). Nothing should be sent to charmrun, no attempt at
300 * communication should be made. The other processes will notice the
301 * abnormal termination and will deal with it.
303 * Rationale: if an error triggers an attempt to send a message,
304 * the attempt to send a message is likely to trigger another error,
305 * leading to an infinite loop and a process that spins instead of
306 * shutting down.
308 *****************************************************************************/
310 static int machine_initiated_shutdown=0;
311 static int already_in_signal_handler=0;
313 static void CmiDestroyLocks();
315 void MachineExit();
317 static void machine_exit(int status)
319 MACHSTATE(3," machine_exit");
320 machine_initiated_shutdown=1;
322 CmiDestroyLocks(); /* destroy locks to prevent dead locking */
323 EmergencyExit();
325 MachineExit();
326 exit(status);
329 static void charmrun_abort(const char*);
331 static void KillEveryone(const char *msg)
333 charmrun_abort(msg);
334 machine_exit(1);
337 static void KillEveryoneCode(n)
338 int n;
340 char _s[100];
341 sprintf(_s, "[%d] Fatal error #%d\n", CmiMyPe(), n);
342 charmrun_abort(_s);
343 machine_exit(1);
346 CpvExtern(int, freezeModeFlag);
348 static int Cmi_truecrash;
350 static void KillOnAllSigs(int sigNo)
352 const char *sig="unknown signal";
353 const char *suggestion="";
354 if (machine_initiated_shutdown ||
355 already_in_signal_handler)
356 machine_exit(1); /*Don't infinite loop if there's a signal during a signal handler-- just die.*/
357 already_in_signal_handler=1;
359 #if CMK_CCS_AVAILABLE
360 if (CpvAccess(cmiArgDebugFlag)) {
361 int reply = 0;
362 CpdNotify(CPD_SIGNAL,sigNo);
363 #if ! CMK_BIGSIM_CHARM
364 CcsSendReplyNoError(4,&reply);/*Send an empty reply if not*/
365 CpvAccess(freezeModeFlag) = 1;
366 CpdFreezeModeScheduler();
367 #else
368 CpdFreeze();
369 #endif
371 #endif
373 CmiDestroyLocks();
375 if (sigNo==SIGSEGV) {
376 sig="segmentation violation";
377 suggestion="Try running with '++debug', or linking with '-memory paranoid' (memory paranoid requires '+netpoll' at runtime).";
379 if (sigNo==SIGFPE) {
380 sig="floating point exception";
381 suggestion="Check for integer or floating-point division by zero.";
383 if (sigNo==SIGBUS) {
384 sig="bus error";
385 suggestion="Check for misaligned reads or writes to memory.";
387 if (sigNo==SIGILL) {
388 sig="illegal instruction";
389 suggestion="Check for calls to uninitialized function pointers.";
391 if (sigNo==SIGKILL) sig="caught signal KILL";
392 if (sigNo==SIGQUIT) sig="caught signal QUIT";
393 if (sigNo==SIGTERM) sig="caught signal TERM";
394 MACHSTATE1(5," Caught signal %s ",sig);
395 /*ifdef this part*/
396 #ifdef __FAULT__
397 if(sigNo == SIGKILL || sigNo == SIGQUIT || sigNo == SIGTERM){
398 CmiPrintf("[%d] Caught but ignoring signal\n",CmiMyPe());
399 } else
400 #endif
402 Cmi_truecrash = 0;
403 CmiAbortHelper("Caught Signal", sig, suggestion, 0, 1);
407 static void machine_atexit_check(void)
409 if (!machine_initiated_shutdown)
410 CmiAbort("unexpected call to exit by user program. Must use CkExit, not exit!");
411 #if 0 /*Wait for the user to press any key (for Win32 debugging)*/
412 fgetc(stdin);
413 #endif
416 #if !defined(_WIN32) || defined(__CYGWIN__)
417 static void HandleUserSignals(int signum)
419 int condnum = ((signum==SIGUSR1) ? CcdSIGUSR1 : CcdSIGUSR2);
420 CcdRaiseCondition(condnum);
422 #endif
424 /*****************************************************************************
426 * Utility routines for network machine interface.
428 *****************************************************************************/
431 Horrific #defines to hide the differences between select() and poll().
433 #if CMK_USE_POLL /*poll() version*/
434 # define CMK_PIPE_DECL(delayMs) \
435 struct pollfd fds[10]; \
436 int nFds_sto=0; int *nFds=&nFds_sto; \
437 int pollDelayMs=delayMs;
438 # define CMK_PIPE_SUB fds,nFds
439 # define CMK_PIPE_CALL() poll(fds, *nFds, pollDelayMs); *nFds=0
441 # define CMK_PIPE_PARAM struct pollfd *fds,int *nFds
442 # define CMK_PIPE_ADDREAD(rd_fd) \
443 do {fds[*nFds].fd=rd_fd; fds[*nFds].events=POLLIN; (*nFds)++;} while(0)
444 # define CMK_PIPE_ADDWRITE(wr_fd) \
445 do {fds[*nFds].fd=wr_fd; fds[*nFds].events=POLLOUT; (*nFds)++;} while(0)
446 # define CMK_PIPE_CHECKREAD(rd_fd) fds[(*nFds)++].revents&POLLIN
447 # define CMK_PIPE_CHECKWRITE(wr_fd) fds[(*nFds)++].revents&POLLOUT
449 #elif CMK_USE_KQUEUE /* kqueue version */
451 # define CMK_PIPE_DECL(delayMs) \
452 if (_kq == -1) _kq = kqueue(); \
453 struct kevent ke_sto; \
454 struct kevent* ke = &ke_sto; \
455 struct timespec tmo; \
456 tmo.tv_sec = 0; tmo.tv_nsec = delayMs*1e6;
457 # define CMK_PIPE_SUB ke
458 # define CMK_PIPE_CALL() kevent(_kq, NULL, 0, ke, 1, &tmo)
460 # define CMK_PIPE_PARAM struct kevent* ke
461 # define CMK_PIPE_ADDREAD(rd_fd) \
462 do { EV_SET(ke, rd_fd, EVFILT_READ, EV_ADD, 0, 10, NULL); \
463 kevent(_kq, ke, 1, NULL, 0, NULL); memset(ke, 0, sizeof(ke));} while(0)
464 # define CMK_PIPE_ADDWRITE(wr_fd) \
465 do { EV_SET(ke, wr_fd, EVFILT_WRITE, EV_ADD, 0, 10, NULL); \
466 kevent(_kq, ke, 1, NULL, 0, NULL); memset(ke, 0, sizeof(ke));} while(0)
467 # define CMK_PIPE_CHECKREAD(rd_fd) (ke->ident == rd_fd && ke->filter == EVFILT_READ)
468 # define CMK_PIPE_CHECKWRITE(wr_fd) (ke->ident == wr_fd && ke->filter == EVFILT_WRITE)
470 #else /*select() version*/
472 # define CMK_PIPE_DECL(delayMs) \
473 fd_set rfds_sto,wfds_sto;\
474 fd_set *rfds=&rfds_sto,*wfds=&wfds_sto; struct timeval tmo; \
475 FD_ZERO(rfds); FD_ZERO(wfds);tmo.tv_sec=0; tmo.tv_usec=1000*delayMs;
476 # define CMK_PIPE_SUB rfds,wfds
477 # define CMK_PIPE_CALL() select(FD_SETSIZE, rfds, wfds, NULL, &tmo)
479 # define CMK_PIPE_PARAM fd_set *rfds,fd_set *wfds
480 # define CMK_PIPE_ADDREAD(rd_fd) FD_SET(rd_fd,rfds)
481 # define CMK_PIPE_ADDWRITE(wr_fd) FD_SET(wr_fd,wfds)
482 # define CMK_PIPE_CHECKREAD(rd_fd) FD_ISSET(rd_fd,rfds)
483 # define CMK_PIPE_CHECKWRITE(wr_fd) FD_ISSET(wr_fd,wfds)
484 #endif
486 static void CMK_PIPE_CHECKERR(void) {
487 #if defined(_WIN32) && !defined(__CYGWIN__)
488 /* Win32 socket seems to randomly return inexplicable errors
489 here-- WSAEINVAL, WSAENOTSOCK-- yet everything is actually OK.
490 int err=WSAGetLastError();
491 CmiPrintf("(%d)Select returns -1; errno=%d, WSAerr=%d\n",withDelayMs,errn
492 o,err);
494 #else /*UNIX machine*/
495 if (errno!=EINTR)
496 KillEveryone("Socket error in CheckSocketsReady!\n");
497 #endif
501 static void CmiStdoutFlush(void);
502 static int CmiStdoutNeedsService(void);
503 static void CmiStdoutService(void);
504 static void CmiStdoutAdd(CMK_PIPE_PARAM);
505 static void CmiStdoutCheck(CMK_PIPE_PARAM);
508 double GetClock(void)
510 #if defined(_WIN32) && !defined(__CYGWIN__)
511 struct _timeb tv;
512 _ftime(&tv);
513 return (tv.time * 1.0 + tv.millitm * 1.0E-3);
514 #else
515 struct timeval tv; int ok;
516 ok = gettimeofday(&tv, NULL);
517 if (ok<0) { perror("gettimeofday"); KillEveryoneCode(9343112); }
518 return (tv.tv_sec * 1.0 + tv.tv_usec * 1.0E-6);
519 #endif
523 /***********************************************************************
525 * Abort function:
527 ************************************************************************/
529 static int Cmi_truecrash;
530 static int already_aborting=0;
531 void LrtsAbort(const char *message)
533 if (already_aborting) machine_exit(1);
534 already_aborting=1;
535 MACHSTATE1(5,"CmiAbort(%s)",message);
537 /*Send off any remaining prints*/
538 CmiStdoutFlush();
540 if(Cmi_truecrash) {
541 printf("CHARM++ FATAL ERROR: %s\n", message);
542 *(int *)NULL = 0; /*Write to null, causing bus error*/
543 } else {
544 charmrun_abort(message);
545 machine_exit(1);
550 /******************************************************************************
552 * CmiEnableAsyncIO
554 * The net and tcp versions use a bunch of unix processes talking to each
555 * other via file descriptors. We need for a signal SIGIO to be generated
556 * each time a message arrives, making it possible to write a signal
557 * handler to handle the messages. The vast majority of unixes can,
558 * in fact, do this. However, there isn't any standard for how this is
559 * supposed to be done, so each version of UNIX has a different set of
560 * calls to turn this signal on. So, there is like one version here for
561 * every major brand of UNIX.
563 *****************************************************************************/
565 #if CMK_ASYNC_USE_F_SETFL_AND_F_SETOWN
566 #include <fcntl.h>
567 void CmiEnableAsyncIO(int fd)
569 if ( fcntl(fd, F_SETOWN, getpid()) < 0 ) {
570 CmiError("setting socket owner: %s\n", strerror(errno)) ;
571 exit(1);
573 if ( fcntl(fd, F_SETFL, O_ASYNC) < 0 ) {
574 CmiError("setting socket async: %s\n", strerror(errno)) ;
575 exit(1);
578 #else
579 void CmiEnableAsyncIO(int fd) { }
580 #endif
582 /* We should probably have a set of "CMK_NONBLOCK_USE_..." defines here:*/
583 #if !defined(_WIN32) || defined(__CYGWIN__)
584 void CmiEnableNonblockingIO(int fd) {
585 int on=1;
586 if (fcntl(fd,F_SETFL,O_NONBLOCK,&on)<0) {
587 CmiError("setting nonblocking IO: %s\n", strerror(errno)) ;
588 exit(1);
591 #else
592 void CmiEnableNonblockingIO(int fd) { }
593 #endif
596 /******************************************************************************
598 * Configuration Data
600 * This data is all read in from the NETSTART variable (provided by the
601 * charmrun) and from the command-line arguments. Once read in, it is never
602 * modified.
604 *****************************************************************************/
606 static skt_ip_t Cmi_self_IP;
607 static skt_ip_t Cmi_charmrun_IP; /*Address of charmrun machine*/
608 static int Cmi_charmrun_port;
609 static int Cmi_charmrun_pid;
610 static int Cmi_charmrun_fd=-1;
611 /* Magic number to be used for sanity check in messege header */
612 static int Cmi_net_magic;
614 static int Cmi_netpoll;
615 static int Cmi_asyncio;
616 static int Cmi_idlepoll;
617 static int Cmi_syncprint;
618 static int Cmi_print_stats = 0;
620 #if ! CMK_SMP && ! defined(_WIN32)
621 /* parse forks only used in non-smp mode */
622 static void parse_forks(void) {
623 char *forkstr;
624 int nread;
625 int forks;
626 int i,pid;
627 forkstr=getenv("CmiMyForks");
628 if(forkstr!=0) { /* charmrun */
629 nread = sscanf(forkstr,"%d",&forks);
630 for(i=1;i<=forks;i++) { /* by default forks = 0 */
631 pid=fork();
632 if(pid<0) CmiAbort("Fork returned an error");
633 if(pid==0) { /* forked process */
634 /* reset mynode,pe & exit loop */
635 Lrts_myNode+=i;
636 _Cmi_mype+=i;
637 break;
642 #endif
643 static void parse_magic(void)
645 char* nm;
646 int nread;
647 nm = getenv("NETMAGIC");
648 if (nm!=0)
649 {/*Read values set by Charmrun*/
650 nread = sscanf(nm, "%d",&Cmi_net_magic);
653 static void parse_netstart(void)
655 char *ns;
656 int nread;
657 int port;
658 ns = getenv("NETSTART");
659 if (ns!=0)
660 {/*Read values set by Charmrun*/
661 char Cmi_charmrun_name[1024];
662 nread = sscanf(ns, "%d%s%d%d%d",
663 &Lrts_myNode,
664 Cmi_charmrun_name, &Cmi_charmrun_port,
665 &Cmi_charmrun_pid, &port);
666 Cmi_charmrun_IP=skt_lookup_ip(Cmi_charmrun_name);
668 if (nread!=5) {
669 fprintf(stderr,"Error parsing NETSTART '%s'\n",ns);
670 exit(1);
672 } else
673 {/*No charmrun-- set flag values for standalone operation*/
674 Lrts_myNode=0;
675 Cmi_charmrun_IP=_skt_invalid_ip;
676 Cmi_charmrun_port=0;
677 Cmi_charmrun_pid=0;
678 dataport = -1;
680 #if CMK_USE_IBVERBS | CMK_USE_IBUD
681 char *cmi_num_nodes = getenv("CmiNumNodes");
682 if(cmi_num_nodes != NULL){
683 sscanf(cmi_num_nodes,"%d",&Lrts_numNodes);
685 #endif
688 static void extract_common_args(char **argv)
690 if (CmiGetArgFlagDesc(argv,"+stats","Print network statistics at shutdown"))
691 Cmi_print_stats = 1;
695 /******************************************************************************
697 * Packet Performance Logging
699 * This module is designed to give a detailed log of the packets and their
700 * acknowledgements, for performance tuning. It can be disabled.
702 *****************************************************************************/
704 #define LOGGING 0
706 #if LOGGING
708 typedef struct logent {
709 double time;
710 int seqno;
711 int srcpe;
712 int dstpe;
713 int kind;
714 } *logent;
717 logent log;
718 int log_pos;
719 int log_wrap;
721 static void log_init(void)
723 log = (logent)malloc(50000 * sizeof(struct logent));
724 _MEMCHECK(log);
725 log_pos = 0;
726 log_wrap = 0;
729 static void log_done(void)
731 char logname[100]; FILE *f; int i, size;
732 sprintf(logname, "log.%d", Lrts_myNode);
733 f = fopen(logname, "w");
734 if (f==0) KillEveryone("fopen problem");
735 if (log_wrap) size = 50000; else size=log_pos;
736 for (i=0; i<size; i++) {
737 logent ent = log+i;
738 fprintf(f, "%1.4f %d %c %d %d\n",
739 ent->time, ent->srcpe, ent->kind, ent->dstpe, ent->seqno);
741 fclose(f);
744 void printLog(void)
746 char logname[100]; FILE *f; int i, j, size;
747 static int logged = 0;
748 if (logged)
749 return;
750 logged = 1;
751 CmiPrintf("Logging: %d\n", Lrts_myNode);
752 sprintf(logname, "log.%d", Lrts_myNode);
753 f = fopen(logname, "w");
754 if (f==0) KillEveryone("fopen problem");
755 for (i = 5000; i; i--)
757 /*for (i=0; i<size; i++) */
758 j = log_pos - i;
759 if (j < 0)
761 if (log_wrap)
762 j = 5000 + j;
763 else
764 j = 0;
767 logent ent = log+j;
768 fprintf(f, "%1.4f %d %c %d %d\n",
769 ent->time, ent->srcpe, ent->kind, ent->dstpe, ent->seqno);
772 fclose(f);
773 CmiPrintf("Done Logging: %d\n", Lrts_myNode);
776 #define LOG(t,s,k,d,q) { if (log_pos==50000) { log_pos=0; log_wrap=1;} { logent ent=log+log_pos; ent->time=t; ent->srcpe=s; ent->kind=k; ent->dstpe=d; ent->seqno=q; log_pos++; }}
778 #endif
780 #if !LOGGING
782 #define log_init() /*empty*/
783 #define log_done() /*empty*/
784 #define printLog() /*empty*/
785 #define LOG(t,s,k,d,q) /*empty*/
787 #endif
789 /******************************************************************************
791 * Node state
793 *****************************************************************************/
795 static CmiNodeLock Cmi_scanf_mutex;
796 static double Cmi_clock;
797 static double Cmi_check_delay = 3.0;
798 int inProgress[128];
800 /** Mechanism to prevent dual locking when comm-layer functions, including prints,
801 * are called recursively. (UN)LOCK_IF_AVAILABLE is used before and after a code piece
802 * which is guaranteed not to make any-recursive locking calls. (UN)LOCK_AND_(UN)SET
803 * is used before and after a code piece that may make recursive locking calls.
806 #define LOCK_IF_AVAILABLE() \
807 if(!inProgress[CmiMyRank()]) { \
808 CmiCommLock(); \
811 #define UNLOCK_IF_AVAILABLE() \
812 if(!inProgress[CmiMyRank()]) { \
813 CmiCommUnlock(); \
816 #define LOCK_AND_SET() \
817 if(!inProgress[CmiMyRank()]) { \
818 CmiCommLock(); \
819 acqLock = 1; \
821 inProgress[CmiMyRank()] += 1;
823 #define UNLOCK_AND_UNSET() \
824 if(acqLock) { \
825 CmiCommUnlock(); \
826 acqLock = 0; \
828 inProgress[CmiMyRank()] -= 1;
831 /******************************************************************************
833 * OS Threads
834 * SMP implementation moved to machine-smp.c
835 *****************************************************************************/
837 /************************ No kernel SMP threads ***************/
838 #if !CMK_SMP
840 static volatile int memflag=0;
841 void CmiMemLockNet() { memflag++; }
842 void CmiMemUnlockNet() { memflag--; }
844 static volatile int comm_flag=0;
845 #define CmiCommLockOrElse(dothis) if (comm_flag!=0) dothis
846 #ifndef MACHLOCK_DEBUG
847 # define CmiCommLock() (comm_flag=1)
848 # define CmiCommUnlock() (comm_flag=0)
849 #else /* Error-checking flag locks */
850 void CmiCommLock(void) {
851 MACHLOCK_ASSERT(!comm_flag,"CmiCommLock");
852 comm_flag=1;
854 void CmiCommUnlock(void) {
855 MACHLOCK_ASSERT(comm_flag,"CmiCommUnlock");
856 comm_flag=0;
858 #endif
860 //int _Cmi_myrank=0; /* Normally zero; only 1 during SIGIO handling */
861 _Cmi_myrank=0;
863 static void CommunicationInterrupt(int ignored)
865 MACHLOCK_ASSERT(!_Cmi_myrank,"CommunicationInterrupt");
866 if (memflag || comm_flag || _immRunning || CmiCheckImmediateLock(0))
867 { /* Already busy inside malloc, comm, or immediate messages */
868 MACHSTATE(5,"--SKIPPING SIGIO--");
869 return;
871 MACHSTATE1(2,"--BEGIN SIGIO comm_mutex_isLocked: %d--", comm_flag)
873 /*Make sure any malloc's we do in here are NOT migratable:*/
874 CmiIsomallocBlockList *oldList=CmiIsomallocBlockListActivate(NULL);
875 /* _Cmi_myrank=1; */
876 CommunicationServerNet(0, COMM_SERVER_FROM_INTERRUPT); /* from interrupt */
877 //CommunicationServer(0); /* from interrupt */
878 /* _Cmi_myrank=0; */
879 CmiIsomallocBlockListActivate(oldList);
881 MACHSTATE(2,"--END SIGIO--")
884 extern void CmiSignal(int sig1, int sig2, int sig3, void (*handler)());
886 static void CmiDestroyLocks()
888 comm_flag = 0;
889 memflag = 0;
892 #endif
894 CpvExtern(int,_charmEpoch);
896 /*Add a message to this processor's receive queue
897 Must be called while holding comm. lock
900 extern double evacTime;
903 /***************************************************************
904 Communication with charmrun:
905 We can send (ctrl_sendone) and receive (ctrl_getone)
906 messages on a TCP socket connected to charmrun.
907 This is used for printfs, CCS, etc; and also for
908 killing ourselves if charmrun dies.
911 /*This flag prevents simultanious outgoing
912 messages on the charmrun socket. It is protected
913 by the commlock.*/
914 static int Cmi_charmrun_fd_sendflag=0;
916 /* ctrl_sendone */
917 static int sendone_abort_fn(int code,const char *msg) {
918 fprintf(stderr,"Socket error %d in ctrl_sendone! %s\n",code,msg);
919 machine_exit(1);
920 return -1;
923 static void ctrl_sendone_nolock(const char *type,
924 const char *data1,int dataLen1,
925 const char *data2,int dataLen2)
927 const void *bufs[3]; int lens[3]; int nBuffers=0;
928 ChMessageHeader hdr;
929 skt_abortFn oldAbort=skt_set_abort(sendone_abort_fn);
930 MACHSTATE1(2,"ctrl_sendone_nolock { type=%s", type);
931 if (Cmi_charmrun_fd==-1)
932 charmrun_abort("ctrl_sendone called in standalone!\n");
933 Cmi_charmrun_fd_sendflag=1;
934 ChMessageHeader_new(type,dataLen1+dataLen2,&hdr);
935 bufs[nBuffers]=&hdr; lens[nBuffers]=sizeof(hdr); nBuffers++;
936 if (dataLen1>0) {bufs[nBuffers]=data1; lens[nBuffers]=dataLen1; nBuffers++;}
937 if (dataLen2>0) {bufs[nBuffers]=data2; lens[nBuffers]=dataLen2; nBuffers++;}
938 skt_sendV(Cmi_charmrun_fd,nBuffers,bufs,lens);
939 Cmi_charmrun_fd_sendflag=0;
940 skt_set_abort(oldAbort);
941 MACHSTATE(2,"} ctrl_sendone_nolock");
944 static void ctrl_sendone_locking(const char *type,
945 const char *data1,int dataLen1,
946 const char *data2,int dataLen2)
948 LOCK_IF_AVAILABLE();
949 ctrl_sendone_nolock(type,data1,dataLen1,data2,dataLen2);
950 UNLOCK_IF_AVAILABLE();
953 #ifndef MEMORYUSAGE_OUTPUT
954 #define MEMORYUSAGE_OUTPUT 0
955 #endif
956 #if MEMORYUSAGE_OUTPUT
957 #define MEMORYUSAGE_OUTPUT_FREQ 10 //how many prints in a second
958 static int memoryusage_counter;
959 #define memoryusage_isOutput ((memoryusage_counter%MEMORYUSAGE_OUTPUT_FREQ)==0)
960 #define memoryusage_output {\
961 memoryusage_counter++;\
962 if(CmiMyPe()==0) printf("-- %d %f %ld --\n", CmiMyPe(), GetClock(), CmiMemoryUsage());}
963 #endif
965 static double Cmi_check_last;
967 /* if charmrun dies, we finish */
968 static void pingCharmrun(void *ignored)
970 #if MEMORYUSAGE_OUTPUT
971 memoryusage_output;
972 if(memoryusage_isOutput){
973 memoryusage_counter = 0;
974 #else
976 #endif
978 double clock=GetClock();
979 if (clock > Cmi_check_last + Cmi_check_delay) {
980 MACHSTATE1(3,"CommunicationsClock pinging charmrun Cmi_charmrun_fd_sendflag=%d", Cmi_charmrun_fd_sendflag);
981 Cmi_check_last = clock;
982 CmiCommLockOrElse(return;); /*Already busy doing communication*/
983 if (Cmi_charmrun_fd_sendflag) return; /*Busy talking to charmrun*/
984 LOCK_IF_AVAILABLE();
985 ctrl_sendone_nolock("ping",NULL,0,NULL,0); /*Charmrun may have died*/
986 UNLOCK_IF_AVAILABLE();
988 CmiStdoutFlush(); /*Make sure stdout buffer hasn't filled up*/
992 /* periodic charm ping, for gm and netpoll */
993 static void pingCharmrunPeriodic(void *ignored)
995 pingCharmrun(ignored);
996 CcdCallFnAfter((CcdVoidFn)pingCharmrunPeriodic,NULL,1000);
999 static int ignore_further_errors(int c,const char *msg) {machine_exit(2);return -1;}
1000 static void charmrun_abort(const char *s)
1002 if (Cmi_charmrun_fd==-1) {/*Standalone*/
1003 fprintf(stderr,"Charm++ fatal error:\n%s\n",s);
1004 CmiPrintStackTrace(0);
1005 abort();
1006 } else {
1007 char msgBuf[80];
1008 skt_set_abort(ignore_further_errors);
1009 if (CmiNumPartitions() == 1) {
1010 sprintf(msgBuf,"Fatal error on PE %d> ",CmiMyPe());
1012 else
1014 sprintf(msgBuf,"Fatal error on Partition %d PE %d> ", CmiMyPartition(), CmiMyPe());
1016 ctrl_sendone_nolock("abort",msgBuf,strlen(msgBuf),s,strlen(s)+1);
1020 /* ctrl_getone */
1022 #ifdef __FAULT__
1023 #include "machine-recover.c"
1024 #endif
1026 static void node_addresses_store(ChMessage *msg);
1028 static int barrierReceived = 0;
1030 static void ctrl_getone(void)
1032 ChMessage msg;
1033 MACHSTATE(2,"ctrl_getone")
1034 MACHLOCK_ASSERT(comm_mutex_isLocked,"ctrl_getone")
1035 ChMessage_recv(Cmi_charmrun_fd,&msg);
1036 MACHSTATE1(2,"ctrl_getone recv one '%s'", msg.header.type);
1038 if (strcmp(msg.header.type,"die")==0) {
1039 MACHSTATE(2,"ctrl_getone bye bye")
1040 fprintf(stderr,"aborting: %s\n",msg.data);
1041 log_done();
1042 ConverseCommonExit();
1043 machine_exit(0);
1045 #if CMK_CCS_AVAILABLE
1046 else if (strcmp(msg.header.type, "req_fw")==0) {
1047 CcsImplHeader *hdr=(CcsImplHeader *)msg.data;
1048 /*Sadly, I *can't* do a:
1049 CcsImpl_netRequest(hdr,msg.data+sizeof(CcsImplHeader));
1050 here, because I can't send converse messages in the
1051 communication thread. I *can* poke this message into
1052 any convenient processor's queue, though: (OSL, 9/14/2000)
1054 int pe=0;/*<- node-local processor number. Any one will do.*/
1055 void *cmsg=(void *)CcsImpl_ccs2converse(hdr,msg.data+sizeof(CcsImplHeader),NULL);
1056 MACHSTATE(2,"Incoming CCS request");
1057 if (cmsg!=NULL) CmiPushPE(pe,cmsg);
1059 #endif
1060 #ifdef __FAULT__
1061 else if(strcmp(msg.header.type,"crashnode")==0) {
1062 crash_node_handle(&msg);
1064 else if(strcmp(msg.header.type,"initnodetab")==0) {
1065 /** A processor crashed and got recreated. So charmrun sent
1066 across the whole nodetable data to update this processor*/
1067 node_addresses_store(&msg);
1068 // fprintf(stdout,"nodetable added %d\n",CmiMyPe());
1070 #endif
1071 else if(strcmp(msg.header.type,"barrier")==0) {
1072 barrierReceived = 1;
1074 else if(strcmp(msg.header.type,"barrier0")==0) {
1075 barrierReceived = 2;
1077 else {
1078 /* We do not use KillEveryOne here because it calls CmiMyPe(),
1079 * which is not available to the communication thread on an SMP version.
1081 /* CmiPrintf("Unknown message: %s\n", msg.header.type); */
1082 charmrun_abort("ERROR> Unrecognized message from charmrun.\n");
1083 machine_exit(1);
1086 MACHSTATE(2,"ctrl_getone done")
1087 ChMessage_free(&msg);
1090 #if CMK_CCS_AVAILABLE && !NODE_0_IS_CONVHOST
1091 /*Deliver this reply data to this reply socket.
1092 The data is forwarded to CCS server via charmrun.*/
1093 void CcsImpl_reply(CcsImplHeader *hdr,int repLen,const void *repData)
1095 MACHSTATE(2,"Outgoing CCS reply");
1096 ctrl_sendone_locking("reply_fw",(const char *)hdr,sizeof(CcsImplHeader),
1097 repData,repLen);
1098 MACHSTATE(1,"Outgoing CCS reply away");
1100 #endif
1102 /*****************************************************************************
1104 * CmiPrintf, CmiError, CmiScanf
1106 *****************************************************************************/
1107 static void InternalWriteToTerminal(int isStdErr,const char *str,int len);
1108 static void InternalPrintf(const char *f, va_list l)
1110 ChMessage replymsg;
1111 char *buffer = CmiTmpAlloc(PRINTBUFSIZE);
1112 CmiStdoutFlush();
1113 vsprintf(buffer, f, l);
1114 if(Cmi_syncprint) {
1115 LOCK_IF_AVAILABLE();
1116 ctrl_sendone_nolock("printsyn", buffer,strlen(buffer)+1,NULL,0);
1117 ChMessage_recv(Cmi_charmrun_fd,&replymsg);
1118 ChMessage_free(&replymsg);
1119 UNLOCK_IF_AVAILABLE();
1120 } else {
1121 ctrl_sendone_locking("print", buffer,strlen(buffer)+1,NULL,0);
1123 InternalWriteToTerminal(0,buffer,strlen(buffer));
1124 CmiTmpFree(buffer);
1127 static void InternalError(const char *f, va_list l)
1129 ChMessage replymsg;
1130 char *buffer = CmiTmpAlloc(PRINTBUFSIZE);
1131 CmiStdoutFlush();
1132 vsprintf(buffer, f, l);
1133 if(Cmi_syncprint) {
1134 ctrl_sendone_locking("printerrsyn", buffer,strlen(buffer)+1,NULL,0);
1135 LOCK_IF_AVAILABLE();
1136 ChMessage_recv(Cmi_charmrun_fd,&replymsg);
1137 ChMessage_free(&replymsg);
1138 UNLOCK_IF_AVAILABLE();
1139 } else {
1140 ctrl_sendone_locking("printerr", buffer,strlen(buffer)+1,NULL,0);
1142 InternalWriteToTerminal(1,buffer,strlen(buffer));
1143 CmiTmpFree(buffer);
1146 static int InternalScanf(char *fmt, va_list l)
1148 ChMessage replymsg;
1149 char *ptr[20];
1150 char *p; int nargs, i;
1151 nargs=0;
1152 p=fmt;
1153 while (*p) {
1154 if ((p[0]=='%')&&(p[1]=='*')) { p+=2; continue; }
1155 if ((p[0]=='%')&&(p[1]=='%')) { p+=2; continue; }
1156 if (p[0]=='%') { nargs++; p++; continue; }
1157 if (*p=='\n') *p=' '; p++;
1159 if (nargs > 18) KillEveryone("CmiScanf only does 18 args.\n");
1160 for (i=0; i<nargs; i++) ptr[i]=va_arg(l, char *);
1161 CmiLock(Cmi_scanf_mutex);
1162 if (Cmi_charmrun_fd!=-1)
1163 {/*Send charmrun the format string*/
1164 ctrl_sendone_locking("scanf", fmt, strlen(fmt)+1,NULL,0);
1165 /*Wait for the reply (characters to scan) from charmrun*/
1166 LOCK_IF_AVAILABLE();
1167 ChMessage_recv(Cmi_charmrun_fd,&replymsg);
1168 i = sscanf((char*)replymsg.data, fmt,
1169 ptr[ 0], ptr[ 1], ptr[ 2], ptr[ 3], ptr[ 4], ptr[ 5],
1170 ptr[ 6], ptr[ 7], ptr[ 8], ptr[ 9], ptr[10], ptr[11],
1171 ptr[12], ptr[13], ptr[14], ptr[15], ptr[16], ptr[17]);
1172 ChMessage_free(&replymsg);
1173 UNLOCK_IF_AVAILABLE();
1174 } else
1175 {/*Just do the scanf normally*/
1176 i=scanf(fmt, ptr[ 0], ptr[ 1], ptr[ 2], ptr[ 3], ptr[ 4], ptr[ 5],
1177 ptr[ 6], ptr[ 7], ptr[ 8], ptr[ 9], ptr[10], ptr[11],
1178 ptr[12], ptr[13], ptr[14], ptr[15], ptr[16], ptr[17]);
1180 CmiUnlock(Cmi_scanf_mutex);
1181 return i;
1183 #if CMK_CMIPRINTF_IS_A_BUILTIN
1185 /*New stdarg.h declarations*/
1186 void CmiPrintf(const char *fmt, ...)
1188 CpdSystemEnter();
1190 va_list p; va_start(p, fmt);
1191 if (Cmi_charmrun_fd!=-1 && _writeToStdout)
1192 InternalPrintf(fmt, p);
1193 else
1194 vfprintf(stdout,fmt,p);
1195 va_end(p);
1197 CpdSystemExit();
1200 void CmiError(const char *fmt, ...)
1202 CpdSystemEnter();
1204 va_list p; va_start (p, fmt);
1205 if (Cmi_charmrun_fd!=-1)
1206 InternalError(fmt, p);
1207 else
1208 vfprintf(stderr,fmt,p);
1209 va_end(p);
1211 CpdSystemExit();
1214 int CmiScanf(const char *fmt, ...)
1216 int i;
1217 CpdSystemEnter();
1219 va_list p; va_start(p, fmt);
1220 i = InternalScanf((char *)fmt, p);
1221 va_end(p);
1223 CpdSystemExit();
1224 return i;
1227 #endif
1229 /***************************************************************************
1230 * Output redirection:
1231 * When people don't use CkPrintf, like above, we'd still like to be able
1232 * to collect their output. Thus we make a pipe and dup2 it to stdout,
1233 * which lets us read the characters sent to stdout at our lesiure.
1234 ***************************************************************************/
1236 /*Can read from stdout or stderr using these fd's*/
1237 static int readStdout[2];
1238 static int writeStdout[2]; /*The original stdout/stderr sockets*/
1239 static int serviceStdout[2]; /*(bool) Normally zero; one if service needed.*/
1240 #define readStdoutBufLen (16*1024)
1241 static char readStdoutBuf[readStdoutBufLen+1]; /*Protected by comm. lock*/
1242 static int servicingStdout;
1244 /*Initialization-- should only be called once per node*/
1245 static void CmiStdoutInit(void) {
1246 int i;
1247 if (Cmi_charmrun_fd==-1) return; /* standalone mode */
1249 /*There's some way to do this same thing in windows, but I don't know how*/
1250 #if !defined(_WIN32) || defined(__CYGWIN__)
1251 /*Prevent buffering in stdio library:*/
1252 setbuf(stdout,NULL); setbuf(stderr,NULL);
1254 /*Reopen stdout and stderr fd's as new pipes:*/
1255 for (i=0;i<2;i++) {
1256 int pair[2];
1257 int srcFd=1+i; /* 1 is stdout; 2 is stderr */
1259 /*First, save a copy of the original stdout*/
1260 writeStdout[i]=dup(srcFd);
1261 #if 0
1262 /*Build a pipe to connect to stdout (4kb buffer, but no SIGIO...)*/
1263 if (-1==pipe(pair)) {perror("building stdio redirection pipe"); exit(1);}
1264 #else
1265 /* UNIX socket (16kb default buffer, and works with SIGIO!) */
1266 if (-1==socketpair(PF_UNIX,SOCK_STREAM,0,pair))
1267 {perror("building stdio redirection socketpair"); exit(1);}
1268 #endif
1269 readStdout[i]=pair[0]; /*We get the read end of pipe*/
1270 if (-1==dup2(pair[1],srcFd)) {perror("dup2 redirection pipe"); exit(1);}
1271 //if (-1==dup2(srcFd,pair[1])) {perror("dup2 redirection pipe"); exit(1);}
1273 #if 0 /*Keep writes from blocking. This just drops excess output, which is bad.*/
1274 CmiEnableNonblockingIO(srcFd);
1275 #endif
1276 //NOTSURE #if CMK_SHARED_VARS_UNAVAILABLE
1277 #if !CMK_SMP
1278 if (Cmi_asyncio)
1280 /*No communication thread-- get a SIGIO on each write(), which keeps the buffer clean*/
1281 //CmiEnableAsyncIO(readStdout[i]);
1282 CmiEnableAsyncIO(pair[1]);
1284 #endif
1286 #else
1287 /*Windows system-- just fake reads for now*/
1288 # ifndef read
1289 # define read(x,y,z) 0
1290 # endif
1291 # ifndef write
1292 # define write(x,y,z)
1293 # endif
1294 #endif
1297 /*Sends data to original stdout (e.g., for ++debug or ++in-xterm)*/
1298 static void InternalWriteToTerminal(int isStdErr,const char *str,int len)
1300 write(writeStdout[isStdErr],str,len);
1304 Service this particular stdout pipe.
1305 Must hold comm. lock.
1307 static void CmiStdoutServiceOne(int i) {
1308 int nBytes;
1309 const static char *cmdName[2]={"print","printerr"};
1310 servicingStdout=1;
1311 while(1) {
1312 const char *tooMuchWarn=NULL; int tooMuchLen=0;
1313 if (!skt_select1(readStdout[i],0)) break; /*Nothing to read*/
1314 nBytes=read(readStdout[i],readStdoutBuf,readStdoutBufLen);
1315 if (nBytes<=0) break; /*Nothing to send*/
1317 /*Send these bytes off to charmrun*/
1318 readStdoutBuf[nBytes]=0; /*Zero-terminate read string*/
1319 nBytes++; /*Include zero-terminator in message to charmrun*/
1321 if (nBytes>=readStdoutBufLen-100)
1322 { /*We must have filled up our output pipe-- most output libraries
1323 don't handle this well (e.g., glibc printf just drops the line).*/
1325 tooMuchWarn="\nWARNING: Too much output at once-- possible output discontinuity!\n"
1326 "Use CkPrintf to avoid discontinuity (and this warning).\n\n";
1327 nBytes--; /*Remove terminator from user's data*/
1328 tooMuchLen=strlen(tooMuchWarn)+1;
1330 ctrl_sendone_nolock(cmdName[i],readStdoutBuf,nBytes,
1331 tooMuchWarn,tooMuchLen);
1333 InternalWriteToTerminal(i,readStdoutBuf,nBytes);
1335 servicingStdout=0;
1336 serviceStdout[i]=0; /*This pipe is now serviced*/
1339 /*Service all stdout pipes, whether it looks like they need it
1340 or not. Used when you aren't sure if select() has been called recently.
1341 Must hold comm. lock.
1343 static void CmiStdoutServiceAll(void) {
1344 int i;
1345 for (i=0;i<2;i++) {
1346 if (readStdout[i]==0) continue; /*Pipe not open*/
1347 CmiStdoutServiceOne(i);
1351 /*Service any outstanding stdout pipes.
1352 Must hold comm. lock.
1354 static void CmiStdoutService(void) {
1355 CmiStdoutServiceAll();
1358 /*Add our pipes to the pile for select() or poll().
1359 Both can be called with or without the comm. lock.
1361 static void CmiStdoutAdd(CMK_PIPE_PARAM) {
1362 int i;
1363 for (i=0;i<2;i++) {
1364 if (readStdout[i]==0) continue; /*Pipe not open*/
1365 CMK_PIPE_ADDREAD(readStdout[i]);
1368 static void CmiStdoutCheck(CMK_PIPE_PARAM) {
1369 int i;
1370 for (i=0;i<2;i++) {
1371 if (readStdout[i]==0) continue; /*Pipe not open*/
1372 if (CMK_PIPE_CHECKREAD(readStdout[i])) serviceStdout[i]=1;
1375 static int CmiStdoutNeedsService(void) {
1376 return (serviceStdout[0]!=0 || serviceStdout[1]!=0);
1379 /*Called every few milliseconds to flush the stdout pipes*/
1380 static void CmiStdoutFlush(void) {
1381 if (servicingStdout) return; /* might be called by SIGALRM */
1382 CmiCommLockOrElse( return; )
1383 LOCK_IF_AVAILABLE();
1384 CmiStdoutServiceAll();
1385 UNLOCK_IF_AVAILABLE();
1388 /***************************************************************************
1389 * Message Delivery:
1391 ***************************************************************************/
1393 #include "machine-dgram.c"
1396 /*****************************************************************************
1398 * node_addresses
1400 * These two functions fill the node-table.
1403 * This node, like all others, first sends its own address to charmrun
1404 * using this command:
1406 * Type: nodeinfo
1407 * Data: Big-endian 4-byte ints
1408 * <my-node #><Dataport>
1410 * When charmrun has all the addresses, he sends this table to me:
1412 * Type: nodes
1413 * Data: Big-endian 4-byte ints
1414 * <number of nodes n>
1415 * <#PEs><IP><Dataport> Node 0
1416 * <#PEs><IP><Dataport> Node 1
1417 * ...
1418 * <#PEs><IP><Dataport> Node n-1
1420 *****************************************************************************/
1422 #if CMK_USE_IBVERBS
1423 void copyInfiAddr(ChInfiAddr *qpList);
1424 #endif
1426 #if CMK_USE_IBVERBS && CMK_IBVERBS_FAST_START
1427 static void send_partial_init()
1429 ChMessageInt_t nodeNo = ChMessageInt_new(Lrts_myNode);
1430 ctrl_sendone_nolock("partinit",(const char *)&(nodeNo),sizeof(nodeNo),NULL,0);
1432 #endif
1435 /*Note: node_addresses_obtain is called before starting
1436 threads, so no locks are needed (or valid!)*/
1437 static void node_addresses_obtain(char **argv)
1439 ChMessage nodetabmsg; /* info about all nodes*/
1440 MACHSTATE(3,"node_addresses_obtain { ");
1441 if (Cmi_charmrun_fd==-1)
1442 {/*Standalone-- fake a single-node nodetab message*/
1443 int npes=1;
1444 ChSingleNodeinfo *fakeTab;
1445 ChMessage_new("nodeinfo",sizeof(ChSingleNodeinfo),&nodetabmsg);
1446 fakeTab=(ChSingleNodeinfo *)(nodetabmsg.data);
1447 CmiGetArgIntDesc(argv,"+p",&npes,"Set the number of processes to create");
1448 //#if CMK_SHARED_VARS_UNAVAILABLE
1449 #if !CMK_SMP
1450 if (npes!=1) {
1451 fprintf(stderr,
1452 "To use multiple processors, you must run this program as:\n"
1453 " > charmrun +p%d %s <args>\n"
1454 "or build the %s-smp version of Charm++.\n",
1455 npes,argv[0],CMK_MACHINE_NAME);
1456 exit(1);
1458 #else
1459 /* standalone smp version reads ppn */
1460 if (CmiGetArgInt(argv, "+ppn", &_Cmi_mynodesize) ||
1461 CmiGetArgInt(argv, "++ppn", &_Cmi_mynodesize) )
1462 npes = _Cmi_mynodesize;
1463 #endif
1464 /*This is a stupid hack: we expect the *number* of nodes
1465 followed by ChNodeinfo structs; so we use a ChSingleNodeinfo
1466 (which happens to have exactly that layout!) and stuff
1467 a 1 into the "node number" slot
1469 fakeTab->nodeNo=ChMessageInt_new(1); /* <- hack */
1470 fakeTab->info.nPE=ChMessageInt_new(npes);
1471 fakeTab->info.dataport=ChMessageInt_new(0);
1472 fakeTab->info.IP=_skt_invalid_ip;
1474 else
1475 { /*Contact charmrun for machine info.*/
1476 ChSingleNodeinfo me;
1478 me.nodeNo=ChMessageInt_new(Lrts_myNode);
1480 #if CMK_USE_IBVERBS
1482 int qpListSize = (Lrts_numNodes-1)*sizeof(ChInfiAddr);
1483 me.info.qpList = malloc(qpListSize);
1484 copyInfiAddr(me.info.qpList);
1485 MACHSTATE1(3,"me.info.qpList created and copied size %d bytes",qpListSize);
1486 ctrl_sendone_nolock("initnode",(const char *)&me,sizeof(me),(const char *)me.info.qpList,qpListSize);
1487 free(me.info.qpList);
1489 #else
1490 /*The nPE fields are set by charmrun--
1491 these values don't matter.
1492 Set IP in case it is mpiexec mode where charmrun does not have IP yet
1494 me.info.nPE=ChMessageInt_new(0);
1495 /* me.info.IP=_skt_invalid_ip; */
1496 me.info.IP=skt_innode_my_ip();
1497 #if CMK_USE_IBUD
1498 me.info.qp.lid=ChMessageInt_new(context->localAddr.lid);
1499 me.info.qp.qpn=ChMessageInt_new(context->localAddr.qpn);
1500 me.info.qp.psn=ChMessageInt_new(context->localAddr.psn);
1501 MACHSTATE3(3,"IBUD Information lid=%i qpn=%i psn=%i\n",me.info.qp.lid,me.info.qp.qpn,me.info.qp.psn);
1502 #endif
1503 me.info.dataport=ChMessageInt_new(dataport);
1505 /*Send our node info. to charmrun.
1506 CommLock hasn't been initialized yet--
1507 use non-locking version*/
1508 ctrl_sendone_nolock("initnode",(const char *)&me,sizeof(me),NULL,0);
1509 MACHSTATE1(5,"send initnode - dataport:%d", dataport);
1510 #endif //CMK_USE_IBVERBS
1512 MACHSTATE(3,"initnode sent");
1514 /*We get the other node addresses from a message sent
1515 back via the charmrun control port.*/
1516 if (!skt_select1(Cmi_charmrun_fd,1200*1000)){
1517 CmiAbort("Timeout waiting for nodetab!\n");
1519 MACHSTATE(2,"recv initnode {");
1520 ChMessage_recv(Cmi_charmrun_fd,&nodetabmsg);
1521 MACHSTATE(2,"} recv initnode");
1523 //#if CMK_USE_IBVERBS
1524 //#else
1525 node_addresses_store(&nodetabmsg);
1526 ChMessage_free(&nodetabmsg);
1527 //#endif
1528 MACHSTATE(3,"} node_addresses_obtain ");
1532 /***********************************************************************
1533 * DeliverOutgoingMessage()
1535 * This function takes care of delivery of outgoing messages from the
1536 * sender end. Broadcast messages are divided into sets of messages that
1537 * are bound to the local node, and to remote nodes. For local
1538 * transmission, the messages are directly pushed into the recv
1539 * queues. For non-local transmission, the function DeliverViaNetwork()
1540 * is called
1541 ***********************************************************************/
1542 int DeliverOutgoingMessage(OutgoingMsg ogm)
1544 int i, rank, dst; OtherNode node;
1546 int network = 1;
1548 dst = ogm->dst;
1550 int acqLock = 0;
1551 //printf("deliver outgoing message, dest: %d \n", dst);
1552 #if CMK_ERROR_CHECKING
1553 if (dst<0 || dst>=CmiNumPesGlobal())
1554 CmiAbort("Send to out-of-bounds processor!");
1555 #endif
1556 node = nodes_by_pe[dst];
1557 rank = dst - node->nodestart;
1558 if (node->nodestart != Cmi_nodestartGlobal) {
1559 #if !CMK_SMP_NOT_RELAX_LOCK
1560 LOCK_AND_SET();
1561 #endif
1562 DeliverViaNetwork(ogm, node, rank, DGRAM_ROOTPE_MASK, 0);
1563 GarbageCollectMsg(ogm);
1564 #if !CMK_SMP_NOT_RELAX_LOCK
1565 UNLOCK_AND_UNSET();
1566 #endif
1568 #if CMK_MULTICORE
1569 network = 0;
1570 #endif
1571 return network;
1575 * Set up an OutgoingMsg structure for this message.
1577 static OutgoingMsg PrepareOutgoing(int pe,int size,int freemode,char *data) {
1578 OutgoingMsg ogm;
1579 MallocOutgoingMsg(ogm);
1580 MACHSTATE2(2,"Preparing outgoing message for pe %d, size %d",pe,size);
1581 ogm->size = size;
1582 ogm->data = data;
1583 ogm->src = CmiMyPeGlobal();
1584 ogm->dst = pe;
1585 ogm->freemode = freemode;
1586 ogm->refcount = 0;
1587 return (CmiCommHandle)ogm;
1591 /******************************************************************************
1593 * CmiGeneralSend
1595 * Description: This is a generic message sending routine. All the
1596 * converse message send functions are implemented in terms of this
1597 * function. (By setting appropriate flags (eg freemode) that tell
1598 * CmiGeneralSend() how exactly to handle the particular case of
1599 * message send)
1601 *****************************************************************************/
1603 //CmiCommHandle CmiGeneralSend(int pe, int size, int freemode, char *data)
1604 CmiCommHandle LrtsSendFunc(int destNode, int pe, int size, char *data, int freemode)
1606 int sendonnetwork;
1607 OutgoingMsg ogm;
1608 MACHSTATE(1,"CmiGeneralSend {");
1610 CMI_MSG_SIZE(data)=size;
1611 ogm=PrepareOutgoing(pe,size,'F',data);
1613 #if CMK_SMP_NOT_RELAX_LOCK
1614 int acqLock = 0;
1615 LOCK_AND_SET();
1616 #endif
1618 sendonnetwork = DeliverOutgoingMessage(ogm);
1620 #if CMK_SMP_NOT_RELAX_LOCK
1621 UNLOCK_AND_UNSET();
1622 #endif
1624 //#if CMK_SMP
1625 // if (sendonnetwork!=0) /* only call server when we send msg on network in SMP */
1626 // CommunicationServerNet(0, COMM_SERVER_FROM_WORKER);
1627 //#endif
1629 MACHSTATE(1,"} LrtsSend");
1630 return (CmiCommHandle)ogm;
1634 /******************************************************************************
1636 * Comm Handle manipulation.
1638 *****************************************************************************/
1640 #if ! CMK_MULTICAST_LIST_USE_COMMON_CODE
1642 /*****************************************************************************
1644 * NET version List-Cast and Multicast Code
1646 ****************************************************************************/
1648 void LrtsSyncListSendFn(int npes, int *pes, int len, char *msg)
1650 int i;
1651 for(i=0;i<npes;i++) {
1652 CmiReference(msg);
1653 CmiSyncSendAndFree(pes[i], len, msg);
1657 CmiCommHandle LrtsAsyncListSendFn(int npes, int *pes, int len, char *msg)
1659 CmiError("ListSend not implemented.");
1660 return (CmiCommHandle) 0;
1664 because in all net versions, the message buffer after CmiSyncSendAndFree
1665 returns is not changed, we can use memory reference trick to avoid
1666 memory copying here
1668 void LrtsFreeListSendFn(int npes, int *pes, int len, char *msg)
1670 int i;
1671 for(i=0;i<npes;i++) {
1672 CmiReference(msg);
1673 CmiSyncSendAndFree(pes[i], len, msg);
1675 CmiFree(msg);
1678 #endif
1681 void LrtsDrainResources() { }
1683 void LrtsPostNonLocal() { }
1685 /* Network progress function is used to poll the network when for
1686 messages. This flushes receive buffers on some implementations*/
1688 #if CMK_MACHINE_PROGRESS_DEFINED
1689 void CmiMachineProgressImpl(){
1690 CommunicationServerNet(0, COMM_SERVER_FROM_SMP);
1692 #endif
1694 void LrtsAdvanceCommunication(int whileidle)
1696 #if CMK_SMP
1697 CommunicationServerNet(0, COMM_SERVER_FROM_SMP);
1698 #else
1699 CommunicationServerNet(0, COMM_SERVER_FROM_WORKER);
1700 #endif
1703 /******************************************************************************
1705 * Main code, Init, and Exit
1707 *****************************************************************************/
1709 #if CMK_BARRIER_USE_COMMON_CODE
1711 /* happen at node level */
1712 /* must be called on every PE including communication processors */
1713 void LrtsBarrier()
1715 int numnodes = CmiNumNodesGlobal();
1716 static int barrier_phase = 0;
1718 if (Cmi_charmrun_fd == -1) return; // standalone
1719 if (numnodes == 1) {
1720 return;
1723 ctrl_sendone_locking("barrier",NULL,0,NULL,0);
1724 while (barrierReceived != 1) {
1725 LOCK_IF_AVAILABLE();
1726 ctrl_getone();
1727 UNLOCK_IF_AVAILABLE();
1729 barrierReceived = 0;
1730 barrier_phase ++;
1734 int CmiBarrierZero()
1736 int i;
1737 int numnodes = CmiNumNodesGlobal();
1738 ChMessage msg;
1740 if (Cmi_charmrun_fd == -1) return 0; // standalone
1741 if (numnodes == 1) {
1742 CmiNodeAllBarrier();
1743 return 0;
1746 if (CmiMyRank() == 0) {
1747 char str[64];
1748 sprintf(str, "%d", CmiMyNodeGlobal());
1749 ctrl_sendone_locking("barrier0",str,strlen(str)+1,NULL,0);
1750 if (CmiMyNodeGlobal() == 0) {
1751 while (barrierReceived != 2) {
1752 LOCK_IF_AVAILABLE();
1753 ctrl_getone();
1754 UNLOCK_IF_AVAILABLE();
1756 barrierReceived = 0;
1760 CmiNodeAllBarrier();
1761 return 0;
1764 #endif
1766 /******************************************************************************
1768 * Main code, Init, and Exit
1770 *****************************************************************************/
1772 void LrtsPreCommonInit(int everReturn)
1774 #if !CMK_SMP
1775 #if !CMK_ASYNC_NOT_NEEDED
1776 if (Cmi_asyncio)
1778 CmiSignal(SIGIO, 0, 0, CommunicationInterrupt);
1779 if (!Cmi_netpoll) {
1780 if (dataskt!=-1) CmiEnableAsyncIO(dataskt);
1781 if (Cmi_charmrun_fd!=-1) CmiEnableAsyncIO(Cmi_charmrun_fd);
1784 #endif
1785 #endif
1788 void LrtsPostCommonInit(int everReturn)
1790 /* better to show the status here */
1791 if (CmiMyPe() == 0) {
1792 if (Cmi_netpoll == 1) {
1793 CmiPrintf("Charm++> scheduler running in netpoll mode.\n");
1795 #if CMK_SHARED_VARS_UNAVAILABLE
1796 else {
1797 if (CmiMemoryIs(CMI_MEMORY_IS_OS))
1798 CmiAbort("Charm++ Fatal Error: interrupt mode does not work with default system memory allocator. Run with +netpoll to disable the interrupt.");
1800 #endif
1803 #if MEMORYUSAGE_OUTPUT
1804 memoryusage_counter = 0;
1805 #endif
1807 #if CMK_SHARED_VARS_UNAVAILABLE
1808 if (Cmi_netpoll) /*Repeatedly call CommServer*/
1809 CcdCallOnConditionKeep(CcdPERIODIC,
1810 (CcdVoidFn) CommunicationPeriodic, NULL);
1811 else /*Only need this for retransmits*/
1812 CcdCallOnConditionKeep(CcdPERIODIC_10ms,
1813 (CcdVoidFn) CommunicationPeriodic, NULL);
1814 #endif
1816 if (CmiMyRank()==0 && Cmi_charmrun_fd!=-1) {
1817 CcdCallOnConditionKeep(CcdPERIODIC_10ms, (CcdVoidFn) CmiStdoutFlush, NULL);
1818 #if CMK_SHARED_VARS_UNAVAILABLE
1819 if (!Cmi_asyncio) {
1820 /* gm cannot live with setitimer */
1821 CcdCallFnAfter((CcdVoidFn)pingCharmrunPeriodic,NULL,1000);
1823 else {
1824 /*Occasionally ping charmrun, to test if it's dead*/
1825 struct itimerval i;
1826 CmiSignal(SIGALRM, 0, 0, pingCharmrun);
1827 #if MEMORYUSAGE_OUTPUT
1828 i.it_interval.tv_sec = 0;
1829 i.it_interval.tv_usec = 1000000/MEMORYUSAGE_OUTPUT_FREQ;
1830 i.it_value.tv_sec = 0;
1831 i.it_value.tv_usec = 1000000/MEMORYUSAGE_OUTPUT_FREQ;
1832 #else
1833 i.it_interval.tv_sec = 10;
1834 i.it_interval.tv_usec = 0;
1835 i.it_value.tv_sec = 10;
1836 i.it_value.tv_usec = 0;
1837 #endif
1838 setitimer(ITIMER_REAL, &i, NULL);
1841 #if ! CMK_USE_IBVERBS
1842 /*Occasionally check for retransmissions, outgoing acks, etc.*/
1843 /*no need for GM case */
1844 CcdCallFnAfter((CcdVoidFn)CommunicationsClockCaller,NULL,Cmi_comm_clock_delay);
1845 #endif
1846 #endif
1848 /*Initialize the clock*/
1849 Cmi_clock=GetClock();
1852 #ifdef IGET_FLOWCONTROL
1853 /* Call the function once to determine the amount of physical memory available */
1854 getAvailSysMem();
1855 /* Call the function to periodically call the token adapt function */
1856 CcdCallFnAfter((CcdVoidFn)TokenUpdatePeriodic, NULL, 2000); // magic number of 2000ms
1857 CcdCallOnConditionKeep(CcdPERIODIC_10s, // magic number of PERIOD 10s
1858 (CcdVoidFn) TokenUpdatePeriodic, NULL);
1859 #endif
1861 #ifdef CMK_RANDOMLY_CORRUPT_MESSAGES
1862 srand((int)(1024.0*CmiWallTimer()));
1863 if (CmiMyPe()==0)
1864 CmiPrintf("Charm++: Machine layer will randomly corrupt every %d'th message (rand %d)\n",
1865 CMK_RANDOMLY_CORRUPT_MESSAGES,rand());
1866 #endif
1868 #ifdef __ONESIDED_IMPL
1869 #ifdef __ONESIDED_NO_HARDWARE
1870 putSrcHandler = CmiRegisterHandler((CmiHandler)handlePutSrc);
1871 putDestHandler = CmiRegisterHandler((CmiHandler)handlePutDest);
1872 getSrcHandler = CmiRegisterHandler((CmiHandler)handleGetSrc);
1873 getDestHandler = CmiRegisterHandler((CmiHandler)handleGetDest);
1874 #endif
1875 #endif
1879 void LrtsExit()
1881 int i;
1882 machine_initiated_shutdown=1;
1884 CmiStdoutFlush();
1885 if (Cmi_charmrun_fd==-1) {
1886 exit(0); /*Standalone version-- just leave*/
1887 } else {
1888 Cmi_check_delay = 1.0; /* speed up checking of charmrun */
1889 for(i = 0; i < CmiMyNodeSize(); i++) {
1890 ctrl_sendone_locking("ending",NULL,0,NULL,0); /* this causes charmrun to go away, every PE needs to report */
1892 while(1) CommunicationServerNet(5, COMM_SERVER_FROM_SMP);
1896 static void set_signals(void)
1898 if(!Cmi_truecrash) {
1899 signal(SIGSEGV, KillOnAllSigs);
1900 signal(SIGFPE, KillOnAllSigs);
1901 signal(SIGILL, KillOnAllSigs);
1902 signal(SIGINT, KillOnAllSigs);
1903 signal(SIGTERM, KillOnAllSigs);
1904 signal(SIGABRT, KillOnAllSigs);
1905 # if !defined(_WIN32) || defined(__CYGWIN__) /*UNIX-only signals*/
1906 signal(SIGQUIT, KillOnAllSigs);
1907 signal(SIGBUS, KillOnAllSigs);
1908 # if CMK_HANDLE_SIGUSR
1909 signal(SIGUSR1, HandleUserSignals);
1910 signal(SIGUSR2, HandleUserSignals);
1911 # endif
1912 # endif /*UNIX*/
1916 /*Socket idle function to use before addresses have been
1917 obtained. During the real program, we idle with CmiYield.
1919 static void obtain_idleFn(void) {sleep(0);}
1921 static int net_default_skt_abort(int code,const char *msg)
1923 fprintf(stderr,"Fatal socket error: code %d-- %s\n",code,msg);
1924 machine_exit(1);
1925 return -1;
1928 void LrtsInit(int *argc, char ***argv, int *numNodes, int *myNodeID)
1930 int i;
1931 Cmi_netpoll = 0;
1932 #if CMK_NETPOLL
1933 Cmi_netpoll = 1;
1934 #endif
1935 #if CMK_WHEN_PROCESSOR_IDLE_USLEEP
1936 Cmi_idlepoll = 0;
1937 #else
1938 Cmi_idlepoll = 1;
1939 #endif
1940 Cmi_truecrash = 0;
1941 if (CmiGetArgFlagDesc(*argv,"+truecrash","Do not install signal handlers") ||
1942 CmiGetArgFlagDesc(*argv,"++debug",NULL /*meaning: don't show this*/)) Cmi_truecrash = 1;
1943 /* netpoll disable signal */
1944 if (CmiGetArgFlagDesc(*argv,"+netpoll","Do not use SIGIO--poll instead")) Cmi_netpoll = 1;
1945 if (CmiGetArgFlagDesc(*argv,"+netint","Use SIGIO")) Cmi_netpoll = 0;
1946 /* idlepoll use poll instead if sleep when idle */
1947 if (CmiGetArgFlagDesc(*argv,"+idlepoll","Do not sleep when idle")) Cmi_idlepoll = 1;
1948 /* idlesleep use sleep instead if busywait when idle */
1949 if (CmiGetArgFlagDesc(*argv,"+idlesleep","Make sleep calls when idle")) Cmi_idlepoll = 0;
1950 Cmi_syncprint = CmiGetArgFlagDesc(*argv,"+syncprint", "Flush each CmiPrintf to the terminal");
1952 Cmi_asyncio = 1;
1953 #if CMK_ASYNC_NOT_NEEDED
1954 Cmi_asyncio = 0;
1955 #endif
1956 if (CmiGetArgFlagDesc(*argv,"+asyncio","Use async IO")) Cmi_asyncio = 1;
1957 if (CmiGetArgFlagDesc(*argv,"+asynciooff","Don not use async IO")) Cmi_asyncio = 0;
1958 #if CMK_MULTICORE
1959 if (CmiGetArgFlagDesc(*argv,"+commthread","Use communication thread")) {
1960 Cmi_commthread = 1;
1961 #if CMK_SHARED_VARS_POSIX_THREADS_SMP
1962 _Cmi_sleepOnIdle = 1; /* worker thread go sleep */
1963 #endif
1964 if (CmiMyPe() == 0) CmiPrintf("Charm++> communication thread is launched in multicore version. \n");
1966 #endif
1968 skt_init();
1969 /* use special abort handler instead of default_skt_abort to
1970 prevent exit trapped by atexit_check() due to the exit() call */
1971 skt_set_abort(net_default_skt_abort);
1972 atexit(machine_atexit_check);
1973 parse_netstart();
1974 parse_magic();
1975 #if ! CMK_SMP && ! defined(_WIN32)
1976 /* only get forks in non-smp mode */
1977 parse_forks();
1978 #endif
1979 extract_args(*argv);
1980 log_init();
1981 Cmi_scanf_mutex = CmiCreateLock();
1983 /* NOTE: can not acutally call timer before timerInit ! GZ */
1984 MACHSTATE2(5,"Init: (netpoll=%d), (idlepoll=%d)",Cmi_netpoll,Cmi_idlepoll);
1986 skt_set_idle(obtain_idleFn);
1987 if (!skt_ip_match(Cmi_charmrun_IP,_skt_invalid_ip)) {
1988 set_signals();
1989 dataskt=skt_datagram(&dataport, Cmi_os_buffer_size);
1990 MACHSTATE2(5,"skt_connect at dataskt:%d Cmi_charmrun_port:%d",dataskt, Cmi_charmrun_port);
1991 Cmi_charmrun_fd = skt_connect(Cmi_charmrun_IP, Cmi_charmrun_port, 1800);
1992 MACHSTATE2(5,"Opened connection to charmrun at socket %d, dataport=%d", Cmi_charmrun_fd, dataport);
1993 skt_tcp_no_nagle(Cmi_charmrun_fd);
1994 CmiStdoutInit();
1995 } else {/*Standalone operation*/
1996 printf("Charm++: standalone mode (not using charmrun)\n");
1997 dataskt=-1;
1998 Cmi_charmrun_fd=-1;
2001 CmiMachineInit(*argv);
2003 node_addresses_obtain(*argv);
2004 MACHSTATE(5,"node_addresses_obtain done");
2006 CmiCommunicationInit(*argv);
2008 skt_set_idle(CmiYield);
2009 Cmi_check_delay = 1.0+0.25*Lrts_numNodes;
2011 if (Cmi_charmrun_fd==-1) /*Don't bother with check in standalone mode*/
2012 Cmi_check_delay=1.0e30;
2014 for(i = 0; i < _Cmi_mynodesize; i++)
2015 inProgress[i] = 0;
2017 *numNodes = Lrts_numNodes;
2018 *myNodeID = Lrts_myNode;
2022 #if CMK_CELL
2024 #include "spert_ppu.h"
2026 void machine_OffloadAPIProgress() {
2027 LOCK_IF_AVAILABLE();
2028 OffloadAPIProgress();
2029 UNLOCK_IF_AVAILABLE();
2031 #endif
2033 void LrtsPrepareEnvelope(char *msg, int size)
2035 CMI_MSG_SIZE(msg) = size;
2038 /*@}*/