3 #include "../sockRoutines.h"
4 #include "../sockRoutines.c"
5 #include "../ccs-auth.h"
6 #include "../ccs-auth.c"
7 #include "../ccs-server.h"
8 #include "../ccs-server.c"
23 #include <sys/bproc.h>
35 #if defined(_WIN32) && !defined(__CYGWIN__)
36 /*Win32 has screwy names for the standard UNIX calls:*/
37 #define getcwd _getcwd
38 #define strdup _strdup
39 #define unlink _unlink
41 #define fdopen _fdopen
42 #define ftruncate _chsize
46 #include <sys/timeb.h>
49 #define SIGBUS -1 /*These signals don't exist in Win32*/
54 #include <pwd.h> /*getcwd*/
59 #define PRINT(a) (arg_quiet ? 1 : printf a)
61 #if CMK_SSH_NOT_NEEDED /*No SSH-- use daemon to start node-programs*/
64 #else /*Use SSH to start node-programs*/
66 #if CMK_SSH_IS_A_COMMAND
74 /*#define DEBUGF(x) printf x*/
78 #define MAXPATHLEN 1024
81 const int MAX_NUM_RETRIES = 3;
84 std::map<SOCKET, int> skt_client_table;
85 std::map<std::string, int> host_sizes;
87 const char *nodetab_name(int i);
88 const char *skt_to_name(SOCKET skt)
90 if (skt_client_table.find(skt) != skt_client_table.end()) {
91 return nodetab_name(skt_client_table[skt]);
96 int skt_to_node(SOCKET skt)
98 if (skt_client_table.find(skt) != skt_client_table.end()) {
99 return skt_client_table[skt];
107 /*Hierarchical-start routines*/
108 int mynodes_start; /* To keep a global node numbering */
112 static double ftTimer;
116 int *ssh_pids = NULL;
118 double GetClock(void)
120 #if defined(_WIN32) && !defined(__CYGWIN__)
123 return (tv.time * 1.0 + tv.millitm * 1.0E-3);
127 ok = gettimeofday(&tv, NULL);
129 perror("gettimeofday");
132 return (tv.tv_sec * 1.0 + tv.tv_usec * 1.0E-6);
136 int probefile(const char *path)
138 FILE *f = fopen(path, "r");
145 const char *mylogin(void)
147 #if defined(_WIN32) && !defined(__CYGWIN__)
148 static char name[100] = {'d', 'u', 'n', 'n', 'o', 0};
149 unsigned int len = 100;
150 GetUserName(name, (LPDWORD) &len);
155 self = getpwuid(getuid());
161 sprintf(cmd, "id -u -n");
164 fscanf(p, "%s", uname);
166 return strdup(uname);
173 return self->pw_name;
177 /**************************************************************************
181 * Sends a single UDP packet to the charm developers notifying them
182 * that charm is in use.
184 **************************************************************************/
186 void ping_developers()
190 /*This is the resolved IP address of elegance.cs.uiuc.edu */
191 skt_ip_t destination_ip = skt_lookup_ip("128.174.241.211");
192 unsigned int destination_port = 6571;
193 struct sockaddr_in addr = skt_build_addr(destination_ip, destination_port);
196 skt = socket(AF_INET, SOCK_DGRAM, 0);
197 if (skt == INVALID_SOCKET)
200 sprintf(info, "%s", mylogin());
202 sendto(skt, info, strlen(info), 0, (struct sockaddr *) &addr, sizeof(addr));
207 /**************************************************************************
209 * Pathfix : alters a path according to a set of rewrite rules
211 *************************************************************************/
213 typedef struct s_pathfixlist {
216 struct s_pathfixlist *next;
219 pathfixlist pathfix_append(char *s1, char *s2, pathfixlist l)
221 pathfixlist pf = (pathfixlist) malloc(sizeof(s_pathfixlist));
228 char *pathfix(const char *path, pathfixlist fixes)
230 char buffer[MAXPATHLEN];
232 char buf2[MAXPATHLEN];
235 strcpy(buffer, path);
239 for (l = fixes; l; l = l->next) {
241 offs = strstr(buffer, l->s1);
244 sprintf(buf2, "%s%s%s", buffer, l->s2, offs + len);
245 strcpy(buffer, buf2);
250 return strdup(buffer);
253 char *pathextfix(const char *path, pathfixlist fixes, char *ext)
255 char *newpath = pathfix(path, fixes);
259 ret = (char *) malloc(strlen(newpath) + strlen(ext) + 2);
260 strcpy(ret, newpath);
266 /****************************************************************************
268 * Miscellaneous minor routines.
270 ****************************************************************************/
272 int is_quote(char c) { return (c == '\'' || c == '"'); }
274 void zap_newline(char *s)
277 p = s + strlen(s) - 1;
280 /* in case of DOS ^m */
286 /* get substring from lo to hi, remove quote chars */
287 char *substr(const char *lo, const char *hi)
293 if (is_quote(*(hi - 1)))
296 res = (char *) malloc(1 + len);
297 memcpy(res, lo, len);
302 int subeqs(const char *lo, const char *hi, const char *str)
304 int len = strlen(str);
307 if (memcmp(lo, str, len))
312 /* advance pointer over blank characters */
313 const char *skipblanks(const char *p)
315 while ((*p == ' ') || (*p == '\t'))
320 /* advance pointer over nonblank characters and a quoted string */
321 const char *skipstuff(const char *p)
324 if (*p && (*p == '\'' || *p == '"')) {
329 while (*p && *p != quote)
332 fprintf(stderr, "ERROR> Unmatched quote in nodelist file.\n");
337 while ((*p) && (*p != ' ') && (*p != '\t'))
343 const char *getenv_ssh()
347 e = getenv("CONV_RSH");
348 return e ? e : SSH_CMD;
352 #if !defined(_WIN32) || defined(__CYGWIN__)
353 char *getenv_display()
355 static char result[100], ipBuf[200];
358 e = getenv("DISPLAY");
364 if ((e[0] == ':') || (strncmp(e, "unix:", 5) == 0)) {
365 sprintf(result, "%s:%s", skt_print_ip(ipBuf, skt_my_ip()), p + 1);
370 char *getenv_display_no_tamper()
372 static char result[100], ipBuf[200];
375 e = getenv("DISPLAY");
387 static unsigned int server_port;
388 static char server_addr[1024]; /* IP address or hostname of charmrun*/
389 static SOCKET server_fd;
390 #if CMK_SHRINK_EXPAND
391 char *create_netstart(int node);
392 char *create_oldnodenames();
394 /*****************************************************************************
396 * PPARAM - obtaining "program parameters" from the user. *
398 *****************************************************************************/
400 typedef struct s_ppdef {
406 } where; /*Where to store result*/
407 const char *lname; /*Argument name on command line*/
409 char type; /*One of i, r, s, f.*/
410 bool initFlag; // if 0 means, user input paramater is inserted. 1 means, it holds a default value
411 struct s_ppdef *next;
416 static int pparam_pos;
417 static const char **pparam_argv;
418 static char pparam_optc = '-';
419 char pparam_error[100];
421 static ppdef pparam_find(const char *lname)
424 for (def = ppdefs; def; def = def->next)
425 if (strcmp(def->lname, lname) == 0)
430 static ppdef pparam_cell(const char *lname)
432 ppdef def = pparam_find(lname);
435 def = (ppdef) malloc(sizeof(s_ppdef));
438 def->doc = "(undocumented)";
440 def->initFlag = true;
445 void pparam_int(int *where, int defValue, const char *arg, const char *doc)
447 ppdef def = pparam_cell(arg);
449 def->where.i = where;
455 void pparam_flag(int *where, int defValue, const char *arg, const char *doc)
457 ppdef def = pparam_cell(arg);
459 def->where.f = where;
465 void pparam_real(double *where, double defValue, const char *arg,
468 ppdef def = pparam_cell(arg);
470 def->where.r = where;
476 void pparam_str(const char **where, const char *defValue, const char *arg,
479 ppdef def = pparam_cell(arg);
481 def->where.s = where;
487 static int pparam_setdef(ppdef def, const char *value)
491 def->initFlag = false;
493 fprintf(stderr, "Option \'%s\' is used more than once. Please remove duplicate arguments for this option\n", def->lname);
499 *def->where.i = strtol(value, &p, 10);
504 *def->where.r = strtod(value, &p);
509 /* Parse input string and convert a literal "\n" into '\n'. */
510 *def->where.s = (char *) calloc(strlen(value) + 1, sizeof(char));
511 char *parsed_value = (char *) *def->where.s;
512 for (int i = 0, j = 0; i < strlen(value); i++) {
513 if (i + 1 < strlen(value)) {
514 if (value[i] == '\\' && value[i + 1] == 'n') {
515 parsed_value[j++] = '\n';
520 parsed_value[j++] = value[i];
525 *def->where.f = strtol(value, &p, 10);
533 int pparam_set(char *lname, char *value)
535 ppdef def = pparam_cell(lname);
536 return pparam_setdef(def, value);
539 const char *pparam_getdef(ppdef def)
541 static char result[100];
544 sprintf(result, "%d", *def->where.i);
547 sprintf(result, "%f", *def->where.r);
550 return *def->where.s ? *def->where.s : "";
552 sprintf(result, "%d", *def->where.f);
558 void pparam_printdocs()
561 int len, maxname, maxdoc;
564 for (def = ppdefs; def; def = def->next) {
565 len = strlen(def->lname);
568 len = strlen(def->doc);
572 fprintf(stderr, "\n");
573 fprintf(stderr, "Charmrun Command-line Parameters:\n");
574 for (def = ppdefs; def; def = def->next) {
575 fprintf(stderr, " %c%c%-*s ", pparam_optc, pparam_optc, maxname,
577 fprintf(stderr, " %-*s [%s]\n", maxdoc, def->doc, pparam_getdef(def));
579 fprintf(stderr, "\n");
582 void pparam_delarg(int i)
585 for (j = i; pparam_argv[j]; j++)
586 pparam_argv[j] = pparam_argv[j + 1];
589 int pparam_countargs(const char **argv)
592 for (argc = 0; argv[argc]; argc++)
597 int pparam_parseopt()
601 const char *opt = pparam_argv[pparam_pos];
602 /* handle ++ by skipping to end */
603 if ((opt[1] == '+') && (opt[2] == 0)) {
604 pparam_delarg(pparam_pos);
605 while (pparam_argv[pparam_pos])
609 /* handle + by itself - an error */
611 sprintf(pparam_error, "Illegal option +\n");
614 /* look up option definition */
616 def = pparam_find(opt + 2);
620 if (strlen(opt) <= 2 || !isalpha(opt[2])) {
622 def = pparam_find(name);
627 sprintf(pparam_error, "Option %s not recognized.", opt);
630 /*Unrecognized + option-- skip it.*/
635 /* handle flag-options */
636 if ((def->type == 'f') && (opt[1] != '+') && (opt[2])) {
637 sprintf(pparam_error, "Option %s should not include a value", opt);
640 if (def->type == 'f') {
642 pparam_delarg(pparam_pos);
645 /* handle non-flag options */
646 if ((opt[1] == '+') || (opt[2] == 0)) {
647 pparam_delarg(pparam_pos);
648 opt = pparam_argv[pparam_pos];
651 if ((opt == 0) || (opt[0] == 0)) {
652 sprintf(pparam_error, "%s must be followed by a value.", opt);
655 ok = pparam_setdef(def, opt);
656 pparam_delarg(pparam_pos);
658 sprintf(pparam_error, "Illegal value for %s", opt);
664 int pparam_parsecmd(char optchr, const char **argv)
668 pparam_optc = optchr;
671 const char *opt = pparam_argv[pparam_pos];
674 if (opt[0] != optchr)
676 else if (pparam_parseopt() < 0)
683 char **dupargv(char **argv)
692 for (argc = 0; argv[argc] != NULL; argc++)
694 copy = (char **) malloc((argc + 2) * sizeof(char *));
699 for (argc = 0; argv[argc] != NULL; argc++) {
700 int len = strlen(argv[argc]);
701 copy[argc] = malloc(sizeof(char) * (len + 1));
702 strcpy(copy[argc], argv[argc]);
710 /****************************************************************************
714 * The following module computes a whole bunch of miscellaneous values, which
715 * are all constant throughout the program. Naturally, this includes the
716 * value of the command-line arguments.
718 *****************************************************************************/
720 #define MAX_LINE_LENGTH 1000
722 const char **arg_argv;
725 int arg_requested_pes;
728 const char *arg_nodelist;
729 const char *arg_nodegroup;
730 const char *arg_runscript; /* script to run the node-program with */
731 const char *arg_charmrunip;
734 int arg_debug_no_pause;
735 int arg_debug_no_xrdb;
738 arg_debug_commands; /* commands that are provided by a ++debug-commands
739 flag. These are passed into gdb. */
741 int arg_quiet; /* omit charmrun standard output */
742 int arg_local; /* start node programs directly by exec on localhost */
743 int arg_batch_spawn; /* control starting node programs, several at a time */
744 int arg_scalable_start;
747 int arg_hierarchical_start;
748 int arg_child_charmrun;
750 int arg_help; /* print help message */
751 int arg_ppn; /* pes per node */
754 #if CMK_SHRINK_EXPAND
759 int arg_shrinkexpand;
760 int arg_charmrun_port;
761 const char *arg_shrinkexpand_basedir;
763 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
764 int arg_read_pes = 0;
769 const char *arg_shell;
771 const char *arg_debugger;
772 const char *arg_xterm;
773 const char *arg_display;
775 const char *arg_mylogin;
778 int arg_mpiexec_no_n;
781 const char *arg_nodeprog_a;
782 const char *arg_nodeprog_r;
787 int arg_server_port = 0;
788 const char *arg_server_auth = NULL;
789 int replay_single = 0;
794 int arg_singlemaster;
798 void arg_init(int argc, const char **argv)
800 static char buf[1024];
802 int i, local_def = 0, j;
803 #if CMK_CHARMRUN_LOCAL
804 local_def = 1; /*++local is the default*/
807 pparam_int(&arg_requested_pes, 1, "p", "number of processes to create");
808 pparam_int(&arg_timeout, 60, "timeout",
809 "seconds to wait per host connection");
810 pparam_flag(&arg_verbose, 0, "verbose", "Print diagnostic messages");
811 pparam_flag(&arg_quiet, 0, "quiet", "Omit non-error runtime messages");
812 pparam_str(&arg_nodelist, 0, "nodelist", "file containing list of nodes");
813 pparam_str(&arg_nodegroup, "main", "nodegroup",
814 "which group of nodes to use");
815 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
816 pparam_int(&arg_read_pes, 0, "readpe",
817 "number of host names to read into the host table");
820 #if CMK_CCS_AVAILABLE
821 pparam_flag(&arg_server, 0, "server", "Enable client-server (CCS) mode");
822 pparam_int(&arg_server_port, 0, "server-port",
823 "Port to listen for CCS requests");
824 pparam_str(&arg_server_auth, 0, "server-auth", "CCS Authentication file");
826 pparam_flag(&arg_local, local_def, "local",
827 "Start node programs locally without daemon");
828 pparam_int(&arg_batch_spawn, 0, "batch", "Launch connections to this many "
829 "node programs at a time, avoiding "
830 "overloading charmrun pe");
831 pparam_flag(&arg_scalable_start, 1, "scalable-start", "scalable start");
833 pparam_flag(&arg_hierarchical_start, 0, "hierarchical-start",
834 "hierarchical start");
835 pparam_flag(&arg_child_charmrun, 0, "child-charmrun", "child charmrun");
837 #if CMK_SHRINK_EXPAND
838 pparam_int(&arg_realloc_pes, 1, "newp", "new number of processes to create");
839 pparam_int(&arg_old_pes, 1, "oldp", "old number of processes to create");
840 pparam_flag(&arg_shrinkexpand, 0, "shrinkexpand", "shrink expand");
841 pparam_int(&arg_charmrun_port, 0, "charmrun_port", "make charmrun listen on this port");
843 pparam_flag(&arg_usehostname, 0, "usehostname",
844 "Send nodes our symbolic hostname instead of IP address");
845 pparam_str(&arg_charmrunip, 0, "useip",
846 "Use IP address provided for charmrun IP");
847 pparam_flag(&arg_mpiexec, 0, "mpiexec", "use mpiexec to start jobs");
848 pparam_flag(&arg_mpiexec_no_n, 0, "mpiexec-no-n", "use mpiexec to start jobs without -n procs");
850 pparam_flag(&arg_debug, 0, "debug",
851 "Run each node under gdb in an xterm window");
852 pparam_flag(&arg_debug_no_pause, 0, "debug-no-pause",
853 "Like debug, except doesn't pause at beginning");
854 pparam_str(&arg_debug_commands, 0, "debug-commands",
855 "Commands to be run inside gdb at startup");
856 pparam_flag(&arg_debug_no_xrdb, 0, "no-xrdb", "Don't check xrdb");
858 /* When the ++charmdebug flag is used, charmrun listens from its stdin for
859 commands, and forwards them to the gdb info program (a child), or to the
860 processor gdbs. The stderr is redirected to the stdout, so the two streams
861 are mixed together. The channel for stderr is reused to forward the replies
862 of gdb back to the java debugger. */
864 pparam_flag(&arg_charmdebug, 0, "charmdebug",
865 "Used only when charmrun is started by charmdebug");
868 pparam_int(&arg_maxssh, 16, "maxssh",
869 "Maximum number of ssh's to run at a time");
870 pparam_str(&arg_shell, 0, "remote-shell",
871 "which remote shell to use (default $CONV_RSH or " SSH_CMD);
872 pparam_str(&arg_debugger, 0, "debugger", "which debugger to use");
873 pparam_str(&arg_display, 0, "display", "X Display for xterm");
874 pparam_flag(&arg_ssh_display, 0, "ssh-display",
875 "use own X Display for each ssh session");
876 pparam_flag(&arg_in_xterm, 0, "in-xterm", "Run each node in an xterm window");
877 pparam_str(&arg_xterm, 0, "xterm", "which xterm to use");
880 /* options for Scyld */
881 pparam_int(&arg_startpe, 0, "startpe", "first pe to start job(SCYLD)");
882 pparam_int(&arg_endpe, 1000000, "endpe", "last pe to start job(SCYLD)");
883 pparam_flag(&arg_singlemaster, 0, "singlemaster",
884 "Only assign one process to master node(SCYLD)");
885 pparam_flag(&arg_skipmaster, 0, "skipmaster",
886 "Donot assign any process to master node(SCYLD)");
887 if (arg_skipmaster && arg_singlemaster) {
888 PRINT(("Charmrun> 'singlemaster' is ignored due to 'skipmaster'. \n"));
889 arg_singlemaster = 0;
891 pparam_flag(&arg_debug, 0, "debug", "turn on more verbose debug print");
893 pparam_str(&arg_runscript, 0, "runscript", "script to run node-program with");
894 pparam_flag(&arg_help, 0, "help", "print help messages");
895 pparam_int(&arg_ppn, 0, "ppn", "number of pes per node");
896 pparam_flag(&arg_no_va_rand, 0, "no-va-randomization",
897 "Disables randomization of the virtual address space");
899 arg_argv = dupargv(argv);
902 #if CMK_SHRINK_EXPAND
903 /* move it to a function */
905 saved_argv = (char **) malloc(sizeof(char *) * (saved_argc));
906 for (i = 0; i < saved_argc; i++) {
907 // MACHSTATE1(2,"Parameters %s",Cmi_argvcopy[i]);
908 saved_argv[i] = (char *) argv[i];
912 if (pparam_parsecmd('+', argv) < 0) {
913 fprintf(stderr, "ERROR> syntax: %s\n", pparam_error);
918 /* Check for (but do *not* remove) the "-?", "-h", or "--help" flags */
919 for (i = 0; argv[i]; i++) {
920 if (0 == strcmp(argv[i], "-?") || 0 == strcmp(argv[i], "-h") ||
921 0 == strcmp(argv[i], "--help"))
929 if ( arg_mpiexec_no_n ) arg_mpiexec = arg_mpiexec_no_n;
931 #if CMK_SHRINK_EXPAND
932 if (arg_shrinkexpand) {
933 arg_requested_pes = arg_realloc_pes;
934 printf("\n \nCharmrun> %d Reallocated pes\n \n", arg_requested_pes);
939 if (!arg_hierarchical_start || arg_child_charmrun)
942 (argv) + 1; /*Skip over charmrun (0) here and program name (1) later*/
943 arg_argc = pparam_countargs(arg_argv);
945 fprintf(stderr, "ERROR> You must specify a node-program.\n");
951 if (!arg_hierarchical_start || arg_child_charmrun) {
952 // Removing nodeprogram from the list
956 // Removing charmrun from parameters
960 arg_argv[arg_argc] = malloc(sizeof(char) * strlen("++child-charmrun"));
961 strcpy(arg_argv[arg_argc++], "++child-charmrun");
962 arg_argv[arg_argc] = NULL;
969 if (arg_server_port || arg_server_auth)
972 if (arg_verbose) arg_quiet = 0;
974 if (arg_debug || arg_debug_no_pause || arg_in_xterm) {
975 fprintf(stderr, "Charmrun> scalable start disabled under ++debug and ++in-xterm:\n"
976 "NOTE: will make an SSH connection per process launched,"
977 " instead of per physical node.\n");
978 arg_scalable_start = 0;
981 /*Pass ++debug along to program (used by machine.c)*/
982 arg_argv[arg_argc++] = "++debug";
984 /* pass ++quiet to program */
985 if (arg_quiet) arg_argv[arg_argc++] = "++quiet";
987 /* Check for +replay-detail to know we have to load only one single processor
989 for (i = 0; argv[i]; i++) {
990 if (0 == strcmp(argv[i], "+replay-detail")) {
992 arg_requested_pes = 1;
999 "Warning> ++local cannot be used in bproc version, ignored!\n");
1005 /* Find the current value of the CONV_RSH variable */
1008 arg_shell = "mpiexec";
1010 arg_shell = getenv_ssh();
1013 #if !defined(_WIN32) || defined(__CYGWIN__)
1014 /* Find the current value of the DISPLAY variable */
1016 arg_display = getenv_display_no_tamper();
1019 if ((arg_debug || arg_debug_no_pause || arg_in_xterm) && (arg_display == 0)) {
1020 fprintf(stderr, "ERROR> DISPLAY must be set to use debugging mode\n");
1023 if (arg_debug || arg_debug_no_pause)
1024 arg_timeout = 8 * 60 * 60; /* Wait 8 hours for ++debug */
1026 /* default debugger is gdb */
1028 arg_debugger = "gdb";
1029 /* default xterm is xterm */
1031 arg_xterm = "xterm";
1033 arg_mylogin = mylogin();
1036 /* find the current directory, absolute version */
1038 arg_currdir_a = strdup(buf);
1040 /* find the node-program, absolute version */
1041 arg_nodeprog_r = argv[1];
1043 if (arg_nodeprog_r[0] == '-' || arg_nodeprog_r[0] == '+') {
1044 /*If it starts with - or +, it ain't a node program.
1045 Chances are, the user screwed up and passed some
1046 unknown flag to charmrun*/
1047 fprintf(stderr, "Charmrun does not recognize the flag '%s'.\n", arg_nodeprog_r);
1048 if (arg_nodeprog_r[0] == '+')
1049 fprintf(stderr, "Charm++'s flags need to be placed *after* the program name.\n");
1054 #if defined(_WIN32) && !defined(__CYGWIN__)
1055 if (argv[1][1] == ':' ||
1056 argv[1][0] == '\\' && argv[1][1] == '\\') { /*E.g.: "C:\foo\bar.exe*/
1058 if (argv[1][0] == '/') { /*E.g.: "\foo\bar"*/
1060 /*Absolute path to node-program*/
1061 arg_nodeprog_a = argv[1];
1063 sprintf(buf, "%s%s%s", arg_currdir_a, DIRSEP, arg_nodeprog_r);
1064 arg_nodeprog_a = strdup(buf);
1066 if (arg_scalable_start) {
1067 PRINT(("Charmrun> scalable start enabled. \n"));
1071 if (arg_hierarchical_start) {
1072 PRINT(("Charmrun> Hierarchical scalable start enabled. \n"));
1073 if (arg_debug || arg_debug_no_pause) {
1074 fprintf(stderr, "Charmrun> Error: ++hierarchical-start does not support "
1075 "debugging mode. \n");
1079 fprintf(stderr, "Charmrun> Warning: you have enabled verbose output with "
1080 "Hierarchical startup, you may get inconsistent verbose "
1081 "outputs. \n++hierarchial-start does not support verbose "
1085 } else if (arg_child_charmrun) {
1088 "Charmrun> Error: ++child-charmrun is not a user-specified flag. \n");
1093 /*If number of pes per node does not divide number of pes*/
1094 if(arg_requested_pes && arg_ppn){
1095 if(arg_requested_pes % arg_ppn != 0){
1096 if(arg_ppn > arg_requested_pes){
1097 arg_ppn=arg_requested_pes;
1098 fprintf(stderr, "Charmrun> warning: forced ++ppn = +p = %d\n",arg_ppn);
1104 "Charmrun> Error: ++ppn (number of pes per node) does not divide +p (number of pes) \n");
1111 /****************************************************************************
1113 * NODETAB: The nodes file and nodes table.
1115 ****************************************************************************/
1117 static int portOk = 1;
1118 static const char *nodetab_tempName = NULL;
1119 char *nodetab_file_find()
1121 char buffer[MAXPATHLEN];
1123 /* Find a nodes-file as specified by ++nodelist */
1125 const char *path = arg_nodelist;
1126 if (probefile(path))
1127 return strdup(path);
1128 fprintf(stderr, "ERROR> No such nodelist file %s\n", path);
1131 /* Find a nodes-file as specified by getenv("NODELIST") */
1132 if (getenv("NODELIST")) {
1133 char *path = getenv("NODELIST");
1134 if (path && probefile(path))
1135 return strdup(path);
1136 // cppcheck-suppress nullPointer
1137 fprintf(stderr, "ERROR> Cannot find nodelist file %s\n", path);
1140 /* Find a nodes-file by looking under 'nodelist' in the current directory */
1141 if (probefile("./nodelist"))
1142 return strdup("./nodelist");
1143 #if defined(_WIN32) && !defined(__CYGWIN__)
1145 nodetab_tempName = strdup(buffer);
1147 if (getenv("HOME")) {
1148 sprintf(buffer, "%s/.nodelist", getenv("HOME"));
1151 if (!probefile(buffer)) {
1152 /*Create a simple nodelist in the user's home*/
1153 FILE *f = fopen(buffer, "w");
1155 fprintf(stderr, "ERROR> Cannot create a 'nodelist' file.\n");
1158 fprintf(f, "group main\nhost localhost\n");
1161 return strdup(buffer);
1164 typedef struct nodetab_host {
1165 const char *name; /*Host DNS name*/
1166 skt_ip_t ip; /*IP address of host*/
1167 pathfixlist pathfixes;
1168 char *ext; /*FIXME: What the heck is this? OSL 9/8/00*/
1169 int cpus; /* # of physical CPUs*/
1170 int rank; /*Rank of this CPU*/
1171 double speed; /*Relative speed of each CPU*/
1172 int nice; /* process priority */
1173 int forks; /* number of processes to fork on remote node */
1174 /*These fields are set during node-startup*/
1175 int dataport; /*UDP port number*/
1176 SOCKET ctrlfd; /*Connection to control port*/
1178 const char *shell; /*Ssh to use*/
1179 const char *debugger; /*Debugger to use*/
1180 const char *xterm; /*Xterm to use*/
1181 const char *login; /*User login name to use*/
1182 const char *passwd; /*User login password*/
1183 const char *setup; /*Commands to execute on login*/
1195 nodetab_host **nodetab_table;
1198 int *nodetab_rank0_table;
1199 int nodetab_rank0_size;
1201 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
1205 void nodetab_reset(nodetab_host *h)
1207 h->name = "SET_H->NAME";
1208 h->ip = _skt_invalid_ip;
1219 h->shell = arg_shell;
1220 h->debugger = arg_debugger;
1221 h->xterm = arg_xterm;
1222 h->login = arg_mylogin;
1228 void nodetab_add(nodetab_host *h)
1231 nodetab_rank0_table[nodetab_rank0_size++] = nodetab_size;
1232 nodetab_table[nodetab_size] = (nodetab_host *) malloc(sizeof(nodetab_host));
1236 skt_print_ip(ips, h->ip);
1237 printf("Charmrun> adding client %d: \"%s\", IP:%s\n", nodetab_size, h->name,
1241 *nodetab_table[nodetab_size++] = *h;
1244 void nodetab_makehost(const char *name, nodetab_host *h)
1246 h->name = strdup(name);
1247 h->ip = skt_innode_lookup_ip(name);
1248 if (skt_ip_match(h->ip, _skt_invalid_ip)) {
1250 /* only the master node is used */
1251 if (!(1 == arg_requested_pes && atoi(name) == -1))
1254 fprintf(stderr, "ERROR> Cannot obtain IP address of %s\n", name);
1260 const char *nodetab_args(const char *args, nodetab_host *h)
1264 while (*args != 0) {
1265 const char *b1 = skipblanks(args), *e1 = skipstuff(b1);
1266 const char *b2 = skipblanks(e1), *e2 = skipstuff(b2);
1268 b1++; /*Skip over "++" on parameters*/
1270 if (subeqs(b1, e1, "login"))
1271 h->login = substr(b2, e2);
1272 else if (subeqs(b1, e1, "passwd"))
1273 h->passwd = substr(b2, e2);
1274 else if (subeqs(b1, e1, "setup"))
1275 h->setup = strdup(b2);
1276 else if (subeqs(b1, e1, "shell"))
1277 h->shell = substr(b2, e2);
1278 else if (subeqs(b1, e1, "debugger"))
1279 h->debugger = substr(b2, e2);
1280 else if (subeqs(b1, e1, "xterm"))
1281 h->xterm = substr(b2, e2);
1284 if (subeqs(b1, e1, "speed"))
1285 h->speed = atof(b2);
1286 else if (subeqs(b1, e1, "cpus")) {
1288 h->cpus = atol(b2); /* ignore if there is ++ppn */
1289 } else if (subeqs(b1, e1, "pathfix")) {
1290 const char *b3 = skipblanks(e2), *e3 = skipstuff(b3);
1291 args = skipblanks(e3);
1293 pathfix_append(substr(b2, e2), substr(b3, e3), h->pathfixes);
1294 e2 = e3; /* for the skipblanks at the end */
1295 } else if (subeqs(b1, e1, "ext"))
1296 h->ext = substr(b2, e2);
1297 else if (subeqs(b1, e1, "nice"))
1301 args = skipblanks(e2);
1306 /* setup nodetab as localhost only */
1307 void nodetab_init_for_local()
1309 int tablesize, i, done = 0;
1312 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
1313 if (arg_read_pes == 0) {
1314 arg_read_pes = arg_requested_pes;
1318 tablesize = arg_requested_pes;
1319 nodetab_table = (nodetab_host **) malloc(tablesize * sizeof(nodetab_host *));
1320 nodetab_rank0_table = (int *) malloc(tablesize * sizeof(int));
1321 nodetab_max = tablesize;
1323 nodetab_reset(&group);
1326 #if CMK_SHARED_VARS_UNAVAILABLE
1328 fprintf(stderr, "Warning> Invalid ppn %d in nodelist ignored.\n", arg_ppn);
1332 group.cpus = arg_ppn;
1335 const char *hostname = "127.0.0.1";
1336 for (group.rank = 0; group.rank < arg_ppn; group.rank++) {
1337 nodetab_makehost(hostname, &group);
1338 nodetab_add(&group);
1339 if (++i == arg_requested_pes) {
1345 host_sizes["127.0.0.1"] = (arg_requested_pes + arg_ppn - 1) / arg_ppn;
1349 /* Sets the parent field of hosts to point to their parent charmrun. The root
1350 * charmrun will create children for all hosts which are parent of at least one
1353 int nodes_per_child;
1354 int *nodetab_unique_table;
1355 int nodetab_unique_size;
1356 char *nodetab_name(int i);
1357 void nodetab_init_hierarchical_start(void)
1361 nodetab_unique_size = 0;
1362 nodetab_unique_table = (int *) malloc(nodetab_rank0_size * sizeof(int));
1363 while (node_start < nodetab_rank0_size) {
1364 nodetab_unique_table[nodetab_unique_size++] = node_start;
1365 node_name = nodetab_name(node_start);
1368 } while (node_start < nodetab_rank0_size &&
1369 (!strcmp(nodetab_name(node_start), node_name)));
1371 branchfactor = ceil(sqrt(nodetab_unique_size));
1372 nodes_per_child = round(nodetab_unique_size * 1.0 / branchfactor);
1376 #if CMK_SHRINK_EXPAND
1377 int isPresent(const char *names, char **listofnames)
1380 for (k = 0; k < arg_old_pes; k++) {
1381 if (strcmp(names, listofnames[k]) == 0)
1386 void parse_oldnodenames(char **oldnodelist)
1389 ns = getenv("OLDNODENAMES");
1391 char buffer[1024 * 1000];
1392 for (i = 0; i < arg_old_pes; i++) {
1393 oldnodelist[i] = (char *) malloc(100 * sizeof(char));
1394 int nread = sscanf(ns, "%s %[^\n]", oldnodelist[i], buffer);
1404 nodetab_host global, group, host;
1405 char input_line[MAX_LINE_LENGTH];
1406 int rightgroup, i, remain, lineNo;
1407 /* Store the previous host so we can make sure we aren't mixing localhost and
1409 char *prevHostName = NULL;
1410 std::vector< std::pair<int, nodetab_host> > hosts;
1411 std::multimap<int, nodetab_host> binned_hosts;
1413 /* if arg_local is set, ignore the nodelist file */
1414 if (arg_local || arg_mpiexec) {
1415 nodetab_init_for_local();
1419 /* Open the NODES_FILE. */
1420 nodesfile = nodetab_file_find();
1422 fprintf(stderr, "Charmrun> using %s as nodesfile\n", nodesfile);
1423 if (!(f = fopen(nodesfile, "r"))) {
1424 fprintf(stderr, "ERROR> Cannot read %s: %s\n", nodesfile, strerror(errno));
1429 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
1430 if (arg_read_pes == 0) {
1431 arg_read_pes = arg_requested_pes;
1434 (nodetab_host **) malloc(arg_read_pes * sizeof(nodetab_host *));
1435 nodetab_rank0_table = (int *) malloc(arg_read_pes * sizeof(int));
1436 nodetab_max = arg_read_pes;
1437 PRINT(("arg_read_pes %d arg_requested_pes %d\n", arg_read_pes,
1438 arg_requested_pes));
1441 (nodetab_host **) malloc(arg_requested_pes * sizeof(nodetab_host *));
1442 nodetab_rank0_table = (int *) malloc(arg_requested_pes * sizeof(int));
1443 nodetab_max = arg_requested_pes;
1446 nodetab_reset(&global);
1448 rightgroup = (strcmp(arg_nodegroup, "main") == 0);
1454 while (fgets(input_line, sizeof(input_line) - 1, f) != 0) {
1455 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
1456 if (nodetab_size == arg_read_pes)
1459 if (nodetab_size == arg_requested_pes)
1462 if (input_line[0] == '#')
1464 zap_newline(input_line);
1465 if (!nodetab_args(input_line, &global)) {
1466 /*An option line-- also add options to current group*/
1467 nodetab_args(input_line, &group);
1468 } else { /*Not an option line*/
1469 const char *b1 = skipblanks(input_line), *e1 = skipstuff(b1);
1470 const char *b2 = skipblanks(e1), *e2 = skipstuff(b2);
1471 const char *b3 = skipblanks(e2);
1472 if (subeqs(b1, e1, "host")) {
1474 /* check if we have a previous host, if it's different than our
1475 * current host, and if one of them is localhost */
1476 if (prevHostName && strcmp(b2, prevHostName) &&
1477 (!strcmp(b2, "localhost") ||
1478 !strcmp(prevHostName, "localhost"))) {
1479 fprintf(stderr, "ERROR> Mixing localhost with other hostnames will "
1480 "lead to connection failures.\n");
1481 fprintf(stderr, "ERROR> The problematic line in group %s is: %s\n",
1482 arg_nodegroup, input_line);
1486 nodetab_args(b3, &host);
1488 /* Non-SMP workaround */
1489 int cpus = host.cpus;
1491 for (int rank = 0; rank < cpus; rank++)
1493 for (host.rank = 0; host.rank < host.cpus; host.rank++)
1496 nodetab_makehost(substr(b2, e2), &host);
1497 hosts.push_back(std::make_pair(lineNo, host));
1500 prevHostName = strdup(b2);
1502 } else if (subeqs(b1, e1, "group")) {
1504 nodetab_args(b3, &group);
1505 rightgroup = subeqs(b2, e2, arg_nodegroup);
1506 } else if (b1 != b3) {
1507 fprintf(stderr, "ERROR> unrecognized command in nodesfile:\n");
1508 fprintf(stderr, "ERROR> %s\n", input_line);
1515 if (nodetab_tempName != NULL)
1516 unlink(nodetab_tempName);
1518 if (hosts.size() == 0) {
1519 fprintf(stderr, "ERROR> No hosts in group %s\n", arg_nodegroup);
1523 /*Wrap nodes in table around if there aren't enough yet*/
1524 for (int i = 0; binned_hosts.size() < arg_requested_pes; ++i) {
1525 binned_hosts.insert(hosts[i % hosts.size()]);
1526 host_sizes[hosts[i % hosts.size()].second.name]++;
1529 /* Only increase counter for each new process */
1530 for (std::map<std::string, int>::iterator it = host_sizes.begin();
1531 it != host_sizes.end(); ++it) {
1532 it->second = (it->second + arg_ppn - 1) / arg_ppn;
1535 for (std::multimap<int, nodetab_host>::iterator it = binned_hosts.begin();
1536 it != binned_hosts.end(); ++it) {
1537 nodetab_add(&(it->second));
1541 /*Clip off excess CPUs at end*/
1542 for (i = 0; i < nodetab_size; i++) {
1543 if (nodetab_table[i]->rank == 0)
1544 remain = nodetab_size - i;
1545 if (nodetab_table[i]->cpus > remain)
1546 nodetab_table[i]->cpus = remain;
1549 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
1550 loaded_max_pe = arg_requested_pes - 1;
1553 if (arg_hierarchical_start)
1554 nodetab_init_hierarchical_start();
1559 #if CMK_SHRINK_EXPAND
1560 if (arg_shrinkexpand &&
1561 (arg_requested_pes > arg_old_pes)) // modify nodetable ordering
1563 nodetab_host **reordered_nodetab_table =
1564 (nodetab_host **) malloc(arg_requested_pes * sizeof(nodetab_host *));
1565 char **oldnodenames = (char **) malloc(arg_old_pes * sizeof(char *));
1567 parse_oldnodenames(oldnodenames);
1568 int newpes = arg_old_pes;
1571 for (k = 0; k < nodetab_size; k++) {
1572 if (isPresent(nodetab_table[k]->name, oldnodenames))
1573 reordered_nodetab_table[oldpes++] = nodetab_table[k];
1575 reordered_nodetab_table[newpes++] = nodetab_table[k];
1577 free(nodetab_table);
1578 nodetab_table = reordered_nodetab_table;
1583 /* Given a processor number, look up the nodetab info: */
1584 nodetab_host *nodetab_getinfo(int i)
1586 if (nodetab_table == 0) {
1587 fprintf(stderr, "ERROR> Node table not initialized.\n");
1590 return nodetab_table[i];
1593 /* Given a node number, look up the nodetab info: */
1594 nodetab_host *nodetab_getnodeinfo(int i)
1596 return nodetab_getinfo(nodetab_rank0_table[i]);
1599 /*These routines all take *PE* numbers (NOT node numbers!)*/
1600 const char *nodetab_name(int i) { return nodetab_getinfo(i)->name; }
1601 pathfixlist nodetab_pathfixes(int i) { return nodetab_getinfo(i)->pathfixes; }
1602 char *nodetab_ext(int i) { return nodetab_getinfo(i)->ext; }
1603 skt_ip_t nodetab_ip(int i) { return nodetab_getinfo(i)->ip; }
1604 unsigned int nodetab_cpus(int i) { return nodetab_getinfo(i)->cpus; }
1605 unsigned int nodetab_rank(int i) { return nodetab_getinfo(i)->rank; }
1606 int nodetab_dataport(int i) { return nodetab_getinfo(i)->dataport; }
1607 int nodetab_nice(int i) { return nodetab_getinfo(i)->nice; }
1608 SOCKET nodetab_ctrlfd(int i) { return nodetab_getinfo(i)->ctrlfd; }
1610 const char *nodetab_setup(int i) { return nodetab_getinfo(i)->setup; }
1611 const char *nodetab_shell(int i) { return nodetab_getinfo(i)->shell; }
1612 const char *nodetab_debugger(int i) { return nodetab_getinfo(i)->debugger; }
1613 const char *nodetab_xterm(int i) { return nodetab_getinfo(i)->xterm; }
1614 const char *nodetab_login(int i) { return nodetab_getinfo(i)->login; }
1615 const char *nodetab_passwd(int i) { return nodetab_getinfo(i)->passwd; }
1618 /****************************************************************************
1622 * The global list of node PEs, IPs, and port numbers.
1623 * Stored in ChMachineInt_t format so the table can easily be sent
1624 * back to the nodes.
1626 ****************************************************************************/
1628 static ChNodeinfo *nodeinfo_arr; /*Indexed by node number.*/
1630 void nodeinfo_allocate(void)
1632 nodeinfo_arr = (ChNodeinfo *) malloc(nodetab_rank0_size * sizeof(ChNodeinfo));
1634 void nodeinfo_add(const ChSingleNodeinfo *in, SOCKET ctrlfd)
1636 int node = ChMessageInt(in->nodeNo);
1637 ChNodeinfo i = in->info;
1640 unsigned int dataport;
1642 if (node < 0 || node >= nodetab_rank0_size) {
1643 fprintf(stderr, "Unexpected node %d registered!\n", node);
1646 nt = nodetab_rank0_table[node]; /*Nodetable index for this node*/
1647 i.nPE = ChMessageInt_new(nodetab_cpus(nt));
1648 i.nProcessesInPhysNode = ChMessageInt_new(host_sizes[nodetab_name(nt)]);
1651 nodetab_getinfo(nt)->ip = i.IP; /* get IP */
1652 i.IP = nodetab_ip(nt);
1654 nodeinfo_arr[node] = i;
1655 for (pe = 0; pe < nodetab_cpus(nt); pe++) {
1656 nodetab_table[nt + pe]->ctrlfd = ctrlfd;
1658 /* PRINT(("Charmrun> client %d connected\n", nt)); */
1660 dataport = ChMessageInt(i.dataport);
1661 if (0 == dataport) {
1662 fprintf(stderr, "Node %d could not initialize network!\n", node);
1665 nodeinfo_arr[node] = i;
1666 for (pe = 0; pe < nodetab_cpus(nt); pe++) {
1667 nodetab_table[nt + pe]->dataport = dataport;
1668 nodetab_table[nt + pe]->ctrlfd = ctrlfd;
1670 nodetab_table[nt + pe]->qp = i.qp;
1675 skt_print_ip(ips, nodetab_ip(nt));
1676 printf("Charmrun> client %d connected (IP=%s data_port=%d)\n", nt, ips,
1679 printf("Charmrun> client %d lid=%d qpn=%i psn=%i\n", nt,
1680 ChMessageInt(i.qp.lid), ChMessageInt(i.qp.qpn),
1681 ChMessageInt(i.qp.psn));
1687 /****************************************************************************
1691 * You can use this module to read the standard input. It supports
1692 * one odd function, input_scanf_chars, which is what makes it useful.
1693 * if you use this module, you may not read stdin yourself.
1695 * void input_init(void)
1696 * char *input_gets(void)
1697 * char *input_scanf_chars(char *fmt)
1699 ****************************************************************************/
1706 char *new_input_buffer;
1707 int len = input_buffer ? strlen(input_buffer) : 0;
1709 if (fgets(line, 1023, stdin) == 0) {
1710 fprintf(stderr, "end-of-file on stdin");
1713 new_input_buffer = (char *) realloc(input_buffer, len + strlen(line) + 1);
1714 if (new_input_buffer == NULL) {
1715 // could not realloc
1717 fprintf(stderr, "Charmrun: Realloc failed");
1720 input_buffer = new_input_buffer;
1723 strcpy(input_buffer + len, line);
1726 void input_init() { input_buffer = strdup(""); }
1728 char *input_extract(int nchars)
1730 char *res = substr(input_buffer, input_buffer + nchars);
1732 substr(input_buffer + nchars, input_buffer + strlen(input_buffer));
1743 p = strchr(input_buffer, '\n');
1748 len = p - input_buffer;
1749 res = input_extract(len + 1);
1754 /*FIXME: I am terrified by this routine. OSL 9/8/00*/
1755 char *input_scanf_chars(char *fmt)
1765 strcpy(tmp, "/tmp/fnordXXXXXX");
1768 char *tmp = tmpnam(NULL); /*This was once /tmp/fnord*/
1771 fd = open(tmp, O_RDWR | O_CREAT | O_TRUNC, 0664);
1773 fprintf(stderr, "cannot open temp file /tmp/fnord");
1776 file = fdopen(fd, "r+");
1780 len = strlen(input_buffer);
1782 fwrite(input_buffer, len, 1, file);
1786 fscanf(file, fmt, buf, buf, buf, buf, buf, buf, buf, buf, buf, buf, buf,
1787 buf, buf, buf, buf, buf, buf, buf);
1793 return input_extract(pos);
1796 /***************************************************************************
1798 Charmrun forwards CCS requests on to the node-programs' control
1800 ***************************************************************************/
1802 #if CMK_CCS_AVAILABLE
1804 /*The Ccs Server socket became active--
1805 rec'v the message and respond to the request,
1806 by forwarding the request to the appropriate node.
1808 void req_ccs_connect(void)
1810 const void *bufs[3];
1813 ChMessageHeader ch; /*Make a charmrun header*/
1814 CcsImplHeader hdr; /*Ccs internal header*/
1816 void *reqData; /*CCS request data*/
1818 if (0 == CcsServer_recvRequest(&h.hdr, &reqData))
1819 return; /*Malformed request*/
1820 pe = ChMessageInt(h.hdr.pe);
1821 reqBytes = ChMessageInt(h.hdr.len);
1824 /*Treat -1 as broadcast and sent to 0 as root of the spanning tree*/
1827 if ((pe <= -nodetab_size || pe >= nodetab_size) && 0 == replay_single) {
1828 /*Treat out of bound values as errors. Helps detecting bugs*/
1829 /* But when virtualized with Bigemulator, we can have more pes than nodetabs */
1830 /* TODO: We should somehow check boundaries also for bigemulator... */
1831 #if !CMK_BIGSIM_CHARM
1832 if (pe == -nodetab_size)
1833 fprintf(stderr, "Invalid processor index in CCS request: are you trying "
1834 "to do a broadcast instead?");
1836 fprintf(stderr, "Invalid processor index in CCS request.");
1837 CcsServer_sendReply(&h.hdr, 0, 0);
1841 } else if (pe < -1) {
1842 /*Treat negative values as multicast to a number of processors specified by
1844 The pes to multicast to follows sits at the beginning of reqData*/
1845 reqBytes -= pe * sizeof(ChMessageInt_t);
1846 pe = ChMessageInt(*(ChMessageInt_t *) reqData);
1849 if (!check_stdio_header(&h.hdr)) {
1852 #if LOOPBACK /*Immediately reply "there's nothing!" (for performance \
1854 CcsServer_sendReply(&h.hdr, 0, 0);
1857 #if CMK_BIGSIM_CHARM
1858 destpe = destpe % nodetab_size;
1862 /*Fill out the charmrun header & forward the CCS request*/
1863 ChMessageHeader_new("req_fw", sizeof(h.hdr) + reqBytes, &h.ch);
1866 lens[0] = sizeof(h);
1869 skt_sendV(nodetab_ctrlfd(destpe), 2, bufs, lens);
1877 Forward the CCS reply (if any) from this client back to the
1878 original network requestor, on the original request socket.
1880 int req_ccs_reply_fw(ChMessage *msg, SOCKET srcFd)
1882 int len = msg->len; /* bytes of data remaining to receive */
1884 /* First pull down the CCS header sent by the client. */
1886 skt_recvN(srcFd, &hdr, sizeof(hdr));
1889 #define m (4 * 1024) /* packets of message to recv/send at once */
1890 if (len < m || hdr.attr.auth) { /* short or authenticated message: grab the
1891 whole thing first */
1892 void *data = malloc(len);
1893 skt_recvN(srcFd, data, len);
1894 CcsServer_sendReply(&hdr, len, data);
1896 } else { /* long messages: packetize (for pipelined sending; a 2x bandwidth
1898 ChMessageInt_t outLen;
1899 int destFd; /* destination for data */
1900 skt_abortFn old = skt_set_abort(reply_abortFn);
1903 destFd = ChMessageInt(hdr.replyFd);
1904 outLen = ChMessageInt_new(len);
1905 skt_sendN(destFd, &outLen, sizeof(outLen)); /* first comes the length */
1911 skt_recvN(srcFd, buf, r);
1912 if (0 == destErrs) /* don't keep sending to dead clients, but *do* clean
1914 destErrs |= skt_sendN(destFd, buf, r);
1926 int req_ccs_reply_fw(ChMessage *msg, SOCKET srcFd) {}
1927 #endif /*CMK_CCS_AVAILABLE*/
1929 /****************************************************************************
1933 * The request servicer accepts connections on a TCP port. The client
1934 * sends a sequence of commands (each is one line). It then closes the
1935 * connection. The server must then contact the client, sending replies.
1937 ****************************************************************************/
1938 /** Macro to switch on the case when charmrun stays up even if
1939 one of the processor crashes*/
1940 /*#define __FAULT__*/
1942 SOCKET *req_clients; /*TCP request sockets for each node*/
1944 SOCKET *charmrun_fds;
1946 int req_nClients; /*Number of entries in above list (==nodetab_rank0_size)*/
1949 /* socket and std streams for the gdb info program */
1950 int gdb_info_pid = 0;
1951 int gdb_info_std[3];
1952 FILE *gdb_stream = NULL;
1955 #define REQ_FAILED -1
1958 int req_reply_child(SOCKET fd, const char *type, const char *data, int dataLen)
1961 int status = req_reply(fd, type, data, dataLen);
1962 if (status != REQ_OK)
1965 skt_recvN(fd, (const char *) &clientFd, sizeof(SOCKET));
1966 skt_sendN(fd, (const char *) &clientFd, sizeof(fd));
1971 * @brief This is the only place where charmrun talks back to anyone.
1973 int req_reply(SOCKET fd, const char *type, const char *data, int dataLen)
1975 ChMessageHeader msg;
1976 if (fd == INVALID_SOCKET)
1978 ChMessageHeader_new(type, dataLen, &msg);
1979 skt_sendN(fd, (const char *) &msg, sizeof(msg));
1980 skt_sendN(fd, data, dataLen);
1984 /* Request handlers:
1985 When a client asks us to do something, these are the
1986 routines that actually respond to the request.
1988 /*Stash this new node's control and data ports.
1990 int req_handle_initnode(ChMessage *msg, SOCKET fd)
1994 ChSingleNodeinfo *nodeInfo = (ChSingleNodeinfo *) msg->data;
1995 // printf("Charmrun> msg->len %d sizeof(ChSingleNodeinfo) %d
1996 // sizeof(ChInfiAddr) %d
1997 //\n",msg->len,sizeof(ChSingleNodeinfo),sizeof(ChInfiAddr));
1999 sizeof(ChSingleNodeinfo) +
2000 (nodetab_rank0_size - 1) * sizeof(ChInfiAddr)) {
2001 fprintf(stderr, "Charmrun: Bad initnode data length. Aborting\n");
2002 fprintf(stderr, "Charmrun: possibly because: %s.\n", msg->data);
2005 nodeInfo->info.qpList =
2006 (ChInfiAddr *) malloc(sizeof(ChInfiAddr) * (nodetab_rank0_size - 1));
2007 memcpy((char *) nodeInfo->info.qpList, &msg->data[sizeof(ChSingleNodeinfo)],
2008 sizeof(ChInfiAddr) * (nodetab_rank0_size - 1));
2009 /* for(i=0;i<nodetab_rank0_size-1;i++){
2010 printf("i %d 0x%0x 0x%0x
2011 0x%0x\n",i,ChMessageInt(nodeInfo->info.qpList[i].lid),ChMessageInt(nodeInfo->info.qpList[i].qpn),ChMessageInt(nodeInfo->info.qpList[i].psn));
2014 if (msg->len != sizeof(ChSingleNodeinfo)) {
2015 fprintf(stderr, "Charmrun: Bad initnode data length. Aborting\n");
2016 fprintf(stderr, "Charmrun: possibly because: %s.\n", msg->data);
2020 nodeinfo_add((ChSingleNodeinfo *) msg->data, fd);
2025 * @brief Gets the array of node numbers, IPs, and ports. This is used by the
2027 * to talk to one another.
2029 int req_handle_initnodetab(ChMessage *msg, SOCKET fd)
2031 ChMessageHeader hdr;
2032 ChMessageInt_t nNodes = ChMessageInt_new(nodetab_rank0_size);
2033 ChMessageHeader_new(
2035 sizeof(ChMessageInt_t) + sizeof(ChNodeinfo) * nodetab_rank0_size, &hdr);
2036 skt_sendN(fd, (const char *) &hdr, sizeof(hdr));
2037 skt_sendN(fd, (const char *) &nNodes, sizeof(nNodes));
2038 skt_sendN(fd, (const char *) nodeinfo_arr,
2039 sizeof(ChNodeinfo) * nodetab_rank0_size);
2045 /* Used for fault tolerance with hierarchical start */
2046 int req_handle_initnodetab1(ChMessage *msg, SOCKET fd)
2048 ChMessageHeader hdr;
2049 ChMessageInt_t nNodes = ChMessageInt_new(nodetab_rank0_size);
2050 ChMessageHeader_new("initnttab", sizeof(ChMessageInt_t) +
2051 sizeof(ChNodeinfo) * nodetab_rank0_size,
2053 skt_sendN(fd, (const char *) &hdr, sizeof(hdr));
2054 skt_sendN(fd, (const char *) &nNodes, sizeof(nNodes));
2055 skt_sendN(fd, (const char *) nodeinfo_arr,
2056 sizeof(ChNodeinfo) * nodetab_rank0_size);
2060 /*Get the array of node numbers, IPs, and ports.
2061 This is used by the node-programs to talk to one another.
2063 static int parent_charmrun_fd = -1;
2064 int req_handle_initnodedistribution(ChMessage *msg, SOCKET fd, int client)
2067 nodes_per_child; /* rounding should help in better load distribution*/
2068 int rank0_start = nodetab_unique_table[client * nodes_per_child];
2070 if (client == branchfactor - 1) {
2071 nodes_to_fork = nodetab_unique_size - client * nodes_per_child;
2072 rank0_finish = nodetab_rank0_size;
2075 nodetab_unique_table[client * nodes_per_child + nodes_to_fork];
2077 ChMessageInt_t *nodemsg = (ChMessageInt_t *) malloc(
2078 (rank0_finish - rank0_start) * sizeof(ChMessageInt_t));
2079 for (k = 0; k < rank0_finish - rank0_start; k++)
2080 nodemsg[k] = ChMessageInt_new(nodetab_rank0_table[rank0_start + k]);
2081 ChMessageHeader hdr;
2082 ChMessageInt_t nNodes = ChMessageInt_new(rank0_finish - rank0_start);
2083 ChMessageInt_t nTotalNodes = ChMessageInt_new(nodetab_rank0_size);
2084 ChMessageHeader_new("initnodetab",
2085 sizeof(ChMessageInt_t) * 2 +
2086 sizeof(ChMessageInt_t) * (rank0_finish - rank0_start),
2088 skt_sendN(fd, (const char *) &hdr, sizeof(hdr));
2089 skt_sendN(fd, (const char *) &nNodes, sizeof(nNodes));
2090 skt_sendN(fd, (const char *) &nTotalNodes, sizeof(nTotalNodes));
2091 skt_sendN(fd, (const char *) nodemsg,
2092 (rank0_finish - rank0_start) * sizeof(ChMessageInt_t));
2097 ChSingleNodeinfo *myNodesInfo;
2098 int send_myNodeInfo_to_parent()
2100 ChMessageHeader hdr;
2101 ChMessageInt_t nNodes = ChMessageInt_new(nodetab_rank0_size);
2102 ChMessageHeader_new("initnodetab",
2103 sizeof(ChMessageInt_t) +
2104 sizeof(ChSingleNodeinfo) * nodetab_rank0_size,
2106 skt_sendN(parent_charmrun_fd, (const char *) &hdr, sizeof(hdr));
2107 skt_sendN(parent_charmrun_fd, (const char *) &nNodes, sizeof(nNodes));
2108 skt_sendN(parent_charmrun_fd, (const char *) myNodesInfo,
2109 sizeof(ChSingleNodeinfo) * nodetab_rank0_size);
2113 void forward_nodetab_to_children()
2115 /*it just needs to receive and copy the nodetab info if required and send it
2116 * as it is to its nodes */
2117 if (!skt_select1(parent_charmrun_fd, 1200 * 1000)) {
2121 ChMessage_recv(parent_charmrun_fd, &msg);
2123 ChMessageInt_t *nodelistmsg = (ChMessageInt_t *) msg.data;
2124 int nodetab_Nodes = ChMessageInt(nodelistmsg[0]);
2126 for (client = 0; client < nodetab_rank0_size; client++) {
2127 SOCKET fd = req_clients[client];
2128 ChMessageHeader hdr;
2129 ChMessageInt_t nNodes = ChMessageInt_new(nodetab_Nodes);
2130 ChMessageHeader_new("initnodetab", sizeof(ChMessageInt_t) +
2131 sizeof(ChNodeinfo) * nodetab_Nodes,
2133 skt_sendN(fd, (const char *) &hdr, sizeof(hdr));
2134 skt_sendN(fd, (const char *) &nNodes, sizeof(nNodes));
2135 skt_sendN(fd, (const char *) (nodelistmsg + 1),
2136 sizeof(ChNodeinfo) * nodetab_Nodes);
2139 /*Parent Charmrun receives the nodetab from child and processes it. msg contain
2140 * array of ChSingleNodeInfo*/
2141 void receive_nodeset_from_child(ChMessage *msg, SOCKET fd)
2143 ChMessageInt_t *n32 = (ChMessageInt_t *) msg->data;
2144 int numOfNodes = ChMessageInt(n32[0]);
2145 ChSingleNodeinfo *childNodeInfo = (ChSingleNodeinfo *) (n32 + 1);
2147 for (k = 0; k < numOfNodes; k++)
2148 nodeinfo_add(childNodeInfo + k, fd);
2151 void set_sockets_list(ChMessage *msg, SOCKET fd)
2153 ChMessageInt_t *n32 = (ChMessageInt_t *) msg->data;
2154 int node_start = ChMessageInt(n32[0]);
2155 charmrun_fds[node_start / nodes_per_child] = fd;
2158 /* Check this return code from "printf". */
2159 static void checkPrintfError(int err)
2162 static int warned = 0;
2164 perror("charmrun WARNING> error in printf");
2170 int req_handle_print(ChMessage *msg, SOCKET fd)
2172 checkPrintfError(printf("%s", msg->data));
2173 checkPrintfError(fflush(stdout));
2174 write_stdio_duplicate(msg->data);
2178 int req_handle_printerr(ChMessage *msg, SOCKET fd)
2180 fprintf(stderr, "%s", msg->data);
2182 write_stdio_duplicate(msg->data);
2186 int req_handle_printsyn(ChMessage *msg, SOCKET fd)
2188 checkPrintfError(printf("%s", msg->data));
2189 checkPrintfError(fflush(stdout));
2190 write_stdio_duplicate(msg->data);
2192 if (arg_hierarchical_start)
2193 req_reply_child(fd, "printdone", "", 1);
2196 req_reply(fd, "printdone", "", 1);
2200 int req_handle_printerrsyn(ChMessage *msg, SOCKET fd)
2202 fprintf(stderr, "%s", msg->data);
2204 write_stdio_duplicate(msg->data);
2206 if (arg_hierarchical_start)
2207 req_reply_child(fd, "printdone", "", 1);
2210 req_reply(fd, "printdone", "", 1);
2214 int req_handle_ending(ChMessage *msg, SOCKET fd)
2219 #if CMK_SHRINK_EXPAND
2220 // When using shrink-expand, only PE 0 will send an "ending" request.
2221 #elif (!defined(_FAULT_MLOG_) && !defined(_FAULT_CAUSAL_))
2222 if (req_ending == nodetab_size)
2224 if (req_ending == arg_requested_pes)
2227 #if CMK_SHRINK_EXPAND
2229 ChMessage_new("realloc_ack", 0, &ackmsg);
2230 for (i = 0; i < req_nClients; i++) {
2231 ChMessage_send(req_clients[i], &ackmsg);
2235 for (i = 0; i < req_nClients; i++)
2236 skt_close(req_clients[i]);
2238 printf("Charmrun> Graceful exit.\n");
2244 int req_handle_barrier(ChMessage *msg, SOCKET fd)
2247 static int barrier_count = 0;
2248 static int barrier_phase = 0;
2251 if (barrier_count == arg_requested_pes)
2253 if (barrier_count == req_nClients)
2258 for (i = 0; i < req_nClients; i++)
2259 if (REQ_OK != req_reply(req_clients[i], "barrier", "", 1)) {
2260 fprintf(stderr, "req_handle_barrier socket error: %d\n", i);
2267 int req_handle_barrier0(ChMessage *msg, SOCKET fd)
2270 static int count = 0;
2272 int pe = atoi(msg->data);
2277 if (count == arg_requested_pes)
2279 if (count == req_nClients)
2282 req_reply(fd0, "barrier0", "", 1); /* only send to node 0 */
2288 void req_handle_abort(ChMessage *msg, SOCKET fd)
2290 /*fprintf(stderr,"req_handle_abort called \n");*/
2292 fprintf(stderr, "Aborting!\n");
2294 fprintf(stderr, "%s\n", msg->data);
2298 int req_handle_scanf(ChMessage *msg, SOCKET fd)
2300 char *fmt, *res, *p;
2303 fmt[msg->len - 1] = 0;
2304 res = input_scanf_chars(fmt);
2312 if (arg_hierarchical_start)
2313 req_reply_child(fd, "scanf-data", res, strlen(res) + 1);
2316 req_reply(fd, "scanf-data", res, strlen(res) + 1);
2321 #if CMK_SHRINK_EXPAND
2322 int req_handle_realloc(ChMessage *msg, SOCKET fd)
2324 printf("Charmrun> Realloc request received %s \n", msg->data);
2326 /* Exec to clear and restart everything, just preserve contents of
2328 int restart_idx = -1, newp_idx = -1, oldp_idx = -1, shrink_expand_idx= -1, charmrun_idx = -1;
2329 int additional_args = 10;
2330 for (int i = 0; i < saved_argc; ++i) {
2331 if (strcmp(saved_argv[i], "+restart") == 0) {
2333 additional_args -= 2;
2335 if(strcmp(saved_argv[i], "++newp") == 0)
2338 additional_args -= 2;
2340 if(strcmp(saved_argv[i], "++oldp") == 0)
2343 additional_args -= 2;
2345 if(strcmp(saved_argv[i], "++shrinkexpand") == 0)
2347 shrink_expand_idx = i;
2348 additional_args -= 1;
2350 if(strcmp(saved_argv[i], "++charmrun_port") == 0)
2353 additional_args -= 2;
2357 const char *dir = "/dev/shm";
2358 for (int i = 0; i < saved_argc; ++i) {
2359 if (strcmp(saved_argv[i], "+shrinkexpand_basedir") == 0) {
2360 dir = saved_argv[i+1];
2366 ret = (const char **) malloc(sizeof(char *) * (saved_argc + additional_args));
2368 int newP = *(int *) (msg->data);
2369 int oldP = arg_requested_pes;
2370 printf("Charmrun> newp = %d oldP = %d \n \n \n", newP, oldP);
2373 for (i = 0; i < saved_argc; i++) {
2374 ret[i] = saved_argv[i];
2379 char sp_buffer[50]; // newP buffer
2380 sprintf(sp_buffer, "%d", newP);
2382 char sp_buffer1[50]; // oldP buffer
2383 sprintf(sp_buffer1, "%d", arg_requested_pes);
2385 char sp_buffer2[6]; // charmrun port
2386 sprintf(sp_buffer2, "%d", server_port);
2388 /* Check that shrink expand parameters don't already exist */
2392 ret[saved_argc + index++] = "++newp";
2393 ret[saved_argc + index++] = sp_buffer;
2396 ret[newp_idx + 1] = sp_buffer;
2400 ret[saved_argc + index++] = "++oldp";
2401 ret[saved_argc + index++] = sp_buffer1;
2404 ret[oldp_idx + 1] = sp_buffer1;
2406 if(shrink_expand_idx == -1)
2408 ret[saved_argc + index++] = "++shrinkexpand";
2411 if(charmrun_idx == -1)
2413 ret[saved_argc + index++] = "++charmrun_port";
2414 ret[saved_argc + index++] = sp_buffer2;
2417 ret[charmrun_idx + 1] = sp_buffer2;
2419 if (restart_idx == -1) {
2420 ret[saved_argc + index++] = "+restart";
2421 ret[saved_argc + index++] = dir;
2422 ret[saved_argc + index++] = NULL;
2424 ret[restart_idx + 1] = dir;
2425 ret[saved_argc + index++] = NULL;
2428 setenv("NETSTART", create_netstart(1), 1);
2429 setenv("OLDNODENAMES", create_oldnodenames(), 1);
2432 ChMessage_new("realloc_ack", 0, &ackmsg);
2433 for (i = 0; i < req_nClients; i++) {
2434 ChMessage_send(req_clients[i], &ackmsg);
2437 skt_client_table.clear();
2438 skt_close(server_fd);
2439 skt_close(CcsServer_fd());
2440 execv(ret[0], (char **)ret);
2441 printf("Should not be here\n");
2449 void restart_node(int crashed_node);
2450 void reconnect_crashed_client(int socket_index, int crashed_node);
2451 void announce_crash(int socket_index, int crashed_node);
2453 static int _last_crash = 0; /* last crashed pe number */
2454 static int _crash_socket_index = 0; /* last restart socket */
2456 static int _crash_socket_charmrun_index = 0; /* last restart socket */
2458 int restarted_pe_id;
2460 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
2461 static int numCrashes = 0; /*number of crashes*/
2462 static SOCKET last_crashed_fd = -1;
2466 * @brief Handles an ACK after a crash. Once it has received all the pending
2467 * acks, it sends the nodetab
2468 * table to the crashed node.
2470 int req_handle_crashack(ChMessage *msg, SOCKET fd)
2472 static int count = 0;
2475 if (arg_hierarchical_start) {
2476 if (count == nodetab_rank0_size - 1) {
2477 /* only after everybody else update its nodetab, can this
2478 restarted process continue */
2479 PRINT(("Charmrun> continue node: %d\n", _last_crash));
2480 req_handle_initnodetab1(NULL, req_clients[_crash_socket_charmrun_index]);
2483 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
2484 last_crashed_fd = -1;
2492 if (count == req_nClients - 1) {
2493 // only after everybody else update its nodetab, can this restarted process
2495 PRINT(("Charmrun> continue node: %d\n", _last_crash));
2496 req_handle_initnodetab(NULL, req_clients[_crash_socket_index]);
2499 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
2500 last_crashed_fd = -1;
2506 /* send initnode to root*/
2507 int set_crashed_socket_id(ChMessage *msg, SOCKET fd)
2509 ChSingleNodeinfo *nodeInfo = (ChSingleNodeinfo *) msg->data;
2510 int nt = nodetab_rank0_table[ChMessageInt(nodeInfo->nodeNo) - mynodes_start];
2511 nodeInfo->nodeNo = ChMessageInt_new(nt);
2512 /* Required for CCS */
2513 /*Nodetable index for this node*/
2515 for (pe = 0; pe < nodetab_cpus(nt); pe++) {
2516 nodetab_table[nt + pe]->ctrlfd = fd;
2520 /* Receives new dataport of restarted prcoess and resends nodetable to
2522 int req_handle_crash(ChMessage *msg, SOCKET fd)
2525 ChMessageInt_t oldpe, newpe;
2526 skt_recvN(fd, (const char *) &oldpe, sizeof(oldpe));
2527 skt_recvN(fd, (const char *) &newpe, sizeof(newpe));
2528 *nodetab_table[ChMessageInt(oldpe)] = *nodetab_table[ChMessageInt(newpe)];
2530 int status = req_handle_initnode(msg, fd);
2532 for (i = 0; i < req_nClients; i++) {
2533 if (req_clients[i] == fd) {
2537 _crash_socket_charmrun_index = i;
2539 fprintf(stderr, "Root charmrun : Socket %d failed %d\n", fd,
2540 _crash_socket_charmrun_index);
2542 ChSingleNodeinfo *nodeInfo = (ChSingleNodeinfo *) msg->data;
2543 int crashed_node = ChMessageInt(nodeInfo->nodeNo);
2544 _last_crash = crashed_node;
2552 /* Already processed, so send*/
2554 for (client = 0; client < req_nClients; client++) {
2555 req_handle_initnodetab(NULL, req_clients[client]);
2558 /*Anounce crash to all child charmruns*/
2559 announce_crash(nodetab_rank0_size + 1, crashed_node);
2566 void error_in_req_serve_client(SOCKET fd)
2569 int crashed_node, crashed_pe, node_index, socket_index;
2570 fprintf(stderr, "Socket %d failed \n", fd);
2573 if (arg_hierarchical_start) {
2574 for (i = mynodes_start; i < mynodes_start + nodetab_rank0_size; i++) {
2575 if (nodetab_ctrlfd(i) == fd) {
2583 for (i = 0; i < nodetab_max; i++) {
2584 if (nodetab_ctrlfd(i) == fd) {
2590 #if (!defined(_FAULT_MLOG_) && !defined(_FAULT_CAUSAL_))
2594 node_index = i - nodetab_rank(crashed_pe);
2595 for (i = 0; i < nodetab_rank0_size; i++) {
2596 if (node_index == nodetab_rank0_table[i]) {
2602 /** should also send a message to all the other processors telling them that
2603 * this guy has crashed*/
2604 /*announce_crash(socket_index,crashed_node);*/
2605 restart_node(crashed_node);
2607 fprintf(stderr, "charmrun says Processor %d failed on Node %d\n", crashed_pe,
2609 /** after the crashed processor has been recreated
2610 it connects to charmrun. That data must now be filled
2611 into the req_nClients array and the nodetab_table*/
2613 for (i = 0; i < req_nClients; i++) {
2614 if (req_clients[i] == fd) {
2619 reconnect_crashed_client(socket_index, crashed_node);
2620 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
2626 int req_handler_dispatch(ChMessage *msg, SOCKET replyFd)
2628 char *cmd = msg->header.type;
2630 DEBUGF(("Got request '%s'\n", cmd, replyFd));
2631 #if CMK_CCS_AVAILABLE /* CCS *doesn't* want data yet, for faster forwarding */
2632 if (strcmp(cmd, "reply_fw") == 0)
2633 return req_ccs_reply_fw(msg, replyFd);
2636 /* grab request data */
2637 recv_status = ChMessageData_recv(replyFd, msg);
2640 if (!arg_hierarchical_start)
2642 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
2643 if (recv_status < 0) {
2644 if (replyFd == last_crashed_fd) {
2647 DEBUGF(("recv_status %d on socket %d \n", recv_status, replyFd));
2648 error_in_req_serve_client(replyFd);
2651 if (recv_status < 0) {
2652 error_in_req_serve_client(replyFd);
2658 if (strcmp(cmd, "ping") == 0)
2660 else if (strcmp(cmd, "print") == 0)
2661 return req_handle_print(msg, replyFd);
2662 else if (strcmp(cmd, "printerr") == 0)
2663 return req_handle_printerr(msg, replyFd);
2664 else if (strcmp(cmd, "printsyn") == 0)
2665 return req_handle_printsyn(msg, replyFd);
2666 else if (strcmp(cmd, "printerrsyn") == 0)
2667 return req_handle_printerrsyn(msg, replyFd);
2668 else if (strcmp(cmd, "scanf") == 0)
2669 return req_handle_scanf(msg, replyFd);
2670 else if (strcmp(cmd, "barrier") == 0)
2671 return req_handle_barrier(msg, replyFd);
2672 else if (strcmp(cmd, "barrier0") == 0)
2673 return req_handle_barrier0(msg, replyFd);
2674 else if (strcmp(cmd, "ending") == 0)
2675 return req_handle_ending(msg, replyFd);
2676 else if (strcmp(cmd, "abort") == 0) {
2677 req_handle_abort(msg, replyFd);
2681 else if (strcmp(cmd, "crash_ack") == 0)
2682 return req_handle_crashack(msg, replyFd);
2684 else if (strcmp(cmd, "initnode") == 0)
2685 return req_handle_crash(msg, replyFd);
2688 #if CMK_SHRINK_EXPAND
2689 else if (strcmp(cmd, "realloc") == 0)
2690 return req_handle_realloc(msg, replyFd);
2694 fprintf(stderr, "Charmrun> Bad control socket request '%s'\n", cmd);
2702 void req_serve_client(SOCKET fd)
2707 DEBUGF(("Getting message from client...\n"));
2708 recv_status = ChMessageHeader_recv(fd, &msg);
2711 if (!arg_hierarchical_start && recv_status < 0)
2712 error_in_req_serve_client(fd);
2714 if (recv_status < 0) {
2715 error_in_req_serve_client(fd);
2721 DEBUGF(("Message is '%s'\n", msg.header.type));
2722 status = req_handler_dispatch(&msg, fd);
2727 fprintf(stderr, "Charmrun> Error processing control socket request %s\n",
2732 ChMessage_free(&msg);
2736 void req_forward_root(SOCKET fd)
2741 recv_status = ChMessage_recv(fd, &msg);
2743 char *cmd = msg.header.type;
2746 if (recv_status < 0) {
2747 error_in_req_serve_client(fd);
2751 /*called from reconnect_crashed_client */
2752 if (strcmp(cmd, "initnode") == 0) {
2753 set_crashed_socket_id(&msg, fd);
2757 if (strcmp(cmd, "ping") != 0) {
2758 status = req_reply(parent_charmrun_fd, cmd, msg.data,
2759 ChMessageInt(msg.header.len));
2761 if (strcmp(cmd, "scanf") == 0 || strcmp(cmd, "printsyn") == 0 ||
2762 strcmp(cmd, "printerrsyn") == 0)
2763 skt_sendN(parent_charmrun_fd, (const char *) &fd, sizeof(fd));
2766 if (strcmp(cmd, "initnode") == 0) {
2767 ChMessageInt_t oldpe = ChMessageInt_new(crashed_pe_id);
2768 ChMessageInt_t newpe = ChMessageInt_new(restarted_pe_id);
2769 skt_sendN(parent_charmrun_fd, (const char *) &oldpe, sizeof(oldpe));
2770 skt_sendN(parent_charmrun_fd, (const char *) &newpe, sizeof(newpe));
2782 ChMessage_free(&msg);
2785 void req_forward_client()
2790 recv_status = ChMessage_recv(parent_charmrun_fd, &msg);
2791 if (recv_status < 0) {
2793 for (i = 0; i < req_nClients; i++)
2794 skt_close(req_clients[i]);
2798 char *cmd = msg.header.type;
2800 if (strcmp(cmd, "barrier") == 0) {
2802 for (i = 0; i < req_nClients; i++)
2803 if (REQ_OK != req_reply(req_clients[i], cmd, msg.data,
2804 ChMessageInt(msg.header.len))) {
2810 if (strcmp(cmd, "initnodetab") == 0) {
2811 if (_last_crash == 0)
2812 current_restart_phase++;
2814 for (i = 0; i < req_nClients; i++)
2815 if (_last_crash == 0 || i != _crash_socket_index)
2816 if (REQ_OK != req_reply(req_clients[i], cmd, msg.data,
2817 ChMessageInt(msg.header.len))) {
2823 if (strcmp(cmd, "crashnode") == 0) {
2826 for (i = 0; i < req_nClients; i++)
2827 if (_last_crash == 0 || i != _crash_socket_index)
2828 if (REQ_OK != req_reply(req_clients[i], cmd, msg.data,
2829 ChMessageInt(msg.header.len))) {
2834 if (strcmp(cmd, "initnttab") == 0) {
2836 if (REQ_OK != req_reply(req_clients[_crash_socket_index], "initnodetab",
2837 msg.data, ChMessageInt(msg.header.len))) {
2847 /* CCS forward request */
2848 if (strcmp(cmd, "req_fw") == 0) {
2849 CcsImplHeader *hdr = (CcsImplHeader *) msg.data;
2850 int pe = ChMessageInt(hdr->pe);
2851 fd = nodetab_table[pe]->ctrlfd;
2852 } else if (strcmp(cmd, "barrier0") == 0) {
2853 fd = nodetab_table[0]->ctrlfd;
2855 skt_recvN(parent_charmrun_fd, (const char *) &fd, sizeof(SOCKET));
2857 status = req_reply(fd, cmd, msg.data, ChMessageInt(msg.header.len));
2866 ChMessage_free(&msg);
2871 int ignore_socket_errors(SOCKET skt, int c, const char *m)
2872 { /*Abandon on further socket errors during error shutdown*/
2880 /*A socket went bad somewhere! Immediately disconnect,
2881 which kills everybody.
2883 int socket_error_in_poll(SOCKET skt, int code, const char *msg)
2885 /*commenting it for fault tolerance*/
2888 skt_set_abort(ignore_socket_errors);
2889 const char *name = skt_to_name(skt);
2890 fprintf(stderr, "Charmrun> error on request socket to node %d '%s'--\n"
2892 skt_to_node(skt), name, msg);
2894 for (i = 0; i < req_nClients; i++)
2895 skt_close(req_clients[i]);
2898 ftTimer = GetClock();
2902 #if CMK_USE_POLL /*poll() version*/
2903 #define CMK_PIPE_DECL(maxn, delayMs) \
2904 static struct pollfd *fds = NULL; \
2906 int *nFds = &nFds_sto; \
2907 int pollDelayMs = delayMs; \
2909 fds = (struct pollfd *) malloc((maxn) * sizeof(struct pollfd));
2910 #define CMK_PIPE_SUB fds, nFds
2911 #define CMK_PIPE_CALL() \
2912 poll(fds, *nFds, pollDelayMs); \
2915 #define CMK_PIPE_PARAM struct pollfd *fds, int *nFds
2916 #define CMK_PIPE_ADDREAD(rd_fd) \
2918 fds[*nFds].fd = rd_fd; \
2919 fds[*nFds].events = POLLIN; \
2922 #define CMK_PIPE_ADDWRITE(wr_fd) \
2924 fds[*nFds].fd = wr_fd; \
2925 fds[*nFds].events = POLLOUT; \
2928 #define CMK_PIPE_CHECKREAD(rd_fd) fds[(*nFds)++].revents &POLLIN
2929 #define CMK_PIPE_CHECKWRITE(wr_fd) fds[(*nFds)++].revents &POLLOUT
2931 #else /*select() version*/
2933 #define CMK_PIPE_DECL(maxn, delayMs) \
2934 fd_set rfds_sto, wfds_sto; \
2936 fd_set *rfds = &rfds_sto, *wfds = &wfds_sto; \
2937 struct timeval tmo; \
2940 tmo.tv_sec = delayMs / 1000; \
2941 tmo.tv_usec = 1000 * (delayMs % 1000);
2942 #define CMK_PIPE_SUB rfds, wfds
2943 #define CMK_PIPE_CALL() select(FD_SETSIZE, rfds, 0, 0, &tmo)
2945 #define CMK_PIPE_PARAM fd_set *rfds, fd_set *wfds
2946 #define CMK_PIPE_ADDREAD(rd_fd) \
2948 assert(nFds < FD_SETSIZE); \
2949 FD_SET(rd_fd, rfds); \
2952 #define CMK_PIPE_ADDWRITE(wr_fd) FD_SET(wr_fd, wfds)
2953 #define CMK_PIPE_CHECKREAD(rd_fd) FD_ISSET(rd_fd, rfds)
2954 #define CMK_PIPE_CHECKWRITE(wr_fd) FD_ISSET(wr_fd, wfds)
2958 Wait for incoming requests on all client sockets,
2959 and the CCS socket (if present).
2966 CMK_PIPE_DECL(req_nClients + 5, 1000);
2967 for (i = 0; i < req_nClients; i++)
2968 CMK_PIPE_ADDREAD(req_clients[i]);
2969 if (CcsServer_fd() != INVALID_SOCKET)
2970 CMK_PIPE_ADDREAD(CcsServer_fd());
2971 if (arg_charmdebug) {
2972 CMK_PIPE_ADDREAD(0);
2973 CMK_PIPE_ADDREAD(gdb_info_std[1]);
2974 CMK_PIPE_ADDREAD(gdb_info_std[2]);
2977 skt_set_abort(socket_error_in_poll);
2979 DEBUGF(("Req_poll: Calling select...\n"));
2980 status = CMK_PIPE_CALL();
2981 DEBUGF(("Req_poll: Select returned %d...\n", status));
2984 return; /*Nothing to do-- timeout*/
2987 if (errno == EINTR || errno == EAGAIN)
2991 socket_error_in_poll(-1, 1359, "Node program terminated unexpectedly!\n");
2993 for (i = 0; i < req_nClients; i++)
2994 if (CMK_PIPE_CHECKREAD(req_clients[i])) {
2995 readcount = 10; /*number of successive reads we serve per socket*/
2996 /*This client is ready to read*/
2998 req_serve_client(req_clients[i]);
3000 } while (1 == skt_select1(req_clients[i], 0) && readcount > 0);
3003 if (CcsServer_fd() != INVALID_SOCKET)
3004 if (CMK_PIPE_CHECKREAD(CcsServer_fd())) {
3005 DEBUGF(("Activity on CCS server port...\n"));
3009 if (arg_charmdebug) {
3011 if (CMK_PIPE_CHECKREAD(0)) {
3012 int indata = read(0, buf, 5);
3015 fprintf(stderr, "Error reading command (%s)\n", buf);
3016 if (strncmp(buf, "info:", 5) == 0) {
3017 /* Found info command, forward data to gdb info program */
3020 // printf("Command to be forwarded\n");
3021 while (read(0, &c, 1) != -1) {
3023 if (c == '\n' || num >= 2045) {
3024 write(gdb_info_std[0], buf, num);
3030 // printf("Command from charmdebug: %d(%s)\n",indata,buf);
3032 /* All streams from gdb are forwarded to the stderr stream through the FILE
3033 gdb_stream which has been duplicated from stderr */
3034 /* NOTE: gdb_info_std[2] must be flushed before gdb_info_std[1] because the
3035 latter contains the string "(gdb) " ending the synchronization. Also the
3036 std[1] should be read with the else statement. It will not work without.
3038 if (CMK_PIPE_CHECKREAD(gdb_info_std[2])) {
3039 int indata = read(gdb_info_std[2], buf, 100);
3040 /*printf("read data from gdb info stderr %d\n",indata);*/
3043 // printf("printing %s\n",buf);
3045 // fprintf(gdb_stream,"%s",buf);
3048 } else if (CMK_PIPE_CHECKREAD(gdb_info_std[1])) {
3049 int indata = read(gdb_info_std[1], buf, 100);
3050 /*printf("read data from gdb info stdout %d\n",indata);*/
3053 // printf("printing %s\n",buf);
3055 fprintf(gdb_stream, "%s", buf);
3063 void req_poll_hierarchical()
3070 skt_set_abort(socket_error_in_poll);
3074 FD_ZERO(&rfds); /* clears set of file descriptor */
3075 for (i = 0; i < req_nClients; i++)
3076 FD_SET(req_clients[i], &rfds); /* adds client sockets to rfds set*/
3077 if (CcsServer_fd() != INVALID_SOCKET)
3078 FD_SET(CcsServer_fd(), &rfds);
3079 if (arg_charmdebug) {
3081 FD_SET(gdb_info_std[1], &rfds);
3082 FD_SET(gdb_info_std[2], &rfds);
3085 if (arg_child_charmrun)
3086 FD_SET(parent_charmrun_fd, &rfds); /* adds client sockets to rfds set*/
3087 DEBUGF(("Req_poll: Calling select...\n"));
3088 status = select(FD_SETSIZE, &rfds, 0, 0,
3089 &tmo); /* FD_SETSIZE is the maximum number of file
3090 descriptors that a fd_set object can hold
3091 information about, select returns number of
3093 DEBUGF(("Req_poll: Select returned %d...\n", status));
3096 return; /*Nothing to do-- timeout*/
3100 socket_error_in_poll(1359, "Node program terminated unexpectedly!\n");
3102 for (i = 0; i < req_nClients; i++)
3103 if (FD_ISSET(req_clients[i], &rfds)) {
3104 readcount = 10; /*number of successive reads we serve per socket*/
3105 /*This client is ready to read*/
3107 if (arg_child_charmrun)
3108 req_forward_root(req_clients[i]);
3110 req_serve_client(req_clients[i]);
3112 } while (1 == skt_select1(req_clients[i], 0) && readcount > 0);
3115 if (arg_child_charmrun)
3116 // Forward from root to clients
3117 if (FD_ISSET(parent_charmrun_fd, &rfds)) {
3118 readcount = 10; /*number of successive reads we serve per socket*/
3120 req_forward_client();
3122 } while (1 == skt_select1(parent_charmrun_fd, 0) && readcount > 0);
3125 /*Wait to receive responses and Forward responses */
3126 if (CcsServer_fd() != INVALID_SOCKET)
3127 if (FD_ISSET(CcsServer_fd(), &rfds)) {
3128 DEBUGF(("Activity on CCS server port...\n"));
3132 if (arg_charmdebug) {
3134 if (FD_ISSET(0, &rfds)) {
3135 int indata = read(0, buf, 5);
3138 fprintf(stderr, "Error reading command (%s)\n", buf);
3139 if (strncmp(buf, "info:", 5) == 0) {
3140 /* Found info command, forward data to gdb info program */
3143 // printf("Command to be forwarded\n");
3144 while (read(0, &c, 1) != -1) {
3146 if (c == '\n' || num >= 2045) {
3147 write(gdb_info_std[0], buf, num);
3153 // printf("Command from charmdebug: %d(%s)\n",indata,buf);
3155 /* All streams from gdb are forwarded to the stderr stream through the FILE
3156 gdb_stream which has been duplicated from stderr */
3157 /* NOTE: gdb_info_std[2] must be flushed before gdb_info_std[1] because the
3158 latter contains the string "(gdb) " ending the synchronization. Also the
3159 std[1] should be read with the else statement. It will not work without.
3161 if (FD_ISSET(gdb_info_std[2], &rfds)) {
3162 int indata = read(gdb_info_std[2], buf, 100);
3163 /*printf("read data from gdb info stderr %d\n",indata);*/
3166 // printf("printing %s\n",buf);
3168 // fprintf(gdb_stream,"%s",buf);
3171 } else if (FD_ISSET(gdb_info_std[1], &rfds)) {
3172 int indata = read(gdb_info_std[1], buf, 100);
3173 /*printf("read data from gdb info stdout %d\n",indata);*/
3176 // printf("printing %s\n",buf);
3178 fprintf(gdb_stream, "%s", buf);
3187 static skt_ip_t parent_charmrun_IP;
3188 static int parent_charmrun_port;
3189 static int parent_charmrun_pid;
3190 static int dataport;
3191 static SOCKET dataskt;
3192 int charmrun_phase = 0;
3195 int client_connect_problem(SOCKET skt, int code, const char *msg)
3196 { /*Called when something goes wrong during a client connect*/
3197 const char *name = skt_to_name(skt);
3198 fprintf(stderr, "Charmrun> error attaching to node '%s':\n%s\n", name, msg);
3203 /** return 1 if connection is openned succesfully with client**/
3204 int errorcheck_one_client_connect(int client)
3207 /* Child charmruns are already connected - Do we need to conect again*/
3208 if (arg_hierarchical_start && !arg_child_charmrun && charmrun_phase == 1)
3211 /* FIXME: The error printing functions do a table lookup on the socket to
3212 * figure their corresponding host. However, certain failures happen
3213 * before we can associate a socket with a particular client, as in
3214 * skt_select1 below. In that case, we use a workaround to create a
3215 * dummy socket so that the internal error message is printed
3218 SOCKET dummy_skt = -10;
3219 skt_client_table[dummy_skt] = client;
3221 unsigned int clientPort; /*These are actually ignored*/
3224 printf("Charmrun> Waiting for %d-th client to connect.\n", client);
3225 /* FIXME: why are we passing the client as an error code here? */
3226 if (0 == skt_select1(server_fd, arg_timeout * 1000))
3227 client_connect_problem(dummy_skt, client,
3228 "Timeout waiting for node-program to connect");
3230 req_clients[client] = skt_accept(server_fd, &clientIP, &clientPort);
3231 skt_client_table[req_clients[client]] = client;
3233 /* FIXME: will this ever be triggered? It seems the skt_abort handler here is
3234 * 'client_connect_problem', which calls exit(1), so we'd exit
3236 if (req_clients[client] == SOCKET_ERROR)
3237 client_connect_problem(dummy_skt, client, "Failure in node accept");
3239 skt_tcp_no_nagle(req_clients[client]);
3248 read_initnode_one_client(int client)
3251 if (!skt_select1(req_clients[client], arg_timeout * 1000))
3252 client_connect_problem(req_clients[client], client,
3253 "Timeout on IP request");
3254 ChMessage_recv(req_clients[client], &msg);
3255 req_handle_initnode(&msg, req_clients[client]);
3256 ChMessage_free(&msg);
3259 #if CMK_IBVERBS_FAST_START
3260 void req_one_client_partinit(int client)
3262 ChMessage partStartMsg;
3265 if (errorcheck_one_client_connect(client)) {
3266 if (!skt_select1(req_clients[client], arg_timeout * 1000))
3267 client_connect_problem(req_clients[client], client,
3268 "Timeout on partial init request");
3270 ChMessage_recv(req_clients[client], &partStartMsg);
3271 clientNode = ChMessageInt(*(ChMessageInt_t *) partStartMsg.data);
3272 assert(strncmp(partStartMsg.header.type, "partinit", 8) == 0);
3273 ChMessage_free(&partStartMsg);
3280 /* To keep a global node numbering */
3281 void add_singlenodeinfo_to_mynodeinfo(ChMessage *msg, SOCKET ctrlfd)
3283 /*add to myNodesInfo */
3284 ChSingleNodeinfo *nodeInfo = (ChSingleNodeinfo *) msg->data;
3286 /* need to change nodeNo */
3287 myNodesInfo[nodeCount].nodeNo = ChMessageInt_new(
3288 nodetab_rank0_table[ChMessageInt(nodeInfo->nodeNo) - mynodes_start]);
3289 myNodesInfo[nodeCount++].info = nodeInfo->info;
3291 /* Required for CCS */
3292 int nt = nodetab_rank0_table[ChMessageInt(nodeInfo->nodeNo) -
3293 mynodes_start]; /*Nodetable index for this node*/
3295 for (pe = 0; pe < nodetab_cpus(nt); pe++) {
3296 nodetab_table[nt + pe]->ctrlfd = ctrlfd;
3302 /* Original Function, need to check if modifications required*/
3303 void req_set_client_connect(int start, int end)
3309 int *finished; // -1 if client i not finished, otherwise the node id of client i
3310 int curclient, curclientend, curclientstart;
3312 curclient = curclientend = curclientstart = start;
3314 finished = (int *) malloc((end - start) * sizeof(int));
3315 for (i = 0; i < (end - start); i++)
3318 #if CMK_USE_IBVERBS && !CMK_IBVERBS_FAST_START
3319 for (i = start; i < end; i++) {
3320 errorcheck_one_client_connect(curclientend++);
3322 if (req_nClients > 1) {
3323 /* a barrier to make sure infiniband device gets initialized */
3324 for (i = start; i < end; i++)
3325 ChMessage_recv(req_clients[i], &msg);
3326 for (i = start; i < end; i++)
3327 req_reply(req_clients[i], "barrier", "", 1);
3333 /* check server socket for messages */
3334 #if !CMK_USE_IBVERBS || CMK_IBVERBS_FAST_START
3335 while (curclientstart == curclientend || skt_select1(server_fd, 1) != 0) {
3336 errorcheck_one_client_connect(curclientend++);
3339 /* check appropriate clients for messages */
3340 for (client = curclientstart; client < curclientend; client++)
3341 if (req_clients[client] > 0) {
3342 if (skt_select1(req_clients[client], 1) != 0) {
3343 ChMessage_recv(req_clients[client], &msg);
3344 req_handle_initnode(&msg, req_clients[client]);
3345 finished[client - start] =
3346 ChMessageInt(((ChSingleNodeinfo *)msg.data)->nodeNo);
3352 for (i = curclientstart - start; i < (end - start); i++)
3353 if (finished[i] == -1) {
3354 curclientstart = start + i;
3359 ChMessage_free(&msg);
3361 // correct mapping in skt_client_table so that socket points to node using the socket
3362 for (i = start; i < (end - start); i++)
3363 skt_client_table[req_clients[i]] = finished[i];
3368 /*int charmrun_phase =0; meaningful for main charmun to decide what to receive*/
3369 void req_set_client_connect(int start, int end)
3375 int *finished; // -1 if client i not finished, otherwise the node id of client i
3376 int curclient, curclientend, curclientstart;
3378 curclient = curclientend = curclientstart = start;
3380 finished = malloc((end - start) * sizeof(int));
3381 for (i = 0; i < (end - start); i++)
3384 if (arg_child_charmrun && start == 0)
3385 myNodesInfo = malloc(sizeof(ChSingleNodeinfo) * nodetab_rank0_size);
3387 #if CMK_USE_IBVERBS && !CMK_IBVERBS_FAST_START
3388 for (i = start; i < end; i++) {
3389 errorcheck_one_client_connect(curclientend++);
3391 if (req_nClients > 1) {
3392 /* a barrier to make sure infiniband device gets initialized */
3393 for (i = start; i < end; i++)
3394 ChMessage_recv(req_clients[i], &msg);
3395 for (i = start; i < end; i++)
3396 req_reply(req_clients[i], "barrier", "", 1);
3402 /* check server socket for messages */
3403 #if !CMK_USE_IBVERBS || CMK_IBVERBS_FAST_START
3404 while (curclientstart == curclientend || skt_select1(server_fd, 1) != 0) {
3405 errorcheck_one_client_connect(curclientend++);
3408 /* check appropriate clients for messages */
3409 for (client = curclientstart; client < curclientend; client++)
3410 if (req_clients[client] > 0) {
3411 if (skt_select1(req_clients[client], 1) != 0) {
3412 ChMessage_recv(req_clients[client], &msg);
3413 if (!arg_hierarchical_start)
3414 req_handle_initnode(&msg, req_clients[client]);
3416 if (!arg_child_charmrun) {
3417 if (charmrun_phase == 1)
3418 receive_nodeset_from_child(&msg, req_clients[client]);
3420 set_sockets_list(&msg, req_clients[client]);
3421 // here we need to decide based upon the phase
3422 } else /* hier-start with 2nd leval*/
3423 add_singlenodeinfo_to_mynodeinfo(&msg, req_clients[client]);
3425 finished[client - start] =
3426 ChMessageInt(((ChSingleNodeinfo *)msg.data)->nodeNo);
3432 for (i = curclientstart - start; i < (end - start); i++)
3433 if (finished[i] == -1) {
3434 curclientstart = start + i;
3439 ChMessage_free(&msg);
3441 // correct mapping in skt_client_table so that socket points to node using the socket
3442 for (i = start; i < (end - start); i++)
3443 skt_client_table[req_clients[i]] = finished[i];
3449 /* allow one client to connect */
3450 void req_one_client_connect(int client)
3452 if (errorcheck_one_client_connect(
3453 client)) { /*This client has just connected-- fetch his name and IP*/
3454 read_initnode_one_client(client);
3459 /* Each node has sent the qpn data for all the qpns it has created
3460 This data needs to be sent to all the other nodes
3461 This needs to be done for all nodes
3463 void exchange_qpdata_clients()
3466 for (i = 0; i < nodetab_rank0_size; i++) {
3467 int nt = nodetab_rank0_table[i]; /*Nodetable index for this node*/
3468 nodetab_table[nt]->qpData =
3469 (ChInfiAddr *) malloc(sizeof(ChInfiAddr) * nodetab_rank0_size);
3471 for (proc = 0; proc < nodetab_rank0_size; proc++) {
3473 for (i = 0; i < nodetab_rank0_size; i++) {
3476 int nt = nodetab_rank0_table[i]; /*Nodetable index for this node*/
3477 nodetab_table[nt]->qpData[proc] = nodeinfo_arr[proc].qpList[count];
3478 // printf("Charmrun> nt %d proc %d lid 0x%x qpn
3481 // 0x%x\n",nt,proc,ChMessageInt(nodetab_table[nt]->qpData[proc].lid),ChMessageInt(nodetab_table[nt]->qpData[proc].qpn),ChMessageInt(nodetab_table[nt]->qpData[proc].psn));
3485 free(nodeinfo_arr[proc].qpList);
3489 void send_clients_nodeinfo_qpdata()
3492 int msgSize = sizeof(ChMessageInt_t) +
3493 sizeof(ChNodeinfo) * nodetab_rank0_size +
3494 sizeof(ChInfiAddr) * nodetab_rank0_size;
3495 for (node = 0; node < nodetab_rank0_size; node++) {
3496 int nt = nodetab_rank0_table[node]; /*Nodetable index for this node*/
3497 // printf("Charmrun> Node %d proc %d sending initnodetab
3499 ChMessageHeader hdr;
3500 ChMessageInt_t nNodes = ChMessageInt_new(nodetab_rank0_size);
3501 ChMessageHeader_new("initnodetab", msgSize, &hdr);
3502 skt_sendN(nodetab_table[nt]->ctrlfd, (const char *) &hdr, sizeof(hdr));
3503 skt_sendN(nodetab_table[nt]->ctrlfd, (const char *) &nNodes,
3505 skt_sendN(nodetab_table[nt]->ctrlfd, (const char *) nodeinfo_arr,
3506 sizeof(ChNodeinfo) * nodetab_rank0_size);
3507 skt_sendN(nodetab_table[nt]->ctrlfd,
3508 (const char *) &nodetab_table[nt]->qpData[0],
3509 sizeof(ChInfiAddr) * nodetab_rank0_size);
3515 #define getthetime(x) \
3516 gettimeofday(&tim, NULL); \
3517 x = tim.tv_sec + (tim.tv_usec / 1000000.0);
3518 #define getthetime1(x) \
3519 gettimeofday(&tim, NULL); \
3521 /*Wait for all the clients to connect to our server port*/
3522 void req_client_connect(void)
3526 if (!arg_hierarchical_start)
3528 nodeinfo_allocate();
3529 req_nClients = nodetab_rank0_size;
3530 req_clients = (SOCKET *) malloc(req_nClients * sizeof(SOCKET));
3531 for (client = 0; client < req_nClients; client++)
3532 req_clients[client] = -1;
3534 skt_set_abort(client_connect_problem);
3536 #if CMK_IBVERBS_FAST_START
3537 for (client = 0; client < req_nClients; client++) {
3538 req_one_client_partinit(client);
3540 for (client = 0; client < req_nClients; client++) {
3541 read_initnode_one_client(client);
3545 req_set_client_connect(0, req_nClients);
3552 printf("Charmrun> All clients connected.\n");
3554 exchange_qpdata_clients();
3555 send_clients_nodeinfo_qpdata();
3558 if (arg_hierarchical_start) {
3559 /* first we need to send data to parent charmrun and then send the nodeinfo
3561 send_myNodeInfo_to_parent();
3562 /*then receive from root */
3563 forward_nodetab_to_children();
3568 for (client = 0; client < req_nClients; client++) {
3569 req_handle_initnodetab(NULL, req_clients[client]);
3574 printf("Charmrun> IP tables sent.\n");
3577 /*Wait for all the clients to connect to our server port, then collect and send
3578 * nodetable to all */
3580 void req_charmrun_connect(void)
3582 // double t1, t2, t3, t4;
3584 nodeinfo_allocate();
3585 req_nClients = branchfactor;
3586 req_clients = (SOCKET *) malloc(req_nClients * sizeof(SOCKET));
3587 charmrun_fds = (SOCKET *) malloc(req_nClients * sizeof(SOCKET));
3588 for (client = 0; client < req_nClients; client++)
3589 req_clients[client] = -1;
3591 skt_set_abort(client_connect_problem);
3593 #if CMK_IBVERBS_FAST_START
3594 for (client = 0; client < req_nClients; client++) {
3595 req_one_client_partinit(client);
3597 for (client = 0; client < req_nClients; client++) {
3598 read_initnode_one_client(client);
3601 // if(!arg_child_charmrun) getthetime(t1);
3603 req_set_client_connect(0, req_nClients);
3604 // if(!arg_child_charmrun) getthetime(t2); /* also need to process
3605 // received nodesets JIT */
3611 printf("Charmrun> All clients connected.\n");
3613 exchange_qpdata_clients();
3614 send_clients_nodeinfo_qpdata();
3616 for (client = 0; client < req_nClients; client++) {
3617 // add flag to check what leval charmrun it is and what phase
3618 req_handle_initnodedistribution(NULL, charmrun_fds[client], client);
3622 /* Now receive the nodetab from child charmruns*/
3625 skt_set_abort(client_connect_problem);
3627 req_set_client_connect(0, req_nClients);
3629 /* Already processed, so send*/
3630 for (client = 0; client < req_nClients; client++) {
3631 req_handle_initnodetab(NULL, req_clients[client]);
3633 // if(!arg_child_charmrun) getthetime(t4);
3636 printf("Charmrun> IP tables sent.\n");
3637 // if(!arg_child_charmrun) printf("Time for charmruns connect= %f , sending
3638 // nodes to fire= %f, node clients connected= %f n ", t2-t1, t3-t2, t4-t3);
3645 void start_one_node_ssh(int rank0no);
3646 void finish_one_node(int rank0no);
3647 void finish_set_nodes(int start, int stop);
3648 int start_set_node_ssh(int client);
3650 void req_client_start_and_connect(void)
3653 int batch = arg_batch_spawn; /* fire several at a time */
3654 int clientgroup, clientstart;
3658 if (!arg_hierarchical_start)
3660 nodeinfo_allocate();
3661 req_nClients = nodetab_rank0_size;
3662 req_clients = (SOCKET *) malloc(req_nClients * sizeof(SOCKET));
3664 skt_set_abort(client_connect_problem);
3667 while (client < req_nClients) { /* initiate a batch */
3668 clientstart = client;
3670 for (counter = 0; counter < batch;
3671 counter++) { /* initiate batch number of nodes */
3672 clientgroup = start_set_node_ssh(client);
3673 client += clientgroup;
3674 if (client >= req_nClients) {
3675 client = req_nClients;
3680 /* ssh x11 forwarding will make sure ssh exit */
3681 if (!arg_ssh_display)
3683 finish_set_nodes(clientstart, client);
3685 #if CMK_IBVERBS_FAST_START
3686 for (c = clientstart; c < client; c++) {
3687 req_one_client_partinit(c);
3690 req_set_client_connect(clientstart, client);
3694 #if CMK_IBVERBS_FAST_START
3695 for (client = 0; client < req_nClients; client++) {
3696 read_initnode_one_client(client);
3702 printf("Charmrun> All clients connected.\n");
3705 exchange_qpdata_clients();
3706 send_clients_nodeinfo_qpdata();
3709 if (arg_hierarchical_start) {
3710 /* first we need to send data to parent charmrun and then send the nodeinfo
3712 send_myNodeInfo_to_parent();
3713 /*then receive from root */
3714 forward_nodetab_to_children();
3719 for (client = 0; client < req_nClients; client++) {
3720 req_handle_initnodetab(NULL, req_clients[client]);
3725 printf("Charmrun> IP tables sent.\n");
3726 free(ssh_pids); /* done with ssh_pids */
3731 /*Start the server socket the clients will connect to.*/
3732 void req_start_server(void)
3734 skt_ip_t ip = skt_innode_my_ip();
3736 #if CMK_SHRINK_EXPAND
3737 if (arg_shrinkexpand) { // Need port information
3741 ns = getenv("NETSTART");
3742 if (ns != 0) { /*Read values set by Charmrun*/
3743 int node_num, old_charmrun_pid;
3744 char old_charmrun_name[1024 * 1000];
3745 nread = sscanf(ns, "%d%s%d%d%d", &node_num, old_charmrun_name,
3746 &server_port, &old_charmrun_pid, &port);
3748 fprintf(stderr, "Error parsing NETSTART '%s'\n", ns);
3755 /* local execution, use localhost always */
3756 strcpy(server_addr, "127.0.0.1");
3757 else if (arg_charmrunip != NULL)
3758 /* user specify the IP at +useip */
3759 strcpy(server_addr, arg_charmrunip);
3760 else if ((arg_charmrunip = getenv("CHARMRUN_IP")) != NULL)
3761 /* user specify the env */
3762 strcpy(server_addr, arg_charmrunip);
3763 else if (skt_ip_match(ip, _skt_invalid_ip)) {
3764 fprintf(stderr, "Charmrun> Warning-- cannot find IP address for your hostname. "
3765 "Using loopback.\n");
3766 strcpy(server_addr, "127.0.0.1");
3767 } else if (arg_usehostname || skt_ip_match(ip, skt_lookup_ip("127.0.0.1")))
3768 /*Use symbolic host name as charmrun address*/
3769 gethostname(server_addr, sizeof(server_addr));
3771 skt_print_ip(server_addr, ip);
3773 #if CMK_SHRINK_EXPAND
3774 server_port = arg_charmrun_port;
3778 server_fd = skt_server(&server_port);
3781 printf("Charmrun> Charmrun = %s, port = %d\n", server_addr, server_port);
3784 #if CMK_CCS_AVAILABLE
3786 if (!arg_hierarchical_start ||
3787 (arg_hierarchical_start && !arg_child_charmrun))
3789 if (arg_server == 1)
3790 CcsServer_new(NULL, &arg_server_port, arg_server_auth);
3795 int unique_node_start;
3796 /* Function copied from machine.c file */
3797 void parse_netstart(void)
3802 ns = getenv("NETSTART");
3803 if (ns != 0) { /*Read values set by Charmrun*/
3804 char parent_charmrun_name[1024 * 1000];
3805 nread = sscanf(ns, "%d%s%d%d%d", &unique_node_start, parent_charmrun_name,
3806 &parent_charmrun_port, &parent_charmrun_pid, &port);
3807 parent_charmrun_IP = skt_lookup_ip(parent_charmrun_name);
3809 nodetab_unique_table[unique_node_start]; /*Works only when
3810 init_hierarchical called in
3814 fprintf(stderr, "Error parsing NETSTART '%s'\n", ns);
3818 #if CMK_USE_IBVERBS | CMK_USE_IBUD
3819 char *cmi_num_nodes = getenv("CmiNumNodes");
3820 if (cmi_num_nodes != NULL) {
3821 sscanf(cmi_num_nodes, "%d", &_Cmi_numnodes);
3826 int nodetab_rank0_size_total;
3827 /* Receive nodes for which I am responsible*/
3828 void my_nodetab_store(ChMessage *msg)
3830 ChMessageInt_t *nodelistmsg = (ChMessageInt_t *) msg->data;
3831 nodetab_rank0_size = ChMessageInt(nodelistmsg[0]);
3832 nodetab_rank0_size_total = ChMessageInt(nodelistmsg[1]);
3834 for (k = 0; k < nodetab_rank0_size; k++) {
3835 nodetab_rank0_table[k] = ChMessageInt(nodelistmsg[k + 2]);
3839 /* In hierarchical startup, this function is used by child charmrun to obtains
3840 * the list of nodes for which it is responsible */
3841 void nodelist_obtain(void)
3843 ChMessage nodelistmsg; /* info about all nodes*/
3844 /*Contact charmrun for machine info.*/
3848 /* int qpListSize = (_Cmi_numnodes-1)*sizeof(ChInfiAddr);
3849 me.info.qpList = malloc(qpListSize);
3850 copyInfiAddr(me.info.qpList);
3851 MACHSTATE1(3,"me.info.qpList created and copied size %d bytes",qpListSize);
3852 ctrl_sendone_nolock("initnode",(const char *)&me,sizeof(me),(const char *)me.info.qpList,qpListSize);
3853 free(me.info.qpList);
3856 ChMessageHeader hdr;
3857 ChMessageInt_t node_start = ChMessageInt_new(unique_node_start);
3858 ChMessageHeader_new("initnodetab", sizeof(ChMessageInt_t), &hdr);
3859 skt_sendN(parent_charmrun_fd, (const char *) &hdr, sizeof(hdr));
3860 skt_sendN(parent_charmrun_fd, (const char *) &node_start, sizeof(node_start));
3862 #endif // CMK_USE_IBVERBS
3864 /*We get the other node addresses from a message sent
3865 back via the charmrun control port.*/
3866 if (!skt_select1(parent_charmrun_fd, 1200 * 1000)) {
3869 ChMessage_recv(parent_charmrun_fd, &nodelistmsg);
3871 my_nodetab_store(&nodelistmsg);
3872 ChMessage_free(&nodelistmsg);
3875 void init_mynodes(void)
3878 if (!skt_ip_match(parent_charmrun_IP, _skt_invalid_ip)) {
3879 dataskt = skt_server(&dataport);
3880 parent_charmrun_fd =
3881 skt_connect(parent_charmrun_IP, parent_charmrun_port, 1800);
3883 parent_charmrun_fd = -1;
3890 /****************************************************************************
3894 ****************************************************************************/
3895 void start_nodes_daemon(void);
3896 void start_nodes_ssh(void);
3897 void start_nodes_mpiexec();
3899 void start_next_level_charmruns(void);
3902 void nodetab_init_for_scyld(void);
3903 void start_nodes_scyld(void);
3905 void start_nodes_local(char **envp);
3906 void kill_nodes(void);
3907 void open_gdb_info(void);
3908 void read_global_segments_size(void);
3910 static void fast_idleFn(void) { sleep(0); }
3911 void finish_nodes(void);
3913 int main(int argc, const char **argv, char **envp)
3917 skt_set_idle(fast_idleFn);
3918 /* CrnSrand((int) time(0)); */
3919 /* notify charm developers that charm is in use */
3922 if (!arg_child_charmrun)
3925 /* Compute the values of all constants */
3926 arg_init(argc, argv);
3928 fprintf(stderr, "Charmrun> charmrun started...\n");
3929 start_timer = GetClock();
3931 /* check scyld configuration */
3935 nodetab_init_for_scyld();
3937 /* Initialize the node-table by reading nodesfile */
3941 /* Start the server port */
3944 /* Initialize the IO module */
3948 /* Hierarchical startup*/
3949 if (arg_child_charmrun) {
3950 init_mynodes(); /* contacts root charmrun and gets list of nodes to start*/
3953 /* start the node processes */
3954 if (0 != getenv("CONV_DAEMON"))
3955 start_nodes_daemon();
3958 start_nodes_scyld();
3961 PRINT(("Charmrun> IBVERBS version of charmrun\n"));
3965 /* Hierarchical-startup*/
3966 if (arg_hierarchical_start) {
3968 if (!arg_child_charmrun) {
3969 start_next_level_charmruns();
3971 if (!arg_batch_spawn)
3974 req_client_start_and_connect();
3977 start_nodes_local(envp);
3986 if (!arg_batch_spawn) {
3987 #if CMK_SHRINK_EXPAND
3988 // modified rsh in shrink expand, need to launch only new ones,
3989 // preserve some info between new and old
3990 if (!arg_shrinkexpand || (arg_requested_pes > arg_old_pes))
3994 start_nodes_mpiexec();
3999 req_client_start_and_connect();
4001 start_nodes_local(envp);
4005 if (arg_charmdebug) {
4006 #if (defined(_WIN32) && !defined(__CYGWIN__)) || CMK_BPROC
4007 /* Gdb stream (and charmdebug) currently valid only with ssh subsystem */
4009 "Charmdebug is supported currently only with the ssh subsystem\n");
4012 /* Open an additional connection to node 0 with a gdb to grab info */
4013 PRINT(("opening connection with node 0 for info gdb\n"));
4014 read_global_segments_size();
4016 gdb_stream = fdopen(dup(2), "a");
4022 fprintf(stderr, "Charmrun> node programs all started\n");
4024 /* Wait for all clients to connect */
4026 /* Hierarchical startup*/
4027 if (arg_hierarchical_start) {
4029 if (!arg_batch_spawn || (!arg_child_charmrun))
4033 if (!arg_child_charmrun)
4034 req_charmrun_connect();
4035 else if (!arg_batch_spawn)
4036 req_client_connect();
4043 if (!arg_batch_spawn)
4046 if (!arg_batch_spawn)
4047 req_client_connect();
4053 fprintf(stderr, "Charmrun> node programs all connected\n");
4055 PRINT(("Charmrun> started all node programs in %.3f seconds.\n",
4056 GetClock() - start_timer));
4058 /* enter request-service mode */
4060 if (arg_hierarchical_start)
4062 req_poll_hierarchical();
4069 /*This little snippet creates a NETSTART
4070 environment variable entry for the given node #.
4071 It uses the idiotic "return reference to static buffer"
4072 string return idiom.
4074 char *create_netstart(int node)
4076 static char dest[1024];
4079 sprintf(dest, "$CmiMyNode %s %d %d %d", server_addr, server_port,
4080 getpid() & 0x7FFF, port);
4082 sprintf(dest, "%d %s %d %d %d", node, server_addr, server_port,
4083 getpid() & 0x7FFF, port);
4087 #if CMK_SHRINK_EXPAND
4088 /*This little snippet creates a OLDNODENAMES
4089 environment variable entry*/
4090 char *create_oldnodenames()
4092 static char dest1[1024 * 1000];
4094 for (i = 0; i < nodetab_size; i++)
4095 sprintf(dest1, "%s %s", dest1, (*nodetab_table[i]).name);
4096 printf("Charmrun> Created oldnames %s \n", dest1);
4100 /* The remainder of charmrun is only concerned with starting all
4101 the node-programs, also known as charmrun clients. We have to
4102 start nodetab_rank0_size processes on the remote machines.
4105 /*Ask the converse daemon running on each machine to start the node-programs.*/
4106 void start_nodes_daemon(void)
4109 char argBuffer[5000]; /*Buffer to hold assembled program arguments*/
4112 /*Set the parts of the task structure that will be the same for all nodes*/
4113 /*Figure out the command line arguments (same for all PEs)*/
4115 for (i = 0; arg_argv[i]; i++) {
4117 printf("Charmrun> packing arg: %s\n", arg_argv[i]);
4118 strcat(argBuffer, " ");
4119 strcat(argBuffer, arg_argv[i]);
4122 task.magic = ChMessageInt_new(DAEMON_MAGIC);
4124 /*Start up the user program, by sending a message
4125 to PE 0 on each node.*/
4126 for (nodeNumber = 0; nodeNumber < nodetab_rank0_size; nodeNumber++) {
4127 char nodeArgBuffer[5000]; /*Buffer to hold assembled program arguments*/
4129 char *arg_nodeprog_r, *arg_currdir_r;
4130 char statusCode = 'N'; /*Default error code-- network problem*/
4132 int pe0 = nodetab_rank0_table[nodeNumber];
4134 arg_currdir_r = pathfix(arg_currdir_a, nodetab_pathfixes(nodeNumber));
4135 strcpy(task.cwd, arg_currdir_r);
4136 free(arg_currdir_r);
4137 arg_nodeprog_r = pathextfix(arg_nodeprog_a, nodetab_pathfixes(nodeNumber),
4138 nodetab_ext(nodeNumber));
4139 strcpy(task.pgm, arg_nodeprog_r);
4142 printf("Charmrun> Starting node program %d on '%s' as %s.\n", nodeNumber,
4143 nodetab_name(pe0), arg_nodeprog_r);
4144 free(arg_nodeprog_r);
4145 sprintf(task.env, "NETSTART=%s", create_netstart(nodeNumber));
4147 if (nodetab_nice(nodeNumber) != -100) {
4149 fprintf(stderr, "Charmrun> +nice %d\n", nodetab_nice(nodeNumber));
4150 sprintf(nodeArgBuffer, "%s +nice %d", argBuffer,
4151 nodetab_nice(nodeNumber));
4152 argBuf = nodeArgBuffer;
4155 task.argLength = ChMessageInt_new(strlen(argBuf));
4157 /*Send request out to remote node*/
4158 fd = skt_connect(nodetab_ip(pe0), DAEMON_IP_PORT, 30);
4160 INVALID_SOCKET) { /*Contact! Ask the daemon to start the program*/
4161 skt_sendN(fd, (const char *) &task, sizeof(task));
4162 skt_sendN(fd, (const char *) argBuf, strlen(argBuf));
4163 skt_recvN(fd, &statusCode, sizeof(char));
4165 if (statusCode != 'G') { /*Something went wrong--*/
4166 fprintf(stderr, "Error '%c' starting remote node program on %s--\n%s\n",
4167 statusCode, nodetab_name(pe0), daemon_status2msg(statusCode));
4169 } else if (arg_verbose)
4170 printf("Charmrun> Node program %d started.\n", nodeNumber);
4174 #if defined(_WIN32) && !defined(__CYGWIN__)
4175 /*Sadly, interprocess communication on Win32 is quite
4176 different, so we can't use Ssh on win32 yet.
4177 Fall back to the daemon.*/
4178 void start_nodes_ssh() { start_nodes_daemon(); }
4179 void finish_nodes(void) {}
4180 void start_one_node_ssh(int rank0no) {}
4181 void finish_one_node(int rank0no) {}
4182 void start_nodes_mpiexec() {}
4184 int start_set_node_ssh(int client) { return 0; }
4185 void finish_set_nodes(int start, int stop) {}
4187 void envCat(char *dest, LPTSTR oldEnv)
4190 dest += strlen(dest); // Advance to end of dest
4191 dest++; // Advance past terminating NULL character
4192 while ((*src) != '\0') {
4193 int adv = strlen(src) + 1; // Length of newly-copied string plus NULL
4194 strcpy(dest, src); // Copy another environment string
4195 dest += adv; // Advance past newly-copied string and NULL
4196 src += adv; // Ditto for src
4198 *dest = '\0'; // Paste on final terminating NULL character
4199 FreeEnvironmentStrings(oldEnv);
4202 /* simple version of charmrun that avoids the sshd or charmd, */
4203 /* it spawn the node program just on local machine using exec. */
4204 void start_nodes_local(char **env)
4207 PROCESS_INFORMATION pi; /* process Information for the process spawned */
4210 char environment[10000]; /*Doubly-null terminated environment strings*/
4211 char cmdLine[10000]; /*Program command line, including executable name*/
4212 /*Command line too long.*/
4214 if (strlen(pparam_argv[1])+strlen(args) > 10000)
4217 strcpy(cmdLine, pparam_argv[1]);
4218 p = pparam_argv + 2;
4219 while ((*p) != '\0') {
4220 strcat(cmdLine, " ");
4221 strcat(cmdLine, *p);
4225 for (i = 0; i < arg_requested_pes; i++) {
4226 STARTUPINFO si = {0}; /* startup info for the process spawned */
4228 sprintf(environment, "NETSTART=%s", create_netstart(i));
4229 /*Paste all system environment strings */
4230 envCat(environment, GetEnvironmentStrings());
4232 /* Initialise the security attributes for the process
4236 printf("Charmrun> start %d node program on localhost.\n", i);
4238 ret = CreateProcess(NULL, /* application name */
4239 cmdLine, /* command line */
4240 NULL, /*&sa,*/ /* process SA */
4241 NULL, /*&sa,*/ /* thread SA */
4242 FALSE, /* inherit flag */
4244 CREATE_NEW_PROCESS_GROUP | DETACHED_PROCESS,
4246 CREATE_NEW_PROCESS_GROUP | CREATE_NEW_CONSOLE,
4248 /* creation flags */
4249 environment, /* environment block */
4250 ".", /* working directory */
4251 &si, /* startup info */
4255 /*Something went wrong! Look up the Windows error code*/
4257 int error=GetLastError();
4258 char statusCode=daemon_err2status(error);
4259 fprintf(logfile,"******************* ERROR *****************\n"
4260 "Error in creating process!\n"
4261 "Error code = %ld-- %s\n\n\n", error,
4262 daemon_status2msg(statusCode));
4265 int error = GetLastError();
4266 fprintf(stderr, "startProcess failed to start process \"%s\" with status: %d\n",
4267 pparam_argv[1], error);
4275 int bproc_nodeisup(int node)
4278 #if CMK_BPROC_VERSION < 4
4279 if (bproc_nodestatus(node) == bproc_node_up)
4282 printf("Charmrun> node %d status: %s\n", node, status ? "up" : "down");
4284 char nodestatus[128];
4285 if (node == -1) { /* master node is always up */
4286 strcpy(nodestatus, "up");
4289 if (bproc_nodestatus(node, nodestatus, 128)) {
4290 if (strcmp(nodestatus, "up") == 0)
4294 printf("Charmrun> node %d status: %s\n", node, nodestatus);
4299 /* ++ppn now is supported in both SMP and non SMP version
4300 in SMP, ++ppn specifies number of threads on each node;
4301 in non-SMP, ++ppn specifies number of processes on each node. */
4302 void nodetab_init_for_scyld()
4304 int maxNodes, i, node, npes, rank;
4308 tablesize = arg_requested_pes;
4309 maxNodes = bproc_numnodes() + 1;
4310 if (arg_endpe < maxNodes)
4311 maxNodes = arg_endpe + 1;
4312 if (maxNodes > tablesize)
4313 tablesize = maxNodes;
4314 nodetab_table = (nodetab_host **) malloc(tablesize * sizeof(nodetab_host *));
4315 nodetab_rank0_table = (int *) malloc(tablesize * sizeof(int));
4316 nodetab_max = tablesize;
4318 nodetab_reset(&group);
4323 #if CMK_SHARED_VARS_UNAVAILABLE
4325 fprintf(stderr,"Warning> Invalid ppn %d in nodelist ignored.\n",
4334 /* check which slave node is available from frompe to endpe */
4336 for (i = -1; i < maxNodes && npes < arg_requested_pes; i++) {
4338 if (!bproc_nodeisup(i))
4340 if (i != -1 && i < arg_startpe)
4342 if (i == -1 && arg_skipmaster)
4343 continue; /* skip master node -1 */
4344 sprintf(hostname, "%d", i);
4345 #if !CMK_SHARED_VARS_UNAVAILABLE
4346 if (npes + arg_ppn > arg_requested_pes)
4347 group.cpus = arg_requested_pes - npes;
4349 group.cpus = arg_ppn;
4351 for (rank = 0; rank < arg_ppn; rank++) {
4352 #if !CMK_SHARED_VARS_UNAVAILABLE
4355 nodetab_makehost(hostname, &group);
4356 nodetab_add(&group);
4357 if (++npes == arg_requested_pes)
4361 if (nodetab_rank0_size == 0) {
4362 fprintf(stderr, "Charmrun> no slave node available!\n");
4366 printf("Charmrun> There are %d slave nodes available.\n",
4367 nodetab_rank0_size - (arg_skipmaster ? 0 : 1));
4369 /* expand node table to arg_requested_pes */
4370 if (arg_requested_pes > npes) {
4371 int orig_size = npes;
4374 if (arg_singlemaster && nodetab_rank0_size > 1 && !arg_skipmaster)
4375 startnode = arg_ppn; /* skip -1 */
4377 while (npes < arg_requested_pes) {
4378 #if !CMK_SHARED_VARS_UNAVAILABLE
4379 if (npes + arg_ppn > arg_requested_pes)
4380 group.cpus = arg_requested_pes - npes;
4382 group.cpus = arg_ppn;
4384 for (rank = 0; rank < arg_ppn; rank++) {
4385 #if !CMK_SHARED_VARS_UNAVAILABLE
4388 nodetab_makehost(nodetab_name(node), &group);
4389 nodetab_add(&group);
4390 if (++node == orig_size)
4392 if (++npes == arg_requested_pes)
4399 void start_nodes_scyld(void)
4404 envp[0] = (char *) malloc(256);
4406 for (i = 0; i < nodetab_rank0_size; i++) {
4409 int pe = nodetab_rank0_table[i];
4410 int nodeno = atoi(nodetab_name(pe));
4413 printf("Charmrun> start node program on slave node: %d.\n", nodeno);
4414 sprintf(envp[0], "NETSTART=%s", create_netstart(i));
4420 int fd, fd1 = dup(1);
4421 if (!(arg_debug || arg_debug_no_pause)) { /* debug mode */
4422 if (fd = open("/dev/null", O_RDWR)) {
4429 status = execve(pparam_argv[1], pparam_argv + 1, envp);
4431 fprintf(stderr, "execve failed to start process \"%s\" with status: %d\n",
4432 pparam_argv[1], status);
4434 status = bproc_execmove(nodeno, pparam_argv[1], pparam_argv + 1, envp);
4436 fprintf(stderr, "bproc_execmove failed to start remote process \"%s\" with "
4438 pparam_argv[1], status);
4446 void finish_nodes(void) {}
4449 /*Unix systems can use Ssh normally*/
4450 /********** SSH-ONLY CODE *****************************************/
4454 /* this starts all the node programs. It executes fully in the background. */
4456 /****************************************************************************/
4457 #include <sys/wait.h>
4459 extern char **environ;
4460 void removeEnv(const char *doomedEnv)
4461 { /*Remove a value from the environment list*/
4464 while (*ie != NULL) {
4465 if (0 != strncmp(*ie, doomedEnv, strlen(doomedEnv)))
4469 *oe = NULL; /*NULL-terminate list*/
4472 int ssh_fork(int nodeno, const char *startScript)
4474 std::vector<const char *> sshargv;
4478 s = nodetab_shell(nodeno);
4481 sshargv.push_back(substr(s, e));
4486 sshargv.push_back(nodetab_name(nodeno));
4487 sshargv.push_back("-l");
4488 sshargv.push_back(nodetab_login(nodeno));
4489 sshargv.push_back("-o");
4490 sshargv.push_back("KbdInteractiveAuthentication=no");
4491 sshargv.push_back("-o");
4492 sshargv.push_back("PasswordAuthentication=no");
4493 sshargv.push_back("-o");
4494 sshargv.push_back("NoHostAuthenticationForLocalhost=yes");
4495 sshargv.push_back("/bin/bash -f");
4496 sshargv.push_back((const char *) NULL);
4499 std::string cmd_str = sshargv[0];
4500 for (int n = 1; n < sshargv.size()-1; ++n)
4501 cmd_str += " " + std::string(sshargv[n]);
4502 printf("Charmrun> Starting %s\n", cmd_str.c_str());
4507 perror("ERROR> starting remote shell");
4510 if (pid == 0) { /*Child process*/
4511 int fdScript = open(startScript, O_RDONLY);
4512 /**/ unlink(startScript); /**/
4513 dup2(fdScript, 0); /*Open script as standard input*/
4514 // removeEnv("DISPLAY="); /*No DISPLAY disables ssh's slow X11 forwarding*/
4515 for (int i = 3; i < 1024; i++)
4517 execvp(sshargv[0], const_cast<char **>(&sshargv[0]));
4518 fprintf(stderr, "Charmrun> Couldn't find remote shell program '%s'!\n",
4523 fprintf(stderr, "Charmrun> remote shell (%s:%d) started\n",
4524 nodetab_name(nodeno), nodeno);
4528 void fprint_arg(FILE *f, const char **argv)
4531 fprintf(f, " %s", *argv);
4535 void ssh_Find(FILE *f, const char *program, const char *dest)
4537 fprintf(f, "Find %s\n", program);
4538 fprintf(f, "%s=$loc\n", dest);
4540 void ssh_script(FILE *f, int nodeno, int rank0no, const char **argv,
4544 char *arg_nodeprog_r, *arg_currdir_r;
4545 const char *dbg = nodetab_debugger(nodeno);
4546 const char *host = nodetab_name(nodeno);
4549 fprintf(f, "#!/bin/sh\n");
4551 fprintf(f, /*Echo: prints out status message*/
4553 " echo 'Charmrun remote shell(%s.%d)>' $*\n"
4556 fprintf(f, /*Exit: exits with return code*/
4558 " if [ $1 -ne 0 ]\n"
4560 " Echo Exiting with error code $1\n"
4562 #if CMK_SSH_KILL /*End by killing ourselves*/
4563 " sleep 5\n" /*Delay until any error messages are flushed*/
4565 #else /*Exit normally*/
4569 fprintf(f, /*Find: locates a binary program in PATH, sets loc*/
4572 " for dir in `echo $PATH | sed -e 's/:/ /g'`\n"
4574 " test -f \"$dir/$1\" && loc=\"$dir/$1\"\n"
4576 " if [ \"x$loc\" = x ]\n"
4578 " Echo $1 not found in your PATH \"($PATH)\"--\n"
4579 " Echo set your path in your ~/.charmrunrc\n"
4585 fprintf(f, "Echo 'remote responding...'\n");
4587 fprintf(f, "test -f \"$HOME/.charmrunrc\" && . \"$HOME/.charmrunrc\"\n");
4588 /* let's leave DISPLAY untouched and rely on X11 forwarding,
4589 changing DISPLAY to charmrun does not always work if X11 forwarding
4592 if (arg_display && !arg_ssh_display)
4593 fprintf(f, "DISPLAY='%s';export DISPLAY\n", arg_display);
4596 if (arg_child_charmrun)
4597 fprintf(f, "NETMAGIC=\"%d\";export NETMAGIC\n",
4598 parent_charmrun_pid & 0x7FFF);
4601 fprintf(f, "NETMAGIC=\"%d\";export NETMAGIC\n", getpid() & 0x7FFF);
4604 fprintf(f, "CmiMyNode=$OMPI_COMM_WORLD_RANK\n");
4605 fprintf(f, "test -z \"$CmiMyNode\" && CmiMyNode=$MPIRUN_RANK\n");
4606 fprintf(f, "test -z \"$CmiMyNode\" && CmiMyNode=$PMI_RANK\n");
4607 fprintf(f, "test -z \"$CmiMyNode\" && CmiMyNode=$PMI_ID\n");
4608 fprintf(f, "test -z \"$CmiMyNode\" && CmiMyNode=$MP_CHILD\n");
4609 fprintf(f, "test -z \"$CmiMyNode\" && CmiMyNode=$SLURM_PROCID\n");
4610 fprintf(f, "test -z \"$CmiMyNode\" && (Echo Could not detect rank from "
4611 "environment ; Exit 1)\n");
4612 fprintf(f, "export CmiMyNode\n");
4615 else if (arg_hierarchical_start && arg_child_charmrun)
4616 fprintf(f, "CmiMyNode='%d'; export CmiMyNode\n", mynodes_start + rank0no);
4619 fprintf(f, "CmiMyNode='%d'; export CmiMyNode\n", rank0no);
4622 if (arg_hierarchical_start && arg_child_charmrun)
4623 netstart = create_netstart(mynodes_start + rank0no);
4626 netstart = create_netstart(rank0no);
4627 fprintf(f, "NETSTART=\"%s\";export NETSTART\n", netstart);
4629 fprintf(f, "CmiMyNodeSize='%d'; export CmiMyNodeSize\n",
4630 nodetab_getnodeinfo(rank0no)->cpus);
4632 if (restart || arg_mpiexec) /* skip fork */
4633 fprintf(f, "CmiMyForks='%d'; export CmiMyForks\n", 0);
4635 fprintf(f, "CmiMyForks='%d'; export CmiMyForks\n",
4636 nodetab_getnodeinfo(rank0no)->forks);
4639 fprintf(f, "CmiNumNodes=$OMPI_COMM_WORLD_SIZE\n");
4640 fprintf(f, "test -z \"$CmiNumNodes\" && CmiNumNodes=$MPIRUN_NPROCS\n");
4641 fprintf(f, "test -z \"$CmiNumNodes\" && CmiNumNodes=$PMI_SIZE\n");
4642 fprintf(f, "test -z \"$CmiNumNodes\" && CmiNumNodes=$MP_PROCS\n");
4643 fprintf(f, "test -z \"$CmiNumNodes\" && CmiNumNodes=$SLURM_NTASKS\n");
4644 fprintf(f, "test -z \"$CmiNumNodes\" && CmiNumNodes=$SLURM_NPROCS\n");
4645 fprintf(f, "test -z \"$CmiNumNodes\" && (Echo Could not detect node count "
4646 "from environment ; Exit 1)\n");
4647 fprintf(f, "export CmiNumNodes\n");
4650 else if (arg_hierarchical_start && arg_child_charmrun)
4651 fprintf(f, "CmiNumNodes='%d'; export CmiNumNodes\n",
4652 nodetab_rank0_size_total);
4656 fprintf(f, "CmiNumNodes='%d'; export CmiNumNodes\n", nodetab_rank0_size);
4659 fprintf(f, "G95_UNBUFFERED_ALL=TRUE; export G95_UNBUFFERED_ALL\n");
4662 fprintf(f, "GFORTRAN_UNBUFFERED_ALL=YES; export GFORTRAN_UNBUFFERED_ALL\n");
4665 fprintf(f, "MX_MONOTHREAD=1; export MX_MONOTHREAD\n");
4666 /*fprintf(f,"MX_RCACHE=1; export MX_RCACHE\n");*/
4668 #if CMK_AIX && CMK_SMP
4669 fprintf(f, "MALLOCMULTIHEAP=1; export MALLOCMULTIHEAP\n");
4673 printf("Charmrun> Sending \"%s\" to client %d.\n", netstart, rank0no);
4676 "PATH=\"$PATH:/bin:/usr/bin:/usr/X/bin:/usr/X11/bin:/usr/local/bin:"
4677 "/usr/X11R6/bin:/usr/openwin/bin\"\n");
4679 /* find the node-program */
4680 arg_nodeprog_r = pathextfix(arg_nodeprog_a, nodetab_pathfixes(nodeno),
4681 nodetab_ext(nodeno));
4683 /* find the current directory, relative version */
4684 arg_currdir_r = pathfix(arg_currdir_a, nodetab_pathfixes(nodeno));
4687 printf("Charmrun> find the node program \"%s\" at \"%s\" for %d.\n",
4688 arg_nodeprog_r, arg_currdir_r, nodeno);
4690 if (arg_debug || arg_debug_no_pause || arg_in_xterm) {
4691 ssh_Find(f, nodetab_xterm(nodeno), "F_XTERM");
4692 if (!arg_ssh_display && !arg_debug_no_xrdb)
4693 ssh_Find(f, "xrdb", "F_XRDB");
4695 fprintf(f, "Echo 'using xterm' $F_XTERM\n");
4698 if (arg_debug || arg_debug_no_pause) { /*Look through PATH for debugger*/
4699 ssh_Find(f, dbg, "F_DBG");
4701 fprintf(f, "Echo 'using debugger' $F_DBG\n");
4704 if (!arg_ssh_display && !arg_debug_no_xrdb &&
4705 (arg_debug || arg_debug_no_pause || arg_in_xterm)) {
4706 /* if (arg_debug || arg_debug_no_pause || arg_in_xterm) {*/
4707 fprintf(f, "$F_XRDB -query > /dev/null\n");
4708 fprintf(f, "if test $? != 0\nthen\n");
4709 fprintf(f, " Echo 'Cannot contact X Server '$DISPLAY'. You probably'\n");
4710 fprintf(f, " Echo 'need to run xhost to authorize connections.'\n");
4711 fprintf(f, " Echo '(See manual for xhost for security issues)'\n");
4712 fprintf(f, " Echo 'Or try ++batch 1 ++ssh-display to rely on SSH X11 "
4714 fprintf(f, " Exit 1\n");
4718 fprintf(f, "if test ! -x \"%s\"\nthen\n", arg_nodeprog_r);
4719 fprintf(f, " Echo 'Cannot locate this node-program: %s'\n", arg_nodeprog_r);
4720 fprintf(f, " Exit 1\n");
4723 fprintf(f, "cd \"%s\"\n", arg_currdir_r);
4724 fprintf(f, "if test $? = 1\nthen\n");
4725 fprintf(f, " Echo 'Cannot propagate this current directory:'\n");
4726 fprintf(f, " Echo '%s'\n", arg_currdir_r);
4727 fprintf(f, " Exit 1\n");
4730 if (strcmp(nodetab_setup(nodeno), "*")) {
4731 fprintf(f, "%s\n", nodetab_setup(nodeno));
4732 fprintf(f, "if test $? = 1\nthen\n");
4733 fprintf(f, " Echo 'this initialization command failed:'\n");
4734 fprintf(f, " Echo '\"%s\"'\n", nodetab_setup(nodeno));
4735 fprintf(f, " Echo 'edit your nodes file to fix it.'\n");
4736 fprintf(f, " Exit 1\n");
4740 fprintf(f, "rm -f /tmp/charmrun_err.$$\n");
4742 fprintf(f, "Echo 'starting node-program...'\n");
4743 /* This is the start of the the run-nodeprogram script */
4746 if (arg_debug || arg_debug_no_pause) {
4747 if (strcmp(dbg, "gdb") == 0 || strcmp(dbg, "idb") == 0) {
4748 fprintf(f, "cat > /tmp/charmrun_gdb.$$ << END_OF_SCRIPT\n");
4749 if (strcmp(dbg, "idb") == 0) {
4750 fprintf(f, "set \\$cmdset=\"gdb\"\n");
4752 fprintf(f, "shell /bin/rm -f /tmp/charmrun_gdb.$$\n");
4753 fprintf(f, "handle SIGPIPE nostop noprint\n");
4754 fprintf(f, "handle SIGWINCH nostop noprint\n");
4755 fprintf(f, "handle SIGWAITING nostop noprint\n");
4756 if (arg_debug_commands)
4757 fprintf(f, "%s\n", arg_debug_commands);
4758 fprintf(f, "set args");
4759 fprint_arg(f, argv);
4761 if (arg_debug_no_pause)
4762 fprintf(f, "run\n");
4763 fprintf(f, "END_OF_SCRIPT\n");
4765 fprintf(f, "\"%s\" ", arg_runscript);
4766 fprintf(f, "$F_XTERM");
4767 fprintf(f, " -title 'Node %d (%s)' ", nodeno, nodetab_name(nodeno));
4768 if (strcmp(dbg, "idb") == 0)
4769 fprintf(f, " -e $F_DBG \"%s\" -c /tmp/charmrun_gdb.$$ \n", arg_nodeprog_r);
4771 fprintf(f, " -e $F_DBG \"%s\" -x /tmp/charmrun_gdb.$$ \n", arg_nodeprog_r);
4772 } else if (strcmp(dbg, "dbx") == 0) {
4773 fprintf(f, "cat > /tmp/charmrun_dbx.$$ << END_OF_SCRIPT\n");
4774 fprintf(f, "sh /bin/rm -f /tmp/charmrun_dbx.$$\n");
4775 fprintf(f, "dbxenv suppress_startup_message 5.0\n");
4776 fprintf(f, "ignore SIGPOLL\n");
4777 fprintf(f, "ignore SIGPIPE\n");
4778 fprintf(f, "ignore SIGWINCH\n");
4779 fprintf(f, "ignore SIGWAITING\n");
4780 if (arg_debug_commands)
4781 fprintf(f, "%s\n", arg_debug_commands);
4782 fprintf(f, "END_OF_SCRIPT\n");
4784 fprintf(f, "\"%s\" ", arg_runscript);
4785 fprintf(f, "$F_XTERM");
4786 fprintf(f, " -title 'Node %d (%s)' ", nodeno, nodetab_name(nodeno));
4787 fprintf(f, " -e $F_DBG %s ", arg_debug_no_pause ? "-r" : "");
4789 fprintf(f, "-c \'runargs ");
4790 fprint_arg(f, argv);
4793 fprintf(f, "-s/tmp/charmrun_dbx.$$ %s", arg_nodeprog_r);
4794 if (arg_debug_no_pause)
4795 fprint_arg(f, argv);
4798 fprintf(stderr, "Unknown debugger: %s.\n Exiting.\n",
4799 nodetab_debugger(nodeno));
4801 } else if (arg_in_xterm) {
4803 fprintf(stderr, "Charmrun> node %d: xterm is %s\n", nodeno,
4804 nodetab_xterm(nodeno));
4805 fprintf(f, "cat > /tmp/charmrun_inx.$$ << END_OF_SCRIPT\n");
4806 fprintf(f, "#!/bin/sh\n");
4807 fprintf(f, "/bin/rm -f /tmp/charmrun_inx.$$\n");
4808 fprintf(f, "%s", arg_nodeprog_r);
4809 fprint_arg(f, argv);
4811 fprintf(f, "echo 'program exited with code '\\$?\n");
4812 fprintf(f, "read eoln\n");
4813 fprintf(f, "END_OF_SCRIPT\n");
4814 fprintf(f, "chmod 700 /tmp/charmrun_inx.$$\n");
4816 fprintf(f, "\"%s\" ", arg_runscript);
4817 fprintf(f, "$F_XTERM -title 'Node %d (%s)' ", nodeno, nodetab_name(nodeno));
4818 fprintf(f, " -sl 5000");
4819 fprintf(f, " -e /tmp/charmrun_inx.$$\n");
4822 fprintf(f, "\"%s\" ", arg_runscript);
4823 if (arg_no_va_rand) {
4825 fprintf(stderr, "Charmrun> setarch -R is used.\n");
4826 fprintf(f, "setarch `uname -m` -R ");
4828 fprintf(f, "\"%s\" ", arg_nodeprog_r);
4829 fprint_arg(f, argv);
4830 if (nodetab_nice(nodeno) != -100) {
4832 fprintf(stderr, "Charmrun> nice -n %d\n", nodetab_nice(nodeno));
4833 fprintf(f, " +nice %d ", nodetab_nice(nodeno));
4835 fprintf(f, "\nres=$?\n");
4836 /* If shared libraries fail to load, the program dies without
4837 calling charmrun back. Since we *have* to close down stdin/out/err,
4838 we have to smuggle this failure information out via a file,
4839 /tmp/charmrun_err.<pid> */
4840 fprintf(f, "if [ $res -eq 127 ]\n"
4842 " ( \n" /* Re-run, spitting out errors from a subshell: */
4845 " ) > /tmp/charmrun_err.$$ 2>&1 \n"
4847 arg_nodeprog_r, arg_nodeprog_r);
4850 /* End the node-program subshell. To minimize the number
4851 of open ports on the front-end, we must close down ssh;
4852 to do this, we have to close stdin, stdout, stderr, and
4853 run the subshell in the background. */
4855 fprintf(f, " < /dev/null 1> /dev/null 2> /dev/null");
4861 fprintf(f, "Echo 'remote shell phase successful.'\n");
4862 fprintf(f, /* Check for startup errors: */
4864 "if [ -r /tmp/charmrun_err.$$ ]\n"
4866 " cat /tmp/charmrun_err.$$ \n"
4867 " rm -f /tmp/charmrun_err.$$ \n"
4870 fprintf(f, "Exit 0\n");
4871 free(arg_currdir_r);
4874 /* use the command "size" to get information about the position of the ".data"
4875 and ".bss" segments inside the program memory */
4876 void read_global_segments_size()
4878 std::vector<const char *> sshargv;
4882 /* find the node-program */
4884 pathextfix(arg_nodeprog_a, nodetab_pathfixes(0), nodetab_ext(0));
4886 sshargv.push_back(nodetab_shell(0));
4887 sshargv.push_back(nodetab_name(0));
4888 sshargv.push_back("-l");
4889 sshargv.push_back(nodetab_login(0));
4890 tmp = (char *) malloc(sizeof(char) * 9 + strlen(arg_nodeprog_r));
4891 sprintf(tmp, "size -A %s", arg_nodeprog_r);
4892 sshargv.push_back(tmp);
4893 sshargv.push_back((const char *) NULL);
4897 perror("ERROR> getting the size of the global variables segments");
4899 } else if (childPid == 0) {
4902 /*printf("executing: \"%s\" \"%s\" \"%s\" \"%s\"
4903 * \"%s\"\n",sshargv[0],sshargv[1],sshargv[2],sshargv[3],sshargv[4]);*/
4904 execvp(sshargv[0], const_cast<char **>(&sshargv[0]));
4905 fprintf(stderr, "Charmrun> Couldn't find remote shell program '%s'!\n",
4909 /* else we are in the parent */
4911 waitpid(childPid, NULL, 0);
4915 /* open a ssh connection with processor 0 and open a gdb session for info */
4916 void open_gdb_info()
4918 std::vector<const char *> sshargv;
4925 /* find the node-program */
4927 pathextfix(arg_nodeprog_a, nodetab_pathfixes(0), nodetab_ext(0));
4929 sshargv.push_back(nodetab_shell(0));
4930 sshargv.push_back(nodetab_name(0));
4931 sshargv.push_back("-l");
4932 sshargv.push_back(nodetab_login(0));
4933 tmp = (char *) malloc(sizeof(char) * 8 + strlen(arg_nodeprog_r));
4934 sprintf(tmp, "gdb -q %s", arg_nodeprog_r);
4935 sshargv.push_back(tmp);
4936 sshargv.push_back((const char *) NULL);
4942 gdb_info_pid = fork();
4943 if (gdb_info_pid < 0) {
4944 perror("ERROR> starting info gdb");
4946 } else if (gdb_info_pid == 0) {
4951 PRINT(("executing: \"%s\" \"%s\" \"%s\" \"%s\" \"%s\"\n", sshargv[0],
4952 sshargv[1], sshargv[2], sshargv[3], sshargv[4]));
4956 for (i = 3; i < 1024; i++)
4958 execvp(sshargv[0], const_cast<char **>(&sshargv[0]));
4959 fprintf(stderr, "Charmrun> Couldn't find remote shell program '%s'!\n",
4963 /* else we are in the parent */
4965 gdb_info_std[0] = fdin[1];
4966 gdb_info_std[1] = fdout[0];
4967 gdb_info_std[2] = fderr[0];
4973 void start_next_level_charmruns()
4976 static char buf[1024];
4977 char *nodeprog_name = strrchr(arg_nodeprog_a, '/');
4978 nodeprog_name[0] = 0;
4979 sprintf(buf, "%s%s%s", arg_nodeprog_a, DIRSEP, "charmrun");
4980 arg_nodeprog_a = strdup(buf);
4985 while (nextIndex < branchfactor) {
4986 /* need to index into unique_table*/
4987 int rank0no = nodetab_unique_table[client];
4988 int pe = nodetab_rank0_table[rank0no];
4990 char startScript[200];
4991 sprintf(startScript, "/tmp/charmrun.%d.%d", getpid(), pe);
4992 f = fopen(startScript, "w");
4994 /* now try current directory */
4995 sprintf(startScript, "charmrun.%d.%d", getpid(), pe);
4996 f = fopen(startScript, "w");
4998 fprintf(stderr, "Charmrun> Can not write file %s!\n", startScript);
5002 ssh_script(f, pe, client, arg_argv, 0);
5005 ssh_pids = (int *) malloc(sizeof(int) * branchfactor);
5006 ssh_pids[nextIndex++] = ssh_fork(pe, startScript);
5007 client += nodes_per_child;
5013 void start_one_node_ssh(int rank0no)
5015 int pe = nodetab_rank0_table[rank0no];
5017 char startScript[200];
5018 sprintf(startScript, "/tmp/charmrun.%d.%d", getpid(), pe);
5019 f = fopen(startScript, "w");
5021 /* now try current directory */
5022 sprintf(startScript, "charmrun.%d.%d", getpid(), pe);
5023 f = fopen(startScript, "w");
5025 fprintf(stderr, "Charmrun> Can not write file %s!\n", startScript);
5029 ssh_script(f, pe, rank0no, arg_argv, 0);
5032 ssh_pids = (int *) malloc(sizeof(int) * nodetab_rank0_size);
5033 ssh_pids[rank0no] = ssh_fork(pe, startScript);
5036 int start_set_node_ssh(int client)
5038 /* a search function could be inserted here instead of sequential lookup for
5039 * more complex node lists (e.g. interleaving) */
5043 clientgroup = client + 1; /* smp already handles this functionality */
5047 if (!arg_scalable_start && !arg_hierarchical_start)
5048 clientgroup = client + 1; /* only launch 1 core per ssh call */
5050 clientgroup = client;
5052 clientgroup++; /* add one more client to group if not greater than nodes
5053 and shares the same name as client */
5054 if (clientgroup >= nodetab_rank0_size)
5056 if (arg_scalable_start && !arg_hierarchical_start)
5057 if (strcmp(nodetab_name(clientgroup), nodetab_name(client)))
5059 /*Hierarchical-start*/
5060 if (strcmp(nodetab_name(nodetab_rank0_table[clientgroup]),
5061 nodetab_name(nodetab_rank0_table[client])))
5067 if (!arg_scalable_start)
5068 clientgroup = client + 1; /* only launch 1 core per ssh call */
5070 clientgroup = client;
5072 clientgroup++; /* add one more client to group if not greater than nodes
5073 and shares the same name as client */
5074 } while (clientgroup < nodetab_rank0_size &&
5075 (!strcmp(nodetab_getnodeinfo(clientgroup)->name,
5076 nodetab_getnodeinfo(client)->name)));
5081 nodetab_getnodeinfo(client)->forks =
5082 clientgroup - client - 1; /* already have 1 process launching */
5083 start_one_node_ssh(client);
5084 return clientgroup - client; /* return number of entries in group */
5087 void start_nodes_ssh()
5089 int client, clientgroup;
5090 ssh_pids = (int *) malloc(sizeof(int) * nodetab_rank0_size);
5093 printf("start_nodes_ssh\n");
5095 #if CMK_SHRINK_EXPAND
5097 printf("start_nodes_rsh %d %d\n", arg_requested_pes, arg_old_pes);
5098 if (arg_shrinkexpand) {
5099 if (arg_requested_pes >= arg_old_pes) { // expand case
5101 printf("Expand %d %d\n", arg_requested_pes, arg_old_pes);
5102 for (client = 0; client < arg_old_pes; client++)
5103 ssh_pids[client] = 0;
5104 } else { // shrink case
5106 printf("Shrink %d %d\n", arg_requested_pes, arg_old_pes);
5107 for (client = 0; client < arg_requested_pes; client++)
5108 ssh_pids[client] = 0;
5112 while (client < nodetab_rank0_size) {
5113 /* start a group of processes per node */
5114 clientgroup = start_set_node_ssh(client);
5115 client += clientgroup;
5119 /* for mpiexec, for once calling mpiexec to start on all nodes */
5120 int ssh_fork_one(const char *startScript)
5122 std::vector<const char *> sshargv;
5127 /* figure out size and dynamic allocate */
5128 s = nodetab_shell(0);
5135 s = nodetab_shell(0);
5138 sshargv.push_back(substr(s, e));
5143 if ( ! arg_mpiexec_no_n ) {
5144 sshargv.push_back("-n");
5145 sprintf(npes, "%d", nodetab_rank0_size);
5146 sshargv.push_back(npes);
5148 sshargv.push_back((char *) startScript);
5149 sshargv.push_back((const char *) NULL);
5151 printf("Charmrun> Starting %s %s \n", nodetab_shell(0), startScript);
5155 perror("ERROR> starting mpiexec");
5158 if (pid == 0) { /*Child process*/
5160 /* unlink(startScript); */
5161 // removeEnv("DISPLAY="); /*No DISPLAY disables ssh's slow X11 forwarding*/
5162 for (i = 3; i < 1024; i++)
5164 execvp(sshargv[0], const_cast<char *const *>(&sshargv[0]));
5165 fprintf(stderr, "Charmrun> Couldn't find mpiexec program '%s'!\n",
5170 fprintf(stderr, "Charmrun> mpiexec started\n");
5174 void start_nodes_mpiexec()
5179 char startScript[200];
5180 sprintf(startScript, "./charmrun.%d", getpid());
5181 f = fopen(startScript, "w");
5182 chmod(startScript, S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IROTH);
5184 /* now try current directory */
5185 sprintf(startScript, "./charmrun.%d", getpid());
5186 f = fopen(startScript, "w");
5188 fprintf(stderr, "Charmrun> Can not write file %s!\n", startScript);
5192 ssh_script(f, 0, 0, arg_argv, 0);
5194 ssh_pids = (int *) malloc(sizeof(int) * nodetab_rank0_size);
5195 ssh_pids[0] = ssh_fork_one(startScript);
5196 for (i = 0; i < nodetab_rank0_size; i++)
5197 ssh_pids[i] = 0; /* skip finish_nodes */
5200 void finish_set_nodes(int start, int stop)
5202 int status, done, i;
5206 return; /*nothing to do*/
5208 std::vector<int> num_retries(stop - start, 0);
5212 for (i = start; i < stop; i++) { /* check all nodes */
5213 if (ssh_pids[i] != 0) {
5214 done = 0; /* we are not finished yet */
5216 waitpid(ssh_pids[i], &status, 0); /* check if the process is finished */
5217 if (WIFEXITED(status)) {
5218 if (!WEXITSTATUS(status)) { /* good */
5219 ssh_pids[i] = 0; /* process is finished */
5221 host = nodetab_name(nodetab_rank0_table[i]);
5223 "Charmrun> Error %d returned from remote shell (%s:%d)\n",
5224 WEXITSTATUS(status), host, i);
5226 if (WEXITSTATUS(status) != 255)
5229 if (++num_retries[i - start] <= MAX_NUM_RETRIES) {
5230 fprintf(stderr, "Charmrun> Reconnection attempt %d of %d\n",
5231 num_retries[i - start], MAX_NUM_RETRIES);
5232 start_one_node_ssh(i);
5236 "Charmrun> Too many reconnection attempts; bailing out\n");
5249 if (arg_hierarchical_start && !arg_child_charmrun)
5250 finish_set_nodes(0, branchfactor);
5253 finish_set_nodes(0, nodetab_rank0_size);
5261 return; /*nothing to do*/
5262 /*Now wait for all the ssh'es to finish*/
5263 for (rank0no = 0; rank0no < nodetab_rank0_size; rank0no++) {
5264 const char *host = nodetab_name(nodetab_rank0_table[rank0no]);
5267 printf("Charmrun> waiting for remote shell (%s:%d), pid %d\n", host,
5268 rank0no, ssh_pids[rank0no]);
5269 kill(ssh_pids[rank0no], 9);
5270 waitpid(ssh_pids[rank0no], &status, 0); /*<- no zombies*/
5276 /* find the absolute path for an executable in the path */
5277 char *find_abs_path(const char *target)
5279 char *thepath=getenv("PATH");
5280 char *path=strdup(thepath);
5281 char *subpath=strtok(path,":");
5282 char *abspath=(char*) malloc(PATH_MAX + strlen(target) + 2);
5283 while(subpath!=NULL) {
5284 strcpy(abspath,subpath);
5285 strcat(abspath,"/");
5286 strcat(abspath,target);
5287 if(probefile(abspath)){
5291 subpath=strtok(NULL,":");
5298 /* simple version of charmrun that avoids the sshd or charmd, */
5299 /* it spawn the node program just on local machine using exec. */
5300 void start_nodes_local(char **env)
5303 int envc, rank0no, i;
5308 #if CMK_AIX && CMK_SMP
5313 /* copy environ and expanded to hold NETSTART and CmiNumNodes */
5314 for (envc = 0; env[envc]; envc++)
5316 envp = (char **) malloc((envc + 2 + extra + 1) * sizeof(void *));
5317 for (i = 0; i < envc; i++)
5319 envp[envc] = (char *) malloc(256);
5320 envp[envc + 1] = (char *) malloc(256);
5321 #if CMK_AIX && CMK_SMP
5322 envp[envc + 2] = (char *) malloc(256);
5323 sprintf(envp[envc + 2], "MALLOCMULTIHEAP=1");
5325 envp[envc + 2 + extra] = 0;
5326 for (i = 0; i < envc; i++)
5328 envp[envc] = (char *) malloc(256);
5329 envp[envc + 1] = (char *) malloc(256);
5333 /* insert xterm gdb in front of command line and pass args to gdb */
5334 if(arg_debug || arg_debug_no_pause) {
5336 for (dparamc = 0, argstringlen=0; pparam_argv[dparamc]; dparamc++)
5338 if(dparamc>1) argstringlen+=strlen(pparam_argv[dparamc]);
5340 if(arg_debug_no_pause) dparamoutmax+=2;
5342 dparamp = (char **) malloc((dparamoutmax) * sizeof(void *));
5343 char *abs_xterm=find_abs_path(arg_xterm);
5346 fprintf(stderr, "Charmrun> cannot find xterm for gdb, please add it to your path\n");
5349 dparamp[dparamoutc++] = strdup(abs_xterm);
5350 dparamp[dparamoutc++] = strdup("-e");
5351 dparamp[dparamoutc++] = strdup(arg_debugger);
5352 dparamp[dparamoutc++] = strdup(pparam_argv[1]);
5353 dparamp[dparamoutc++] = strdup("-ex");
5354 dparamp[dparamoutc] = (char *) malloc(argstringlen + 11 + dparamc);
5355 strcpy(dparamp[dparamoutc], "set args");
5356 for(int i=2; i< dparamc; i++)
5358 strcat(dparamp[dparamoutc], " ");
5359 strcat(dparamp[dparamoutc], pparam_argv[i]);
5361 if(arg_debug_no_pause)
5363 dparamp[++dparamoutc] = strdup("-ex");
5364 dparamp[++dparamoutc] = strdup("r");
5366 dparamp[++dparamoutc]=0; // null terminate your argv or face the wrath of
5367 // undefined behavior
5370 printf("Charmrun> gdb args : ");
5371 for (i = 0; i < dparamoutc; i++)
5372 printf(" %s ",dparamp[i]);
5378 dparamp=(char **) (pparam_argv+1);
5381 for (rank0no = 0; rank0no < nodetab_rank0_size; rank0no++) {
5384 int pe = nodetab_rank0_table[rank0no];
5387 printf("Charmrun> start %d node program on localhost.\n", pe);
5388 sprintf(envp[envc], "NETSTART=%s", create_netstart(rank0no));
5389 sprintf(envp[envc + 1], "CmiNumNodes=%d", nodetab_rank0_size);
5395 int fd, fd1 = dup(1);
5396 if (-1 != (fd = open("/dev/null", O_RDWR))) {
5401 status = execve(dparamp[0],
5402 const_cast<char *const *>(dparamp), envp);
5405 fprintf(stderr, "execve failed to start process \"%s\" with status: %d\n",
5406 dparamp[0], status);
5411 if(arg_debug || arg_debug_no_pause)
5413 for(;dparamoutc>=0;dparamoutc--) free(dparamp[dparamoutc]);
5417 free(envp[envc + 1]);
5418 #if CMK_AIX && CMK_SMP
5419 free(envp[envc + 2]);
5426 int current_restart_phase = 1;
5428 void refill_nodetab_entry(int crashed_node);
5429 nodetab_host *replacement_host(int pe);
5432 * @brief Relaunches a program on the crashed node.
5434 void restart_node(int crashed_node)
5436 int pe = nodetab_rank0_table[crashed_node];
5438 char startScript[200];
5439 int restart_ssh_pid;
5440 const char **restart_argv;
5444 /** write the startScript file to be sent**/
5445 sprintf(startScript, "/tmp/charmrun.%d.%d", getpid(), pe);
5446 f = fopen(startScript, "w");
5448 /** add an argument to the argv of the new process
5449 so that the restarting processor knows that it
5450 is a restarting processor */
5452 while (arg_argv[i] != NULL) {
5455 restart_argv = (const char **) malloc(sizeof(char *) * (i + 4));
5457 while (arg_argv[i] != NULL) {
5458 restart_argv[i] = arg_argv[i];
5461 restart_argv[i] = "+restartaftercrash";
5462 sprintf(phase_str, "%d", ++current_restart_phase);
5463 restart_argv[i + 1] = phase_str;
5464 restart_argv[i + 2] = "+restartisomalloc";
5465 restart_argv[i + 3] = NULL;
5467 /** change the nodetable entry of the crashed
5468 processor to connect it to a new one**/
5469 refill_nodetab_entry(crashed_node);
5470 ssh_script(f, pe, crashed_node, restart_argv, 1);
5472 /**start the new processor */
5473 restart_ssh_pid = ssh_fork(pe, startScript);
5474 /**wait for the reply from the new process*/
5476 if (arg_debug_no_pause || arg_debug)
5480 waitpid(restart_ssh_pid, &status, 0);
5481 } while (!WIFEXITED(status));
5482 if (WEXITSTATUS(status) != 0) {
5484 "Charmrun> Error %d returned from new attempted remote shell \n",
5485 WEXITSTATUS(status));
5489 PRINT(("Charmrun finished launching new process in %fs\n",
5490 GetClock() - ftTimer));
5493 void refill_nodetab_entry(int crashed_node)
5495 int pe = nodetab_rank0_table[crashed_node];
5496 nodetab_host *h = nodetab_table[pe];
5497 *h = *(replacement_host(pe));
5498 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
5499 fprintf(stderr, "Charmrun>>> New pe %d is on host %s \n", pe,
5504 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
5505 nodetab_host *replacement_host(int pe)
5507 int x = loaded_max_pe + 1;
5509 x = x % arg_read_pes;
5512 * x = rand()%nodetab_size;
5514 fprintf(stderr, "Charmrun>>> replacing pe %d with %d host %s with %s \n", pe,
5515 x, nodetab_name(pe), nodetab_name(x));
5516 return nodetab_table[x];
5519 nodetab_host *replacement_host(int pe)
5524 if (arg_hierarchical_start) {
5525 x = nodetab_rank0_table[rand() % nodetab_rank0_size];
5527 restarted_pe_id = x;
5530 x = rand() % nodetab_size;
5532 return nodetab_table[x];
5537 * @brief Reconnects a crashed node. It waits for the I-tuple from the just
5538 * relaunched program. It also:
5539 * i) Broadcast the nodetabtable to every other node.
5540 * ii) Announces the crash to every other node.
5542 void reconnect_crashed_client(int socket_index, int crashed_node)
5545 unsigned int clientPort;
5547 ChSingleNodeinfo *in;
5548 if (0 == skt_select1(server_fd, arg_timeout * 1000)) {
5549 client_connect_problem(
5550 socket_index, socket_index,
5551 "Timeout waiting for restarted node-program to connect");
5553 req_clients[socket_index] = skt_accept(server_fd, &clientIP, &clientPort);
5554 skt_client_table[req_clients[socket_index]] = crashed_node;
5556 if (req_clients[socket_index] == SOCKET_ERROR) {
5557 client_connect_problem(socket_index, socket_index,
5558 "Failure in restarted node accept");
5561 if (!skt_select1(req_clients[socket_index], arg_timeout * 1000)) {
5562 client_connect_problem(socket_index, socket_index,
5563 "Timeout on IP request for restarted processor");
5567 if (arg_hierarchical_start) {
5568 req_forward_root(req_clients[socket_index]);
5569 if (_last_crash != 0) {
5570 fprintf(stderr, "ERROR> Charmrun detected multiple crashes.\n");
5574 _last_crash = crashed_node;
5575 _crash_socket_index = socket_index;
5579 ChMessage_recv(req_clients[socket_index], &msg);
5580 if (msg.len != sizeof(ChSingleNodeinfo)) {
5581 fprintf(stderr, "Charmrun: Bad initnode data length. Aborting\n");
5582 fprintf(stderr, "Charmrun: possibly because: %s.\n", msg.data);
5584 fprintf(stderr, "socket_index %d crashed_node %d reconnected fd %d \n",
5585 socket_index, crashed_node, req_clients[socket_index]);
5587 /** update the nodetab entry corresponding to
5588 this node, skip the restarted one */
5589 in = (ChSingleNodeinfo *) msg.data;
5590 nodeinfo_add(in, req_clients[socket_index]);
5591 for (i = 0; i < req_nClients; i++) {
5592 if (i != socket_index) {
5593 req_handle_initnodetab(NULL, req_clients[i]);
5597 /* tell every one there is a crash */
5598 announce_crash(socket_index, crashed_node);
5599 if (_last_crash != 0) {
5600 fprintf(stderr, "ERROR> Charmrun detected multiple crashes.\n");
5603 _last_crash = crashed_node;
5604 _crash_socket_index = socket_index;
5605 /*holds the restarted process until I got ack back from
5606 everyone in req_handle_crashack
5607 now the restarted one can only continue until
5608 req_handle_crashack calls req_handle_initnodetab(socket_index)
5609 req_handle_initnodetab(NULL,req_clients[socket_index]); */
5610 ChMessage_free(&msg);
5615 * @brief Sends a message announcing the crash to every other node. This message
5617 * trigger fault tolerance methods.
5619 void announce_crash(int socket_index, int crashed_node)
5622 ChMessageHeader hdr;
5623 ChMessageInt_t crashNo = ChMessageInt_new(crashed_node);
5624 ChMessageHeader_new("crashnode", sizeof(ChMessageInt_t), &hdr);
5625 for (i = 0; i < req_nClients; i++) {
5626 if (i != socket_index) {
5627 skt_sendN(req_clients[i], (const char *) &hdr, sizeof(hdr));
5628 skt_sendN(req_clients[i], (const char *) &crashNo,
5629 sizeof(ChMessageInt_t));
5636 #endif /*CMK_USE_SSH*/