3 #include "../sockRoutines.h"
4 #include "../sockRoutines.c"
5 #include "../ccs-auth.h"
6 #include "../ccs-auth.c"
7 #include "../ccs-server.h"
8 #include "../ccs-server.c"
23 #include <sys/bproc.h>
35 #if defined(_WIN32) && !defined(__CYGWIN__)
36 /*Win32 has screwy names for the standard UNIX calls:*/
37 #define getcwd _getcwd
38 #define strdup _strdup
39 #define unlink _unlink
41 #define fdopen _fdopen
42 #define ftruncate _chsize
46 #include <sys/timeb.h>
49 #define SIGBUS -1 /*These signals don't exist in Win32*/
54 #include <pwd.h> /*getcwd*/
59 #if CMK_SSH_NOT_NEEDED /*No SSH-- use daemon to start node-programs*/
62 #else /*Use SSH to start node-programs*/
66 #elif !defined(__CYGWIN__)
69 #include <w32api/rpc.h>
71 #if CMK_SSH_IS_A_COMMAND
79 /*#define DEBUGF(x) printf x*/
83 #define MAXPATHLEN 1024
86 const int MAX_NUM_RETRIES = 3;
89 std::map<SOCKET, int> skt_client_table;
90 std::map<std::string, int> host_sizes;
92 const char *nodetab_name(int i);
93 const char *skt_to_name(SOCKET skt)
95 if (skt_client_table.find(skt) != skt_client_table.end()) {
96 return nodetab_name(skt_client_table[skt]);
101 int skt_to_node(SOCKET skt)
103 if (skt_client_table.find(skt) != skt_client_table.end()) {
104 return skt_client_table[skt];
112 /*Hierarchical-start routines*/
113 int mynodes_start; /* To keep a global node numbering */
117 static double ftTimer;
121 int *ssh_pids = NULL;
123 double GetClock(void)
125 #if defined(_WIN32) && !defined(__CYGWIN__)
128 return (tv.time * 1.0 + tv.millitm * 1.0E-3);
132 ok = gettimeofday(&tv, NULL);
134 perror("gettimeofday");
137 return (tv.tv_sec * 1.0 + tv.tv_usec * 1.0E-6);
141 int probefile(const char *path)
143 FILE *f = fopen(path, "r");
150 const char *mylogin(void)
152 #if defined(_WIN32) && !defined(__CYGWIN__)
153 static char name[100] = {'d', 'u', 'n', 'n', 'o', 0};
154 unsigned int len = 100;
155 GetUserName(name, (LPDWORD) &len);
160 self = getpwuid(getuid());
166 sprintf(cmd, "id -u -n");
169 fscanf(p, "%s", uname);
171 return strdup(uname);
178 return self->pw_name;
182 /**************************************************************************
186 * Sends a single UDP packet to the charm developers notifying them
187 * that charm is in use.
189 **************************************************************************/
191 void ping_developers()
195 /*This is the resolved IP address of elegance.cs.uiuc.edu */
196 skt_ip_t destination_ip = skt_lookup_ip("128.174.241.211");
197 unsigned int destination_port = 6571;
198 struct sockaddr_in addr = skt_build_addr(destination_ip, destination_port);
201 skt = socket(AF_INET, SOCK_DGRAM, 0);
202 if (skt == INVALID_SOCKET)
205 sprintf(info, "%s", mylogin());
207 sendto(skt, info, strlen(info), 0, (struct sockaddr *) &addr, sizeof(addr));
212 /**************************************************************************
214 * Pathfix : alters a path according to a set of rewrite rules
216 *************************************************************************/
218 typedef struct s_pathfixlist {
221 struct s_pathfixlist *next;
224 pathfixlist pathfix_append(char *s1, char *s2, pathfixlist l)
226 pathfixlist pf = (pathfixlist) malloc(sizeof(s_pathfixlist));
233 char *pathfix(const char *path, pathfixlist fixes)
235 char buffer[MAXPATHLEN];
237 char buf2[MAXPATHLEN];
240 strcpy(buffer, path);
244 for (l = fixes; l; l = l->next) {
246 offs = strstr(buffer, l->s1);
249 sprintf(buf2, "%s%s%s", buffer, l->s2, offs + len);
250 strcpy(buffer, buf2);
255 return strdup(buffer);
258 char *pathextfix(const char *path, pathfixlist fixes, char *ext)
260 char *newpath = pathfix(path, fixes);
264 ret = (char *) malloc(strlen(newpath) + strlen(ext) + 2);
265 strcpy(ret, newpath);
271 /****************************************************************************
273 * Miscellaneous minor routines.
275 ****************************************************************************/
277 int is_quote(char c) { return (c == '\'' || c == '"'); }
279 void zap_newline(char *s)
282 p = s + strlen(s) - 1;
285 /* in case of DOS ^m */
291 /* get substring from lo to hi, remove quote chars */
292 char *substr(const char *lo, const char *hi)
298 if (is_quote(*(hi - 1)))
301 res = (char *) malloc(1 + len);
302 memcpy(res, lo, len);
307 int subeqs(const char *lo, const char *hi, const char *str)
309 int len = strlen(str);
312 if (memcmp(lo, str, len))
317 /* advance pointer over blank characters */
318 const char *skipblanks(const char *p)
320 while ((*p == ' ') || (*p == '\t'))
325 /* advance pointer over nonblank characters and a quoted string */
326 const char *skipstuff(const char *p)
329 if (*p && (*p == '\'' || *p == '"')) {
334 while (*p && *p != quote)
337 fprintf(stderr, "ERROR> Unmatched quote in nodelist file.\n");
342 while ((*p) && (*p != ' ') && (*p != '\t'))
348 const char *getenv_ssh()
352 e = getenv("CONV_RSH");
353 return e ? e : SSH_CMD;
357 #if !defined(_WIN32) || defined(__CYGWIN__)
358 char *getenv_display()
360 static char result[100], ipBuf[200];
363 e = getenv("DISPLAY");
369 if ((e[0] == ':') || (strncmp(e, "unix:", 5) == 0)) {
370 sprintf(result, "%s:%s", skt_print_ip(ipBuf, skt_my_ip()), p + 1);
375 char *getenv_display_no_tamper()
377 static char result[100], ipBuf[200];
380 e = getenv("DISPLAY");
392 /*****************************************************************************
394 * PPARAM - obtaining "program parameters" from the user. *
396 *****************************************************************************/
398 typedef struct s_ppdef {
404 } where; /*Where to store result*/
405 const char *lname; /*Argument name on command line*/
407 char type; /*One of i, r, s, f.*/
408 struct s_ppdef *next;
413 static int pparam_pos;
414 static const char **pparam_argv;
415 static char pparam_optc = '-';
416 char pparam_error[100];
418 static ppdef pparam_find(const char *lname)
421 for (def = ppdefs; def; def = def->next)
422 if (strcmp(def->lname, lname) == 0)
427 static ppdef pparam_cell(const char *lname)
429 ppdef def = pparam_find(lname);
432 def = (ppdef) malloc(sizeof(s_ppdef));
435 def->doc = "(undocumented)";
441 void pparam_int(int *where, int defValue, const char *arg, const char *doc)
443 ppdef def = pparam_cell(arg);
445 def->where.i = where;
451 void pparam_flag(int *where, int defValue, const char *arg, const char *doc)
453 ppdef def = pparam_cell(arg);
455 def->where.f = where;
461 void pparam_real(double *where, double defValue, const char *arg,
464 ppdef def = pparam_cell(arg);
466 def->where.r = where;
471 void pparam_str(const char **where, const char *defValue, const char *arg,
474 ppdef def = pparam_cell(arg);
476 def->where.s = where;
482 static int pparam_setdef(ppdef def, const char *value)
487 *def->where.i = strtol(value, &p, 10);
492 *def->where.r = strtod(value, &p);
497 /* Parse input string and convert a literal "\n" into '\n'. */
498 *def->where.s = (char *) calloc(strlen(value) + 1, sizeof(char));
499 char *parsed_value = (char *) *def->where.s;
500 for (int i = 0, j = 0; i < strlen(value); i++) {
501 // fprintf(stderr, "i = %d, j = %d, value[i] = \n", i, j);
502 if (i + 1 < strlen(value)) {
503 if (value[i] == '\\' && value[i + 1] == 'n') {
504 parsed_value[j++] = '\n';
509 parsed_value[j++] = value[i];
514 *def->where.f = strtol(value, &p, 10);
522 int pparam_set(char *lname, char *value)
524 ppdef def = pparam_cell(lname);
525 return pparam_setdef(def, value);
528 const char *pparam_getdef(ppdef def)
530 static char result[100];
533 sprintf(result, "%d", *def->where.i);
536 sprintf(result, "%f", *def->where.r);
539 return *def->where.s ? *def->where.s : "";
541 sprintf(result, "%d", *def->where.f);
547 void pparam_printdocs()
550 int len, maxname, maxdoc;
553 for (def = ppdefs; def; def = def->next) {
554 len = strlen(def->lname);
557 len = strlen(def->doc);
561 fprintf(stderr, "\n");
562 fprintf(stderr, "Charmrun Command-line Parameters:\n");
563 for (def = ppdefs; def; def = def->next) {
564 fprintf(stderr, " %c%c%-*s ", pparam_optc, pparam_optc, maxname,
566 fprintf(stderr, " %-*s [%s]\n", maxdoc, def->doc, pparam_getdef(def));
568 fprintf(stderr, "\n");
571 void pparam_delarg(int i)
574 for (j = i; pparam_argv[j]; j++)
575 pparam_argv[j] = pparam_argv[j + 1];
578 int pparam_countargs(const char **argv)
581 for (argc = 0; argv[argc]; argc++)
586 int pparam_parseopt()
590 const char *opt = pparam_argv[pparam_pos];
591 /* handle ++ by skipping to end */
592 if ((opt[1] == '+') && (opt[2] == 0)) {
593 pparam_delarg(pparam_pos);
594 while (pparam_argv[pparam_pos])
598 /* handle + by itself - an error */
600 sprintf(pparam_error, "Illegal option +\n");
603 /* look up option definition */
605 def = pparam_find(opt + 2);
609 if (strlen(opt) <= 2 || !isalpha(opt[2])) {
611 def = pparam_find(name);
616 sprintf(pparam_error, "Option %s not recognized.", opt);
619 /*Unrecognized + option-- skip it.*/
624 /* handle flag-options */
625 if ((def->type == 'f') && (opt[1] != '+') && (opt[2])) {
626 sprintf(pparam_error, "Option %s should not include a value", opt);
629 if (def->type == 'f') {
631 pparam_delarg(pparam_pos);
634 /* handle non-flag options */
635 if ((opt[1] == '+') || (opt[2] == 0)) {
636 pparam_delarg(pparam_pos);
637 opt = pparam_argv[pparam_pos];
640 if ((opt == 0) || (opt[0] == 0)) {
641 sprintf(pparam_error, "%s must be followed by a value.", opt);
644 ok = pparam_setdef(def, opt);
645 pparam_delarg(pparam_pos);
647 sprintf(pparam_error, "Illegal value for %s", opt);
653 int pparam_parsecmd(char optchr, const char **argv)
657 pparam_optc = optchr;
660 const char *opt = pparam_argv[pparam_pos];
663 if (opt[0] != optchr)
665 else if (pparam_parseopt() < 0)
672 char **dupargv(char **argv)
681 for (argc = 0; argv[argc] != NULL; argc++)
683 copy = (char **) malloc((argc + 2) * sizeof(char *));
688 for (argc = 0; argv[argc] != NULL; argc++) {
689 int len = strlen(argv[argc]);
690 copy[argc] = malloc(sizeof(char) * (len + 1));
691 strcpy(copy[argc], argv[argc]);
699 /****************************************************************************
703 * The following module computes a whole bunch of miscellaneous values, which
704 * are all constant throughout the program. Naturally, this includes the
705 * value of the command-line arguments.
707 *****************************************************************************/
709 #define MAX_LINE_LENGTH 1000
711 const char **arg_argv;
714 int arg_requested_pes;
717 const char *arg_nodelist;
718 const char *arg_nodegroup;
719 const char *arg_runscript; /* script to run the node-program with */
720 const char *arg_charmrunip;
723 int arg_debug_no_pause;
724 int arg_debug_no_xrdb;
727 arg_debug_commands; /* commands that are provided by a ++debug-commands
728 flag. These are passed into gdb. */
730 int arg_local; /* start node programs directly by exec on localhost */
731 int arg_batch_spawn; /* control starting node programs, several at a time */
732 int arg_scalable_start;
735 int arg_hierarchical_start;
736 int arg_child_charmrun;
738 int arg_help; /* print help message */
739 int arg_ppn; /* pes per node */
742 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
743 int arg_read_pes = 0;
748 const char *arg_shell;
750 const char *arg_debugger;
751 const char *arg_xterm;
752 const char *arg_display;
754 const char *arg_mylogin;
757 int arg_mpiexec_no_n;
760 const char *arg_nodeprog_a;
761 const char *arg_nodeprog_r;
766 int arg_server_port = 0;
767 const char *arg_server_auth = NULL;
768 int replay_single = 0;
773 int arg_singlemaster;
777 void arg_init(int argc, const char **argv)
779 static char buf[1024];
781 int i, local_def = 0;
782 #if CMK_CHARMRUN_LOCAL
783 local_def = 1; /*++local is the default*/
786 pparam_int(&arg_requested_pes, 1, "p", "number of processes to create");
787 pparam_int(&arg_timeout, 60, "timeout",
788 "seconds to wait per host connection");
789 pparam_flag(&arg_verbose, 0, "verbose", "Print diagnostic messages");
790 pparam_str(&arg_nodelist, 0, "nodelist", "file containing list of nodes");
791 pparam_str(&arg_nodegroup, "main", "nodegroup",
792 "which group of nodes to use");
793 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
794 pparam_int(&arg_read_pes, 0, "readpe",
795 "number of host names to read into the host table");
798 #if CMK_CCS_AVAILABLE
799 pparam_flag(&arg_server, 0, "server", "Enable client-server (CCS) mode");
800 pparam_int(&arg_server_port, 0, "server-port",
801 "Port to listen for CCS requests");
802 pparam_str(&arg_server_auth, 0, "server-auth", "CCS Authentication file");
804 pparam_flag(&arg_local, local_def, "local",
805 "Start node programs locally without daemon");
806 pparam_int(&arg_batch_spawn, 0, "batch", "Launch connections to this many "
807 "node programs at a time, avoiding "
808 "overloading charmrun pe");
809 pparam_flag(&arg_scalable_start, 1, "scalable-start", "scalable start");
811 pparam_flag(&arg_hierarchical_start, 0, "hierarchical-start",
812 "hierarchical start");
813 pparam_flag(&arg_child_charmrun, 0, "child-charmrun", "child charmrun");
815 pparam_flag(&arg_usehostname, 0, "usehostname",
816 "Send nodes our symbolic hostname instead of IP address");
817 pparam_str(&arg_charmrunip, 0, "useip",
818 "Use IP address provided for charmrun IP");
819 pparam_flag(&arg_mpiexec, 0, "mpiexec", "use mpiexec to start jobs");
820 pparam_flag(&arg_mpiexec_no_n, 0, "mpiexec-no-n", "use mpiexec to start jobs without -n procs");
822 pparam_flag(&arg_debug, 0, "debug",
823 "Run each node under gdb in an xterm window");
824 pparam_flag(&arg_debug_no_pause, 0, "debug-no-pause",
825 "Like debug, except doesn't pause at beginning");
826 pparam_str(&arg_debug_commands, 0, "debug-commands",
827 "Commands to be run inside gdb at startup");
828 pparam_flag(&arg_debug_no_xrdb, 0, "no-xrdb", "Don't check xrdb");
830 /* When the ++charmdebug flag is used, charmrun listens from its stdin for
831 commands, and forwards them to the gdb info program (a child), or to the
832 processor gdbs. The stderr is redirected to the stdout, so the two streams
833 are mixed together. The channel for stderr is reused to forward the replies
834 of gdb back to the java debugger. */
836 pparam_flag(&arg_charmdebug, 0, "charmdebug",
837 "Used only when charmrun is started by charmdebug");
840 pparam_int(&arg_maxssh, 16, "maxssh",
841 "Maximum number of ssh's to run at a time");
842 pparam_str(&arg_shell, 0, "remote-shell",
843 "which remote shell to use (default $CONV_RSH or " SSH_CMD);
844 pparam_str(&arg_debugger, 0, "debugger", "which debugger to use");
845 pparam_str(&arg_display, 0, "display", "X Display for xterm");
846 pparam_flag(&arg_ssh_display, 0, "ssh-display",
847 "use own X Display for each ssh session");
848 pparam_flag(&arg_in_xterm, 0, "in-xterm", "Run each node in an xterm window");
849 pparam_str(&arg_xterm, 0, "xterm", "which xterm to use");
852 /* options for Scyld */
853 pparam_int(&arg_startpe, 0, "startpe", "first pe to start job(SCYLD)");
854 pparam_int(&arg_endpe, 1000000, "endpe", "last pe to start job(SCYLD)");
855 pparam_flag(&arg_singlemaster, 0, "singlemaster",
856 "Only assign one process to master node(SCYLD)");
857 pparam_flag(&arg_skipmaster, 0, "skipmaster",
858 "Donot assign any process to master node(SCYLD)");
859 if (arg_skipmaster && arg_singlemaster) {
860 printf("Charmrun> 'singlemaster' is ignored due to 'skipmaster'. \n");
861 arg_singlemaster = 0;
863 pparam_flag(&arg_debug, 0, "debug", "turn on more verbose debug print");
865 pparam_str(&arg_runscript, 0, "runscript", "script to run node-program with");
866 pparam_flag(&arg_help, 0, "help", "print help messages");
867 pparam_int(&arg_ppn, 0, "ppn", "number of pes per node");
868 pparam_flag(&arg_no_va_rand, 0, "no-va-randomization",
869 "Disables randomization of the virtual address space");
871 arg_argv = dupargv(argv);
874 if (pparam_parsecmd('+', argv) < 0) {
875 fprintf(stderr, "ERROR> syntax: %s\n", pparam_error);
880 /* Check for (but do *not* remove) the "-?", "-h", or "--help" flags */
881 for (i = 0; argv[i]; i++) {
882 if (0 == strcmp(argv[i], "-?") || 0 == strcmp(argv[i], "-h") ||
883 0 == strcmp(argv[i], "--help"))
891 if ( arg_mpiexec_no_n ) arg_mpiexec = arg_mpiexec_no_n;
894 if (!arg_hierarchical_start || arg_child_charmrun)
897 (argv) + 1; /*Skip over charmrun (0) here and program name (1) later*/
898 arg_argc = pparam_countargs(arg_argv);
900 fprintf(stderr, "ERROR> You must specify a node-program.\n");
906 if (!arg_hierarchical_start || arg_child_charmrun) {
907 // Removing nodeprogram from the list
911 // Removing charmrun from parameters
915 arg_argv[arg_argc] = malloc(sizeof(char) * strlen("++child-charmrun"));
916 strcpy(arg_argv[arg_argc++], "++child-charmrun");
917 arg_argv[arg_argc] = NULL;
924 if (arg_server_port || arg_server_auth)
927 if (arg_debug || arg_debug_no_pause) {
928 fprintf(stderr, "Charmrun> scalable start disabled under ++debug:\n"
929 "NOTE: will make an SSH connection per process launched,"
930 " instead of per physical node.\n");
931 arg_scalable_start = 0;
933 /*Pass ++debug along to program (used by machine.c)*/
934 arg_argv[arg_argc++] = "++debug";
937 /* Check for +replay-detail to know we have to load only one single processor
939 for (i = 0; argv[i]; i++) {
940 if (0 == strcmp(argv[i], "+replay-detail")) {
942 arg_requested_pes = 1;
949 "Warning> ++local cannot be used in bproc version, ignored!\n");
955 /* Find the current value of the CONV_RSH variable */
958 arg_shell = "mpiexec";
960 arg_shell = getenv_ssh();
963 /* Find the current value of the DISPLAY variable */
965 arg_display = getenv_display_no_tamper();
966 if ((arg_debug || arg_debug_no_pause || arg_in_xterm) && (arg_display == 0)) {
967 fprintf(stderr, "ERROR> DISPLAY must be set to use debugging mode\n");
970 if (arg_debug || arg_debug_no_pause)
971 arg_timeout = 8 * 60 * 60; /* Wait 8 hours for ++debug */
973 /* default debugger is gdb */
975 arg_debugger = "gdb";
976 /* default xterm is xterm */
980 arg_mylogin = mylogin();
983 /* find the current directory, absolute version */
985 arg_currdir_a = strdup(buf);
987 /* find the node-program, absolute version */
988 arg_nodeprog_r = argv[1];
990 if (arg_nodeprog_r[0] == '-' || arg_nodeprog_r[0] == '+') {
991 /*If it starts with - or +, it ain't a node program.
992 Chances are, the user screwed up and passed some
993 unknown flag to charmrun*/
994 printf("Charmrun does not recognize the flag '%s'.\n", arg_nodeprog_r);
995 if (arg_nodeprog_r[0] == '+')
996 printf("Charm++'s flags need to be placed *after* the program name.\n");
1001 #if defined(_WIN32) && !defined(__CYGWIN__)
1002 if (argv[1][1] == ':' ||
1003 argv[1][0] == '\\' && argv[1][1] == '\\') { /*E.g.: "C:\foo\bar.exe*/
1005 if (argv[1][0] == '/') { /*E.g.: "\foo\bar"*/
1007 /*Absolute path to node-program*/
1008 arg_nodeprog_a = argv[1];
1010 sprintf(buf, "%s%s%s", arg_currdir_a, DIRSEP, arg_nodeprog_r);
1011 arg_nodeprog_a = strdup(buf);
1013 if (arg_scalable_start) {
1014 printf("Charmrun> scalable start enabled. \n");
1018 if (arg_hierarchical_start) {
1019 printf("Charmrun> Hierarchical scalable start enabled. \n");
1020 if (arg_debug || arg_debug_no_pause) {
1021 fprintf(stderr, "Charmrun> Error: ++hierarchical-start does not support "
1022 "debugging mode. \n");
1026 fprintf(stderr, "Charmrun> Warning: you have enabled verbose output with "
1027 "Hierarchical startup, you may get inconsistent verbose "
1028 "outputs. \n++hierarchial-start does not support verbose "
1032 } else if (arg_child_charmrun) {
1035 "Charmrun> Error: ++child-charmrun is not a user-specified flag. \n");
1040 /*If number of pes per node does not divide number of pes*/
1041 if(arg_requested_pes && arg_ppn){
1042 if(arg_requested_pes % arg_ppn != 0){
1045 "Charmrun> Error: ++ppn (number of pes per node) does not divide +p (number of pes) \n");
1051 /****************************************************************************
1053 * NODETAB: The nodes file and nodes table.
1055 ****************************************************************************/
1057 static int portOk = 1;
1058 static const char *nodetab_tempName = NULL;
1059 char *nodetab_file_find()
1061 char buffer[MAXPATHLEN];
1063 /* Find a nodes-file as specified by ++nodelist */
1065 const char *path = arg_nodelist;
1066 if (probefile(path))
1067 return strdup(path);
1068 fprintf(stderr, "ERROR> No such nodelist file %s\n", path);
1071 /* Find a nodes-file as specified by getenv("NODELIST") */
1072 if (getenv("NODELIST")) {
1073 char *path = getenv("NODELIST");
1074 if (path && probefile(path))
1075 return strdup(path);
1076 // cppcheck-suppress nullPointer
1077 fprintf(stderr, "ERROR> Cannot find nodelist file %s\n", path);
1080 /* Find a nodes-file by looking under 'nodelist' in the current directory */
1081 if (probefile("./nodelist"))
1082 return strdup("./nodelist");
1083 #if defined(_WIN32) && !defined(__CYGWIN__)
1085 nodetab_tempName = strdup(buffer);
1087 if (getenv("HOME")) {
1088 sprintf(buffer, "%s/.nodelist", getenv("HOME"));
1091 if (!probefile(buffer)) {
1092 /*Create a simple nodelist in the user's home*/
1093 FILE *f = fopen(buffer, "w");
1095 fprintf(stderr, "ERROR> Cannot create a 'nodelist' file.\n");
1098 fprintf(f, "group main\nhost localhost\n");
1101 return strdup(buffer);
1104 typedef struct nodetab_host {
1105 const char *name; /*Host DNS name*/
1106 skt_ip_t ip; /*IP address of host*/
1107 pathfixlist pathfixes;
1108 char *ext; /*FIXME: What the heck is this? OSL 9/8/00*/
1109 int cpus; /* # of physical CPUs*/
1110 int rank; /*Rank of this CPU*/
1111 double speed; /*Relative speed of each CPU*/
1112 int nice; /* process priority */
1113 int forks; /* number of processes to fork on remote node */
1114 /*These fields are set during node-startup*/
1115 int dataport; /*UDP port number*/
1116 SOCKET ctrlfd; /*Connection to control port*/
1118 const char *shell; /*Ssh to use*/
1119 const char *debugger; /*Debugger to use*/
1120 const char *xterm; /*Xterm to use*/
1121 const char *login; /*User login name to use*/
1122 const char *passwd; /*User login password*/
1123 const char *setup; /*Commands to execute on login*/
1135 nodetab_host **nodetab_table;
1138 int *nodetab_rank0_table;
1139 int nodetab_rank0_size;
1141 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
1145 void nodetab_reset(nodetab_host *h)
1147 h->name = "SET_H->NAME";
1148 h->ip = _skt_invalid_ip;
1159 h->shell = arg_shell;
1160 h->debugger = arg_debugger;
1161 h->xterm = arg_xterm;
1162 h->login = arg_mylogin;
1168 void nodetab_add(nodetab_host *h)
1171 nodetab_rank0_table[nodetab_rank0_size++] = nodetab_size;
1172 nodetab_table[nodetab_size] = (nodetab_host *) malloc(sizeof(nodetab_host));
1176 skt_print_ip(ips, h->ip);
1177 printf("Charmrun> adding client %d: \"%s\", IP:%s\n", nodetab_size, h->name,
1181 *nodetab_table[nodetab_size++] = *h;
1184 void nodetab_makehost(const char *name, nodetab_host *h)
1186 h->name = strdup(name);
1187 h->ip = skt_innode_lookup_ip(name);
1188 if (skt_ip_match(h->ip, _skt_invalid_ip)) {
1190 /* only the master node is used */
1191 if (!(1 == arg_requested_pes && atoi(name) == -1))
1194 fprintf(stderr, "ERROR> Cannot obtain IP address of %s\n", name);
1198 if (nodetab_size == nodetab_max)
1202 const char *nodetab_args(const char *args, nodetab_host *h)
1206 while (*args != 0) {
1207 const char *b1 = skipblanks(args), *e1 = skipstuff(b1);
1208 const char *b2 = skipblanks(e1), *e2 = skipstuff(b2);
1210 b1++; /*Skip over "++" on parameters*/
1212 if (subeqs(b1, e1, "login"))
1213 h->login = substr(b2, e2);
1214 else if (subeqs(b1, e1, "passwd"))
1215 h->passwd = substr(b2, e2);
1216 else if (subeqs(b1, e1, "setup"))
1217 h->setup = strdup(b2);
1218 else if (subeqs(b1, e1, "shell"))
1219 h->shell = substr(b2, e2);
1220 else if (subeqs(b1, e1, "debugger"))
1221 h->debugger = substr(b2, e2);
1222 else if (subeqs(b1, e1, "xterm"))
1223 h->xterm = substr(b2, e2);
1226 if (subeqs(b1, e1, "speed"))
1227 h->speed = atof(b2);
1228 else if (subeqs(b1, e1, "cpus")) {
1230 h->cpus = atol(b2); /* ignore if there is ++ppn */
1231 } else if (subeqs(b1, e1, "pathfix")) {
1232 const char *b3 = skipblanks(e2), *e3 = skipstuff(b3);
1233 args = skipblanks(e3);
1235 pathfix_append(substr(b2, e2), substr(b3, e3), h->pathfixes);
1236 e2 = e3; /* for the skipblanks at the end */
1237 } else if (subeqs(b1, e1, "ext"))
1238 h->ext = substr(b2, e2);
1239 else if (subeqs(b1, e1, "nice"))
1243 args = skipblanks(e2);
1248 /* setup nodetab as localhost only */
1249 void nodetab_init_for_local()
1251 int tablesize, i, done = 0;
1254 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
1255 if (arg_read_pes == 0) {
1256 arg_read_pes = arg_requested_pes;
1260 tablesize = arg_requested_pes;
1261 nodetab_table = (nodetab_host **) malloc(tablesize * sizeof(nodetab_host *));
1262 nodetab_rank0_table = (int *) malloc(tablesize * sizeof(int));
1263 nodetab_max = tablesize;
1265 nodetab_reset(&group);
1268 #if CMK_SHARED_VARS_UNAVAILABLE
1270 fprintf(stderr, "Warning> Invalid ppn %d in nodelist ignored.\n", arg_ppn);
1274 group.cpus = arg_ppn;
1277 const char *hostname = "127.0.0.1";
1278 for (group.rank = 0; group.rank < arg_ppn; group.rank++) {
1279 nodetab_makehost(hostname, &group);
1280 nodetab_add(&group);
1281 if (++i == arg_requested_pes) {
1287 host_sizes["127.0.0.1"] = (arg_requested_pes + arg_ppn - 1) / arg_ppn;
1291 /* Sets the parent field of hosts to point to their parent charmrun. The root
1292 * charmrun will create children for all hosts which are parent of at least one
1295 int nodes_per_child;
1296 int *nodetab_unique_table;
1297 int nodetab_unique_size;
1298 char *nodetab_name(int i);
1299 void nodetab_init_hierarchical_start(void)
1303 nodetab_unique_size = 0;
1304 nodetab_unique_table = (int *) malloc(nodetab_rank0_size * sizeof(int));
1305 while (node_start < nodetab_rank0_size) {
1306 nodetab_unique_table[nodetab_unique_size++] = node_start;
1307 node_name = nodetab_name(node_start);
1310 } while (node_start < nodetab_rank0_size &&
1311 (!strcmp(nodetab_name(node_start), node_name)));
1313 branchfactor = ceil(sqrt(nodetab_unique_size));
1314 nodes_per_child = round(nodetab_unique_size * 1.0 / branchfactor);
1322 nodetab_host global, group, host;
1323 char input_line[MAX_LINE_LENGTH];
1324 int rightgroup, i, remain, lineNo;
1325 /* Store the previous host so we can make sure we aren't mixing localhost and
1327 char *prevHostName = NULL;
1328 std::vector< std::pair<int, nodetab_host> > hosts;
1329 std::multimap<int, nodetab_host> binned_hosts;
1331 /* if arg_local is set, ignore the nodelist file */
1332 if (arg_local || arg_mpiexec) {
1333 nodetab_init_for_local();
1337 /* Open the NODES_FILE. */
1338 nodesfile = nodetab_file_find();
1340 fprintf(stderr, "Charmrun> using %s as nodesfile\n", nodesfile);
1341 if (!(f = fopen(nodesfile, "r"))) {
1342 fprintf(stderr, "ERROR> Cannot read %s: %s\n", nodesfile, strerror(errno));
1347 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
1348 if (arg_read_pes == 0) {
1349 arg_read_pes = arg_requested_pes;
1352 (nodetab_host **) malloc(arg_read_pes * sizeof(nodetab_host *));
1353 nodetab_rank0_table = (int *) malloc(arg_read_pes * sizeof(int));
1354 nodetab_max = arg_read_pes;
1355 fprintf(stderr, "arg_read_pes %d arg_requested_pes %d\n", arg_read_pes,
1359 (nodetab_host **) malloc(arg_requested_pes * sizeof(nodetab_host *));
1360 nodetab_rank0_table = (int *) malloc(arg_requested_pes * sizeof(int));
1361 nodetab_max = arg_requested_pes;
1364 nodetab_reset(&global);
1366 rightgroup = (strcmp(arg_nodegroup, "main") == 0);
1372 while (fgets(input_line, sizeof(input_line) - 1, f) != 0) {
1373 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
1374 if (nodetab_size == arg_read_pes)
1377 if (nodetab_size == arg_requested_pes)
1380 if (input_line[0] == '#')
1382 zap_newline(input_line);
1383 if (!nodetab_args(input_line, &global)) {
1384 /*An option line-- also add options to current group*/
1385 nodetab_args(input_line, &group);
1386 } else { /*Not an option line*/
1387 const char *b1 = skipblanks(input_line), *e1 = skipstuff(b1);
1388 const char *b2 = skipblanks(e1), *e2 = skipstuff(b2);
1389 const char *b3 = skipblanks(e2);
1390 if (subeqs(b1, e1, "host")) {
1392 /* check if we have a previous host, if it's different than our
1393 * current host, and if one of them is localhost */
1394 if (prevHostName && strcmp(b2, prevHostName) &&
1395 (!strcmp(b2, "localhost") ||
1396 !strcmp(prevHostName, "localhost"))) {
1397 fprintf(stderr, "ERROR> Mixing localhost with other hostnames will "
1398 "lead to connection failures.\n");
1399 fprintf(stderr, "ERROR> The problematic line in group %s is: %s\n",
1400 arg_nodegroup, input_line);
1404 nodetab_args(b3, &host);
1406 /* Non-SMP workaround */
1407 int cpus = host.cpus;
1409 for (int rank = 0; rank < cpus; rank++)
1411 for (host.rank = 0; host.rank < host.cpus; host.rank++)
1414 nodetab_makehost(substr(b2, e2), &host);
1415 hosts.push_back(std::make_pair(lineNo, host));
1418 prevHostName = strdup(b2);
1420 } else if (subeqs(b1, e1, "group")) {
1422 nodetab_args(b3, &group);
1423 rightgroup = subeqs(b2, e2, arg_nodegroup);
1424 } else if (b1 != b3) {
1425 fprintf(stderr, "ERROR> unrecognized command in nodesfile:\n");
1426 fprintf(stderr, "ERROR> %s\n", input_line);
1433 if (nodetab_tempName != NULL)
1434 unlink(nodetab_tempName);
1436 if (hosts.size() == 0) {
1437 fprintf(stderr, "ERROR> No hosts in group %s\n", arg_nodegroup);
1441 /*Wrap nodes in table around if there aren't enough yet*/
1442 for (int i = 0; binned_hosts.size() < arg_requested_pes; ++i) {
1443 binned_hosts.insert(hosts[i % hosts.size()]);
1444 host_sizes[hosts[i % hosts.size()].second.name]++;
1447 /* Only increase counter for each new process */
1448 for (std::map<std::string, int>::iterator it = host_sizes.begin();
1449 it != host_sizes.end(); ++it) {
1450 it->second = (it->second + arg_ppn - 1) / arg_ppn;
1453 for (std::multimap<int, nodetab_host>::iterator it = binned_hosts.begin();
1454 it != binned_hosts.end(); ++it) {
1455 nodetab_add(&(it->second));
1459 /*Clip off excess CPUs at end*/
1460 for (i = 0; i < nodetab_size; i++) {
1461 if (nodetab_table[i]->rank == 0)
1462 remain = nodetab_size - i;
1463 if (nodetab_table[i]->cpus > remain)
1464 nodetab_table[i]->cpus = remain;
1467 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
1468 loaded_max_pe = arg_requested_pes - 1;
1471 if (arg_hierarchical_start)
1472 nodetab_init_hierarchical_start();
1478 /* Given a processor number, look up the nodetab info: */
1479 nodetab_host *nodetab_getinfo(int i)
1481 if (nodetab_table == 0) {
1482 fprintf(stderr, "ERROR> Node table not initialized.\n");
1485 return nodetab_table[i];
1488 /* Given a node number, look up the nodetab info: */
1489 nodetab_host *nodetab_getnodeinfo(int i)
1491 return nodetab_getinfo(nodetab_rank0_table[i]);
1494 /*These routines all take *PE* numbers (NOT node numbers!)*/
1495 const char *nodetab_name(int i) { return nodetab_getinfo(i)->name; }
1496 pathfixlist nodetab_pathfixes(int i) { return nodetab_getinfo(i)->pathfixes; }
1497 char *nodetab_ext(int i) { return nodetab_getinfo(i)->ext; }
1498 skt_ip_t nodetab_ip(int i) { return nodetab_getinfo(i)->ip; }
1499 unsigned int nodetab_cpus(int i) { return nodetab_getinfo(i)->cpus; }
1500 unsigned int nodetab_rank(int i) { return nodetab_getinfo(i)->rank; }
1501 int nodetab_dataport(int i) { return nodetab_getinfo(i)->dataport; }
1502 int nodetab_nice(int i) { return nodetab_getinfo(i)->nice; }
1503 SOCKET nodetab_ctrlfd(int i) { return nodetab_getinfo(i)->ctrlfd; }
1505 const char *nodetab_setup(int i) { return nodetab_getinfo(i)->setup; }
1506 const char *nodetab_shell(int i) { return nodetab_getinfo(i)->shell; }
1507 const char *nodetab_debugger(int i) { return nodetab_getinfo(i)->debugger; }
1508 const char *nodetab_xterm(int i) { return nodetab_getinfo(i)->xterm; }
1509 const char *nodetab_login(int i) { return nodetab_getinfo(i)->login; }
1510 const char *nodetab_passwd(int i) { return nodetab_getinfo(i)->passwd; }
1513 /****************************************************************************
1517 * The global list of node PEs, IPs, and port numbers.
1518 * Stored in ChMachineInt_t format so the table can easily be sent
1519 * back to the nodes.
1521 ****************************************************************************/
1523 static ChNodeinfo *nodeinfo_arr; /*Indexed by node number.*/
1525 void nodeinfo_allocate(void)
1527 nodeinfo_arr = (ChNodeinfo *) malloc(nodetab_rank0_size * sizeof(ChNodeinfo));
1529 void nodeinfo_add(const ChSingleNodeinfo *in, SOCKET ctrlfd)
1531 int node = ChMessageInt(in->nodeNo);
1532 ChNodeinfo i = in->info;
1535 unsigned int dataport;
1537 if (node < 0 || node >= nodetab_rank0_size) {
1538 fprintf(stderr, "Unexpected node %d registered!\n", node);
1541 nt = nodetab_rank0_table[node]; /*Nodetable index for this node*/
1542 i.nPE = ChMessageInt_new(nodetab_cpus(nt));
1543 i.nProcessesInPhysNode = ChMessageInt_new(host_sizes[nodetab_name(nt)]);
1546 nodetab_getinfo(nt)->ip = i.IP; /* get IP */
1547 i.IP = nodetab_ip(nt);
1549 nodeinfo_arr[node] = i;
1550 for (pe = 0; pe < nodetab_cpus(nt); pe++) {
1551 nodetab_table[nt + pe]->ctrlfd = ctrlfd;
1553 /* printf("Charmrun> client %d connected\n", nt); */
1555 dataport = ChMessageInt(i.dataport);
1556 if (0 == dataport) {
1557 fprintf(stderr, "Node %d could not initialize network!\n", node);
1560 nodeinfo_arr[node] = i;
1561 for (pe = 0; pe < nodetab_cpus(nt); pe++) {
1562 nodetab_table[nt + pe]->dataport = dataport;
1563 nodetab_table[nt + pe]->ctrlfd = ctrlfd;
1565 nodetab_table[nt + pe]->qp = i.qp;
1570 skt_print_ip(ips, nodetab_ip(nt));
1571 printf("Charmrun> client %d connected (IP=%s data_port=%d)\n", nt, ips,
1574 printf("Charmrun> client %d lid=%d qpn=%i psn=%i\n", nt,
1575 ChMessageInt(i.qp.lid), ChMessageInt(i.qp.qpn),
1576 ChMessageInt(i.qp.psn));
1582 /****************************************************************************
1586 * You can use this module to read the standard input. It supports
1587 * one odd function, input_scanf_chars, which is what makes it useful.
1588 * if you use this module, you may not read stdin yourself.
1590 * void input_init(void)
1591 * char *input_gets(void)
1592 * char *input_scanf_chars(char *fmt)
1594 ****************************************************************************/
1601 char *new_input_buffer;
1602 int len = input_buffer ? strlen(input_buffer) : 0;
1604 if (fgets(line, 1023, stdin) == 0) {
1605 fprintf(stderr, "end-of-file on stdin");
1608 new_input_buffer = (char *) realloc(input_buffer, len + strlen(line) + 1);
1609 if (new_input_buffer == NULL) {
1610 // could not realloc
1612 fprintf(stderr, "Charmrun: Realloc failed");
1615 input_buffer = new_input_buffer;
1618 strcpy(input_buffer + len, line);
1621 void input_init() { input_buffer = strdup(""); }
1623 char *input_extract(int nchars)
1625 char *res = substr(input_buffer, input_buffer + nchars);
1627 substr(input_buffer + nchars, input_buffer + strlen(input_buffer));
1638 p = strchr(input_buffer, '\n');
1643 len = p - input_buffer;
1644 res = input_extract(len + 1);
1649 /*FIXME: I am terrified by this routine. OSL 9/8/00*/
1650 char *input_scanf_chars(char *fmt)
1660 strcpy(tmp, "/tmp/fnordXXXXXX");
1663 char *tmp = tmpnam(NULL); /*This was once /tmp/fnord*/
1666 fd = open(tmp, O_RDWR | O_CREAT | O_TRUNC, 0664);
1668 fprintf(stderr, "cannot open temp file /tmp/fnord");
1671 file = fdopen(fd, "r+");
1675 len = strlen(input_buffer);
1677 fwrite(input_buffer, len, 1, file);
1681 fscanf(file, fmt, buf, buf, buf, buf, buf, buf, buf, buf, buf, buf, buf,
1682 buf, buf, buf, buf, buf, buf, buf);
1688 return input_extract(pos);
1691 /***************************************************************************
1693 Charmrun forwards CCS requests on to the node-programs' control
1695 ***************************************************************************/
1697 #if CMK_CCS_AVAILABLE
1699 /*The Ccs Server socket became active--
1700 rec'v the message and respond to the request,
1701 by forwarding the request to the appropriate node.
1703 void req_ccs_connect(void)
1705 const void *bufs[3];
1708 ChMessageHeader ch; /*Make a charmrun header*/
1709 CcsImplHeader hdr; /*Ccs internal header*/
1711 void *reqData; /*CCS request data*/
1713 if (0 == CcsServer_recvRequest(&h.hdr, &reqData))
1714 return; /*Malformed request*/
1715 pe = ChMessageInt(h.hdr.pe);
1716 reqBytes = ChMessageInt(h.hdr.len);
1719 /*Treat -1 as broadcast and sent to 0 as root of the spanning tree*/
1722 if ((pe <= -nodetab_size || pe >= nodetab_size) && 0 == replay_single) {
1723 /*Treat out of bound values as errors. Helps detecting bugs*/
1724 /* But when virtualized with Bigemulator, we can have more pes than nodetabs */
1725 /* TODO: We should somehow check boundaries also for bigemulator... */
1726 #if !CMK_BIGSIM_CHARM
1727 if (pe == -nodetab_size)
1728 fprintf(stderr, "Invalid processor index in CCS request: are you trying "
1729 "to do a broadcast instead?");
1731 fprintf(stderr, "Invalid processor index in CCS request.");
1732 CcsServer_sendReply(&h.hdr, 0, 0);
1736 } else if (pe < -1) {
1737 /*Treat negative values as multicast to a number of processors specified by
1739 The pes to multicast to follows sits at the beginning of reqData*/
1740 reqBytes -= pe * sizeof(ChMessageInt_t);
1741 pe = ChMessageInt(*(ChMessageInt_t *) reqData);
1744 if (!check_stdio_header(&h.hdr)) {
1747 #if LOOPBACK /*Immediately reply "there's nothing!" (for performance \
1749 CcsServer_sendReply(&h.hdr, 0, 0);
1752 #if CMK_BIGSIM_CHARM
1753 destpe = destpe % nodetab_size;
1757 /*Fill out the charmrun header & forward the CCS request*/
1758 ChMessageHeader_new("req_fw", sizeof(h.hdr) + reqBytes, &h.ch);
1761 lens[0] = sizeof(h);
1764 skt_sendV(nodetab_ctrlfd(destpe), 2, bufs, lens);
1772 Forward the CCS reply (if any) from this client back to the
1773 original network requestor, on the original request socket.
1775 int req_ccs_reply_fw(ChMessage *msg, SOCKET srcFd)
1777 int len = msg->len; /* bytes of data remaining to receive */
1779 /* First pull down the CCS header sent by the client. */
1781 skt_recvN(srcFd, &hdr, sizeof(hdr));
1784 #define m (4 * 1024) /* packets of message to recv/send at once */
1785 if (len < m || hdr.attr.auth) { /* short or authenticated message: grab the
1786 whole thing first */
1787 void *data = malloc(len);
1788 skt_recvN(srcFd, data, len);
1789 CcsServer_sendReply(&hdr, len, data);
1791 } else { /* long messages: packetize (for pipelined sending; a 2x bandwidth
1793 ChMessageInt_t outLen;
1794 int destFd; /* destination for data */
1795 skt_abortFn old = skt_set_abort(reply_abortFn);
1798 destFd = ChMessageInt(hdr.replyFd);
1799 outLen = ChMessageInt_new(len);
1800 skt_sendN(destFd, &outLen, sizeof(outLen)); /* first comes the length */
1806 skt_recvN(srcFd, buf, r);
1807 if (0 == destErrs) /* don't keep sending to dead clients, but *do* clean
1809 destErrs |= skt_sendN(destFd, buf, r);
1821 int req_ccs_reply_fw(ChMessage *msg, SOCKET srcFd) {}
1822 #endif /*CMK_CCS_AVAILABLE*/
1824 /****************************************************************************
1828 * The request servicer accepts connections on a TCP port. The client
1829 * sends a sequence of commands (each is one line). It then closes the
1830 * connection. The server must then contact the client, sending replies.
1832 ****************************************************************************/
1833 /** Macro to switch on the case when charmrun stays up even if
1834 one of the processor crashes*/
1835 /*#define __FAULT__*/
1837 SOCKET *req_clients; /*TCP request sockets for each node*/
1839 SOCKET *charmrun_fds;
1841 int req_nClients; /*Number of entries in above list (==nodetab_rank0_size)*/
1844 /* socket and std streams for the gdb info program */
1845 int gdb_info_pid = 0;
1846 int gdb_info_std[3];
1847 FILE *gdb_stream = NULL;
1850 #define REQ_FAILED -1
1853 int req_reply_child(SOCKET fd, const char *type, const char *data, int dataLen)
1856 int status = req_reply(fd, type, data, dataLen);
1857 if (status != REQ_OK)
1860 skt_recvN(fd, (const char *) &clientFd, sizeof(SOCKET));
1861 skt_sendN(fd, (const char *) &clientFd, sizeof(fd));
1866 * @brief This is the only place where charmrun talks back to anyone.
1868 int req_reply(SOCKET fd, const char *type, const char *data, int dataLen)
1870 ChMessageHeader msg;
1871 if (fd == INVALID_SOCKET)
1873 ChMessageHeader_new(type, dataLen, &msg);
1874 skt_sendN(fd, (const char *) &msg, sizeof(msg));
1875 skt_sendN(fd, data, dataLen);
1879 /* Request handlers:
1880 When a client asks us to do something, these are the
1881 routines that actually respond to the request.
1883 /*Stash this new node's control and data ports.
1885 int req_handle_initnode(ChMessage *msg, SOCKET fd)
1889 ChSingleNodeinfo *nodeInfo = (ChSingleNodeinfo *) msg->data;
1890 // printf("Charmrun> msg->len %d sizeof(ChSingleNodeinfo) %d
1891 // sizeof(ChInfiAddr) %d
1892 //\n",msg->len,sizeof(ChSingleNodeinfo),sizeof(ChInfiAddr));
1894 sizeof(ChSingleNodeinfo) +
1895 (nodetab_rank0_size - 1) * sizeof(ChInfiAddr)) {
1896 fprintf(stderr, "Charmrun: Bad initnode data length. Aborting\n");
1897 fprintf(stderr, "Charmrun: possibly because: %s.\n", msg->data);
1900 nodeInfo->info.qpList =
1901 (ChInfiAddr *) malloc(sizeof(ChInfiAddr) * (nodetab_rank0_size - 1));
1902 memcpy((char *) nodeInfo->info.qpList, &msg->data[sizeof(ChSingleNodeinfo)],
1903 sizeof(ChInfiAddr) * (nodetab_rank0_size - 1));
1904 /* for(i=0;i<nodetab_rank0_size-1;i++){
1905 printf("i %d 0x%0x 0x%0x
1906 0x%0x\n",i,ChMessageInt(nodeInfo->info.qpList[i].lid),ChMessageInt(nodeInfo->info.qpList[i].qpn),ChMessageInt(nodeInfo->info.qpList[i].psn));
1909 if (msg->len != sizeof(ChSingleNodeinfo)) {
1910 fprintf(stderr, "Charmrun: Bad initnode data length. Aborting\n");
1911 fprintf(stderr, "Charmrun: possibly because: %s.\n", msg->data);
1915 nodeinfo_add((ChSingleNodeinfo *) msg->data, fd);
1920 * @brief Gets the array of node numbers, IPs, and ports. This is used by the
1922 * to talk to one another.
1924 int req_handle_initnodetab(ChMessage *msg, SOCKET fd)
1926 ChMessageHeader hdr;
1927 ChMessageInt_t nNodes = ChMessageInt_new(nodetab_rank0_size);
1928 ChMessageHeader_new(
1930 sizeof(ChMessageInt_t) + sizeof(ChNodeinfo) * nodetab_rank0_size, &hdr);
1931 skt_sendN(fd, (const char *) &hdr, sizeof(hdr));
1932 skt_sendN(fd, (const char *) &nNodes, sizeof(nNodes));
1933 skt_sendN(fd, (const char *) nodeinfo_arr,
1934 sizeof(ChNodeinfo) * nodetab_rank0_size);
1940 /* Used for fault tolerance with hierarchical start */
1941 int req_handle_initnodetab1(ChMessage *msg, SOCKET fd)
1943 ChMessageHeader hdr;
1944 ChMessageInt_t nNodes = ChMessageInt_new(nodetab_rank0_size);
1945 ChMessageHeader_new("initnttab", sizeof(ChMessageInt_t) +
1946 sizeof(ChNodeinfo) * nodetab_rank0_size,
1948 skt_sendN(fd, (const char *) &hdr, sizeof(hdr));
1949 skt_sendN(fd, (const char *) &nNodes, sizeof(nNodes));
1950 skt_sendN(fd, (const char *) nodeinfo_arr,
1951 sizeof(ChNodeinfo) * nodetab_rank0_size);
1955 /*Get the array of node numbers, IPs, and ports.
1956 This is used by the node-programs to talk to one another.
1958 static int parent_charmrun_fd = -1;
1959 int req_handle_initnodedistribution(ChMessage *msg, SOCKET fd, int client)
1962 nodes_per_child; /* rounding should help in better load distribution*/
1963 int rank0_start = nodetab_unique_table[client * nodes_per_child];
1965 if (client == branchfactor - 1) {
1966 nodes_to_fork = nodetab_unique_size - client * nodes_per_child;
1967 rank0_finish = nodetab_rank0_size;
1970 nodetab_unique_table[client * nodes_per_child + nodes_to_fork];
1972 ChMessageInt_t *nodemsg = (ChMessageInt_t *) malloc(
1973 (rank0_finish - rank0_start) * sizeof(ChMessageInt_t));
1974 for (k = 0; k < rank0_finish - rank0_start; k++)
1975 nodemsg[k] = ChMessageInt_new(nodetab_rank0_table[rank0_start + k]);
1976 ChMessageHeader hdr;
1977 ChMessageInt_t nNodes = ChMessageInt_new(rank0_finish - rank0_start);
1978 ChMessageInt_t nTotalNodes = ChMessageInt_new(nodetab_rank0_size);
1979 ChMessageHeader_new("initnodetab",
1980 sizeof(ChMessageInt_t) * 2 +
1981 sizeof(ChMessageInt_t) * (rank0_finish - rank0_start),
1983 skt_sendN(fd, (const char *) &hdr, sizeof(hdr));
1984 skt_sendN(fd, (const char *) &nNodes, sizeof(nNodes));
1985 skt_sendN(fd, (const char *) &nTotalNodes, sizeof(nTotalNodes));
1986 skt_sendN(fd, (const char *) nodemsg,
1987 (rank0_finish - rank0_start) * sizeof(ChMessageInt_t));
1992 ChSingleNodeinfo *myNodesInfo;
1993 int send_myNodeInfo_to_parent()
1995 ChMessageHeader hdr;
1996 ChMessageInt_t nNodes = ChMessageInt_new(nodetab_rank0_size);
1997 ChMessageHeader_new("initnodetab",
1998 sizeof(ChMessageInt_t) +
1999 sizeof(ChSingleNodeinfo) * nodetab_rank0_size,
2001 skt_sendN(parent_charmrun_fd, (const char *) &hdr, sizeof(hdr));
2002 skt_sendN(parent_charmrun_fd, (const char *) &nNodes, sizeof(nNodes));
2003 skt_sendN(parent_charmrun_fd, (const char *) myNodesInfo,
2004 sizeof(ChSingleNodeinfo) * nodetab_rank0_size);
2008 void forward_nodetab_to_children()
2010 /*it just needs to receive and copy the nodetab info if required and send it
2011 * as it is to its nodes */
2012 if (!skt_select1(parent_charmrun_fd, 1200 * 1000)) {
2016 ChMessage_recv(parent_charmrun_fd, &msg);
2018 ChMessageInt_t *nodelistmsg = (ChMessageInt_t *) msg.data;
2019 int nodetab_Nodes = ChMessageInt(nodelistmsg[0]);
2021 for (client = 0; client < nodetab_rank0_size; client++) {
2022 SOCKET fd = req_clients[client];
2023 ChMessageHeader hdr;
2024 ChMessageInt_t nNodes = ChMessageInt_new(nodetab_Nodes);
2025 ChMessageHeader_new("initnodetab", sizeof(ChMessageInt_t) +
2026 sizeof(ChNodeinfo) * nodetab_Nodes,
2028 skt_sendN(fd, (const char *) &hdr, sizeof(hdr));
2029 skt_sendN(fd, (const char *) &nNodes, sizeof(nNodes));
2030 skt_sendN(fd, (const char *) (nodelistmsg + 1),
2031 sizeof(ChNodeinfo) * nodetab_Nodes);
2034 /*Parent Charmrun receives the nodetab from child and processes it. msg contain
2035 * array of ChSingleNodeInfo*/
2036 void receive_nodeset_from_child(ChMessage *msg, SOCKET fd)
2038 ChMessageInt_t *n32 = (ChMessageInt_t *) msg->data;
2039 int numOfNodes = ChMessageInt(n32[0]);
2040 ChSingleNodeinfo *childNodeInfo = (ChSingleNodeinfo *) (n32 + 1);
2042 for (k = 0; k < numOfNodes; k++)
2043 nodeinfo_add(childNodeInfo + k, fd);
2046 void set_sockets_list(ChMessage *msg, SOCKET fd)
2048 ChMessageInt_t *n32 = (ChMessageInt_t *) msg->data;
2049 int node_start = ChMessageInt(n32[0]);
2050 charmrun_fds[node_start / nodes_per_child] = fd;
2053 /* Check this return code from "printf". */
2054 static void checkPrintfError(int err)
2057 static int warned = 0;
2059 perror("charmrun WARNING> error in printf");
2065 int req_handle_print(ChMessage *msg, SOCKET fd)
2067 checkPrintfError(printf("%s", msg->data));
2068 checkPrintfError(fflush(stdout));
2069 write_stdio_duplicate(msg->data);
2073 int req_handle_printerr(ChMessage *msg, SOCKET fd)
2075 fprintf(stderr, "%s", msg->data);
2077 write_stdio_duplicate(msg->data);
2081 int req_handle_printsyn(ChMessage *msg, SOCKET fd)
2083 checkPrintfError(printf("%s", msg->data));
2084 checkPrintfError(fflush(stdout));
2085 write_stdio_duplicate(msg->data);
2087 if (arg_hierarchical_start)
2088 req_reply_child(fd, "printdone", "", 1);
2091 req_reply(fd, "printdone", "", 1);
2095 int req_handle_printerrsyn(ChMessage *msg, SOCKET fd)
2097 fprintf(stderr, "%s", msg->data);
2099 write_stdio_duplicate(msg->data);
2101 if (arg_hierarchical_start)
2102 req_reply_child(fd, "printdone", "", 1);
2105 req_reply(fd, "printdone", "", 1);
2109 int req_handle_ending(ChMessage *msg, SOCKET fd)
2114 #if (!defined(_FAULT_MLOG_) && !defined(_FAULT_CAUSAL_))
2115 if (req_ending == nodetab_size)
2117 if (req_ending == arg_requested_pes)
2120 for (i = 0; i < req_nClients; i++)
2121 skt_close(req_clients[i]);
2123 printf("Charmrun> Graceful exit.\n");
2129 int req_handle_barrier(ChMessage *msg, SOCKET fd)
2132 static int barrier_count = 0;
2133 static int barrier_phase = 0;
2136 if (barrier_count == arg_requested_pes)
2138 if (barrier_count == req_nClients)
2143 for (i = 0; i < req_nClients; i++)
2144 if (REQ_OK != req_reply(req_clients[i], "barrier", "", 1)) {
2145 fprintf(stderr, "req_handle_barrier socket error: %d\n", i);
2152 int req_handle_barrier0(ChMessage *msg, SOCKET fd)
2155 static int count = 0;
2157 int pe = atoi(msg->data);
2162 if (count == arg_requested_pes)
2164 if (count == req_nClients)
2167 req_reply(fd0, "barrier0", "", 1); /* only send to node 0 */
2173 void req_handle_abort(ChMessage *msg, SOCKET fd)
2175 /*fprintf(stderr,"req_handle_abort called \n");*/
2177 fprintf(stderr, "Aborting!\n");
2179 fprintf(stderr, "%s\n", msg->data);
2183 int req_handle_scanf(ChMessage *msg, SOCKET fd)
2185 char *fmt, *res, *p;
2188 fmt[msg->len - 1] = 0;
2189 res = input_scanf_chars(fmt);
2197 if (arg_hierarchical_start)
2198 req_reply_child(fd, "scanf-data", res, strlen(res) + 1);
2201 req_reply(fd, "scanf-data", res, strlen(res) + 1);
2207 void restart_node(int crashed_node);
2208 void reconnect_crashed_client(int socket_index, int crashed_node);
2209 void announce_crash(int socket_index, int crashed_node);
2211 static int _last_crash = 0; /* last crashed pe number */
2212 static int _crash_socket_index = 0; /* last restart socket */
2214 static int _crash_socket_charmrun_index = 0; /* last restart socket */
2216 int restarted_pe_id;
2218 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
2219 static int numCrashes = 0; /*number of crashes*/
2220 static SOCKET last_crashed_fd = -1;
2224 * @brief Handles an ACK after a crash. Once it has received all the pending
2225 * acks, it sends the nodetab
2226 * table to the crashed node.
2228 int req_handle_crashack(ChMessage *msg, SOCKET fd)
2230 static int count = 0;
2233 if (arg_hierarchical_start) {
2234 if (count == nodetab_rank0_size - 1) {
2235 /* only after everybody else update its nodetab, can this
2236 restarted process continue */
2237 printf("Charmrun> continue node: %d\n", _last_crash);
2238 req_handle_initnodetab1(NULL, req_clients[_crash_socket_charmrun_index]);
2241 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
2242 last_crashed_fd = -1;
2250 if (count == req_nClients - 1) {
2251 // only after everybody else update its nodetab, can this restarted process
2253 printf("Charmrun> continue node: %d\n", _last_crash);
2254 req_handle_initnodetab(NULL, req_clients[_crash_socket_index]);
2257 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
2258 last_crashed_fd = -1;
2264 /* send initnode to root*/
2265 int set_crashed_socket_id(ChMessage *msg, SOCKET fd)
2267 ChSingleNodeinfo *nodeInfo = (ChSingleNodeinfo *) msg->data;
2268 int nt = nodetab_rank0_table[ChMessageInt(nodeInfo->nodeNo) - mynodes_start];
2269 nodeInfo->nodeNo = ChMessageInt_new(nt);
2270 /* Required for CCS */
2271 /*Nodetable index for this node*/
2273 for (pe = 0; pe < nodetab_cpus(nt); pe++) {
2274 nodetab_table[nt + pe]->ctrlfd = fd;
2278 /* Receives new dataport of restarted prcoess and resends nodetable to
2280 int req_handle_crash(ChMessage *msg, SOCKET fd)
2283 ChMessageInt_t oldpe, newpe;
2284 skt_recvN(fd, (const char *) &oldpe, sizeof(oldpe));
2285 skt_recvN(fd, (const char *) &newpe, sizeof(newpe));
2286 *nodetab_table[ChMessageInt(oldpe)] = *nodetab_table[ChMessageInt(newpe)];
2288 int status = req_handle_initnode(msg, fd);
2290 for (i = 0; i < req_nClients; i++) {
2291 if (req_clients[i] == fd) {
2295 _crash_socket_charmrun_index = i;
2297 fprintf(stdout, "Root charmrun : Socket %d failed %d\n", fd,
2298 _crash_socket_charmrun_index);
2300 ChSingleNodeinfo *nodeInfo = (ChSingleNodeinfo *) msg->data;
2301 int crashed_node = ChMessageInt(nodeInfo->nodeNo);
2302 _last_crash = crashed_node;
2310 /* Already processed, so send*/
2312 for (client = 0; client < req_nClients; client++) {
2313 req_handle_initnodetab(NULL, req_clients[client]);
2316 /*Anounce crash to all child charmruns*/
2317 announce_crash(nodetab_rank0_size + 1, crashed_node);
2324 void error_in_req_serve_client(SOCKET fd)
2327 int crashed_node, crashed_pe, node_index, socket_index;
2328 fprintf(stdout, "Socket %d failed \n", fd);
2331 if (arg_hierarchical_start) {
2332 for (i = mynodes_start; i < mynodes_start + nodetab_rank0_size; i++) {
2333 if (nodetab_ctrlfd(i) == fd) {
2341 for (i = 0; i < nodetab_max; i++) {
2342 if (nodetab_ctrlfd(i) == fd) {
2348 #if (!defined(_FAULT_MLOG_) && !defined(_FAULT_CAUSAL_))
2352 node_index = i - nodetab_rank(crashed_pe);
2353 for (i = 0; i < nodetab_rank0_size; i++) {
2354 if (node_index == nodetab_rank0_table[i]) {
2360 /** should also send a message to all the other processors telling them that
2361 * this guy has crashed*/
2362 /*announce_crash(socket_index,crashed_node);*/
2363 restart_node(crashed_node);
2365 fprintf(stdout, "charmrun says Processor %d failed on Node %d\n", crashed_pe,
2367 /** after the crashed processor has been recreated
2368 it connects to charmrun. That data must now be filled
2369 into the req_nClients array and the nodetab_table*/
2371 for (i = 0; i < req_nClients; i++) {
2372 if (req_clients[i] == fd) {
2377 reconnect_crashed_client(socket_index, crashed_node);
2378 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
2384 int req_handler_dispatch(ChMessage *msg, SOCKET replyFd)
2386 char *cmd = msg->header.type;
2388 DEBUGF(("Got request '%s'\n", cmd, replyFd));
2389 #if CMK_CCS_AVAILABLE /* CCS *doesn't* want data yet, for faster forwarding */
2390 if (strcmp(cmd, "reply_fw") == 0)
2391 return req_ccs_reply_fw(msg, replyFd);
2394 /* grab request data */
2395 recv_status = ChMessageData_recv(replyFd, msg);
2398 if (!arg_hierarchical_start)
2400 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
2401 if (recv_status < 0) {
2402 if (replyFd == last_crashed_fd) {
2405 DEBUGF(("recv_status %d on socket %d \n", recv_status, replyFd));
2406 error_in_req_serve_client(replyFd);
2409 if (recv_status < 0) {
2410 error_in_req_serve_client(replyFd);
2416 if (strcmp(cmd, "ping") == 0)
2418 else if (strcmp(cmd, "print") == 0)
2419 return req_handle_print(msg, replyFd);
2420 else if (strcmp(cmd, "printerr") == 0)
2421 return req_handle_printerr(msg, replyFd);
2422 else if (strcmp(cmd, "printsyn") == 0)
2423 return req_handle_printsyn(msg, replyFd);
2424 else if (strcmp(cmd, "printerrsyn") == 0)
2425 return req_handle_printerrsyn(msg, replyFd);
2426 else if (strcmp(cmd, "scanf") == 0)
2427 return req_handle_scanf(msg, replyFd);
2428 else if (strcmp(cmd, "barrier") == 0)
2429 return req_handle_barrier(msg, replyFd);
2430 else if (strcmp(cmd, "barrier0") == 0)
2431 return req_handle_barrier0(msg, replyFd);
2432 else if (strcmp(cmd, "ending") == 0)
2433 return req_handle_ending(msg, replyFd);
2434 else if (strcmp(cmd, "abort") == 0) {
2435 req_handle_abort(msg, replyFd);
2439 else if (strcmp(cmd, "crash_ack") == 0)
2440 return req_handle_crashack(msg, replyFd);
2442 else if (strcmp(cmd, "initnode") == 0)
2443 return req_handle_crash(msg, replyFd);
2448 fprintf(stderr, "Charmrun> Bad control socket request '%s'\n", cmd);
2456 void req_serve_client(SOCKET fd)
2461 DEBUGF(("Getting message from client...\n"));
2462 recv_status = ChMessageHeader_recv(fd, &msg);
2465 if (!arg_hierarchical_start && recv_status < 0)
2466 error_in_req_serve_client(fd);
2468 if (recv_status < 0) {
2469 error_in_req_serve_client(fd);
2475 DEBUGF(("Message is '%s'\n", msg.header.type));
2476 status = req_handler_dispatch(&msg, fd);
2481 fprintf(stderr, "Charmrun> Error processing control socket request %s\n",
2486 ChMessage_free(&msg);
2490 void req_forward_root(SOCKET fd)
2495 recv_status = ChMessage_recv(fd, &msg);
2497 char *cmd = msg.header.type;
2500 if (recv_status < 0) {
2501 error_in_req_serve_client(fd);
2505 /*called from reconnect_crashed_client */
2506 if (strcmp(cmd, "initnode") == 0) {
2507 set_crashed_socket_id(&msg, fd);
2511 if (strcmp(cmd, "ping") != 0) {
2512 status = req_reply(parent_charmrun_fd, cmd, msg.data,
2513 ChMessageInt(msg.header.len));
2515 if (strcmp(cmd, "scanf") == 0 || strcmp(cmd, "printsyn") == 0 ||
2516 strcmp(cmd, "printerrsyn") == 0)
2517 skt_sendN(parent_charmrun_fd, (const char *) &fd, sizeof(fd));
2520 if (strcmp(cmd, "initnode") == 0) {
2521 ChMessageInt_t oldpe = ChMessageInt_new(crashed_pe_id);
2522 ChMessageInt_t newpe = ChMessageInt_new(restarted_pe_id);
2523 skt_sendN(parent_charmrun_fd, (const char *) &oldpe, sizeof(oldpe));
2524 skt_sendN(parent_charmrun_fd, (const char *) &newpe, sizeof(newpe));
2536 ChMessage_free(&msg);
2539 void req_forward_client()
2544 recv_status = ChMessage_recv(parent_charmrun_fd, &msg);
2545 if (recv_status < 0) {
2547 for (i = 0; i < req_nClients; i++)
2548 skt_close(req_clients[i]);
2552 char *cmd = msg.header.type;
2554 if (strcmp(cmd, "barrier") == 0) {
2556 for (i = 0; i < req_nClients; i++)
2557 if (REQ_OK != req_reply(req_clients[i], cmd, msg.data,
2558 ChMessageInt(msg.header.len))) {
2564 if (strcmp(cmd, "initnodetab") == 0) {
2565 if (_last_crash == 0)
2566 current_restart_phase++;
2568 for (i = 0; i < req_nClients; i++)
2569 if (_last_crash == 0 || i != _crash_socket_index)
2570 if (REQ_OK != req_reply(req_clients[i], cmd, msg.data,
2571 ChMessageInt(msg.header.len))) {
2577 if (strcmp(cmd, "crashnode") == 0) {
2580 for (i = 0; i < req_nClients; i++)
2581 if (_last_crash == 0 || i != _crash_socket_index)
2582 if (REQ_OK != req_reply(req_clients[i], cmd, msg.data,
2583 ChMessageInt(msg.header.len))) {
2588 if (strcmp(cmd, "initnttab") == 0) {
2590 if (REQ_OK != req_reply(req_clients[_crash_socket_index], "initnodetab",
2591 msg.data, ChMessageInt(msg.header.len))) {
2601 /* CCS forward request */
2602 if (strcmp(cmd, "req_fw") == 0) {
2603 CcsImplHeader *hdr = (CcsImplHeader *) msg.data;
2604 int pe = ChMessageInt(hdr->pe);
2605 fd = nodetab_table[pe]->ctrlfd;
2606 } else if (strcmp(cmd, "barrier0") == 0) {
2607 fd = nodetab_table[0]->ctrlfd;
2609 skt_recvN(parent_charmrun_fd, (const char *) &fd, sizeof(SOCKET));
2611 status = req_reply(fd, cmd, msg.data, ChMessageInt(msg.header.len));
2620 ChMessage_free(&msg);
2625 int ignore_socket_errors(SOCKET skt, int c, const char *m)
2626 { /*Abandon on further socket errors during error shutdown*/
2634 /*A socket went bad somewhere! Immediately disconnect,
2635 which kills everybody.
2637 int socket_error_in_poll(SOCKET skt, int code, const char *msg)
2639 /*commenting it for fault tolerance*/
2642 skt_set_abort(ignore_socket_errors);
2643 const char *name = skt_to_name(skt);
2644 fprintf(stderr, "Charmrun> error on request socket to node %d '%s'--\n"
2646 skt_to_node(skt), name, msg);
2648 for (i = 0; i < req_nClients; i++)
2649 skt_close(req_clients[i]);
2652 ftTimer = GetClock();
2656 #if CMK_USE_POLL /*poll() version*/
2657 #define CMK_PIPE_DECL(maxn, delayMs) \
2658 static struct pollfd *fds = NULL; \
2660 int *nFds = &nFds_sto; \
2661 int pollDelayMs = delayMs; \
2663 fds = (struct pollfd *) malloc((maxn) * sizeof(struct pollfd));
2664 #define CMK_PIPE_SUB fds, nFds
2665 #define CMK_PIPE_CALL() \
2666 poll(fds, *nFds, pollDelayMs); \
2669 #define CMK_PIPE_PARAM struct pollfd *fds, int *nFds
2670 #define CMK_PIPE_ADDREAD(rd_fd) \
2672 fds[*nFds].fd = rd_fd; \
2673 fds[*nFds].events = POLLIN; \
2676 #define CMK_PIPE_ADDWRITE(wr_fd) \
2678 fds[*nFds].fd = wr_fd; \
2679 fds[*nFds].events = POLLOUT; \
2682 #define CMK_PIPE_CHECKREAD(rd_fd) fds[(*nFds)++].revents &POLLIN
2683 #define CMK_PIPE_CHECKWRITE(wr_fd) fds[(*nFds)++].revents &POLLOUT
2685 #else /*select() version*/
2687 #define CMK_PIPE_DECL(maxn, delayMs) \
2688 fd_set rfds_sto, wfds_sto; \
2690 fd_set *rfds = &rfds_sto, *wfds = &wfds_sto; \
2691 struct timeval tmo; \
2694 tmo.tv_sec = delayMs / 1000; \
2695 tmo.tv_usec = 1000 * (delayMs % 1000);
2696 #define CMK_PIPE_SUB rfds, wfds
2697 #define CMK_PIPE_CALL() select(FD_SETSIZE, rfds, 0, 0, &tmo)
2699 #define CMK_PIPE_PARAM fd_set *rfds, fd_set *wfds
2700 #define CMK_PIPE_ADDREAD(rd_fd) \
2702 assert(nFds < FD_SETSIZE); \
2703 FD_SET(rd_fd, rfds); \
2706 #define CMK_PIPE_ADDWRITE(wr_fd) FD_SET(wr_fd, wfds)
2707 #define CMK_PIPE_CHECKREAD(rd_fd) FD_ISSET(rd_fd, rfds)
2708 #define CMK_PIPE_CHECKWRITE(wr_fd) FD_ISSET(wr_fd, wfds)
2712 Wait for incoming requests on all client sockets,
2713 and the CCS socket (if present).
2720 CMK_PIPE_DECL(req_nClients + 5, 1000);
2721 for (i = 0; i < req_nClients; i++)
2722 CMK_PIPE_ADDREAD(req_clients[i]);
2723 if (CcsServer_fd() != INVALID_SOCKET)
2724 CMK_PIPE_ADDREAD(CcsServer_fd());
2725 if (arg_charmdebug) {
2726 CMK_PIPE_ADDREAD(0);
2727 CMK_PIPE_ADDREAD(gdb_info_std[1]);
2728 CMK_PIPE_ADDREAD(gdb_info_std[2]);
2731 skt_set_abort(socket_error_in_poll);
2733 DEBUGF(("Req_poll: Calling select...\n"));
2734 status = CMK_PIPE_CALL();
2735 DEBUGF(("Req_poll: Select returned %d...\n", status));
2738 return; /*Nothing to do-- timeout*/
2741 if (errno == EINTR || errno == EAGAIN)
2745 socket_error_in_poll(-1, 1359, "Node program terminated unexpectedly!\n");
2747 for (i = 0; i < req_nClients; i++)
2748 if (CMK_PIPE_CHECKREAD(req_clients[i])) {
2749 readcount = 10; /*number of successive reads we serve per socket*/
2750 /*This client is ready to read*/
2752 req_serve_client(req_clients[i]);
2754 } while (1 == skt_select1(req_clients[i], 0) && readcount > 0);
2757 if (CcsServer_fd() != INVALID_SOCKET)
2758 if (CMK_PIPE_CHECKREAD(CcsServer_fd())) {
2759 DEBUGF(("Activity on CCS server port...\n"));
2763 if (arg_charmdebug) {
2765 if (CMK_PIPE_CHECKREAD(0)) {
2766 int indata = read(0, buf, 5);
2769 fprintf(stderr, "Error reading command (%s)\n", buf);
2770 if (strncmp(buf, "info:", 5) == 0) {
2771 /* Found info command, forward data to gdb info program */
2774 // printf("Command to be forwarded\n");
2775 while (read(0, &c, 1) != -1) {
2777 if (c == '\n' || num >= 2045) {
2778 write(gdb_info_std[0], buf, num);
2784 // printf("Command from charmdebug: %d(%s)\n",indata,buf);
2786 /* All streams from gdb are forwarded to the stderr stream through the FILE
2787 gdb_stream which has been duplicated from stderr */
2788 /* NOTE: gdb_info_std[2] must be flushed before gdb_info_std[1] because the
2789 latter contains the string "(gdb) " ending the synchronization. Also the
2790 std[1] should be read with the else statement. It will not work without.
2792 if (CMK_PIPE_CHECKREAD(gdb_info_std[2])) {
2793 int indata = read(gdb_info_std[2], buf, 100);
2794 /*printf("read data from gdb info stderr %d\n",indata);*/
2797 // printf("printing %s\n",buf);
2799 // fprintf(gdb_stream,"%s",buf);
2802 } else if (CMK_PIPE_CHECKREAD(gdb_info_std[1])) {
2803 int indata = read(gdb_info_std[1], buf, 100);
2804 /*printf("read data from gdb info stdout %d\n",indata);*/
2807 // printf("printing %s\n",buf);
2809 fprintf(gdb_stream, "%s", buf);
2817 void req_poll_hierarchical()
2824 skt_set_abort(socket_error_in_poll);
2828 FD_ZERO(&rfds); /* clears set of file descriptor */
2829 for (i = 0; i < req_nClients; i++)
2830 FD_SET(req_clients[i], &rfds); /* adds client sockets to rfds set*/
2831 if (CcsServer_fd() != INVALID_SOCKET)
2832 FD_SET(CcsServer_fd(), &rfds);
2833 if (arg_charmdebug) {
2835 FD_SET(gdb_info_std[1], &rfds);
2836 FD_SET(gdb_info_std[2], &rfds);
2839 if (arg_child_charmrun)
2840 FD_SET(parent_charmrun_fd, &rfds); /* adds client sockets to rfds set*/
2841 DEBUGF(("Req_poll: Calling select...\n"));
2842 status = select(FD_SETSIZE, &rfds, 0, 0,
2843 &tmo); /* FD_SETSIZE is the maximum number of file
2844 descriptors that a fd_set object can hold
2845 information about, select returns number of
2847 DEBUGF(("Req_poll: Select returned %d...\n", status));
2850 return; /*Nothing to do-- timeout*/
2854 socket_error_in_poll(1359, "Node program terminated unexpectedly!\n");
2856 for (i = 0; i < req_nClients; i++)
2857 if (FD_ISSET(req_clients[i], &rfds)) {
2858 readcount = 10; /*number of successive reads we serve per socket*/
2859 /*This client is ready to read*/
2861 if (arg_child_charmrun)
2862 req_forward_root(req_clients[i]);
2864 req_serve_client(req_clients[i]);
2866 } while (1 == skt_select1(req_clients[i], 0) && readcount > 0);
2869 if (arg_child_charmrun)
2870 // Forward from root to clients
2871 if (FD_ISSET(parent_charmrun_fd, &rfds)) {
2872 readcount = 10; /*number of successive reads we serve per socket*/
2874 req_forward_client();
2876 } while (1 == skt_select1(parent_charmrun_fd, 0) && readcount > 0);
2879 /*Wait to receive responses and Forward responses */
2880 if (CcsServer_fd() != INVALID_SOCKET)
2881 if (FD_ISSET(CcsServer_fd(), &rfds)) {
2882 DEBUGF(("Activity on CCS server port...\n"));
2886 if (arg_charmdebug) {
2888 if (FD_ISSET(0, &rfds)) {
2889 int indata = read(0, buf, 5);
2892 fprintf(stderr, "Error reading command (%s)\n", buf);
2893 if (strncmp(buf, "info:", 5) == 0) {
2894 /* Found info command, forward data to gdb info program */
2897 // printf("Command to be forwarded\n");
2898 while (read(0, &c, 1) != -1) {
2900 if (c == '\n' || num >= 2045) {
2901 write(gdb_info_std[0], buf, num);
2907 // printf("Command from charmdebug: %d(%s)\n",indata,buf);
2909 /* All streams from gdb are forwarded to the stderr stream through the FILE
2910 gdb_stream which has been duplicated from stderr */
2911 /* NOTE: gdb_info_std[2] must be flushed before gdb_info_std[1] because the
2912 latter contains the string "(gdb) " ending the synchronization. Also the
2913 std[1] should be read with the else statement. It will not work without.
2915 if (FD_ISSET(gdb_info_std[2], &rfds)) {
2916 int indata = read(gdb_info_std[2], buf, 100);
2917 /*printf("read data from gdb info stderr %d\n",indata);*/
2920 // printf("printing %s\n",buf);
2922 // fprintf(gdb_stream,"%s",buf);
2925 } else if (FD_ISSET(gdb_info_std[1], &rfds)) {
2926 int indata = read(gdb_info_std[1], buf, 100);
2927 /*printf("read data from gdb info stdout %d\n",indata);*/
2930 // printf("printing %s\n",buf);
2932 fprintf(gdb_stream, "%s", buf);
2940 static unsigned int server_port;
2941 static char server_addr[1024]; /* IP address or hostname of charmrun*/
2942 static SOCKET server_fd;
2945 static skt_ip_t parent_charmrun_IP;
2946 static int parent_charmrun_port;
2947 static int parent_charmrun_pid;
2948 static int dataport;
2949 static SOCKET dataskt;
2950 int charmrun_phase = 0;
2953 int client_connect_problem(SOCKET skt, int code, const char *msg)
2954 { /*Called when something goes wrong during a client connect*/
2955 const char *name = skt_to_name(skt);
2956 fprintf(stderr, "Charmrun> error attaching to node '%s':\n%s\n", name, msg);
2961 /** return 1 if connection is openned succesfully with client**/
2962 int errorcheck_one_client_connect(int client)
2965 /* Child charmruns are already connected - Do we need to conect again*/
2966 if (arg_hierarchical_start && !arg_child_charmrun && charmrun_phase == 1)
2969 /* FIXME: The error printing functions do a table lookup on the socket to
2970 * figure their corresponding host. However, certain failures happen
2971 * before we can associate a socket with a particular client, as in
2972 * skt_select1 below. In that case, we use a workaround to create a
2973 * dummy socket so that the internal error message is printed
2976 SOCKET dummy_skt = -10;
2977 skt_client_table[dummy_skt] = client;
2979 unsigned int clientPort; /*These are actually ignored*/
2982 printf("Charmrun> Waiting for %d-th client to connect.\n", client);
2983 /* FIXME: why are we passing the client as an error code here? */
2984 if (0 == skt_select1(server_fd, arg_timeout * 1000))
2985 client_connect_problem(dummy_skt, client,
2986 "Timeout waiting for node-program to connect");
2988 req_clients[client] = skt_accept(server_fd, &clientIP, &clientPort);
2989 skt_client_table[req_clients[client]] = client;
2991 /* FIXME: will this ever be triggered? It seems the skt_abort handler here is
2992 * 'client_connect_problem', which calls exit(1), so we'd exit
2994 if (req_clients[client] == SOCKET_ERROR)
2995 client_connect_problem(dummy_skt, client, "Failure in node accept");
2997 skt_tcp_no_nagle(req_clients[client]);
3006 read_initnode_one_client(int client)
3009 if (!skt_select1(req_clients[client], arg_timeout * 1000))
3010 client_connect_problem(req_clients[client], client,
3011 "Timeout on IP request");
3012 ChMessage_recv(req_clients[client], &msg);
3013 req_handle_initnode(&msg, req_clients[client]);
3014 ChMessage_free(&msg);
3017 #if CMK_IBVERBS_FAST_START
3018 void req_one_client_partinit(int client)
3020 ChMessage partStartMsg;
3023 if (errorcheck_one_client_connect(client)) {
3024 if (!skt_select1(req_clients[client], arg_timeout * 1000))
3025 client_connect_problem(req_clients[client], client,
3026 "Timeout on partial init request");
3028 ChMessage_recv(req_clients[client], &partStartMsg);
3029 clientNode = ChMessageInt(*(ChMessageInt_t *) partStartMsg.data);
3030 assert(strncmp(partStartMsg.header.type, "partinit", 8) == 0);
3031 ChMessage_free(&partStartMsg);
3038 /* To keep a global node numbering */
3039 void add_singlenodeinfo_to_mynodeinfo(ChMessage *msg, SOCKET ctrlfd)
3041 /*add to myNodesInfo */
3042 ChSingleNodeinfo *nodeInfo = (ChSingleNodeinfo *) msg->data;
3044 /* need to change nodeNo */
3045 myNodesInfo[nodeCount].nodeNo = ChMessageInt_new(
3046 nodetab_rank0_table[ChMessageInt(nodeInfo->nodeNo) - mynodes_start]);
3047 myNodesInfo[nodeCount++].info = nodeInfo->info;
3049 /* Required for CCS */
3050 int nt = nodetab_rank0_table[ChMessageInt(nodeInfo->nodeNo) -
3051 mynodes_start]; /*Nodetable index for this node*/
3053 for (pe = 0; pe < nodetab_cpus(nt); pe++) {
3054 nodetab_table[nt + pe]->ctrlfd = ctrlfd;
3060 /* Original Function, need to check if modifications required*/
3061 void req_set_client_connect(int start, int end)
3067 int *finished; // -1 if client i not finished, otherwise the node id of client i
3068 int curclient, curclientend, curclientstart;
3070 curclient = curclientend = curclientstart = start;
3072 finished = (int *) malloc((end - start) * sizeof(int));
3073 for (i = 0; i < (end - start); i++)
3076 #if CMK_USE_IBVERBS && !CMK_IBVERBS_FAST_START
3077 for (i = start; i < end; i++) {
3078 errorcheck_one_client_connect(curclientend++);
3080 if (req_nClients > 1) {
3081 /* a barrier to make sure infiniband device gets initialized */
3082 for (i = start; i < end; i++)
3083 ChMessage_recv(req_clients[i], &msg);
3084 for (i = start; i < end; i++)
3085 req_reply(req_clients[i], "barrier", "", 1);
3091 /* check server socket for messages */
3092 #if !CMK_USE_IBVERBS || CMK_IBVERBS_FAST_START
3093 while (curclientstart == curclientend || skt_select1(server_fd, 1) != 0) {
3094 errorcheck_one_client_connect(curclientend++);
3097 /* check appropriate clients for messages */
3098 for (client = curclientstart; client < curclientend; client++)
3099 if (req_clients[client] > 0) {
3100 if (skt_select1(req_clients[client], 1) != 0) {
3101 ChMessage_recv(req_clients[client], &msg);
3102 req_handle_initnode(&msg, req_clients[client]);
3103 finished[client - start] =
3104 ChMessageInt(((ChSingleNodeinfo *)msg.data)->nodeNo);
3110 for (i = curclientstart - start; i < (end - start); i++)
3111 if (finished[i] == -1) {
3112 curclientstart = start + i;
3117 ChMessage_free(&msg);
3119 // correct mapping in skt_client_table so that socket points to node using the socket
3120 for (i = start; i < (end - start); i++)
3121 skt_client_table[req_clients[i]] = finished[i];
3126 /*int charmrun_phase =0; meaningful for main charmun to decide what to receive*/
3127 void req_set_client_connect(int start, int end)
3133 int *finished; // -1 if client i not finished, otherwise the node id of client i
3134 int curclient, curclientend, curclientstart;
3136 curclient = curclientend = curclientstart = start;
3138 finished = malloc((end - start) * sizeof(int));
3139 for (i = 0; i < (end - start); i++)
3142 if (arg_child_charmrun && start == 0)
3143 myNodesInfo = malloc(sizeof(ChSingleNodeinfo) * nodetab_rank0_size);
3145 #if CMK_USE_IBVERBS && !CMK_IBVERBS_FAST_START
3146 for (i = start; i < end; i++) {
3147 errorcheck_one_client_connect(curclientend++);
3149 if (req_nClients > 1) {
3150 /* a barrier to make sure infiniband device gets initialized */
3151 for (i = start; i < end; i++)
3152 ChMessage_recv(req_clients[i], &msg);
3153 for (i = start; i < end; i++)
3154 req_reply(req_clients[i], "barrier", "", 1);
3160 /* check server socket for messages */
3161 #if !CMK_USE_IBVERBS || CMK_IBVERBS_FAST_START
3162 while (curclientstart == curclientend || skt_select1(server_fd, 1) != 0) {
3163 errorcheck_one_client_connect(curclientend++);
3166 /* check appropriate clients for messages */
3167 for (client = curclientstart; client < curclientend; client++)
3168 if (req_clients[client] > 0) {
3169 if (skt_select1(req_clients[client], 1) != 0) {
3170 ChMessage_recv(req_clients[client], &msg);
3171 if (!arg_hierarchical_start)
3172 req_handle_initnode(&msg, req_clients[client]);
3174 if (!arg_child_charmrun) {
3175 if (charmrun_phase == 1)
3176 receive_nodeset_from_child(&msg, req_clients[client]);
3178 set_sockets_list(&msg, req_clients[client]);
3179 // here we need to decide based upon the phase
3180 } else /* hier-start with 2nd leval*/
3181 add_singlenodeinfo_to_mynodeinfo(&msg, req_clients[client]);
3183 finished[client - start] =
3184 ChMessageInt(((ChSingleNodeinfo *)msg.data)->nodeNo);
3190 for (i = curclientstart - start; i < (end - start); i++)
3191 if (finished[i] == -1) {
3192 curclientstart = start + i;
3197 ChMessage_free(&msg);
3199 // correct mapping in skt_client_table so that socket points to node using the socket
3200 for (i = start; i < (end - start); i++)
3201 skt_client_table[req_clients[i]] = finished[i];
3207 /* allow one client to connect */
3208 void req_one_client_connect(int client)
3210 if (errorcheck_one_client_connect(
3211 client)) { /*This client has just connected-- fetch his name and IP*/
3212 read_initnode_one_client(client);
3217 /* Each node has sent the qpn data for all the qpns it has created
3218 This data needs to be sent to all the other nodes
3219 This needs to be done for all nodes
3221 void exchange_qpdata_clients()
3224 for (i = 0; i < nodetab_rank0_size; i++) {
3225 int nt = nodetab_rank0_table[i]; /*Nodetable index for this node*/
3226 nodetab_table[nt]->qpData =
3227 (ChInfiAddr *) malloc(sizeof(ChInfiAddr) * nodetab_rank0_size);
3229 for (proc = 0; proc < nodetab_rank0_size; proc++) {
3231 for (i = 0; i < nodetab_rank0_size; i++) {
3234 int nt = nodetab_rank0_table[i]; /*Nodetable index for this node*/
3235 nodetab_table[nt]->qpData[proc] = nodeinfo_arr[proc].qpList[count];
3236 // printf("Charmrun> nt %d proc %d lid 0x%x qpn
3239 // 0x%x\n",nt,proc,ChMessageInt(nodetab_table[nt]->qpData[proc].lid),ChMessageInt(nodetab_table[nt]->qpData[proc].qpn),ChMessageInt(nodetab_table[nt]->qpData[proc].psn));
3243 free(nodeinfo_arr[proc].qpList);
3247 void send_clients_nodeinfo_qpdata()
3250 int msgSize = sizeof(ChMessageInt_t) +
3251 sizeof(ChNodeinfo) * nodetab_rank0_size +
3252 sizeof(ChInfiAddr) * nodetab_rank0_size;
3253 for (node = 0; node < nodetab_rank0_size; node++) {
3254 int nt = nodetab_rank0_table[node]; /*Nodetable index for this node*/
3255 // printf("Charmrun> Node %d proc %d sending initnodetab
3257 ChMessageHeader hdr;
3258 ChMessageInt_t nNodes = ChMessageInt_new(nodetab_rank0_size);
3259 ChMessageHeader_new("initnodetab", msgSize, &hdr);
3260 skt_sendN(nodetab_table[nt]->ctrlfd, (const char *) &hdr, sizeof(hdr));
3261 skt_sendN(nodetab_table[nt]->ctrlfd, (const char *) &nNodes,
3263 skt_sendN(nodetab_table[nt]->ctrlfd, (const char *) nodeinfo_arr,
3264 sizeof(ChNodeinfo) * nodetab_rank0_size);
3265 skt_sendN(nodetab_table[nt]->ctrlfd,
3266 (const char *) &nodetab_table[nt]->qpData[0],
3267 sizeof(ChInfiAddr) * nodetab_rank0_size);
3273 #define getthetime(x) \
3274 gettimeofday(&tim, NULL); \
3275 x = tim.tv_sec + (tim.tv_usec / 1000000.0);
3276 #define getthetime1(x) \
3277 gettimeofday(&tim, NULL); \
3279 /*Wait for all the clients to connect to our server port*/
3280 void req_client_connect(void)
3284 if (!arg_hierarchical_start)
3286 nodeinfo_allocate();
3287 req_nClients = nodetab_rank0_size;
3288 req_clients = (SOCKET *) malloc(req_nClients * sizeof(SOCKET));
3289 for (client = 0; client < req_nClients; client++)
3290 req_clients[client] = -1;
3292 skt_set_abort(client_connect_problem);
3294 #if CMK_IBVERBS_FAST_START
3295 for (client = 0; client < req_nClients; client++) {
3296 req_one_client_partinit(client);
3298 for (client = 0; client < req_nClients; client++) {
3299 read_initnode_one_client(client);
3303 req_set_client_connect(0, req_nClients);
3310 printf("Charmrun> All clients connected.\n");
3312 exchange_qpdata_clients();
3313 send_clients_nodeinfo_qpdata();
3316 if (arg_hierarchical_start) {
3317 /* first we need to send data to parent charmrun and then send the nodeinfo
3319 send_myNodeInfo_to_parent();
3320 /*then receive from root */
3321 forward_nodetab_to_children();
3326 for (client = 0; client < req_nClients; client++) {
3327 req_handle_initnodetab(NULL, req_clients[client]);
3332 printf("Charmrun> IP tables sent.\n");
3335 /*Wait for all the clients to connect to our server port, then collect and send
3336 * nodetable to all */
3338 void req_charmrun_connect(void)
3340 // double t1, t2, t3, t4;
3342 nodeinfo_allocate();
3343 req_nClients = branchfactor;
3344 req_clients = (SOCKET *) malloc(req_nClients * sizeof(SOCKET));
3345 charmrun_fds = (SOCKET *) malloc(req_nClients * sizeof(SOCKET));
3346 for (client = 0; client < req_nClients; client++)
3347 req_clients[client] = -1;
3349 skt_set_abort(client_connect_problem);
3351 #if CMK_IBVERBS_FAST_START
3352 for (client = 0; client < req_nClients; client++) {
3353 req_one_client_partinit(client);
3355 for (client = 0; client < req_nClients; client++) {
3356 read_initnode_one_client(client);
3359 // if(!arg_child_charmrun) getthetime(t1);
3361 req_set_client_connect(0, req_nClients);
3362 // if(!arg_child_charmrun) getthetime(t2); /* also need to process
3363 // received nodesets JIT */
3369 printf("Charmrun> All clients connected.\n");
3371 exchange_qpdata_clients();
3372 send_clients_nodeinfo_qpdata();
3374 for (client = 0; client < req_nClients; client++) {
3375 // add flag to check what leval charmrun it is and what phase
3376 req_handle_initnodedistribution(NULL, charmrun_fds[client], client);
3380 /* Now receive the nodetab from child charmruns*/
3383 skt_set_abort(client_connect_problem);
3385 req_set_client_connect(0, req_nClients);
3387 /* Already processed, so send*/
3388 for (client = 0; client < req_nClients; client++) {
3389 req_handle_initnodetab(NULL, req_clients[client]);
3391 // if(!arg_child_charmrun) getthetime(t4);
3394 printf("Charmrun> IP tables sent.\n");
3395 // if(!arg_child_charmrun) printf("Time for charmruns connect= %f , sending
3396 // nodes to fire= %f, node clients connected= %f n ", t2-t1, t3-t2, t4-t3);
3403 void start_one_node_ssh(int rank0no);
3404 void finish_one_node(int rank0no);
3405 void finish_set_nodes(int start, int stop);
3406 int start_set_node_ssh(int client);
3408 void req_client_start_and_connect(void)
3411 int batch = arg_batch_spawn; /* fire several at a time */
3412 int clientgroup, clientstart;
3416 if (!arg_hierarchical_start)
3418 nodeinfo_allocate();
3419 req_nClients = nodetab_rank0_size;
3420 req_clients = (SOCKET *) malloc(req_nClients * sizeof(SOCKET));
3422 skt_set_abort(client_connect_problem);
3425 while (client < req_nClients) { /* initiate a batch */
3426 clientstart = client;
3428 for (counter = 0; counter < batch;
3429 counter++) { /* initiate batch number of nodes */
3430 clientgroup = start_set_node_ssh(client);
3431 client += clientgroup;
3432 if (client >= req_nClients) {
3433 client = req_nClients;
3438 /* ssh x11 forwarding will make sure ssh exit */
3439 if (!arg_ssh_display)
3441 finish_set_nodes(clientstart, client);
3443 #if CMK_IBVERBS_FAST_START
3444 for (c = clientstart; c < client; c++) {
3445 req_one_client_partinit(c);
3448 req_set_client_connect(clientstart, client);
3452 #if CMK_IBVERBS_FAST_START
3453 for (client = 0; client < req_nClients; client++) {
3454 read_initnode_one_client(client);
3460 printf("Charmrun> All clients connected.\n");
3463 exchange_qpdata_clients();
3464 send_clients_nodeinfo_qpdata();
3467 if (arg_hierarchical_start) {
3468 /* first we need to send data to parent charmrun and then send the nodeinfo
3470 send_myNodeInfo_to_parent();
3471 /*then receive from root */
3472 forward_nodetab_to_children();
3477 for (client = 0; client < req_nClients; client++) {
3478 req_handle_initnodetab(NULL, req_clients[client]);
3483 printf("Charmrun> IP tables sent.\n");
3484 free(ssh_pids); /* done with ssh_pids */
3489 /*Start the server socket the clients will connect to.*/
3490 void req_start_server(void)
3492 skt_ip_t ip = skt_innode_my_ip();
3494 /* local execution, use localhost always */
3495 strcpy(server_addr, "127.0.0.1");
3496 else if (arg_charmrunip != NULL)
3497 /* user specify the IP at +useip */
3498 strcpy(server_addr, arg_charmrunip);
3499 else if ((arg_charmrunip = getenv("CHARMRUN_IP")) != NULL)
3500 /* user specify the env */
3501 strcpy(server_addr, arg_charmrunip);
3502 else if (skt_ip_match(ip, _skt_invalid_ip)) {
3503 printf("Charmrun> Warning-- cannot find IP address for your hostname. "
3504 "Using loopback.\n");
3505 strcpy(server_addr, "127.0.0.1");
3506 } else if (arg_usehostname || skt_ip_match(ip, skt_lookup_ip("127.0.0.1")))
3507 /*Use symbolic host name as charmrun address*/
3508 gethostname(server_addr, sizeof(server_addr));
3510 skt_print_ip(server_addr, ip);
3513 server_fd = skt_server(&server_port);
3516 printf("Charmrun> Charmrun = %s, port = %d\n", server_addr, server_port);
3519 #if CMK_CCS_AVAILABLE
3521 if (!arg_hierarchical_start ||
3522 (arg_hierarchical_start && !arg_child_charmrun))
3524 if (arg_server == 1)
3525 CcsServer_new(NULL, &arg_server_port, arg_server_auth);
3530 int unique_node_start;
3531 /* Function copied from machine.c file */
3532 void parse_netstart(void)
3537 ns = getenv("NETSTART");
3538 if (ns != 0) { /*Read values set by Charmrun*/
3539 char parent_charmrun_name[1024];
3540 nread = sscanf(ns, "%d%s%d%d%d", &unique_node_start, parent_charmrun_name,
3541 &parent_charmrun_port, &parent_charmrun_pid, &port);
3542 parent_charmrun_IP = skt_lookup_ip(parent_charmrun_name);
3544 nodetab_unique_table[unique_node_start]; /*Works only when
3545 init_hierarchical called in
3549 fprintf(stderr, "Error parsing NETSTART '%s'\n", ns);
3553 #if CMK_USE_IBVERBS | CMK_USE_IBUD
3554 char *cmi_num_nodes = getenv("CmiNumNodes");
3555 if (cmi_num_nodes != NULL) {
3556 sscanf(cmi_num_nodes, "%d", &_Cmi_numnodes);
3561 int nodetab_rank0_size_total;
3562 /* Receive nodes for which I am responsible*/
3563 void my_nodetab_store(ChMessage *msg)
3565 ChMessageInt_t *nodelistmsg = (ChMessageInt_t *) msg->data;
3566 nodetab_rank0_size = ChMessageInt(nodelistmsg[0]);
3567 nodetab_rank0_size_total = ChMessageInt(nodelistmsg[1]);
3569 for (k = 0; k < nodetab_rank0_size; k++) {
3570 nodetab_rank0_table[k] = ChMessageInt(nodelistmsg[k + 2]);
3574 /* In hierarchical startup, this function is used by child charmrun to obtains
3575 * the list of nodes for which it is responsible */
3576 void nodelist_obtain(void)
3578 ChMessage nodelistmsg; /* info about all nodes*/
3579 /*Contact charmrun for machine info.*/
3583 /* int qpListSize = (_Cmi_numnodes-1)*sizeof(ChInfiAddr);
3584 me.info.qpList = malloc(qpListSize);
3585 copyInfiAddr(me.info.qpList);
3586 MACHSTATE1(3,"me.info.qpList created and copied size %d bytes",qpListSize);
3587 ctrl_sendone_nolock("initnode",(const char *)&me,sizeof(me),(const char *)me.info.qpList,qpListSize);
3588 free(me.info.qpList);
3591 ChMessageHeader hdr;
3592 ChMessageInt_t node_start = ChMessageInt_new(unique_node_start);
3593 ChMessageHeader_new("initnodetab", sizeof(ChMessageInt_t), &hdr);
3594 skt_sendN(parent_charmrun_fd, (const char *) &hdr, sizeof(hdr));
3595 skt_sendN(parent_charmrun_fd, (const char *) &node_start, sizeof(node_start));
3597 #endif // CMK_USE_IBVERBS
3599 /*We get the other node addresses from a message sent
3600 back via the charmrun control port.*/
3601 if (!skt_select1(parent_charmrun_fd, 1200 * 1000)) {
3604 ChMessage_recv(parent_charmrun_fd, &nodelistmsg);
3606 my_nodetab_store(&nodelistmsg);
3607 ChMessage_free(&nodelistmsg);
3610 void init_mynodes(void)
3613 if (!skt_ip_match(parent_charmrun_IP, _skt_invalid_ip)) {
3614 dataskt = skt_server(&dataport);
3615 parent_charmrun_fd =
3616 skt_connect(parent_charmrun_IP, parent_charmrun_port, 1800);
3618 parent_charmrun_fd = -1;
3625 /****************************************************************************
3629 ****************************************************************************/
3630 void start_nodes_daemon(void);
3631 void start_nodes_ssh(void);
3632 void start_nodes_mpiexec();
3634 void start_next_level_charmruns(void);
3637 void nodetab_init_for_scyld(void);
3638 void start_nodes_scyld(void);
3640 void start_nodes_local(char **envp);
3641 void kill_nodes(void);
3642 void open_gdb_info(void);
3643 void read_global_segments_size(void);
3645 static void fast_idleFn(void) { sleep(0); }
3646 void finish_nodes(void);
3648 int main(int argc, const char **argv, char **envp)
3652 skt_set_idle(fast_idleFn);
3653 /* CrnSrand((int) time(0)); */
3654 /* notify charm developers that charm is in use */
3657 if (!arg_child_charmrun)
3660 /* Compute the values of all constants */
3661 arg_init(argc, argv);
3663 fprintf(stderr, "Charmrun> charmrun started...\n");
3664 start_timer = GetClock();
3666 /* check scyld configuration */
3670 nodetab_init_for_scyld();
3672 /* Initialize the node-table by reading nodesfile */
3676 /* Start the server port */
3679 /* Initialize the IO module */
3683 /* Hierarchical startup*/
3684 if (arg_child_charmrun) {
3685 init_mynodes(); /* contacts root charmrun and gets list of nodes to start*/
3688 /* start the node processes */
3689 if (0 != getenv("CONV_DAEMON"))
3690 start_nodes_daemon();
3693 start_nodes_scyld();
3696 printf("Charmrun> IBVERBS version of charmrun\n");
3700 /* Hierarchical-startup*/
3701 if (arg_hierarchical_start) {
3703 if (!arg_child_charmrun) {
3704 start_next_level_charmruns();
3706 if (!arg_batch_spawn)
3709 req_client_start_and_connect();
3712 start_nodes_local(envp);
3721 if (!arg_batch_spawn) {
3723 start_nodes_mpiexec();
3727 req_client_start_and_connect();
3729 start_nodes_local(envp);
3733 if (arg_charmdebug) {
3734 #if (defined(_WIN32) && !defined(__CYGWIN__)) || CMK_BPROC
3735 /* Gdb stream (and charmdebug) currently valid only with ssh subsystem */
3737 "Charmdebug is supported currently only with the ssh subsystem\n");
3740 /* Open an additional connection to node 0 with a gdb to grab info */
3741 printf("opening connection with node 0 for info gdb\n");
3742 read_global_segments_size();
3744 gdb_stream = fdopen(dup(2), "a");
3750 fprintf(stderr, "Charmrun> node programs all started\n");
3752 /* Wait for all clients to connect */
3754 /* Hierarchical startup*/
3755 if (arg_hierarchical_start) {
3757 if (!arg_batch_spawn || (!arg_child_charmrun))
3761 if (!arg_child_charmrun)
3762 req_charmrun_connect();
3763 else if (!arg_batch_spawn)
3764 req_client_connect();
3771 if (!arg_batch_spawn)
3774 if (!arg_batch_spawn)
3775 req_client_connect();
3781 fprintf(stderr, "Charmrun> node programs all connected\n");
3783 fprintf(stderr, "Charmrun> started all node programs in %.3f seconds.\n",
3784 GetClock() - start_timer);
3786 /* enter request-service mode */
3788 if (arg_hierarchical_start)
3790 req_poll_hierarchical();
3797 /*This little snippet creates a NETSTART
3798 environment variable entry for the given node #.
3799 It uses the idiotic "return reference to static buffer"
3800 string return idiom.
3802 char *create_netstart(int node)
3804 static char dest[1024];
3807 sprintf(dest, "$CmiMyNode %s %d %d %d", server_addr, server_port,
3808 getpid() & 0x7FFF, port);
3810 sprintf(dest, "%d %s %d %d %d", node, server_addr, server_port,
3811 getpid() & 0x7FFF, port);
3815 /* The remainder of charmrun is only concerned with starting all
3816 the node-programs, also known as charmrun clients. We have to
3817 start nodetab_rank0_size processes on the remote machines.
3820 /*Ask the converse daemon running on each machine to start the node-programs.*/
3821 void start_nodes_daemon(void)
3824 char argBuffer[5000]; /*Buffer to hold assembled program arguments*/
3827 /*Set the parts of the task structure that will be the same for all nodes*/
3828 /*Figure out the command line arguments (same for all PEs)*/
3830 for (i = 0; arg_argv[i]; i++) {
3832 printf("Charmrun> packing arg: %s\n", arg_argv[i]);
3833 strcat(argBuffer, " ");
3834 strcat(argBuffer, arg_argv[i]);
3837 task.magic = ChMessageInt_new(DAEMON_MAGIC);
3839 /*Start up the user program, by sending a message
3840 to PE 0 on each node.*/
3841 for (nodeNumber = 0; nodeNumber < nodetab_rank0_size; nodeNumber++) {
3842 char nodeArgBuffer[5000]; /*Buffer to hold assembled program arguments*/
3844 char *arg_nodeprog_r, *arg_currdir_r;
3845 char statusCode = 'N'; /*Default error code-- network problem*/
3847 int pe0 = nodetab_rank0_table[nodeNumber];
3849 arg_currdir_r = pathfix(arg_currdir_a, nodetab_pathfixes(nodeNumber));
3850 strcpy(task.cwd, arg_currdir_r);
3851 free(arg_currdir_r);
3852 arg_nodeprog_r = pathextfix(arg_nodeprog_a, nodetab_pathfixes(nodeNumber),
3853 nodetab_ext(nodeNumber));
3854 strcpy(task.pgm, arg_nodeprog_r);
3857 printf("Charmrun> Starting node program %d on '%s' as %s.\n", nodeNumber,
3858 nodetab_name(pe0), arg_nodeprog_r);
3859 free(arg_nodeprog_r);
3860 sprintf(task.env, "NETSTART=%s", create_netstart(nodeNumber));
3862 if (nodetab_nice(nodeNumber) != -100) {
3864 fprintf(stderr, "Charmrun> +nice %d\n", nodetab_nice(nodeNumber));
3865 sprintf(nodeArgBuffer, "%s +nice %d", argBuffer,
3866 nodetab_nice(nodeNumber));
3867 argBuf = nodeArgBuffer;
3870 task.argLength = ChMessageInt_new(strlen(argBuf));
3872 /*Send request out to remote node*/
3873 fd = skt_connect(nodetab_ip(pe0), DAEMON_IP_PORT, 30);
3875 INVALID_SOCKET) { /*Contact! Ask the daemon to start the program*/
3876 skt_sendN(fd, (const char *) &task, sizeof(task));
3877 skt_sendN(fd, (const char *) argBuf, strlen(argBuf));
3878 skt_recvN(fd, &statusCode, sizeof(char));
3880 if (statusCode != 'G') { /*Something went wrong--*/
3881 fprintf(stderr, "Error '%c' starting remote node program on %s--\n%s\n",
3882 statusCode, nodetab_name(pe0), daemon_status2msg(statusCode));
3884 } else if (arg_verbose)
3885 printf("Charmrun> Node program %d started.\n", nodeNumber);
3889 #if defined(_WIN32) && !defined(__CYGWIN__)
3890 /*Sadly, interprocess communication on Win32 is quite
3891 different, so we can't use Ssh on win32 yet.
3892 Fall back to the daemon.*/
3893 void start_nodes_ssh() { start_nodes_daemon(); }
3894 void finish_nodes(void) {}
3895 void start_one_node_ssh(int rank0no) {}
3896 void finish_one_node(int rank0no) {}
3897 void start_nodes_mpiexec() {}
3899 int start_set_node_ssh(int client) { return 0; }
3900 void finish_set_nodes(int start, int stop) {}
3902 void envCat(char *dest, LPTSTR oldEnv)
3905 dest += strlen(dest); // Advance to end of dest
3906 dest++; // Advance past terminating NULL character
3907 while ((*src) != '\0') {
3908 int adv = strlen(src) + 1; // Length of newly-copied string plus NULL
3909 strcpy(dest, src); // Copy another environment string
3910 dest += adv; // Advance past newly-copied string and NULL
3911 src += adv; // Ditto for src
3913 *dest = '\0'; // Paste on final terminating NULL character
3914 FreeEnvironmentStrings(oldEnv);
3917 /* simple version of charmrun that avoids the sshd or charmd, */
3918 /* it spawn the node program just on local machine using exec. */
3919 void start_nodes_local(char **env)
3922 PROCESS_INFORMATION pi; /* process Information for the process spawned */
3925 char environment[10000]; /*Doubly-null terminated environment strings*/
3926 char cmdLine[10000]; /*Program command line, including executable name*/
3927 /*Command line too long.*/
3929 if (strlen(pparam_argv[1])+strlen(args) > 10000)
3932 strcpy(cmdLine, pparam_argv[1]);
3933 p = pparam_argv + 2;
3934 while ((*p) != '\0') {
3935 strcat(cmdLine, " ");
3936 strcat(cmdLine, *p);
3940 for (i = 0; i < arg_requested_pes; i++) {
3941 STARTUPINFO si = {0}; /* startup info for the process spawned */
3943 sprintf(environment, "NETSTART=%s", create_netstart(i));
3944 /*Paste all system environment strings */
3945 envCat(environment, GetEnvironmentStrings());
3947 /* Initialise the security attributes for the process
3951 printf("Charmrun> start %d node program on localhost.\n", i);
3953 ret = CreateProcess(NULL, /* application name */
3954 cmdLine, /* command line */
3955 NULL, /*&sa,*/ /* process SA */
3956 NULL, /*&sa,*/ /* thread SA */
3957 FALSE, /* inherit flag */
3959 CREATE_NEW_PROCESS_GROUP | DETACHED_PROCESS,
3961 CREATE_NEW_PROCESS_GROUP | CREATE_NEW_CONSOLE,
3963 /* creation flags */
3964 environment, /* environment block */
3965 ".", /* working directory */
3966 &si, /* startup info */
3970 /*Something went wrong! Look up the Windows error code*/
3972 int error=GetLastError();
3973 char statusCode=daemon_err2status(error);
3974 fprintf(logfile,"******************* ERROR *****************\n"
3975 "Error in creating process!\n"
3976 "Error code = %ld-- %s\n\n\n", error,
3977 daemon_status2msg(statusCode));
3980 int error = GetLastError();
3981 printf("startProcess failed to start process \"%s\" with status: %d\n",
3982 pparam_argv[1], error);
3990 int bproc_nodeisup(int node)
3993 #if CMK_BPROC_VERSION < 4
3994 if (bproc_nodestatus(node) == bproc_node_up)
3997 printf("Charmrun> node %d status: %s\n", node, status ? "up" : "down");
3999 char nodestatus[128];
4000 if (node == -1) { /* master node is always up */
4001 strcpy(nodestatus, "up");
4004 if (bproc_nodestatus(node, nodestatus, 128)) {
4005 if (strcmp(nodestatus, "up") == 0)
4009 printf("Charmrun> node %d status: %s\n", node, nodestatus);
4014 /* ++ppn now is supported in both SMP and non SMP version
4015 in SMP, ++ppn specifies number of threads on each node;
4016 in non-SMP, ++ppn specifies number of processes on each node. */
4017 void nodetab_init_for_scyld()
4019 int maxNodes, i, node, npes, rank;
4023 tablesize = arg_requested_pes;
4024 maxNodes = bproc_numnodes() + 1;
4025 if (arg_endpe < maxNodes)
4026 maxNodes = arg_endpe + 1;
4027 if (maxNodes > tablesize)
4028 tablesize = maxNodes;
4029 nodetab_table = (nodetab_host **) malloc(tablesize * sizeof(nodetab_host *));
4030 nodetab_rank0_table = (int *) malloc(tablesize * sizeof(int));
4031 nodetab_max = tablesize;
4033 nodetab_reset(&group);
4038 #if CMK_SHARED_VARS_UNAVAILABLE
4040 fprintf(stderr,"Warning> Invalid ppn %d in nodelist ignored.\n",
4049 /* check which slave node is available from frompe to endpe */
4051 for (i = -1; i < maxNodes && npes < arg_requested_pes; i++) {
4053 if (!bproc_nodeisup(i))
4055 if (i != -1 && i < arg_startpe)
4057 if (i == -1 && arg_skipmaster)
4058 continue; /* skip master node -1 */
4059 sprintf(hostname, "%d", i);
4060 #if !CMK_SHARED_VARS_UNAVAILABLE
4061 if (npes + arg_ppn > arg_requested_pes)
4062 group.cpus = arg_requested_pes - npes;
4064 group.cpus = arg_ppn;
4066 for (rank = 0; rank < arg_ppn; rank++) {
4067 #if !CMK_SHARED_VARS_UNAVAILABLE
4070 nodetab_makehost(hostname, &group);
4071 nodetab_add(&group);
4072 if (++npes == arg_requested_pes)
4076 if (nodetab_rank0_size == 0) {
4077 fprintf(stderr, "Charmrun> no slave node available!\n");
4081 printf("Charmrun> There are %d slave nodes available.\n",
4082 nodetab_rank0_size - (arg_skipmaster ? 0 : 1));
4084 /* expand node table to arg_requested_pes */
4085 if (arg_requested_pes > npes) {
4086 int orig_size = npes;
4089 if (arg_singlemaster && nodetab_rank0_size > 1 && !arg_skipmaster)
4090 startnode = arg_ppn; /* skip -1 */
4092 while (npes < arg_requested_pes) {
4093 #if !CMK_SHARED_VARS_UNAVAILABLE
4094 if (npes + arg_ppn > arg_requested_pes)
4095 group.cpus = arg_requested_pes - npes;
4097 group.cpus = arg_ppn;
4099 for (rank = 0; rank < arg_ppn; rank++) {
4100 #if !CMK_SHARED_VARS_UNAVAILABLE
4103 nodetab_makehost(nodetab_name(node), &group);
4104 nodetab_add(&group);
4105 if (++node == orig_size)
4107 if (++npes == arg_requested_pes)
4114 void start_nodes_scyld(void)
4119 envp[0] = (char *) malloc(256);
4121 for (i = 0; i < nodetab_rank0_size; i++) {
4124 int pe = nodetab_rank0_table[i];
4125 int nodeno = atoi(nodetab_name(pe));
4128 printf("Charmrun> start node program on slave node: %d.\n", nodeno);
4129 sprintf(envp[0], "NETSTART=%s", create_netstart(i));
4135 int fd, fd1 = dup(1);
4136 if (!(arg_debug || arg_debug_no_pause)) { /* debug mode */
4137 if (fd = open("/dev/null", O_RDWR)) {
4144 status = execve(pparam_argv[1], pparam_argv + 1, envp);
4146 printf("execve failed to start process \"%s\" with status: %d\n",
4147 pparam_argv[1], status);
4149 status = bproc_execmove(nodeno, pparam_argv[1], pparam_argv + 1, envp);
4151 printf("bproc_execmove failed to start remote process \"%s\" with "
4153 pparam_argv[1], status);
4161 void finish_nodes(void) {}
4164 /*Unix systems can use Ssh normally*/
4165 /********** SSH-ONLY CODE *****************************************/
4169 /* this starts all the node programs. It executes fully in the background. */
4171 /****************************************************************************/
4172 #include <sys/wait.h>
4174 extern char **environ;
4175 void removeEnv(const char *doomedEnv)
4176 { /*Remove a value from the environment list*/
4179 while (*ie != NULL) {
4180 if (0 != strncmp(*ie, doomedEnv, strlen(doomedEnv)))
4184 *oe = NULL; /*NULL-terminate list*/
4187 int ssh_fork(int nodeno, const char *startScript)
4189 std::vector<const char *> sshargv;
4193 s = nodetab_shell(nodeno);
4196 sshargv.push_back(substr(s, e));
4201 sshargv.push_back(nodetab_name(nodeno));
4202 sshargv.push_back("-l");
4203 sshargv.push_back(nodetab_login(nodeno));
4204 sshargv.push_back("-o");
4205 sshargv.push_back("KbdInteractiveAuthentication=no");
4206 sshargv.push_back("-o");
4207 sshargv.push_back("PasswordAuthentication=no");
4208 sshargv.push_back("-o");
4209 sshargv.push_back("NoHostAuthenticationForLocalhost=yes");
4210 sshargv.push_back("/bin/bash -f");
4211 sshargv.push_back((const char *) NULL);
4214 std::string cmd_str = sshargv[0];
4215 for (int n = 1; n < sshargv.size()-1; ++n)
4216 cmd_str += " " + std::string(sshargv[n]);
4217 printf("Charmrun> Starting %s\n", cmd_str.c_str());
4222 perror("ERROR> starting remote shell");
4225 if (pid == 0) { /*Child process*/
4226 int fdScript = open(startScript, O_RDONLY);
4227 /**/ unlink(startScript); /**/
4228 dup2(fdScript, 0); /*Open script as standard input*/
4229 // removeEnv("DISPLAY="); /*No DISPLAY disables ssh's slow X11 forwarding*/
4230 for (int i = 3; i < 1024; i++)
4232 execvp(sshargv[0], const_cast<char **>(&sshargv[0]));
4233 fprintf(stderr, "Charmrun> Couldn't find remote shell program '%s'!\n",
4238 fprintf(stderr, "Charmrun> remote shell (%s:%d) started\n",
4239 nodetab_name(nodeno), nodeno);
4243 void fprint_arg(FILE *f, const char **argv)
4246 fprintf(f, " %s", *argv);
4250 void ssh_Find(FILE *f, const char *program, const char *dest)
4252 fprintf(f, "Find %s\n", program);
4253 fprintf(f, "%s=$loc\n", dest);
4255 void ssh_script(FILE *f, int nodeno, int rank0no, const char **argv,
4259 char *arg_nodeprog_r, *arg_currdir_r;
4260 const char *dbg = nodetab_debugger(nodeno);
4261 const char *host = nodetab_name(nodeno);
4264 fprintf(f, "#!/bin/sh\n");
4266 fprintf(f, /*Echo: prints out status message*/
4268 " echo 'Charmrun remote shell(%s.%d)>' $*\n"
4271 fprintf(f, /*Exit: exits with return code*/
4273 " if [ $1 -ne 0 ]\n"
4275 " Echo Exiting with error code $1\n"
4277 #if CMK_SSH_KILL /*End by killing ourselves*/
4278 " sleep 5\n" /*Delay until any error messages are flushed*/
4280 #else /*Exit normally*/
4284 fprintf(f, /*Find: locates a binary program in PATH, sets loc*/
4287 " for dir in `echo $PATH | sed -e 's/:/ /g'`\n"
4289 " test -f \"$dir/$1\" && loc=\"$dir/$1\"\n"
4291 " if [ \"x$loc\" = x ]\n"
4293 " Echo $1 not found in your PATH \"($PATH)\"--\n"
4294 " Echo set your path in your ~/.charmrunrc\n"
4300 fprintf(f, "Echo 'remote responding...'\n");
4302 fprintf(f, "test -f \"$HOME/.charmrunrc\" && . \"$HOME/.charmrunrc\"\n");
4303 /* let's leave DISPLAY untouched and rely on X11 forwarding,
4304 changing DISPLAY to charmrun does not always work if X11 forwarding
4307 if (arg_display && !arg_ssh_display)
4308 fprintf(f, "DISPLAY='%s';export DISPLAY\n", arg_display);
4311 if (arg_child_charmrun)
4312 fprintf(f, "NETMAGIC=\"%d\";export NETMAGIC\n",
4313 parent_charmrun_pid & 0x7FFF);
4316 fprintf(f, "NETMAGIC=\"%d\";export NETMAGIC\n", getpid() & 0x7FFF);
4319 fprintf(f, "CmiMyNode=$OMPI_COMM_WORLD_RANK\n");
4320 fprintf(f, "test -z \"$CmiMyNode\" && CmiMyNode=$MPIRUN_RANK\n");
4321 fprintf(f, "test -z \"$CmiMyNode\" && CmiMyNode=$PMI_RANK\n");
4322 fprintf(f, "test -z \"$CmiMyNode\" && CmiMyNode=$PMI_ID\n");
4323 fprintf(f, "test -z \"$CmiMyNode\" && CmiMyNode=$MP_CHILD\n");
4324 fprintf(f, "test -z \"$CmiMyNode\" && (Echo Could not detect rank from "
4325 "environment ; Exit 1)\n");
4326 fprintf(f, "export CmiMyNode\n");
4329 else if (arg_hierarchical_start && arg_child_charmrun)
4330 fprintf(f, "CmiMyNode='%d'; export CmiMyNode\n", mynodes_start + rank0no);
4333 fprintf(f, "CmiMyNode='%d'; export CmiMyNode\n", rank0no);
4336 if (arg_hierarchical_start && arg_child_charmrun)
4337 netstart = create_netstart(mynodes_start + rank0no);
4340 netstart = create_netstart(rank0no);
4341 fprintf(f, "NETSTART=\"%s\";export NETSTART\n", netstart);
4343 fprintf(f, "CmiMyNodeSize='%d'; export CmiMyNodeSize\n",
4344 nodetab_getnodeinfo(rank0no)->cpus);
4346 if (restart || arg_mpiexec) /* skip fork */
4347 fprintf(f, "CmiMyForks='%d'; export CmiMyForks\n", 0);
4349 fprintf(f, "CmiMyForks='%d'; export CmiMyForks\n",
4350 nodetab_getnodeinfo(rank0no)->forks);
4353 fprintf(f, "CmiNumNodes=$OMPI_COMM_WORLD_SIZE\n");
4354 fprintf(f, "test -z \"$CmiNumNodes\" && CmiNumNodes=$MPIRUN_NPROCS\n");
4355 fprintf(f, "test -z \"$CmiNumNodes\" && CmiNumNodes=$PMI_SIZE\n");
4356 fprintf(f, "test -z \"$CmiNumNodes\" && CmiNumNodes=$MP_PROCS\n");
4357 fprintf(f, "test -z \"$CmiNumNodes\" && (Echo Could not detect node count "
4358 "from environment ; Exit 1)\n");
4359 fprintf(f, "export CmiNumNodes\n");
4362 else if (arg_hierarchical_start && arg_child_charmrun)
4363 fprintf(f, "CmiNumNodes='%d'; export CmiNumNodes\n",
4364 nodetab_rank0_size_total);
4368 fprintf(f, "CmiNumNodes='%d'; export CmiNumNodes\n", nodetab_rank0_size);
4371 fprintf(f, "G95_UNBUFFERED_ALL=TRUE; export G95_UNBUFFERED_ALL\n");
4374 fprintf(f, "GFORTRAN_UNBUFFERED_ALL=YES; export GFORTRAN_UNBUFFERED_ALL\n");
4377 fprintf(f, "MX_MONOTHREAD=1; export MX_MONOTHREAD\n");
4378 /*fprintf(f,"MX_RCACHE=1; export MX_RCACHE\n");*/
4380 #if CMK_AIX && CMK_SMP
4381 fprintf(f, "MALLOCMULTIHEAP=1; export MALLOCMULTIHEAP\n");
4385 printf("Charmrun> Sending \"%s\" to client %d.\n", netstart, rank0no);
4388 "PATH=\"$PATH:/bin:/usr/bin:/usr/X/bin:/usr/X11/bin:/usr/local/bin:"
4389 "/usr/X11R6/bin:/usr/openwin/bin\"\n");
4391 /* find the node-program */
4392 arg_nodeprog_r = pathextfix(arg_nodeprog_a, nodetab_pathfixes(nodeno),
4393 nodetab_ext(nodeno));
4395 /* find the current directory, relative version */
4396 arg_currdir_r = pathfix(arg_currdir_a, nodetab_pathfixes(nodeno));
4399 printf("Charmrun> find the node program \"%s\" at \"%s\" for %d.\n",
4400 arg_nodeprog_r, arg_currdir_r, nodeno);
4402 if (arg_debug || arg_debug_no_pause || arg_in_xterm) {
4403 ssh_Find(f, nodetab_xterm(nodeno), "F_XTERM");
4404 if (!arg_ssh_display && !arg_debug_no_xrdb)
4405 ssh_Find(f, "xrdb", "F_XRDB");
4407 fprintf(f, "Echo 'using xterm' $F_XTERM\n");
4410 if (arg_debug || arg_debug_no_pause) { /*Look through PATH for debugger*/
4411 ssh_Find(f, dbg, "F_DBG");
4413 fprintf(f, "Echo 'using debugger' $F_DBG\n");
4416 if (!arg_ssh_display && !arg_debug_no_xrdb &&
4417 (arg_debug || arg_debug_no_pause || arg_in_xterm)) {
4418 /* if (arg_debug || arg_debug_no_pause || arg_in_xterm) {*/
4419 fprintf(f, "$F_XRDB -query > /dev/null\n");
4420 fprintf(f, "if test $? != 0\nthen\n");
4421 fprintf(f, " Echo 'Cannot contact X Server '$DISPLAY'. You probably'\n");
4422 fprintf(f, " Echo 'need to run xhost to authorize connections.'\n");
4423 fprintf(f, " Echo '(See manual for xhost for security issues)'\n");
4424 fprintf(f, " Echo 'Or try ++batch 1 ++ssh-display to rely on SSH X11 "
4426 fprintf(f, " Exit 1\n");
4430 fprintf(f, "if test ! -x \"%s\"\nthen\n", arg_nodeprog_r);
4431 fprintf(f, " Echo 'Cannot locate this node-program: %s'\n", arg_nodeprog_r);
4432 fprintf(f, " Exit 1\n");
4435 fprintf(f, "cd \"%s\"\n", arg_currdir_r);
4436 fprintf(f, "if test $? = 1\nthen\n");
4437 fprintf(f, " Echo 'Cannot propagate this current directory:'\n");
4438 fprintf(f, " Echo '%s'\n", arg_currdir_r);
4439 fprintf(f, " Exit 1\n");
4442 if (strcmp(nodetab_setup(nodeno), "*")) {
4443 fprintf(f, "%s\n", nodetab_setup(nodeno));
4444 fprintf(f, "if test $? = 1\nthen\n");
4445 fprintf(f, " Echo 'this initialization command failed:'\n");
4446 fprintf(f, " Echo '\"%s\"'\n", nodetab_setup(nodeno));
4447 fprintf(f, " Echo 'edit your nodes file to fix it.'\n");
4448 fprintf(f, " Exit 1\n");
4452 fprintf(f, "rm -f /tmp/charmrun_err.$$\n");
4454 fprintf(f, "Echo 'starting node-program...'\n");
4455 /* This is the start of the the run-nodeprogram script */
4458 if (arg_debug || arg_debug_no_pause) {
4459 if (strcmp(dbg, "gdb") == 0 || strcmp(dbg, "idb") == 0) {
4460 fprintf(f, "cat > /tmp/charmrun_gdb.$$ << END_OF_SCRIPT\n");
4461 if (strcmp(dbg, "idb") == 0) {
4462 fprintf(f, "set \\$cmdset=\"gdb\"\n");
4464 fprintf(f, "shell /bin/rm -f /tmp/charmrun_gdb.$$\n");
4465 fprintf(f, "handle SIGPIPE nostop noprint\n");
4466 fprintf(f, "handle SIGWINCH nostop noprint\n");
4467 fprintf(f, "handle SIGWAITING nostop noprint\n");
4468 if (arg_debug_commands)
4469 fprintf(f, "%s\n", arg_debug_commands);
4470 fprintf(f, "set args");
4471 fprint_arg(f, argv);
4473 if (arg_debug_no_pause)
4474 fprintf(f, "run\n");
4475 fprintf(f, "END_OF_SCRIPT\n");
4477 fprintf(f, "\"%s\" ", arg_runscript);
4478 fprintf(f, "$F_XTERM");
4479 fprintf(f, " -title 'Node %d (%s)' ", nodeno, nodetab_name(nodeno));
4480 if (strcmp(dbg, "idb") == 0)
4481 fprintf(f, " -e $F_DBG \"%s\" -c /tmp/charmrun_gdb.$$ \n", arg_nodeprog_r);
4483 fprintf(f, " -e $F_DBG \"%s\" -x /tmp/charmrun_gdb.$$ \n", arg_nodeprog_r);
4484 } else if (strcmp(dbg, "dbx") == 0) {
4485 fprintf(f, "cat > /tmp/charmrun_dbx.$$ << END_OF_SCRIPT\n");
4486 fprintf(f, "sh /bin/rm -f /tmp/charmrun_dbx.$$\n");
4487 fprintf(f, "dbxenv suppress_startup_message 5.0\n");
4488 fprintf(f, "ignore SIGPOLL\n");
4489 fprintf(f, "ignore SIGPIPE\n");
4490 fprintf(f, "ignore SIGWINCH\n");
4491 fprintf(f, "ignore SIGWAITING\n");
4492 if (arg_debug_commands)
4493 fprintf(f, "%s\n", arg_debug_commands);
4494 fprintf(f, "END_OF_SCRIPT\n");
4496 fprintf(f, "\"%s\" ", arg_runscript);
4497 fprintf(f, "$F_XTERM");
4498 fprintf(f, " -title 'Node %d (%s)' ", nodeno, nodetab_name(nodeno));
4499 fprintf(f, " -e $F_DBG %s ", arg_debug_no_pause ? "-r" : "");
4501 fprintf(f, "-c \'runargs ");
4502 fprint_arg(f, argv);
4505 fprintf(f, "-s/tmp/charmrun_dbx.$$ %s", arg_nodeprog_r);
4506 if (arg_debug_no_pause)
4507 fprint_arg(f, argv);
4510 fprintf(stderr, "Unknown debugger: %s.\n Exiting.\n",
4511 nodetab_debugger(nodeno));
4513 } else if (arg_in_xterm) {
4515 fprintf(stderr, "Charmrun> node %d: xterm is %s\n", nodeno,
4516 nodetab_xterm(nodeno));
4517 fprintf(f, "cat > /tmp/charmrun_inx.$$ << END_OF_SCRIPT\n");
4518 fprintf(f, "#!/bin/sh\n");
4519 fprintf(f, "/bin/rm -f /tmp/charmrun_inx.$$\n");
4520 fprintf(f, "%s", arg_nodeprog_r);
4521 fprint_arg(f, argv);
4523 fprintf(f, "echo 'program exited with code '\\$?\n");
4524 fprintf(f, "read eoln\n");
4525 fprintf(f, "END_OF_SCRIPT\n");
4526 fprintf(f, "chmod 700 /tmp/charmrun_inx.$$\n");
4528 fprintf(f, "\"%s\" ", arg_runscript);
4529 fprintf(f, "$F_XTERM -title 'Node %d (%s)' ", nodeno, nodetab_name(nodeno));
4530 fprintf(f, " -sl 5000");
4531 fprintf(f, " -e /tmp/charmrun_inx.$$\n");
4534 fprintf(f, "\"%s\" ", arg_runscript);
4535 if (arg_no_va_rand) {
4537 fprintf(stderr, "Charmrun> setarch -R is used.\n");
4538 fprintf(f, "setarch `uname -m` -R ");
4540 fprintf(f, "\"%s\" ", arg_nodeprog_r);
4541 fprint_arg(f, argv);
4542 if (nodetab_nice(nodeno) != -100) {
4544 fprintf(stderr, "Charmrun> nice -n %d\n", nodetab_nice(nodeno));
4545 fprintf(f, " +nice %d ", nodetab_nice(nodeno));
4547 fprintf(f, "\nres=$?\n");
4548 /* If shared libraries fail to load, the program dies without
4549 calling charmrun back. Since we *have* to close down stdin/out/err,
4550 we have to smuggle this failure information out via a file,
4551 /tmp/charmrun_err.<pid> */
4552 fprintf(f, "if [ $res -eq 127 ]\n"
4554 " ( \n" /* Re-run, spitting out errors from a subshell: */
4557 " ) > /tmp/charmrun_err.$$ 2>&1 \n"
4559 arg_nodeprog_r, arg_nodeprog_r);
4562 /* End the node-program subshell. To minimize the number
4563 of open ports on the front-end, we must close down ssh;
4564 to do this, we have to close stdin, stdout, stderr, and
4565 run the subshell in the background. */
4567 fprintf(f, " < /dev/null 1> /dev/null 2> /dev/null");
4573 fprintf(f, "Echo 'remote shell phase successful.'\n");
4574 fprintf(f, /* Check for startup errors: */
4576 "if [ -r /tmp/charmrun_err.$$ ]\n"
4578 " cat /tmp/charmrun_err.$$ \n"
4579 " rm -f /tmp/charmrun_err.$$ \n"
4582 fprintf(f, "Exit 0\n");
4583 free(arg_currdir_r);
4586 /* use the command "size" to get information about the position of the ".data"
4587 and ".bss" segments inside the program memory */
4588 void read_global_segments_size()
4590 std::vector<const char *> sshargv;
4594 /* find the node-program */
4596 pathextfix(arg_nodeprog_a, nodetab_pathfixes(0), nodetab_ext(0));
4598 sshargv.push_back(nodetab_shell(0));
4599 sshargv.push_back(nodetab_name(0));
4600 sshargv.push_back("-l");
4601 sshargv.push_back(nodetab_login(0));
4602 tmp = (char *) malloc(sizeof(char) * 9 + strlen(arg_nodeprog_r));
4603 sprintf(tmp, "size -A %s", arg_nodeprog_r);
4604 sshargv.push_back(tmp);
4605 sshargv.push_back((const char *) NULL);
4609 perror("ERROR> getting the size of the global variables segments");
4611 } else if (childPid == 0) {
4614 /*printf("executing: \"%s\" \"%s\" \"%s\" \"%s\"
4615 * \"%s\"\n",sshargv[0],sshargv[1],sshargv[2],sshargv[3],sshargv[4]);*/
4616 execvp(sshargv[0], const_cast<char **>(&sshargv[0]));
4617 fprintf(stderr, "Charmrun> Couldn't find remote shell program '%s'!\n",
4621 /* else we are in the parent */
4623 waitpid(childPid, NULL, 0);
4627 /* open a ssh connection with processor 0 and open a gdb session for info */
4628 void open_gdb_info()
4630 std::vector<const char *> sshargv;
4637 /* find the node-program */
4639 pathextfix(arg_nodeprog_a, nodetab_pathfixes(0), nodetab_ext(0));
4641 sshargv.push_back(nodetab_shell(0));
4642 sshargv.push_back(nodetab_name(0));
4643 sshargv.push_back("-l");
4644 sshargv.push_back(nodetab_login(0));
4645 tmp = (char *) malloc(sizeof(char) * 8 + strlen(arg_nodeprog_r));
4646 sprintf(tmp, "gdb -q %s", arg_nodeprog_r);
4647 sshargv.push_back(tmp);
4648 sshargv.push_back((const char *) NULL);
4654 gdb_info_pid = fork();
4655 if (gdb_info_pid < 0) {
4656 perror("ERROR> starting info gdb");
4658 } else if (gdb_info_pid == 0) {
4663 printf("executing: \"%s\" \"%s\" \"%s\" \"%s\" \"%s\"\n", sshargv[0],
4664 sshargv[1], sshargv[2], sshargv[3], sshargv[4]);
4668 for (i = 3; i < 1024; i++)
4670 execvp(sshargv[0], const_cast<char **>(&sshargv[0]));
4671 fprintf(stderr, "Charmrun> Couldn't find remote shell program '%s'!\n",
4675 /* else we are in the parent */
4677 gdb_info_std[0] = fdin[1];
4678 gdb_info_std[1] = fdout[0];
4679 gdb_info_std[2] = fderr[0];
4685 void start_next_level_charmruns()
4688 static char buf[1024];
4689 char *nodeprog_name = strrchr(arg_nodeprog_a, '/');
4690 nodeprog_name[0] = 0;
4691 sprintf(buf, "%s%s%s", arg_nodeprog_a, DIRSEP, "charmrun");
4692 arg_nodeprog_a = strdup(buf);
4697 while (nextIndex < branchfactor) {
4698 /* need to index into unique_table*/
4699 int rank0no = nodetab_unique_table[client];
4700 int pe = nodetab_rank0_table[rank0no];
4702 char startScript[200];
4703 sprintf(startScript, "/tmp/charmrun.%d.%d", getpid(), pe);
4704 f = fopen(startScript, "w");
4706 /* now try current directory */
4707 sprintf(startScript, "charmrun.%d.%d", getpid(), pe);
4708 f = fopen(startScript, "w");
4710 fprintf(stderr, "Charmrun> Can not write file %s!\n", startScript);
4714 ssh_script(f, pe, client, arg_argv, 0);
4717 ssh_pids = (int *) malloc(sizeof(int) * branchfactor);
4718 ssh_pids[nextIndex++] = ssh_fork(pe, startScript);
4719 client += nodes_per_child;
4725 void start_one_node_ssh(int rank0no)
4727 int pe = nodetab_rank0_table[rank0no];
4729 char startScript[200];
4730 sprintf(startScript, "/tmp/charmrun.%d.%d", getpid(), pe);
4731 f = fopen(startScript, "w");
4733 /* now try current directory */
4734 sprintf(startScript, "charmrun.%d.%d", getpid(), pe);
4735 f = fopen(startScript, "w");
4737 fprintf(stderr, "Charmrun> Can not write file %s!\n", startScript);
4741 ssh_script(f, pe, rank0no, arg_argv, 0);
4744 ssh_pids = (int *) malloc(sizeof(int) * nodetab_rank0_size);
4745 ssh_pids[rank0no] = ssh_fork(pe, startScript);
4748 int start_set_node_ssh(int client)
4750 /* a search function could be inserted here instead of sequential lookup for
4751 * more complex node lists (e.g. interleaving) */
4755 clientgroup = client + 1; /* smp already handles this functionality */
4759 if (!arg_scalable_start && !arg_hierarchical_start)
4760 clientgroup = client + 1; /* only launch 1 core per ssh call */
4762 clientgroup = client;
4764 clientgroup++; /* add one more client to group if not greater than nodes
4765 and shares the same name as client */
4766 if (clientgroup >= nodetab_rank0_size)
4768 if (arg_scalable_start && !arg_hierarchical_start)
4769 if (strcmp(nodetab_name(clientgroup), nodetab_name(client)))
4771 /*Hierarchical-start*/
4772 if (strcmp(nodetab_name(nodetab_rank0_table[clientgroup]),
4773 nodetab_name(nodetab_rank0_table[client])))
4779 if (!arg_scalable_start)
4780 clientgroup = client + 1; /* only launch 1 core per ssh call */
4782 clientgroup = client;
4784 clientgroup++; /* add one more client to group if not greater than nodes
4785 and shares the same name as client */
4786 } while (clientgroup < nodetab_rank0_size &&
4787 (!strcmp(nodetab_getnodeinfo(clientgroup)->name,
4788 nodetab_getnodeinfo(client)->name)));
4793 nodetab_getnodeinfo(client)->forks =
4794 clientgroup - client - 1; /* already have 1 process launching */
4795 start_one_node_ssh(client);
4796 return clientgroup - client; /* return number of entries in group */
4799 void start_nodes_ssh()
4801 int client, clientgroup;
4802 ssh_pids = (int *) malloc(sizeof(int) * nodetab_rank0_size);
4805 printf("start_nodes_ssh\n");
4807 while (client < nodetab_rank0_size) {
4808 /* start a group of processes per node */
4809 clientgroup = start_set_node_ssh(client);
4810 client += clientgroup;
4814 /* for mpiexec, for once calling mpiexec to start on all nodes */
4815 int ssh_fork_one(const char *startScript)
4817 std::vector<const char *> sshargv;
4822 /* figure out size and dynamic allocate */
4823 s = nodetab_shell(0);
4830 s = nodetab_shell(0);
4833 sshargv.push_back(substr(s, e));
4838 if ( ! arg_mpiexec_no_n ) {
4839 sshargv.push_back("-n");
4840 sprintf(npes, "%d", nodetab_rank0_size);
4841 sshargv.push_back(npes);
4843 sshargv.push_back((char *) startScript);
4844 sshargv.push_back((const char *) NULL);
4846 printf("Charmrun> Starting %s %s \n", nodetab_shell(0), startScript);
4850 perror("ERROR> starting mpiexec");
4853 if (pid == 0) { /*Child process*/
4855 /* unlink(startScript); */
4856 // removeEnv("DISPLAY="); /*No DISPLAY disables ssh's slow X11 forwarding*/
4857 for (i = 3; i < 1024; i++)
4859 execvp(sshargv[0], const_cast<char *const *>(&sshargv[0]));
4860 fprintf(stderr, "Charmrun> Couldn't find mpiexec program '%s'!\n",
4865 fprintf(stderr, "Charmrun> mpiexec started\n");
4869 void start_nodes_mpiexec()
4874 char startScript[200];
4875 sprintf(startScript, "./charmrun.%d", getpid());
4876 f = fopen(startScript, "w");
4877 chmod(startScript, S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IROTH);
4879 /* now try current directory */
4880 sprintf(startScript, "./charmrun.%d", getpid());
4881 f = fopen(startScript, "w");
4883 fprintf(stderr, "Charmrun> Can not write file %s!\n", startScript);
4887 ssh_script(f, 0, 0, arg_argv, 0);
4889 ssh_pids = (int *) malloc(sizeof(int) * nodetab_rank0_size);
4890 ssh_pids[0] = ssh_fork_one(startScript);
4891 for (i = 0; i < nodetab_rank0_size; i++)
4892 ssh_pids[i] = 0; /* skip finish_nodes */
4895 void finish_set_nodes(int start, int stop)
4897 int status, done, i;
4901 return; /*nothing to do*/
4903 std::vector<int> num_retries(stop - start, 0);
4907 for (i = start; i < stop; i++) { /* check all nodes */
4908 if (ssh_pids[i] != 0) {
4909 done = 0; /* we are not finished yet */
4911 waitpid(ssh_pids[i], &status, 0); /* check if the process is finished */
4912 if (WIFEXITED(status)) {
4913 if (!WEXITSTATUS(status)) { /* good */
4914 ssh_pids[i] = 0; /* process is finished */
4916 host = nodetab_name(nodetab_rank0_table[i]);
4918 "Charmrun> Error %d returned from remote shell (%s:%d)\n",
4919 WEXITSTATUS(status), host, i);
4921 if (WEXITSTATUS(status) != 255)
4924 if (++num_retries[i - start] <= MAX_NUM_RETRIES) {
4925 fprintf(stderr, "Charmrun> Reconnection attempt %d of %d\n",
4926 num_retries[i - start], MAX_NUM_RETRIES);
4927 start_one_node_ssh(i);
4931 "Charmrun> Too many reconnection attempts; bailing out\n");
4944 if (arg_hierarchical_start && !arg_child_charmrun)
4945 finish_set_nodes(0, branchfactor);
4948 finish_set_nodes(0, nodetab_rank0_size);
4956 return; /*nothing to do*/
4957 /*Now wait for all the ssh'es to finish*/
4958 for (rank0no = 0; rank0no < nodetab_rank0_size; rank0no++) {
4959 const char *host = nodetab_name(nodetab_rank0_table[rank0no]);
4962 printf("Charmrun> waiting for remote shell (%s:%d), pid %d\n", host,
4963 rank0no, ssh_pids[rank0no]);
4964 kill(ssh_pids[rank0no], 9);
4965 waitpid(ssh_pids[rank0no], &status, 0); /*<- no zombies*/
4971 /* find the absolute path for an executable in the path */
4972 char *find_abs_path(const char *target)
4974 char *thepath=getenv("PATH");
4975 char *path=strdup(thepath);
4976 char *subpath=strtok(path,":");
4977 char *abspath=(char*) malloc(PATH_MAX + strlen(target) + 2);
4978 while(subpath!=NULL) {
4979 strcpy(abspath,subpath);
4980 strcat(abspath,"/");
4981 strcat(abspath,target);
4982 if(probefile(abspath)){
4986 subpath=strtok(NULL,":");
4993 /* simple version of charmrun that avoids the sshd or charmd, */
4994 /* it spawn the node program just on local machine using exec. */
4995 void start_nodes_local(char **env)
4998 int envc, rank0no, i;
5003 #if CMK_AIX && CMK_SMP
5008 /* copy environ and expanded to hold NETSTART and CmiNumNodes */
5009 for (envc = 0; env[envc]; envc++)
5011 envp = (char **) malloc((envc + 2 + extra + 1) * sizeof(void *));
5012 for (i = 0; i < envc; i++)
5014 envp[envc] = (char *) malloc(256);
5015 envp[envc + 1] = (char *) malloc(256);
5016 #if CMK_AIX && CMK_SMP
5017 envp[envc + 2] = (char *) malloc(256);
5018 sprintf(envp[envc + 2], "MALLOCMULTIHEAP=1");
5020 envp[envc + 2 + extra] = 0;
5021 for (i = 0; i < envc; i++)
5023 envp[envc] = (char *) malloc(256);
5024 envp[envc + 1] = (char *) malloc(256);
5028 /* insert xterm gdb in front of command line and pass args to gdb */
5029 if(arg_debug || arg_debug_no_pause) {
5031 for (dparamc = 0, argstringlen=0; pparam_argv[dparamc]; dparamc++)
5033 if(dparamc>1) argstringlen+=strlen(pparam_argv[dparamc]);
5035 if(arg_debug_no_pause) dparamoutmax+=2;
5037 dparamp = (char **) malloc((dparamoutmax) * sizeof(void *));
5038 char *abs_xterm=find_abs_path(arg_xterm);
5041 printf("Charmrun> cannot find xterm for gdb, please add it to your path\n");
5044 dparamp[dparamoutc++] = strdup(abs_xterm);
5045 dparamp[dparamoutc++] = strdup("-e");
5046 dparamp[dparamoutc++] = strdup(arg_debugger);
5047 dparamp[dparamoutc++] = strdup(pparam_argv[1]);
5048 dparamp[dparamoutc++] = strdup("-ex");
5049 dparamp[dparamoutc] = (char *) malloc(argstringlen + 11 + dparamc);
5050 strcpy(dparamp[dparamoutc], "set args");
5051 for(int i=2; i< dparamc; i++)
5053 strcat(dparamp[dparamoutc], " ");
5054 strcat(dparamp[dparamoutc], pparam_argv[i]);
5056 if(arg_debug_no_pause)
5058 dparamp[++dparamoutc] = strdup("-ex");
5059 dparamp[++dparamoutc] = strdup("r");
5061 dparamp[++dparamoutc]=0; // null terminate your argv or face the wrath of
5062 // undefined behavior
5065 printf("Charmrun> gdb args : ");
5066 for (i = 0; i < dparamoutc; i++)
5067 printf(" %s ",dparamp[i]);
5073 dparamp=(char **) (pparam_argv+1);
5076 for (rank0no = 0; rank0no < nodetab_rank0_size; rank0no++) {
5079 int pe = nodetab_rank0_table[rank0no];
5082 printf("Charmrun> start %d node program on localhost.\n", pe);
5083 sprintf(envp[envc], "NETSTART=%s", create_netstart(rank0no));
5084 sprintf(envp[envc + 1], "CmiNumNodes=%d", nodetab_rank0_size);
5090 int fd, fd1 = dup(1);
5091 if (-1 != (fd = open("/dev/null", O_RDWR))) {
5096 status = execve(dparamp[0],
5097 const_cast<char *const *>(dparamp), envp);
5100 printf("execve failed to start process \"%s\" with status: %d\n",
5101 dparamp[0], status);
5106 if(arg_debug || arg_debug_no_pause)
5108 for(dparamoutc; dparamoutc>=0;dparamoutc--) free(dparamp[dparamoutc]);
5112 free(envp[envc + 1]);
5113 #if CMK_AIX && CMK_SMP
5114 free(envp[envc + 2]);
5121 int current_restart_phase = 1;
5123 void refill_nodetab_entry(int crashed_node);
5124 nodetab_host *replacement_host(int pe);
5127 * @brief Relaunches a program on the crashed node.
5129 void restart_node(int crashed_node)
5131 int pe = nodetab_rank0_table[crashed_node];
5133 char startScript[200];
5134 int restart_ssh_pid;
5135 const char **restart_argv;
5139 /** write the startScript file to be sent**/
5140 sprintf(startScript, "/tmp/charmrun.%d.%d", getpid(), pe);
5141 f = fopen(startScript, "w");
5143 /** add an argument to the argv of the new process
5144 so that the restarting processor knows that it
5145 is a restarting processor */
5147 while (arg_argv[i] != NULL) {
5150 restart_argv = (const char **) malloc(sizeof(char *) * (i + 4));
5152 while (arg_argv[i] != NULL) {
5153 restart_argv[i] = arg_argv[i];
5156 restart_argv[i] = "+restartaftercrash";
5157 sprintf(phase_str, "%d", ++current_restart_phase);
5158 restart_argv[i + 1] = phase_str;
5159 restart_argv[i + 2] = "+restartisomalloc";
5160 restart_argv[i + 3] = NULL;
5162 /** change the nodetable entry of the crashed
5163 processor to connect it to a new one**/
5164 refill_nodetab_entry(crashed_node);
5165 ssh_script(f, pe, crashed_node, restart_argv, 1);
5167 /**start the new processor */
5168 restart_ssh_pid = ssh_fork(pe, startScript);
5169 /**wait for the reply from the new process*/
5171 if (arg_debug_no_pause || arg_debug)
5175 waitpid(restart_ssh_pid, &status, 0);
5176 } while (!WIFEXITED(status));
5177 if (WEXITSTATUS(status) != 0) {
5179 "Charmrun> Error %d returned from new attempted remote shell \n",
5180 WEXITSTATUS(status));
5184 printf("Charmrun finished launching new process in %fs\n",
5185 GetClock() - ftTimer);
5188 void refill_nodetab_entry(int crashed_node)
5190 int pe = nodetab_rank0_table[crashed_node];
5191 nodetab_host *h = nodetab_table[pe];
5192 *h = *(replacement_host(pe));
5193 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
5194 fprintf(stderr, "Charmrun>>> New pe %d is on host %s \n", pe,
5199 #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
5200 nodetab_host *replacement_host(int pe)
5202 int x = loaded_max_pe + 1;
5204 x = x % arg_read_pes;
5207 * x = rand()%nodetab_size;
5209 fprintf(stderr, "Charmrun>>> replacing pe %d with %d host %s with %s \n", pe,
5210 x, nodetab_name(pe), nodetab_name(x));
5211 return nodetab_table[x];
5214 nodetab_host *replacement_host(int pe)
5219 if (arg_hierarchical_start) {
5220 x = nodetab_rank0_table[rand() % nodetab_rank0_size];
5222 restarted_pe_id = x;
5225 x = rand() % nodetab_size;
5227 return nodetab_table[x];
5232 * @brief Reconnects a crashed node. It waits for the I-tuple from the just
5233 * relaunched program. It also:
5234 * i) Broadcast the nodetabtable to every other node.
5235 * ii) Announces the crash to every other node.
5237 void reconnect_crashed_client(int socket_index, int crashed_node)
5240 unsigned int clientPort;
5242 ChSingleNodeinfo *in;
5243 if (0 == skt_select1(server_fd, arg_timeout * 1000)) {
5244 client_connect_problem(
5245 socket_index, socket_index,
5246 "Timeout waiting for restarted node-program to connect");
5248 req_clients[socket_index] = skt_accept(server_fd, &clientIP, &clientPort);
5249 skt_client_table[req_clients[socket_index]] = crashed_node;
5251 if (req_clients[socket_index] == SOCKET_ERROR) {
5252 client_connect_problem(socket_index, socket_index,
5253 "Failure in restarted node accept");
5256 if (!skt_select1(req_clients[socket_index], arg_timeout * 1000)) {
5257 client_connect_problem(socket_index, socket_index,
5258 "Timeout on IP request for restarted processor");
5262 if (arg_hierarchical_start) {
5263 req_forward_root(req_clients[socket_index]);
5264 if (_last_crash != 0) {
5265 fprintf(stderr, "ERROR> Charmrun detected multiple crashes.\n");
5269 _last_crash = crashed_node;
5270 _crash_socket_index = socket_index;
5274 ChMessage_recv(req_clients[socket_index], &msg);
5275 if (msg.len != sizeof(ChSingleNodeinfo)) {
5276 fprintf(stderr, "Charmrun: Bad initnode data length. Aborting\n");
5277 fprintf(stderr, "Charmrun: possibly because: %s.\n", msg.data);
5279 fprintf(stdout, "socket_index %d crashed_node %d reconnected fd %d \n",
5280 socket_index, crashed_node, req_clients[socket_index]);
5282 /** update the nodetab entry corresponding to
5283 this node, skip the restarted one */
5284 in = (ChSingleNodeinfo *) msg.data;
5285 nodeinfo_add(in, req_clients[socket_index]);
5286 for (i = 0; i < req_nClients; i++) {
5287 if (i != socket_index) {
5288 req_handle_initnodetab(NULL, req_clients[i]);
5292 /* tell every one there is a crash */
5293 announce_crash(socket_index, crashed_node);
5294 if (_last_crash != 0) {
5295 fprintf(stderr, "ERROR> Charmrun detected multiple crashes.\n");
5298 _last_crash = crashed_node;
5299 _crash_socket_index = socket_index;
5300 /*holds the restarted process until I got ack back from
5301 everyone in req_handle_crashack
5302 now the restarted one can only continue until
5303 req_handle_crashack calls req_handle_initnodetab(socket_index)
5304 req_handle_initnodetab(NULL,req_clients[socket_index]); */
5305 ChMessage_free(&msg);
5310 * @brief Sends a message announcing the crash to every other node. This message
5312 * trigger fault tolerance methods.
5314 void announce_crash(int socket_index, int crashed_node)
5317 ChMessageHeader hdr;
5318 ChMessageInt_t crashNo = ChMessageInt_new(crashed_node);
5319 ChMessageHeader_new("crashnode", sizeof(ChMessageInt_t), &hdr);
5320 for (i = 0; i < req_nClients; i++) {
5321 if (i != socket_index) {
5322 skt_sendN(req_clients[i], (const char *) &hdr, sizeof(hdr));
5323 skt_sendN(req_clients[i], (const char *) &crashNo,
5324 sizeof(ChMessageInt_t));
5331 #endif /*CMK_USE_SSH*/